1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that ARM uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-isel" 16 #include "ARMISelLowering.h" 17 #include "ARM.h" 18 #include "ARMCallingConv.h" 19 #include "ARMConstantPoolValue.h" 20 #include "ARMMachineFunctionInfo.h" 21 #include "ARMPerfectShuffle.h" 22 #include "ARMSubtarget.h" 23 #include "ARMTargetMachine.h" 24 #include "ARMTargetObjectFile.h" 25 #include "MCTargetDesc/ARMAddressingModes.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/CodeGen/CallingConvLower.h" 29 #include "llvm/CodeGen/IntrinsicLowering.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineFrameInfo.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineModuleInfo.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/IR/CallingConv.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/GlobalValue.h" 41 #include "llvm/IR/Instruction.h" 42 #include "llvm/IR/Instructions.h" 43 #include "llvm/IR/Intrinsics.h" 44 #include "llvm/IR/Type.h" 45 #include "llvm/MC/MCSectionMachO.h" 46 #include "llvm/Support/CommandLine.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MathExtras.h" 49 #include "llvm/Support/raw_ostream.h" 50 #include "llvm/Target/TargetOptions.h" 51 using namespace llvm; 52 53 STATISTIC(NumTailCalls, "Number of tail calls"); 54 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 55 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); 56 57 // This option should go away when tail calls fully work. 58 static cl::opt<bool> 59 EnableARMTailCalls("arm-tail-calls", cl::Hidden, 60 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 61 cl::init(false)); 62 63 cl::opt<bool> 64 EnableARMLongCalls("arm-long-calls", cl::Hidden, 65 cl::desc("Generate calls via indirect call instructions"), 66 cl::init(false)); 67 68 static cl::opt<bool> 69 ARMInterworking("arm-interworking", cl::Hidden, 70 cl::desc("Enable / disable ARM interworking (for debugging only)"), 71 cl::init(true)); 72 73 namespace { 74 class ARMCCState : public CCState { 75 public: 76 ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, 77 const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs, 78 LLVMContext &C, ParmContext PC) 79 : CCState(CC, isVarArg, MF, TM, locs, C) { 80 assert(((PC == Call) || (PC == Prologue)) && 81 "ARMCCState users must specify whether their context is call" 82 "or prologue generation."); 83 CallOrPrologue = PC; 84 } 85 }; 86 } 87 88 // The APCS parameter registers. 89 static const uint16_t GPRArgRegs[] = { 90 ARM::R0, ARM::R1, ARM::R2, ARM::R3 91 }; 92 93 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, 94 MVT PromotedBitwiseVT) { 95 if (VT != PromotedLdStVT) { 96 setOperationAction(ISD::LOAD, VT, Promote); 97 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); 98 99 setOperationAction(ISD::STORE, VT, Promote); 100 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); 101 } 102 103 MVT ElemTy = VT.getVectorElementType(); 104 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 105 setOperationAction(ISD::SETCC, VT, Custom); 106 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 107 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 108 if (ElemTy == MVT::i32) { 109 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 110 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 111 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 112 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 113 } else { 114 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 115 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 116 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 117 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 118 } 119 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 120 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 121 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); 122 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 123 setOperationAction(ISD::SELECT, VT, Expand); 124 setOperationAction(ISD::SELECT_CC, VT, Expand); 125 setOperationAction(ISD::VSELECT, VT, Expand); 126 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 127 if (VT.isInteger()) { 128 setOperationAction(ISD::SHL, VT, Custom); 129 setOperationAction(ISD::SRA, VT, Custom); 130 setOperationAction(ISD::SRL, VT, Custom); 131 } 132 133 // Promote all bit-wise operations. 134 if (VT.isInteger() && VT != PromotedBitwiseVT) { 135 setOperationAction(ISD::AND, VT, Promote); 136 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); 137 setOperationAction(ISD::OR, VT, Promote); 138 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); 139 setOperationAction(ISD::XOR, VT, Promote); 140 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); 141 } 142 143 // Neon does not support vector divide/remainder operations. 144 setOperationAction(ISD::SDIV, VT, Expand); 145 setOperationAction(ISD::UDIV, VT, Expand); 146 setOperationAction(ISD::FDIV, VT, Expand); 147 setOperationAction(ISD::SREM, VT, Expand); 148 setOperationAction(ISD::UREM, VT, Expand); 149 setOperationAction(ISD::FREM, VT, Expand); 150 } 151 152 void ARMTargetLowering::addDRTypeForNEON(MVT VT) { 153 addRegisterClass(VT, &ARM::DPRRegClass); 154 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 155 } 156 157 void ARMTargetLowering::addQRTypeForNEON(MVT VT) { 158 addRegisterClass(VT, &ARM::QPRRegClass); 159 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 160 } 161 162 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 163 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 164 return new TargetLoweringObjectFileMachO(); 165 166 return new ARMElfTargetObjectFile(); 167 } 168 169 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 170 : TargetLowering(TM, createTLOF(TM)) { 171 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 172 RegInfo = TM.getRegisterInfo(); 173 Itins = TM.getInstrItineraryData(); 174 175 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 176 177 if (Subtarget->isTargetDarwin()) { 178 // Uses VFP for Thumb libfuncs if available. 179 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 180 // Single-precision floating-point arithmetic. 181 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 182 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 183 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 184 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 185 186 // Double-precision floating-point arithmetic. 187 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 188 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 189 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 190 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 191 192 // Single-precision comparisons. 193 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 194 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 195 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 196 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 197 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 198 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 199 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 200 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 201 202 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 203 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 204 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 205 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 206 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 207 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 208 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 209 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 210 211 // Double-precision comparisons. 212 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 213 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 214 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 215 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 216 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 217 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 218 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 219 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 220 221 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 222 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 223 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 224 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 225 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 226 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 227 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 228 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 229 230 // Floating-point to integer conversions. 231 // i64 conversions are done via library routines even when generating VFP 232 // instructions, so use the same ones. 233 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 234 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 235 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 236 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 237 238 // Conversions between floating types. 239 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 240 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 241 242 // Integer to floating-point conversions. 243 // i64 conversions are done via library routines even when generating VFP 244 // instructions, so use the same ones. 245 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 246 // e.g., __floatunsidf vs. __floatunssidfvfp. 247 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 248 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 249 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 250 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 251 } 252 } 253 254 // These libcalls are not available in 32-bit. 255 setLibcallName(RTLIB::SHL_I128, 0); 256 setLibcallName(RTLIB::SRL_I128, 0); 257 setLibcallName(RTLIB::SRA_I128, 0); 258 259 if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { 260 // Double-precision floating-point arithmetic helper functions 261 // RTABI chapter 4.1.2, Table 2 262 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 263 setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 264 setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 265 setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 266 setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 267 setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 268 setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 269 setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 270 271 // Double-precision floating-point comparison helper functions 272 // RTABI chapter 4.1.2, Table 3 273 setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 274 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 275 setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 276 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 277 setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 278 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 279 setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 280 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 281 setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 282 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 283 setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 284 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 285 setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 286 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 287 setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 288 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 289 setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 290 setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 291 setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 292 setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 293 setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 294 setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 295 setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 296 setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 297 298 // Single-precision floating-point arithmetic helper functions 299 // RTABI chapter 4.1.2, Table 4 300 setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 301 setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 302 setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 303 setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 304 setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 305 setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 306 setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 307 setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 308 309 // Single-precision floating-point comparison helper functions 310 // RTABI chapter 4.1.2, Table 5 311 setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 312 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 313 setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 314 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 315 setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 316 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 317 setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 318 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 319 setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 320 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 321 setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 322 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 323 setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 324 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 325 setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 326 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 327 setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 328 setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 329 setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 330 setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 331 setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 332 setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 333 setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 334 setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 335 336 // Floating-point to integer conversions. 337 // RTABI chapter 4.1.2, Table 6 338 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 339 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 340 setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 341 setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 342 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 343 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 344 setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 345 setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 346 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 347 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 348 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 349 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 350 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 351 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 352 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 353 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 354 355 // Conversions between floating types. 356 // RTABI chapter 4.1.2, Table 7 357 setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 358 setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 359 setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 360 setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 361 362 // Integer to floating-point conversions. 363 // RTABI chapter 4.1.2, Table 8 364 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 365 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 366 setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 367 setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 368 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 369 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 370 setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 371 setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 372 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 373 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 374 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 375 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 376 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 377 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 378 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 379 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 380 381 // Long long helper functions 382 // RTABI chapter 4.2, Table 9 383 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 384 setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 385 setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 386 setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 387 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 388 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 389 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 390 setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 391 setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 392 setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 393 394 // Integer division functions 395 // RTABI chapter 4.3.1 396 setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 397 setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 398 setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 399 setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 400 setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 401 setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 402 setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 403 setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 404 setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 405 setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 406 setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 407 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 408 setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 409 setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 410 setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 411 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 412 413 // Memory operations 414 // RTABI chapter 4.3.4 415 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); 416 setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); 417 setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); 418 setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); 419 setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); 420 setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); 421 } 422 423 // Use divmod compiler-rt calls for iOS 5.0 and later. 424 if (Subtarget->getTargetTriple().getOS() == Triple::IOS && 425 !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { 426 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 427 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 428 } 429 430 if (Subtarget->isThumb1Only()) 431 addRegisterClass(MVT::i32, &ARM::tGPRRegClass); 432 else 433 addRegisterClass(MVT::i32, &ARM::GPRRegClass); 434 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 435 !Subtarget->isThumb1Only()) { 436 addRegisterClass(MVT::f32, &ARM::SPRRegClass); 437 if (!Subtarget->isFPOnlySP()) 438 addRegisterClass(MVT::f64, &ARM::DPRRegClass); 439 440 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 441 } 442 443 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 444 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 445 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 446 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 447 setTruncStoreAction((MVT::SimpleValueType)VT, 448 (MVT::SimpleValueType)InnerVT, Expand); 449 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); 450 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); 451 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); 452 } 453 454 setOperationAction(ISD::ConstantFP, MVT::f32, Custom); 455 456 if (Subtarget->hasNEON()) { 457 addDRTypeForNEON(MVT::v2f32); 458 addDRTypeForNEON(MVT::v8i8); 459 addDRTypeForNEON(MVT::v4i16); 460 addDRTypeForNEON(MVT::v2i32); 461 addDRTypeForNEON(MVT::v1i64); 462 463 addQRTypeForNEON(MVT::v4f32); 464 addQRTypeForNEON(MVT::v2f64); 465 addQRTypeForNEON(MVT::v16i8); 466 addQRTypeForNEON(MVT::v8i16); 467 addQRTypeForNEON(MVT::v4i32); 468 addQRTypeForNEON(MVT::v2i64); 469 470 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 471 // neither Neon nor VFP support any arithmetic operations on it. 472 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively 473 // supported for v4f32. 474 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 475 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 476 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 477 // FIXME: Code duplication: FDIV and FREM are expanded always, see 478 // ARMTargetLowering::addTypeForNEON method for details. 479 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 480 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 481 // FIXME: Create unittest. 482 // In another words, find a way when "copysign" appears in DAG with vector 483 // operands. 484 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 485 // FIXME: Code duplication: SETCC has custom operation action, see 486 // ARMTargetLowering::addTypeForNEON method for details. 487 setOperationAction(ISD::SETCC, MVT::v2f64, Expand); 488 // FIXME: Create unittest for FNEG and for FABS. 489 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 490 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 491 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 492 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 493 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 494 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 495 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 496 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 497 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 498 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 499 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 500 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 501 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. 502 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 503 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 504 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 505 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 506 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 507 setOperationAction(ISD::FMA, MVT::v2f64, Expand); 508 509 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 510 setOperationAction(ISD::FSIN, MVT::v4f32, Expand); 511 setOperationAction(ISD::FCOS, MVT::v4f32, Expand); 512 setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); 513 setOperationAction(ISD::FPOW, MVT::v4f32, Expand); 514 setOperationAction(ISD::FLOG, MVT::v4f32, Expand); 515 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); 516 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); 517 setOperationAction(ISD::FEXP, MVT::v4f32, Expand); 518 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); 519 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); 520 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); 521 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 522 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 523 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); 524 525 // Mark v2f32 intrinsics. 526 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); 527 setOperationAction(ISD::FSIN, MVT::v2f32, Expand); 528 setOperationAction(ISD::FCOS, MVT::v2f32, Expand); 529 setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); 530 setOperationAction(ISD::FPOW, MVT::v2f32, Expand); 531 setOperationAction(ISD::FLOG, MVT::v2f32, Expand); 532 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); 533 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); 534 setOperationAction(ISD::FEXP, MVT::v2f32, Expand); 535 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); 536 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); 537 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); 538 setOperationAction(ISD::FRINT, MVT::v2f32, Expand); 539 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); 540 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); 541 542 // Neon does not support some operations on v1i64 and v2i64 types. 543 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 544 // Custom handling for some quad-vector types to detect VMULL. 545 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 546 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 547 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 548 // Custom handling for some vector types to avoid expensive expansions 549 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 550 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 551 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 552 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 553 setOperationAction(ISD::SETCC, MVT::v1i64, Expand); 554 setOperationAction(ISD::SETCC, MVT::v2i64, Expand); 555 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 556 // a destination type that is wider than the source, and nor does 557 // it have a FP_TO_[SU]INT instruction with a narrower destination than 558 // source. 559 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 560 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 561 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); 562 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); 563 564 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); 565 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 566 567 // Custom expand long extensions to vectors. 568 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); 569 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); 570 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); 571 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); 572 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); 573 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); 574 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); 575 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); 576 577 // NEON does not have single instruction CTPOP for vectors with element 578 // types wider than 8-bits. However, custom lowering can leverage the 579 // v8i8/v16i8 vcnt instruction. 580 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); 581 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); 582 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); 583 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); 584 585 // NEON only has FMA instructions as of VFP4. 586 if (!Subtarget->hasVFP4()) { 587 setOperationAction(ISD::FMA, MVT::v2f32, Expand); 588 setOperationAction(ISD::FMA, MVT::v4f32, Expand); 589 } 590 591 setTargetDAGCombine(ISD::INTRINSIC_VOID); 592 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 593 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 594 setTargetDAGCombine(ISD::SHL); 595 setTargetDAGCombine(ISD::SRL); 596 setTargetDAGCombine(ISD::SRA); 597 setTargetDAGCombine(ISD::SIGN_EXTEND); 598 setTargetDAGCombine(ISD::ZERO_EXTEND); 599 setTargetDAGCombine(ISD::ANY_EXTEND); 600 setTargetDAGCombine(ISD::SELECT_CC); 601 setTargetDAGCombine(ISD::BUILD_VECTOR); 602 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 603 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 604 setTargetDAGCombine(ISD::STORE); 605 setTargetDAGCombine(ISD::FP_TO_SINT); 606 setTargetDAGCombine(ISD::FP_TO_UINT); 607 setTargetDAGCombine(ISD::FDIV); 608 609 // It is legal to extload from v4i8 to v4i16 or v4i32. 610 MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, 611 MVT::v4i16, MVT::v2i16, 612 MVT::v2i32}; 613 for (unsigned i = 0; i < 6; ++i) { 614 setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); 615 setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); 616 setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); 617 } 618 } 619 620 // ARM and Thumb2 support UMLAL/SMLAL. 621 if (!Subtarget->isThumb1Only()) 622 setTargetDAGCombine(ISD::ADDC); 623 624 625 computeRegisterProperties(); 626 627 // ARM does not have f32 extending load. 628 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 629 630 // ARM does not have i1 sign extending load. 631 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 632 633 // ARM supports all 4 flavors of integer indexed load / store. 634 if (!Subtarget->isThumb1Only()) { 635 for (unsigned im = (unsigned)ISD::PRE_INC; 636 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 637 setIndexedLoadAction(im, MVT::i1, Legal); 638 setIndexedLoadAction(im, MVT::i8, Legal); 639 setIndexedLoadAction(im, MVT::i16, Legal); 640 setIndexedLoadAction(im, MVT::i32, Legal); 641 setIndexedStoreAction(im, MVT::i1, Legal); 642 setIndexedStoreAction(im, MVT::i8, Legal); 643 setIndexedStoreAction(im, MVT::i16, Legal); 644 setIndexedStoreAction(im, MVT::i32, Legal); 645 } 646 } 647 648 // i64 operation support. 649 setOperationAction(ISD::MUL, MVT::i64, Expand); 650 setOperationAction(ISD::MULHU, MVT::i32, Expand); 651 if (Subtarget->isThumb1Only()) { 652 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 653 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 654 } 655 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() 656 || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) 657 setOperationAction(ISD::MULHS, MVT::i32, Expand); 658 659 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 660 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 661 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 662 setOperationAction(ISD::SRL, MVT::i64, Custom); 663 setOperationAction(ISD::SRA, MVT::i64, Custom); 664 665 if (!Subtarget->isThumb1Only()) { 666 // FIXME: We should do this for Thumb1 as well. 667 setOperationAction(ISD::ADDC, MVT::i32, Custom); 668 setOperationAction(ISD::ADDE, MVT::i32, Custom); 669 setOperationAction(ISD::SUBC, MVT::i32, Custom); 670 setOperationAction(ISD::SUBE, MVT::i32, Custom); 671 } 672 673 // ARM does not have ROTL. 674 setOperationAction(ISD::ROTL, MVT::i32, Expand); 675 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 676 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 677 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 678 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 679 680 // These just redirect to CTTZ and CTLZ on ARM. 681 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); 682 setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); 683 684 // Only ARMv6 has BSWAP. 685 if (!Subtarget->hasV6Ops()) 686 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 687 688 if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && 689 !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { 690 // These are expanded into libcalls if the cpu doesn't have HW divider. 691 setOperationAction(ISD::SDIV, MVT::i32, Expand); 692 setOperationAction(ISD::UDIV, MVT::i32, Expand); 693 } 694 setOperationAction(ISD::SREM, MVT::i32, Expand); 695 setOperationAction(ISD::UREM, MVT::i32, Expand); 696 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 697 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 698 699 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 700 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 701 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 702 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 703 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 704 705 setOperationAction(ISD::TRAP, MVT::Other, Legal); 706 707 // Use the default implementation. 708 setOperationAction(ISD::VASTART, MVT::Other, Custom); 709 setOperationAction(ISD::VAARG, MVT::Other, Expand); 710 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 711 setOperationAction(ISD::VAEND, MVT::Other, Expand); 712 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 713 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 714 715 if (!Subtarget->isTargetDarwin()) { 716 // Non-Darwin platforms may return values in these registers via the 717 // personality function. 718 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 719 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 720 setExceptionPointerRegister(ARM::R0); 721 setExceptionSelectorRegister(ARM::R1); 722 } 723 724 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 725 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 726 // the default expansion. 727 // FIXME: This should be checking for v6k, not just v6. 728 if (Subtarget->hasDataBarrier() || 729 (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { 730 // membarrier needs custom lowering; the rest are legal and handled 731 // normally. 732 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 733 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 734 // Custom lowering for 64-bit ops 735 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); 736 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 737 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); 738 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); 739 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); 740 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); 741 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); 742 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); 743 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); 744 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); 745 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); 746 // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. 747 setInsertFencesForAtomic(true); 748 } else { 749 // Set them all for expansion, which will force libcalls. 750 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 751 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); 752 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 753 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 754 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 755 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 756 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 757 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 758 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 759 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 760 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 761 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 762 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 763 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 764 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the 765 // Unordered/Monotonic case. 766 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); 767 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); 768 // Since the libcalls include locking, fold in the fences 769 setShouldFoldAtomicFences(true); 770 } 771 772 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 773 774 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 775 if (!Subtarget->hasV6Ops()) { 776 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 777 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 778 } 779 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 780 781 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 782 !Subtarget->isThumb1Only()) { 783 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 784 // iff target supports vfp2. 785 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 786 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 787 } 788 789 // We want to custom lower some of our intrinsics. 790 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 791 if (Subtarget->isTargetDarwin()) { 792 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 793 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 794 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 795 } 796 797 setOperationAction(ISD::SETCC, MVT::i32, Expand); 798 setOperationAction(ISD::SETCC, MVT::f32, Expand); 799 setOperationAction(ISD::SETCC, MVT::f64, Expand); 800 setOperationAction(ISD::SELECT, MVT::i32, Custom); 801 setOperationAction(ISD::SELECT, MVT::f32, Custom); 802 setOperationAction(ISD::SELECT, MVT::f64, Custom); 803 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 804 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 805 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 806 807 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 808 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 809 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 810 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 811 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 812 813 // We don't support sin/cos/fmod/copysign/pow 814 setOperationAction(ISD::FSIN, MVT::f64, Expand); 815 setOperationAction(ISD::FSIN, MVT::f32, Expand); 816 setOperationAction(ISD::FCOS, MVT::f32, Expand); 817 setOperationAction(ISD::FCOS, MVT::f64, Expand); 818 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 819 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 820 setOperationAction(ISD::FREM, MVT::f64, Expand); 821 setOperationAction(ISD::FREM, MVT::f32, Expand); 822 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 823 !Subtarget->isThumb1Only()) { 824 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 825 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 826 } 827 setOperationAction(ISD::FPOW, MVT::f64, Expand); 828 setOperationAction(ISD::FPOW, MVT::f32, Expand); 829 830 if (!Subtarget->hasVFP4()) { 831 setOperationAction(ISD::FMA, MVT::f64, Expand); 832 setOperationAction(ISD::FMA, MVT::f32, Expand); 833 } 834 835 // Various VFP goodness 836 if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { 837 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 838 if (Subtarget->hasVFP2()) { 839 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 840 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 841 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 842 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 843 } 844 // Special handling for half-precision FP. 845 if (!Subtarget->hasFP16()) { 846 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 847 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 848 } 849 } 850 851 // We have target-specific dag combine patterns for the following nodes: 852 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 853 setTargetDAGCombine(ISD::ADD); 854 setTargetDAGCombine(ISD::SUB); 855 setTargetDAGCombine(ISD::MUL); 856 setTargetDAGCombine(ISD::AND); 857 setTargetDAGCombine(ISD::OR); 858 setTargetDAGCombine(ISD::XOR); 859 860 if (Subtarget->hasV6Ops()) 861 setTargetDAGCombine(ISD::SRL); 862 863 setStackPointerRegisterToSaveRestore(ARM::SP); 864 865 if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || 866 !Subtarget->hasVFP2()) 867 setSchedulingPreference(Sched::RegPressure); 868 else 869 setSchedulingPreference(Sched::Hybrid); 870 871 //// temporary - rewrite interface to use type 872 MaxStoresPerMemset = 8; 873 MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; 874 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores 875 MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 876 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores 877 MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 878 879 // On ARM arguments smaller than 4 bytes are extended, so all arguments 880 // are at least 4 bytes aligned. 881 setMinStackArgumentAlignment(4); 882 883 BenefitFromCodePlacementOpt = true; 884 885 // Prefer likely predicted branches to selects on out-of-order cores. 886 PredictableSelectIsExpensive = Subtarget->isLikeA9(); 887 888 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); 889 } 890 891 // FIXME: It might make sense to define the representative register class as the 892 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is 893 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 894 // SPR's representative would be DPR_VFP2. This should work well if register 895 // pressure tracking were modified such that a register use would increment the 896 // pressure of the register class's representative and all of it's super 897 // classes' representatives transitively. We have not implemented this because 898 // of the difficulty prior to coalescing of modeling operand register classes 899 // due to the common occurrence of cross class copies and subregister insertions 900 // and extractions. 901 std::pair<const TargetRegisterClass*, uint8_t> 902 ARMTargetLowering::findRepresentativeClass(MVT VT) const{ 903 const TargetRegisterClass *RRC = 0; 904 uint8_t Cost = 1; 905 switch (VT.SimpleTy) { 906 default: 907 return TargetLowering::findRepresentativeClass(VT); 908 // Use DPR as representative register class for all floating point 909 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 910 // the cost is 1 for both f32 and f64. 911 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 912 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 913 RRC = &ARM::DPRRegClass; 914 // When NEON is used for SP, only half of the register file is available 915 // because operations that define both SP and DP results will be constrained 916 // to the VFP2 class (D0-D15). We currently model this constraint prior to 917 // coalescing by double-counting the SP regs. See the FIXME above. 918 if (Subtarget->useNEONForSinglePrecisionFP()) 919 Cost = 2; 920 break; 921 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 922 case MVT::v4f32: case MVT::v2f64: 923 RRC = &ARM::DPRRegClass; 924 Cost = 2; 925 break; 926 case MVT::v4i64: 927 RRC = &ARM::DPRRegClass; 928 Cost = 4; 929 break; 930 case MVT::v8i64: 931 RRC = &ARM::DPRRegClass; 932 Cost = 8; 933 break; 934 } 935 return std::make_pair(RRC, Cost); 936 } 937 938 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 939 switch (Opcode) { 940 default: return 0; 941 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 942 case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; 943 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 944 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 945 case ARMISD::CALL: return "ARMISD::CALL"; 946 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 947 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 948 case ARMISD::tCALL: return "ARMISD::tCALL"; 949 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 950 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 951 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 952 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 953 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 954 case ARMISD::CMP: return "ARMISD::CMP"; 955 case ARMISD::CMN: return "ARMISD::CMN"; 956 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 957 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 958 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 959 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 960 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 961 962 case ARMISD::CMOV: return "ARMISD::CMOV"; 963 964 case ARMISD::RBIT: return "ARMISD::RBIT"; 965 966 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 967 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 968 case ARMISD::SITOF: return "ARMISD::SITOF"; 969 case ARMISD::UITOF: return "ARMISD::UITOF"; 970 971 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 972 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 973 case ARMISD::RRX: return "ARMISD::RRX"; 974 975 case ARMISD::ADDC: return "ARMISD::ADDC"; 976 case ARMISD::ADDE: return "ARMISD::ADDE"; 977 case ARMISD::SUBC: return "ARMISD::SUBC"; 978 case ARMISD::SUBE: return "ARMISD::SUBE"; 979 980 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 981 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 982 983 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 984 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 985 986 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 987 988 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 989 990 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 991 992 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 993 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 994 995 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 996 997 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 998 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 999 case ARMISD::VCGE: return "ARMISD::VCGE"; 1000 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 1001 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 1002 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 1003 case ARMISD::VCGT: return "ARMISD::VCGT"; 1004 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 1005 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 1006 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 1007 case ARMISD::VTST: return "ARMISD::VTST"; 1008 1009 case ARMISD::VSHL: return "ARMISD::VSHL"; 1010 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 1011 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 1012 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 1013 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 1014 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 1015 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 1016 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 1017 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 1018 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 1019 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 1020 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 1021 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 1022 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 1023 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 1024 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 1025 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 1026 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 1027 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 1028 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 1029 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 1030 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 1031 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 1032 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; 1033 case ARMISD::VDUP: return "ARMISD::VDUP"; 1034 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 1035 case ARMISD::VEXT: return "ARMISD::VEXT"; 1036 case ARMISD::VREV64: return "ARMISD::VREV64"; 1037 case ARMISD::VREV32: return "ARMISD::VREV32"; 1038 case ARMISD::VREV16: return "ARMISD::VREV16"; 1039 case ARMISD::VZIP: return "ARMISD::VZIP"; 1040 case ARMISD::VUZP: return "ARMISD::VUZP"; 1041 case ARMISD::VTRN: return "ARMISD::VTRN"; 1042 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 1043 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 1044 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 1045 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 1046 case ARMISD::UMLAL: return "ARMISD::UMLAL"; 1047 case ARMISD::SMLAL: return "ARMISD::SMLAL"; 1048 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 1049 case ARMISD::FMAX: return "ARMISD::FMAX"; 1050 case ARMISD::FMIN: return "ARMISD::FMIN"; 1051 case ARMISD::BFI: return "ARMISD::BFI"; 1052 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 1053 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 1054 case ARMISD::VBSL: return "ARMISD::VBSL"; 1055 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 1056 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 1057 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 1058 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 1059 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 1060 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 1061 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 1062 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 1063 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 1064 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 1065 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 1066 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 1067 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 1068 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 1069 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 1070 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 1071 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 1072 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 1073 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 1074 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 1075 } 1076 } 1077 1078 EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { 1079 if (!VT.isVector()) return getPointerTy(); 1080 return VT.changeVectorElementTypeToInteger(); 1081 } 1082 1083 /// getRegClassFor - Return the register class that should be used for the 1084 /// specified value type. 1085 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { 1086 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 1087 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 1088 // load / store 4 to 8 consecutive D registers. 1089 if (Subtarget->hasNEON()) { 1090 if (VT == MVT::v4i64) 1091 return &ARM::QQPRRegClass; 1092 if (VT == MVT::v8i64) 1093 return &ARM::QQQQPRRegClass; 1094 } 1095 return TargetLowering::getRegClassFor(VT); 1096 } 1097 1098 // Create a fast isel object. 1099 FastISel * 1100 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, 1101 const TargetLibraryInfo *libInfo) const { 1102 return ARM::createFastISel(funcInfo, libInfo); 1103 } 1104 1105 /// getMaximalGlobalOffset - Returns the maximal possible offset which can 1106 /// be used for loads / stores from the global. 1107 unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 1108 return (Subtarget->isThumb1Only() ? 127 : 4095); 1109 } 1110 1111 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 1112 unsigned NumVals = N->getNumValues(); 1113 if (!NumVals) 1114 return Sched::RegPressure; 1115 1116 for (unsigned i = 0; i != NumVals; ++i) { 1117 EVT VT = N->getValueType(i); 1118 if (VT == MVT::Glue || VT == MVT::Other) 1119 continue; 1120 if (VT.isFloatingPoint() || VT.isVector()) 1121 return Sched::ILP; 1122 } 1123 1124 if (!N->isMachineOpcode()) 1125 return Sched::RegPressure; 1126 1127 // Load are scheduled for latency even if there instruction itinerary 1128 // is not available. 1129 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 1130 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1131 1132 if (MCID.getNumDefs() == 0) 1133 return Sched::RegPressure; 1134 if (!Itins->isEmpty() && 1135 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) 1136 return Sched::ILP; 1137 1138 return Sched::RegPressure; 1139 } 1140 1141 //===----------------------------------------------------------------------===// 1142 // Lowering Code 1143 //===----------------------------------------------------------------------===// 1144 1145 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 1146 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 1147 switch (CC) { 1148 default: llvm_unreachable("Unknown condition code!"); 1149 case ISD::SETNE: return ARMCC::NE; 1150 case ISD::SETEQ: return ARMCC::EQ; 1151 case ISD::SETGT: return ARMCC::GT; 1152 case ISD::SETGE: return ARMCC::GE; 1153 case ISD::SETLT: return ARMCC::LT; 1154 case ISD::SETLE: return ARMCC::LE; 1155 case ISD::SETUGT: return ARMCC::HI; 1156 case ISD::SETUGE: return ARMCC::HS; 1157 case ISD::SETULT: return ARMCC::LO; 1158 case ISD::SETULE: return ARMCC::LS; 1159 } 1160 } 1161 1162 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 1163 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 1164 ARMCC::CondCodes &CondCode2) { 1165 CondCode2 = ARMCC::AL; 1166 switch (CC) { 1167 default: llvm_unreachable("Unknown FP condition!"); 1168 case ISD::SETEQ: 1169 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 1170 case ISD::SETGT: 1171 case ISD::SETOGT: CondCode = ARMCC::GT; break; 1172 case ISD::SETGE: 1173 case ISD::SETOGE: CondCode = ARMCC::GE; break; 1174 case ISD::SETOLT: CondCode = ARMCC::MI; break; 1175 case ISD::SETOLE: CondCode = ARMCC::LS; break; 1176 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 1177 case ISD::SETO: CondCode = ARMCC::VC; break; 1178 case ISD::SETUO: CondCode = ARMCC::VS; break; 1179 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 1180 case ISD::SETUGT: CondCode = ARMCC::HI; break; 1181 case ISD::SETUGE: CondCode = ARMCC::PL; break; 1182 case ISD::SETLT: 1183 case ISD::SETULT: CondCode = ARMCC::LT; break; 1184 case ISD::SETLE: 1185 case ISD::SETULE: CondCode = ARMCC::LE; break; 1186 case ISD::SETNE: 1187 case ISD::SETUNE: CondCode = ARMCC::NE; break; 1188 } 1189 } 1190 1191 //===----------------------------------------------------------------------===// 1192 // Calling Convention Implementation 1193 //===----------------------------------------------------------------------===// 1194 1195 #include "ARMGenCallingConv.inc" 1196 1197 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1198 /// given CallingConvention value. 1199 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 1200 bool Return, 1201 bool isVarArg) const { 1202 switch (CC) { 1203 default: 1204 llvm_unreachable("Unsupported calling convention"); 1205 case CallingConv::Fast: 1206 if (Subtarget->hasVFP2() && !isVarArg) { 1207 if (!Subtarget->isAAPCS_ABI()) 1208 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1209 // For AAPCS ABI targets, just use VFP variant of the calling convention. 1210 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1211 } 1212 // Fallthrough 1213 case CallingConv::C: { 1214 // Use target triple & subtarget features to do actual dispatch. 1215 if (!Subtarget->isAAPCS_ABI()) 1216 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1217 else if (Subtarget->hasVFP2() && 1218 getTargetMachine().Options.FloatABIType == FloatABI::Hard && 1219 !isVarArg) 1220 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1221 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1222 } 1223 case CallingConv::ARM_AAPCS_VFP: 1224 if (!isVarArg) 1225 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1226 // Fallthrough 1227 case CallingConv::ARM_AAPCS: 1228 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1229 case CallingConv::ARM_APCS: 1230 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1231 case CallingConv::GHC: 1232 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); 1233 } 1234 } 1235 1236 /// LowerCallResult - Lower the result values of a call into the 1237 /// appropriate copies out of appropriate physical registers. 1238 SDValue 1239 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1240 CallingConv::ID CallConv, bool isVarArg, 1241 const SmallVectorImpl<ISD::InputArg> &Ins, 1242 DebugLoc dl, SelectionDAG &DAG, 1243 SmallVectorImpl<SDValue> &InVals) const { 1244 1245 // Assign locations to each value returned by this call. 1246 SmallVector<CCValAssign, 16> RVLocs; 1247 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1248 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1249 CCInfo.AnalyzeCallResult(Ins, 1250 CCAssignFnForNode(CallConv, /* Return*/ true, 1251 isVarArg)); 1252 1253 // Copy all of the result registers out of their specified physreg. 1254 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1255 CCValAssign VA = RVLocs[i]; 1256 1257 SDValue Val; 1258 if (VA.needsCustom()) { 1259 // Handle f64 or half of a v2f64. 1260 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1261 InFlag); 1262 Chain = Lo.getValue(1); 1263 InFlag = Lo.getValue(2); 1264 VA = RVLocs[++i]; // skip ahead to next loc 1265 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1266 InFlag); 1267 Chain = Hi.getValue(1); 1268 InFlag = Hi.getValue(2); 1269 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1270 1271 if (VA.getLocVT() == MVT::v2f64) { 1272 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1273 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1274 DAG.getConstant(0, MVT::i32)); 1275 1276 VA = RVLocs[++i]; // skip ahead to next loc 1277 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1278 Chain = Lo.getValue(1); 1279 InFlag = Lo.getValue(2); 1280 VA = RVLocs[++i]; // skip ahead to next loc 1281 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1282 Chain = Hi.getValue(1); 1283 InFlag = Hi.getValue(2); 1284 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1285 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1286 DAG.getConstant(1, MVT::i32)); 1287 } 1288 } else { 1289 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1290 InFlag); 1291 Chain = Val.getValue(1); 1292 InFlag = Val.getValue(2); 1293 } 1294 1295 switch (VA.getLocInfo()) { 1296 default: llvm_unreachable("Unknown loc info!"); 1297 case CCValAssign::Full: break; 1298 case CCValAssign::BCvt: 1299 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 1300 break; 1301 } 1302 1303 InVals.push_back(Val); 1304 } 1305 1306 return Chain; 1307 } 1308 1309 /// LowerMemOpCallTo - Store the argument to the stack. 1310 SDValue 1311 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1312 SDValue StackPtr, SDValue Arg, 1313 DebugLoc dl, SelectionDAG &DAG, 1314 const CCValAssign &VA, 1315 ISD::ArgFlagsTy Flags) const { 1316 unsigned LocMemOffset = VA.getLocMemOffset(); 1317 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1318 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1319 return DAG.getStore(Chain, dl, Arg, PtrOff, 1320 MachinePointerInfo::getStack(LocMemOffset), 1321 false, false, 0); 1322 } 1323 1324 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 1325 SDValue Chain, SDValue &Arg, 1326 RegsToPassVector &RegsToPass, 1327 CCValAssign &VA, CCValAssign &NextVA, 1328 SDValue &StackPtr, 1329 SmallVector<SDValue, 8> &MemOpChains, 1330 ISD::ArgFlagsTy Flags) const { 1331 1332 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1333 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1334 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1335 1336 if (NextVA.isRegLoc()) 1337 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1338 else { 1339 assert(NextVA.isMemLoc()); 1340 if (StackPtr.getNode() == 0) 1341 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1342 1343 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1344 dl, DAG, NextVA, 1345 Flags)); 1346 } 1347 } 1348 1349 /// LowerCall - Lowering a call into a callseq_start <- 1350 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1351 /// nodes. 1352 SDValue 1353 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 1354 SmallVectorImpl<SDValue> &InVals) const { 1355 SelectionDAG &DAG = CLI.DAG; 1356 DebugLoc &dl = CLI.DL; 1357 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; 1358 SmallVector<SDValue, 32> &OutVals = CLI.OutVals; 1359 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; 1360 SDValue Chain = CLI.Chain; 1361 SDValue Callee = CLI.Callee; 1362 bool &isTailCall = CLI.IsTailCall; 1363 CallingConv::ID CallConv = CLI.CallConv; 1364 bool doesNotRet = CLI.DoesNotReturn; 1365 bool isVarArg = CLI.IsVarArg; 1366 1367 MachineFunction &MF = DAG.getMachineFunction(); 1368 bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1369 bool IsSibCall = false; 1370 // Disable tail calls if they're not supported. 1371 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 1372 isTailCall = false; 1373 if (isTailCall) { 1374 // Check if it's really possible to do a tail call. 1375 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1376 isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), 1377 Outs, OutVals, Ins, DAG); 1378 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1379 // detected sibcalls. 1380 if (isTailCall) { 1381 ++NumTailCalls; 1382 IsSibCall = true; 1383 } 1384 } 1385 1386 // Analyze operands of the call, assigning locations to each operand. 1387 SmallVector<CCValAssign, 16> ArgLocs; 1388 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1389 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1390 CCInfo.AnalyzeCallOperands(Outs, 1391 CCAssignFnForNode(CallConv, /* Return*/ false, 1392 isVarArg)); 1393 1394 // Get a count of how many bytes are to be pushed on the stack. 1395 unsigned NumBytes = CCInfo.getNextStackOffset(); 1396 1397 // For tail calls, memory operands are available in our caller's stack. 1398 if (IsSibCall) 1399 NumBytes = 0; 1400 1401 // Adjust the stack pointer for the new arguments... 1402 // These operations are automatically eliminated by the prolog/epilog pass 1403 if (!IsSibCall) 1404 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1405 1406 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1407 1408 RegsToPassVector RegsToPass; 1409 SmallVector<SDValue, 8> MemOpChains; 1410 1411 // Walk the register/memloc assignments, inserting copies/loads. In the case 1412 // of tail call optimization, arguments are handled later. 1413 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1414 i != e; 1415 ++i, ++realArgIdx) { 1416 CCValAssign &VA = ArgLocs[i]; 1417 SDValue Arg = OutVals[realArgIdx]; 1418 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1419 bool isByVal = Flags.isByVal(); 1420 1421 // Promote the value if needed. 1422 switch (VA.getLocInfo()) { 1423 default: llvm_unreachable("Unknown loc info!"); 1424 case CCValAssign::Full: break; 1425 case CCValAssign::SExt: 1426 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1427 break; 1428 case CCValAssign::ZExt: 1429 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1430 break; 1431 case CCValAssign::AExt: 1432 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1433 break; 1434 case CCValAssign::BCvt: 1435 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1436 break; 1437 } 1438 1439 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1440 if (VA.needsCustom()) { 1441 if (VA.getLocVT() == MVT::v2f64) { 1442 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1443 DAG.getConstant(0, MVT::i32)); 1444 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1445 DAG.getConstant(1, MVT::i32)); 1446 1447 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1448 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1449 1450 VA = ArgLocs[++i]; // skip ahead to next loc 1451 if (VA.isRegLoc()) { 1452 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1453 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1454 } else { 1455 assert(VA.isMemLoc()); 1456 1457 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1458 dl, DAG, VA, Flags)); 1459 } 1460 } else { 1461 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1462 StackPtr, MemOpChains, Flags); 1463 } 1464 } else if (VA.isRegLoc()) { 1465 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1466 } else if (isByVal) { 1467 assert(VA.isMemLoc()); 1468 unsigned offset = 0; 1469 1470 // True if this byval aggregate will be split between registers 1471 // and memory. 1472 if (CCInfo.isFirstByValRegValid()) { 1473 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1474 unsigned int i, j; 1475 for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) { 1476 SDValue Const = DAG.getConstant(4*i, MVT::i32); 1477 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 1478 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 1479 MachinePointerInfo(), 1480 false, false, false, 0); 1481 MemOpChains.push_back(Load.getValue(1)); 1482 RegsToPass.push_back(std::make_pair(j, Load)); 1483 } 1484 offset = ARM::R4 - CCInfo.getFirstByValReg(); 1485 CCInfo.clearFirstByValReg(); 1486 } 1487 1488 if (Flags.getByValSize() - 4*offset > 0) { 1489 unsigned LocMemOffset = VA.getLocMemOffset(); 1490 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); 1491 SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, 1492 StkPtrOff); 1493 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); 1494 SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); 1495 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, 1496 MVT::i32); 1497 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); 1498 1499 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 1500 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; 1501 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, 1502 Ops, array_lengthof(Ops))); 1503 } 1504 } else if (!IsSibCall) { 1505 assert(VA.isMemLoc()); 1506 1507 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1508 dl, DAG, VA, Flags)); 1509 } 1510 } 1511 1512 if (!MemOpChains.empty()) 1513 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1514 &MemOpChains[0], MemOpChains.size()); 1515 1516 // Build a sequence of copy-to-reg nodes chained together with token chain 1517 // and flag operands which copy the outgoing args into the appropriate regs. 1518 SDValue InFlag; 1519 // Tail call byval lowering might overwrite argument registers so in case of 1520 // tail call optimization the copies to registers are lowered later. 1521 if (!isTailCall) 1522 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1523 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1524 RegsToPass[i].second, InFlag); 1525 InFlag = Chain.getValue(1); 1526 } 1527 1528 // For tail calls lower the arguments to the 'real' stack slot. 1529 if (isTailCall) { 1530 // Force all the incoming stack arguments to be loaded from the stack 1531 // before any new outgoing arguments are stored to the stack, because the 1532 // outgoing stack slots may alias the incoming argument stack slots, and 1533 // the alias isn't otherwise explicit. This is slightly more conservative 1534 // than necessary, because it means that each store effectively depends 1535 // on every argument instead of just those arguments it would clobber. 1536 1537 // Do not flag preceding copytoreg stuff together with the following stuff. 1538 InFlag = SDValue(); 1539 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1540 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1541 RegsToPass[i].second, InFlag); 1542 InFlag = Chain.getValue(1); 1543 } 1544 InFlag =SDValue(); 1545 } 1546 1547 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1548 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1549 // node so that legalize doesn't hack it. 1550 bool isDirect = false; 1551 bool isARMFunc = false; 1552 bool isLocalARMFunc = false; 1553 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1554 1555 if (EnableARMLongCalls) { 1556 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1557 && "long-calls with non-static relocation model!"); 1558 // Handle a global address or an external symbol. If it's not one of 1559 // those, the target's already in a register, so we don't need to do 1560 // anything extra. 1561 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1562 const GlobalValue *GV = G->getGlobal(); 1563 // Create a constant pool entry for the callee address 1564 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1565 ARMConstantPoolValue *CPV = 1566 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); 1567 1568 // Get the address of the callee into a register 1569 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1570 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1571 Callee = DAG.getLoad(getPointerTy(), dl, 1572 DAG.getEntryNode(), CPAddr, 1573 MachinePointerInfo::getConstantPool(), 1574 false, false, false, 0); 1575 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1576 const char *Sym = S->getSymbol(); 1577 1578 // Create a constant pool entry for the callee address 1579 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1580 ARMConstantPoolValue *CPV = 1581 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1582 ARMPCLabelIndex, 0); 1583 // Get the address of the callee into a register 1584 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1585 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1586 Callee = DAG.getLoad(getPointerTy(), dl, 1587 DAG.getEntryNode(), CPAddr, 1588 MachinePointerInfo::getConstantPool(), 1589 false, false, false, 0); 1590 } 1591 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1592 const GlobalValue *GV = G->getGlobal(); 1593 isDirect = true; 1594 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1595 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1596 getTargetMachine().getRelocationModel() != Reloc::Static; 1597 isARMFunc = !Subtarget->isThumb() || isStub; 1598 // ARM call to a local ARM function is predicable. 1599 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1600 // tBX takes a register source operand. 1601 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1602 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1603 ARMConstantPoolValue *CPV = 1604 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); 1605 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1606 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1607 Callee = DAG.getLoad(getPointerTy(), dl, 1608 DAG.getEntryNode(), CPAddr, 1609 MachinePointerInfo::getConstantPool(), 1610 false, false, false, 0); 1611 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1612 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1613 getPointerTy(), Callee, PICLabel); 1614 } else { 1615 // On ELF targets for PIC code, direct calls should go through the PLT 1616 unsigned OpFlags = 0; 1617 if (Subtarget->isTargetELF() && 1618 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1619 OpFlags = ARMII::MO_PLT; 1620 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 1621 } 1622 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1623 isDirect = true; 1624 bool isStub = Subtarget->isTargetDarwin() && 1625 getTargetMachine().getRelocationModel() != Reloc::Static; 1626 isARMFunc = !Subtarget->isThumb() || isStub; 1627 // tBX takes a register source operand. 1628 const char *Sym = S->getSymbol(); 1629 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1630 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1631 ARMConstantPoolValue *CPV = 1632 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1633 ARMPCLabelIndex, 4); 1634 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1635 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1636 Callee = DAG.getLoad(getPointerTy(), dl, 1637 DAG.getEntryNode(), CPAddr, 1638 MachinePointerInfo::getConstantPool(), 1639 false, false, false, 0); 1640 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1641 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1642 getPointerTy(), Callee, PICLabel); 1643 } else { 1644 unsigned OpFlags = 0; 1645 // On ELF targets for PIC code, direct calls should go through the PLT 1646 if (Subtarget->isTargetELF() && 1647 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1648 OpFlags = ARMII::MO_PLT; 1649 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 1650 } 1651 } 1652 1653 // FIXME: handle tail calls differently. 1654 unsigned CallOpc; 1655 bool HasMinSizeAttr = MF.getFunction()->getAttributes(). 1656 hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); 1657 if (Subtarget->isThumb()) { 1658 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1659 CallOpc = ARMISD::CALL_NOLINK; 1660 else 1661 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1662 } else { 1663 if (!isDirect && !Subtarget->hasV5TOps()) 1664 CallOpc = ARMISD::CALL_NOLINK; 1665 else if (doesNotRet && isDirect && Subtarget->hasRAS() && 1666 // Emit regular call when code size is the priority 1667 !HasMinSizeAttr) 1668 // "mov lr, pc; b _foo" to avoid confusing the RSP 1669 CallOpc = ARMISD::CALL_NOLINK; 1670 else 1671 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; 1672 } 1673 1674 std::vector<SDValue> Ops; 1675 Ops.push_back(Chain); 1676 Ops.push_back(Callee); 1677 1678 // Add argument registers to the end of the list so that they are known live 1679 // into the call. 1680 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1681 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1682 RegsToPass[i].second.getValueType())); 1683 1684 // Add a register mask operand representing the call-preserved registers. 1685 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 1686 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 1687 assert(Mask && "Missing call preserved mask for calling convention"); 1688 Ops.push_back(DAG.getRegisterMask(Mask)); 1689 1690 if (InFlag.getNode()) 1691 Ops.push_back(InFlag); 1692 1693 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1694 if (isTailCall) 1695 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1696 1697 // Returns a chain and a flag for retval copy to use. 1698 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1699 InFlag = Chain.getValue(1); 1700 1701 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1702 DAG.getIntPtrConstant(0, true), InFlag); 1703 if (!Ins.empty()) 1704 InFlag = Chain.getValue(1); 1705 1706 // Handle result values, copying them out of physregs into vregs that we 1707 // return. 1708 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1709 dl, DAG, InVals); 1710 } 1711 1712 /// HandleByVal - Every parameter *after* a byval parameter is passed 1713 /// on the stack. Remember the next parameter register to allocate, 1714 /// and then confiscate the rest of the parameter registers to insure 1715 /// this. 1716 void 1717 ARMTargetLowering::HandleByVal( 1718 CCState *State, unsigned &size, unsigned Align) const { 1719 unsigned reg = State->AllocateReg(GPRArgRegs, 4); 1720 assert((State->getCallOrPrologue() == Prologue || 1721 State->getCallOrPrologue() == Call) && 1722 "unhandled ParmContext"); 1723 if ((!State->isFirstByValRegValid()) && 1724 (ARM::R0 <= reg) && (reg <= ARM::R3)) { 1725 if (Subtarget->isAAPCS_ABI() && Align > 4) { 1726 unsigned AlignInRegs = Align / 4; 1727 unsigned Waste = (ARM::R4 - reg) % AlignInRegs; 1728 for (unsigned i = 0; i < Waste; ++i) 1729 reg = State->AllocateReg(GPRArgRegs, 4); 1730 } 1731 if (reg != 0) { 1732 State->setFirstByValReg(reg); 1733 // At a call site, a byval parameter that is split between 1734 // registers and memory needs its size truncated here. In a 1735 // function prologue, such byval parameters are reassembled in 1736 // memory, and are not truncated. 1737 if (State->getCallOrPrologue() == Call) { 1738 unsigned excess = 4 * (ARM::R4 - reg); 1739 assert(size >= excess && "expected larger existing stack allocation"); 1740 size -= excess; 1741 } 1742 } 1743 } 1744 // Confiscate any remaining parameter registers to preclude their 1745 // assignment to subsequent parameters. 1746 while (State->AllocateReg(GPRArgRegs, 4)) 1747 ; 1748 } 1749 1750 /// MatchingStackOffset - Return true if the given stack call argument is 1751 /// already available in the same position (relatively) of the caller's 1752 /// incoming argument stack. 1753 static 1754 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1755 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1756 const TargetInstrInfo *TII) { 1757 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1758 int FI = INT_MAX; 1759 if (Arg.getOpcode() == ISD::CopyFromReg) { 1760 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1761 if (!TargetRegisterInfo::isVirtualRegister(VR)) 1762 return false; 1763 MachineInstr *Def = MRI->getVRegDef(VR); 1764 if (!Def) 1765 return false; 1766 if (!Flags.isByVal()) { 1767 if (!TII->isLoadFromStackSlot(Def, FI)) 1768 return false; 1769 } else { 1770 return false; 1771 } 1772 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1773 if (Flags.isByVal()) 1774 // ByVal argument is passed in as a pointer but it's now being 1775 // dereferenced. e.g. 1776 // define @foo(%struct.X* %A) { 1777 // tail call @bar(%struct.X* byval %A) 1778 // } 1779 return false; 1780 SDValue Ptr = Ld->getBasePtr(); 1781 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1782 if (!FINode) 1783 return false; 1784 FI = FINode->getIndex(); 1785 } else 1786 return false; 1787 1788 assert(FI != INT_MAX); 1789 if (!MFI->isFixedObjectIndex(FI)) 1790 return false; 1791 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1792 } 1793 1794 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 1795 /// for tail call optimization. Targets which want to do tail call 1796 /// optimization should implement this function. 1797 bool 1798 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1799 CallingConv::ID CalleeCC, 1800 bool isVarArg, 1801 bool isCalleeStructRet, 1802 bool isCallerStructRet, 1803 const SmallVectorImpl<ISD::OutputArg> &Outs, 1804 const SmallVectorImpl<SDValue> &OutVals, 1805 const SmallVectorImpl<ISD::InputArg> &Ins, 1806 SelectionDAG& DAG) const { 1807 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1808 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1809 bool CCMatch = CallerCC == CalleeCC; 1810 1811 // Look for obvious safe cases to perform tail call optimization that do not 1812 // require ABI changes. This is what gcc calls sibcall. 1813 1814 // Do not sibcall optimize vararg calls unless the call site is not passing 1815 // any arguments. 1816 if (isVarArg && !Outs.empty()) 1817 return false; 1818 1819 // Also avoid sibcall optimization if either caller or callee uses struct 1820 // return semantics. 1821 if (isCalleeStructRet || isCallerStructRet) 1822 return false; 1823 1824 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1825 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 1826 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 1827 // support in the assembler and linker to be used. This would need to be 1828 // fixed to fully support tail calls in Thumb1. 1829 // 1830 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1831 // LR. This means if we need to reload LR, it takes an extra instructions, 1832 // which outweighs the value of the tail call; but here we don't know yet 1833 // whether LR is going to be used. Probably the right approach is to 1834 // generate the tail call here and turn it back into CALL/RET in 1835 // emitEpilogue if LR is used. 1836 1837 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1838 // but we need to make sure there are enough registers; the only valid 1839 // registers are the 4 used for parameters. We don't currently do this 1840 // case. 1841 if (Subtarget->isThumb1Only()) 1842 return false; 1843 1844 // If the calling conventions do not match, then we'd better make sure the 1845 // results are returned in the same way as what the caller expects. 1846 if (!CCMatch) { 1847 SmallVector<CCValAssign, 16> RVLocs1; 1848 ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), 1849 getTargetMachine(), RVLocs1, *DAG.getContext(), Call); 1850 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1851 1852 SmallVector<CCValAssign, 16> RVLocs2; 1853 ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), 1854 getTargetMachine(), RVLocs2, *DAG.getContext(), Call); 1855 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1856 1857 if (RVLocs1.size() != RVLocs2.size()) 1858 return false; 1859 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1860 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1861 return false; 1862 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1863 return false; 1864 if (RVLocs1[i].isRegLoc()) { 1865 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1866 return false; 1867 } else { 1868 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1869 return false; 1870 } 1871 } 1872 } 1873 1874 // If Caller's vararg or byval argument has been split between registers and 1875 // stack, do not perform tail call, since part of the argument is in caller's 1876 // local frame. 1877 const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). 1878 getInfo<ARMFunctionInfo>(); 1879 if (AFI_Caller->getVarArgsRegSaveSize()) 1880 return false; 1881 1882 // If the callee takes no arguments then go on to check the results of the 1883 // call. 1884 if (!Outs.empty()) { 1885 // Check if stack adjustment is needed. For now, do not do this if any 1886 // argument is passed on the stack. 1887 SmallVector<CCValAssign, 16> ArgLocs; 1888 ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), 1889 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1890 CCInfo.AnalyzeCallOperands(Outs, 1891 CCAssignFnForNode(CalleeCC, false, isVarArg)); 1892 if (CCInfo.getNextStackOffset()) { 1893 MachineFunction &MF = DAG.getMachineFunction(); 1894 1895 // Check if the arguments are already laid out in the right way as 1896 // the caller's fixed stack objects. 1897 MachineFrameInfo *MFI = MF.getFrameInfo(); 1898 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1899 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 1900 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1901 i != e; 1902 ++i, ++realArgIdx) { 1903 CCValAssign &VA = ArgLocs[i]; 1904 EVT RegVT = VA.getLocVT(); 1905 SDValue Arg = OutVals[realArgIdx]; 1906 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1907 if (VA.getLocInfo() == CCValAssign::Indirect) 1908 return false; 1909 if (VA.needsCustom()) { 1910 // f64 and vector types are split into multiple registers or 1911 // register/stack-slot combinations. The types will not match 1912 // the registers; give up on memory f64 refs until we figure 1913 // out what to do about this. 1914 if (!VA.isRegLoc()) 1915 return false; 1916 if (!ArgLocs[++i].isRegLoc()) 1917 return false; 1918 if (RegVT == MVT::v2f64) { 1919 if (!ArgLocs[++i].isRegLoc()) 1920 return false; 1921 if (!ArgLocs[++i].isRegLoc()) 1922 return false; 1923 } 1924 } else if (!VA.isRegLoc()) { 1925 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 1926 MFI, MRI, TII)) 1927 return false; 1928 } 1929 } 1930 } 1931 } 1932 1933 return true; 1934 } 1935 1936 bool 1937 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 1938 MachineFunction &MF, bool isVarArg, 1939 const SmallVectorImpl<ISD::OutputArg> &Outs, 1940 LLVMContext &Context) const { 1941 SmallVector<CCValAssign, 16> RVLocs; 1942 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); 1943 return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true, 1944 isVarArg)); 1945 } 1946 1947 SDValue 1948 ARMTargetLowering::LowerReturn(SDValue Chain, 1949 CallingConv::ID CallConv, bool isVarArg, 1950 const SmallVectorImpl<ISD::OutputArg> &Outs, 1951 const SmallVectorImpl<SDValue> &OutVals, 1952 DebugLoc dl, SelectionDAG &DAG) const { 1953 1954 // CCValAssign - represent the assignment of the return value to a location. 1955 SmallVector<CCValAssign, 16> RVLocs; 1956 1957 // CCState - Info about the registers and stack slots. 1958 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1959 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1960 1961 // Analyze outgoing return values. 1962 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1963 isVarArg)); 1964 1965 SDValue Flag; 1966 SmallVector<SDValue, 4> RetOps; 1967 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 1968 1969 // Copy the result values into the output registers. 1970 for (unsigned i = 0, realRVLocIdx = 0; 1971 i != RVLocs.size(); 1972 ++i, ++realRVLocIdx) { 1973 CCValAssign &VA = RVLocs[i]; 1974 assert(VA.isRegLoc() && "Can only return in registers!"); 1975 1976 SDValue Arg = OutVals[realRVLocIdx]; 1977 1978 switch (VA.getLocInfo()) { 1979 default: llvm_unreachable("Unknown loc info!"); 1980 case CCValAssign::Full: break; 1981 case CCValAssign::BCvt: 1982 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1983 break; 1984 } 1985 1986 if (VA.needsCustom()) { 1987 if (VA.getLocVT() == MVT::v2f64) { 1988 // Extract the first half and return it in two registers. 1989 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1990 DAG.getConstant(0, MVT::i32)); 1991 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1992 DAG.getVTList(MVT::i32, MVT::i32), Half); 1993 1994 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1995 Flag = Chain.getValue(1); 1996 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 1997 VA = RVLocs[++i]; // skip ahead to next loc 1998 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1999 HalfGPRs.getValue(1), Flag); 2000 Flag = Chain.getValue(1); 2001 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2002 VA = RVLocs[++i]; // skip ahead to next loc 2003 2004 // Extract the 2nd half and fall through to handle it as an f64 value. 2005 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 2006 DAG.getConstant(1, MVT::i32)); 2007 } 2008 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 2009 // available. 2010 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 2011 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 2012 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 2013 Flag = Chain.getValue(1); 2014 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2015 VA = RVLocs[++i]; // skip ahead to next loc 2016 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 2017 Flag); 2018 } else 2019 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 2020 2021 // Guarantee that all emitted copies are 2022 // stuck together, avoiding something bad. 2023 Flag = Chain.getValue(1); 2024 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2025 } 2026 2027 // Update chain and glue. 2028 RetOps[0] = Chain; 2029 if (Flag.getNode()) 2030 RetOps.push_back(Flag); 2031 2032 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, 2033 RetOps.data(), RetOps.size()); 2034 } 2035 2036 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 2037 if (N->getNumValues() != 1) 2038 return false; 2039 if (!N->hasNUsesOfValue(1, 0)) 2040 return false; 2041 2042 SDValue TCChain = Chain; 2043 SDNode *Copy = *N->use_begin(); 2044 if (Copy->getOpcode() == ISD::CopyToReg) { 2045 // If the copy has a glue operand, we conservatively assume it isn't safe to 2046 // perform a tail call. 2047 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 2048 return false; 2049 TCChain = Copy->getOperand(0); 2050 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { 2051 SDNode *VMov = Copy; 2052 // f64 returned in a pair of GPRs. 2053 SmallPtrSet<SDNode*, 2> Copies; 2054 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 2055 UI != UE; ++UI) { 2056 if (UI->getOpcode() != ISD::CopyToReg) 2057 return false; 2058 Copies.insert(*UI); 2059 } 2060 if (Copies.size() > 2) 2061 return false; 2062 2063 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 2064 UI != UE; ++UI) { 2065 SDValue UseChain = UI->getOperand(0); 2066 if (Copies.count(UseChain.getNode())) 2067 // Second CopyToReg 2068 Copy = *UI; 2069 else 2070 // First CopyToReg 2071 TCChain = UseChain; 2072 } 2073 } else if (Copy->getOpcode() == ISD::BITCAST) { 2074 // f32 returned in a single GPR. 2075 if (!Copy->hasOneUse()) 2076 return false; 2077 Copy = *Copy->use_begin(); 2078 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) 2079 return false; 2080 Chain = Copy->getOperand(0); 2081 } else { 2082 return false; 2083 } 2084 2085 bool HasRet = false; 2086 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 2087 UI != UE; ++UI) { 2088 if (UI->getOpcode() != ARMISD::RET_FLAG) 2089 return false; 2090 HasRet = true; 2091 } 2092 2093 if (!HasRet) 2094 return false; 2095 2096 Chain = TCChain; 2097 return true; 2098 } 2099 2100 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 2101 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 2102 return false; 2103 2104 if (!CI->isTailCall()) 2105 return false; 2106 2107 return !Subtarget->isThumb1Only(); 2108 } 2109 2110 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2111 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 2112 // one of the above mentioned nodes. It has to be wrapped because otherwise 2113 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2114 // be used to form addressing mode. These wrapped nodes will be selected 2115 // into MOVi. 2116 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 2117 EVT PtrVT = Op.getValueType(); 2118 // FIXME there is no actual debug info here 2119 DebugLoc dl = Op.getDebugLoc(); 2120 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2121 SDValue Res; 2122 if (CP->isMachineConstantPoolEntry()) 2123 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2124 CP->getAlignment()); 2125 else 2126 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2127 CP->getAlignment()); 2128 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 2129 } 2130 2131 unsigned ARMTargetLowering::getJumpTableEncoding() const { 2132 return MachineJumpTableInfo::EK_Inline; 2133 } 2134 2135 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 2136 SelectionDAG &DAG) const { 2137 MachineFunction &MF = DAG.getMachineFunction(); 2138 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2139 unsigned ARMPCLabelIndex = 0; 2140 DebugLoc DL = Op.getDebugLoc(); 2141 EVT PtrVT = getPointerTy(); 2142 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 2143 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2144 SDValue CPAddr; 2145 if (RelocM == Reloc::Static) { 2146 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 2147 } else { 2148 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2149 ARMPCLabelIndex = AFI->createPICLabelUId(); 2150 ARMConstantPoolValue *CPV = 2151 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, 2152 ARMCP::CPBlockAddress, PCAdj); 2153 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2154 } 2155 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 2156 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 2157 MachinePointerInfo::getConstantPool(), 2158 false, false, false, 0); 2159 if (RelocM == Reloc::Static) 2160 return Result; 2161 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2162 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 2163 } 2164 2165 // Lower ISD::GlobalTLSAddress using the "general dynamic" model 2166 SDValue 2167 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 2168 SelectionDAG &DAG) const { 2169 DebugLoc dl = GA->getDebugLoc(); 2170 EVT PtrVT = getPointerTy(); 2171 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2172 MachineFunction &MF = DAG.getMachineFunction(); 2173 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2174 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2175 ARMConstantPoolValue *CPV = 2176 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2177 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 2178 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2179 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 2180 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 2181 MachinePointerInfo::getConstantPool(), 2182 false, false, false, 0); 2183 SDValue Chain = Argument.getValue(1); 2184 2185 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2186 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 2187 2188 // call __tls_get_addr. 2189 ArgListTy Args; 2190 ArgListEntry Entry; 2191 Entry.Node = Argument; 2192 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); 2193 Args.push_back(Entry); 2194 // FIXME: is there useful debug info available here? 2195 TargetLowering::CallLoweringInfo CLI(Chain, 2196 (Type *) Type::getInt32Ty(*DAG.getContext()), 2197 false, false, false, false, 2198 0, CallingConv::C, /*isTailCall=*/false, 2199 /*doesNotRet=*/false, /*isReturnValueUsed=*/true, 2200 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 2201 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2202 return CallResult.first; 2203 } 2204 2205 // Lower ISD::GlobalTLSAddress using the "initial exec" or 2206 // "local exec" model. 2207 SDValue 2208 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 2209 SelectionDAG &DAG, 2210 TLSModel::Model model) const { 2211 const GlobalValue *GV = GA->getGlobal(); 2212 DebugLoc dl = GA->getDebugLoc(); 2213 SDValue Offset; 2214 SDValue Chain = DAG.getEntryNode(); 2215 EVT PtrVT = getPointerTy(); 2216 // Get the Thread Pointer 2217 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2218 2219 if (model == TLSModel::InitialExec) { 2220 MachineFunction &MF = DAG.getMachineFunction(); 2221 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2222 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2223 // Initial exec model. 2224 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2225 ARMConstantPoolValue *CPV = 2226 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2227 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, 2228 true); 2229 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2230 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2231 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2232 MachinePointerInfo::getConstantPool(), 2233 false, false, false, 0); 2234 Chain = Offset.getValue(1); 2235 2236 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2237 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 2238 2239 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2240 MachinePointerInfo::getConstantPool(), 2241 false, false, false, 0); 2242 } else { 2243 // local exec model 2244 assert(model == TLSModel::LocalExec); 2245 ARMConstantPoolValue *CPV = 2246 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); 2247 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2248 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2249 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2250 MachinePointerInfo::getConstantPool(), 2251 false, false, false, 0); 2252 } 2253 2254 // The address of the thread local variable is the add of the thread 2255 // pointer with the offset of the variable. 2256 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 2257 } 2258 2259 SDValue 2260 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 2261 // TODO: implement the "local dynamic" model 2262 assert(Subtarget->isTargetELF() && 2263 "TLS not implemented for non-ELF targets"); 2264 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2265 2266 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); 2267 2268 switch (model) { 2269 case TLSModel::GeneralDynamic: 2270 case TLSModel::LocalDynamic: 2271 return LowerToTLSGeneralDynamicModel(GA, DAG); 2272 case TLSModel::InitialExec: 2273 case TLSModel::LocalExec: 2274 return LowerToTLSExecModels(GA, DAG, model); 2275 } 2276 llvm_unreachable("bogus TLS model"); 2277 } 2278 2279 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 2280 SelectionDAG &DAG) const { 2281 EVT PtrVT = getPointerTy(); 2282 DebugLoc dl = Op.getDebugLoc(); 2283 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2284 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2285 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 2286 ARMConstantPoolValue *CPV = 2287 ARMConstantPoolConstant::Create(GV, 2288 UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 2289 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2290 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2291 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 2292 CPAddr, 2293 MachinePointerInfo::getConstantPool(), 2294 false, false, false, 0); 2295 SDValue Chain = Result.getValue(1); 2296 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 2297 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 2298 if (!UseGOTOFF) 2299 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 2300 MachinePointerInfo::getGOT(), 2301 false, false, false, 0); 2302 return Result; 2303 } 2304 2305 // If we have T2 ops, we can materialize the address directly via movt/movw 2306 // pair. This is always cheaper. 2307 if (Subtarget->useMovt()) { 2308 ++NumMovwMovt; 2309 // FIXME: Once remat is capable of dealing with instructions with register 2310 // operands, expand this into two nodes. 2311 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2312 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2313 } else { 2314 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2315 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2316 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2317 MachinePointerInfo::getConstantPool(), 2318 false, false, false, 0); 2319 } 2320 } 2321 2322 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 2323 SelectionDAG &DAG) const { 2324 EVT PtrVT = getPointerTy(); 2325 DebugLoc dl = Op.getDebugLoc(); 2326 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2327 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2328 2329 // FIXME: Enable this for static codegen when tool issues are fixed. Also 2330 // update ARMFastISel::ARMMaterializeGV. 2331 if (Subtarget->useMovt() && RelocM != Reloc::Static) { 2332 ++NumMovwMovt; 2333 // FIXME: Once remat is capable of dealing with instructions with register 2334 // operands, expand this into two nodes. 2335 if (RelocM == Reloc::Static) 2336 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2337 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2338 2339 unsigned Wrapper = (RelocM == Reloc::PIC_) 2340 ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; 2341 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, 2342 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2343 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2344 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 2345 MachinePointerInfo::getGOT(), 2346 false, false, false, 0); 2347 return Result; 2348 } 2349 2350 unsigned ARMPCLabelIndex = 0; 2351 SDValue CPAddr; 2352 if (RelocM == Reloc::Static) { 2353 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2354 } else { 2355 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2356 ARMPCLabelIndex = AFI->createPICLabelUId(); 2357 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 2358 ARMConstantPoolValue *CPV = 2359 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 2360 PCAdj); 2361 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2362 } 2363 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2364 2365 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2366 MachinePointerInfo::getConstantPool(), 2367 false, false, false, 0); 2368 SDValue Chain = Result.getValue(1); 2369 2370 if (RelocM == Reloc::PIC_) { 2371 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2372 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2373 } 2374 2375 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2376 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 2377 false, false, false, 0); 2378 2379 return Result; 2380 } 2381 2382 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 2383 SelectionDAG &DAG) const { 2384 assert(Subtarget->isTargetELF() && 2385 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 2386 MachineFunction &MF = DAG.getMachineFunction(); 2387 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2388 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2389 EVT PtrVT = getPointerTy(); 2390 DebugLoc dl = Op.getDebugLoc(); 2391 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2392 ARMConstantPoolValue *CPV = 2393 ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", 2394 ARMPCLabelIndex, PCAdj); 2395 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2396 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2397 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2398 MachinePointerInfo::getConstantPool(), 2399 false, false, false, 0); 2400 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2401 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2402 } 2403 2404 SDValue 2405 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 2406 DebugLoc dl = Op.getDebugLoc(); 2407 SDValue Val = DAG.getConstant(0, MVT::i32); 2408 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, 2409 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), 2410 Op.getOperand(1), Val); 2411 } 2412 2413 SDValue 2414 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 2415 DebugLoc dl = Op.getDebugLoc(); 2416 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 2417 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 2418 } 2419 2420 SDValue 2421 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 2422 const ARMSubtarget *Subtarget) const { 2423 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2424 DebugLoc dl = Op.getDebugLoc(); 2425 switch (IntNo) { 2426 default: return SDValue(); // Don't custom lower most intrinsics. 2427 case Intrinsic::arm_thread_pointer: { 2428 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2429 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2430 } 2431 case Intrinsic::eh_sjlj_lsda: { 2432 MachineFunction &MF = DAG.getMachineFunction(); 2433 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2434 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2435 EVT PtrVT = getPointerTy(); 2436 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2437 SDValue CPAddr; 2438 unsigned PCAdj = (RelocM != Reloc::PIC_) 2439 ? 0 : (Subtarget->isThumb() ? 4 : 8); 2440 ARMConstantPoolValue *CPV = 2441 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, 2442 ARMCP::CPLSDA, PCAdj); 2443 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2444 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2445 SDValue Result = 2446 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2447 MachinePointerInfo::getConstantPool(), 2448 false, false, false, 0); 2449 2450 if (RelocM == Reloc::PIC_) { 2451 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2452 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2453 } 2454 return Result; 2455 } 2456 case Intrinsic::arm_neon_vmulls: 2457 case Intrinsic::arm_neon_vmullu: { 2458 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 2459 ? ARMISD::VMULLs : ARMISD::VMULLu; 2460 return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), 2461 Op.getOperand(1), Op.getOperand(2)); 2462 } 2463 } 2464 } 2465 2466 static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 2467 const ARMSubtarget *Subtarget) { 2468 DebugLoc dl = Op.getDebugLoc(); 2469 if (!Subtarget->hasDataBarrier()) { 2470 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2471 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2472 // here. 2473 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2474 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2475 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2476 DAG.getConstant(0, MVT::i32)); 2477 } 2478 2479 SDValue Op5 = Op.getOperand(5); 2480 bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0; 2481 unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 2482 unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 2483 bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0); 2484 2485 ARM_MB::MemBOpt DMBOpt; 2486 if (isDeviceBarrier) 2487 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY; 2488 else 2489 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH; 2490 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2491 DAG.getConstant(DMBOpt, MVT::i32)); 2492 } 2493 2494 2495 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 2496 const ARMSubtarget *Subtarget) { 2497 // FIXME: handle "fence singlethread" more efficiently. 2498 DebugLoc dl = Op.getDebugLoc(); 2499 if (!Subtarget->hasDataBarrier()) { 2500 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2501 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2502 // here. 2503 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2504 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2505 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2506 DAG.getConstant(0, MVT::i32)); 2507 } 2508 2509 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2510 DAG.getConstant(ARM_MB::ISH, MVT::i32)); 2511 } 2512 2513 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 2514 const ARMSubtarget *Subtarget) { 2515 // ARM pre v5TE and Thumb1 does not have preload instructions. 2516 if (!(Subtarget->isThumb2() || 2517 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 2518 // Just preserve the chain. 2519 return Op.getOperand(0); 2520 2521 DebugLoc dl = Op.getDebugLoc(); 2522 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 2523 if (!isRead && 2524 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 2525 // ARMv7 with MP extension has PLDW. 2526 return Op.getOperand(0); 2527 2528 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 2529 if (Subtarget->isThumb()) { 2530 // Invert the bits. 2531 isRead = ~isRead & 1; 2532 isData = ~isData & 1; 2533 } 2534 2535 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 2536 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 2537 DAG.getConstant(isData, MVT::i32)); 2538 } 2539 2540 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2541 MachineFunction &MF = DAG.getMachineFunction(); 2542 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2543 2544 // vastart just stores the address of the VarArgsFrameIndex slot into the 2545 // memory location argument. 2546 DebugLoc dl = Op.getDebugLoc(); 2547 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2548 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2549 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2550 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2551 MachinePointerInfo(SV), false, false, 0); 2552 } 2553 2554 SDValue 2555 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2556 SDValue &Root, SelectionDAG &DAG, 2557 DebugLoc dl) const { 2558 MachineFunction &MF = DAG.getMachineFunction(); 2559 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2560 2561 const TargetRegisterClass *RC; 2562 if (AFI->isThumb1OnlyFunction()) 2563 RC = &ARM::tGPRRegClass; 2564 else 2565 RC = &ARM::GPRRegClass; 2566 2567 // Transform the arguments stored in physical registers into virtual ones. 2568 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2569 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2570 2571 SDValue ArgValue2; 2572 if (NextVA.isMemLoc()) { 2573 MachineFrameInfo *MFI = MF.getFrameInfo(); 2574 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2575 2576 // Create load node to retrieve arguments from the stack. 2577 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2578 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2579 MachinePointerInfo::getFixedStack(FI), 2580 false, false, false, 0); 2581 } else { 2582 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2583 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2584 } 2585 2586 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2587 } 2588 2589 void 2590 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, 2591 unsigned &VARegSize, unsigned &VARegSaveSize) 2592 const { 2593 unsigned NumGPRs; 2594 if (CCInfo.isFirstByValRegValid()) 2595 NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); 2596 else { 2597 unsigned int firstUnalloced; 2598 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, 2599 sizeof(GPRArgRegs) / 2600 sizeof(GPRArgRegs[0])); 2601 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; 2602 } 2603 2604 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 2605 VARegSize = NumGPRs * 4; 2606 VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 2607 } 2608 2609 // The remaining GPRs hold either the beginning of variable-argument 2610 // data, or the beginning of an aggregate passed by value (usually 2611 // byval). Either way, we allocate stack slots adjacent to the data 2612 // provided by our caller, and store the unallocated registers there. 2613 // If this is a variadic function, the va_list pointer will begin with 2614 // these values; otherwise, this reassembles a (byval) structure that 2615 // was split between registers and memory. 2616 void 2617 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 2618 DebugLoc dl, SDValue &Chain, 2619 const Value *OrigArg, 2620 unsigned OffsetFromOrigArg, 2621 unsigned ArgOffset, 2622 bool ForceMutable) const { 2623 MachineFunction &MF = DAG.getMachineFunction(); 2624 MachineFrameInfo *MFI = MF.getFrameInfo(); 2625 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2626 unsigned firstRegToSaveIndex; 2627 if (CCInfo.isFirstByValRegValid()) 2628 firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; 2629 else { 2630 firstRegToSaveIndex = CCInfo.getFirstUnallocated 2631 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 2632 } 2633 2634 unsigned VARegSize, VARegSaveSize; 2635 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); 2636 if (VARegSaveSize) { 2637 // If this function is vararg, store any remaining integer argument regs 2638 // to their spots on the stack so that they may be loaded by deferencing 2639 // the result of va_next. 2640 AFI->setVarArgsRegSaveSize(VARegSaveSize); 2641 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize, 2642 ArgOffset + VARegSaveSize 2643 - VARegSize, 2644 false)); 2645 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), 2646 getPointerTy()); 2647 2648 SmallVector<SDValue, 4> MemOps; 2649 for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) { 2650 const TargetRegisterClass *RC; 2651 if (AFI->isThumb1OnlyFunction()) 2652 RC = &ARM::tGPRRegClass; 2653 else 2654 RC = &ARM::GPRRegClass; 2655 2656 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); 2657 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2658 SDValue Store = 2659 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2660 MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i), 2661 false, false, 0); 2662 MemOps.push_back(Store); 2663 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2664 DAG.getConstant(4, getPointerTy())); 2665 } 2666 if (!MemOps.empty()) 2667 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2668 &MemOps[0], MemOps.size()); 2669 } else 2670 // This will point to the next argument passed via stack. 2671 AFI->setVarArgsFrameIndex( 2672 MFI->CreateFixedObject(4, ArgOffset, !ForceMutable)); 2673 } 2674 2675 SDValue 2676 ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2677 CallingConv::ID CallConv, bool isVarArg, 2678 const SmallVectorImpl<ISD::InputArg> 2679 &Ins, 2680 DebugLoc dl, SelectionDAG &DAG, 2681 SmallVectorImpl<SDValue> &InVals) 2682 const { 2683 MachineFunction &MF = DAG.getMachineFunction(); 2684 MachineFrameInfo *MFI = MF.getFrameInfo(); 2685 2686 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2687 2688 // Assign locations to all of the incoming arguments. 2689 SmallVector<CCValAssign, 16> ArgLocs; 2690 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2691 getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue); 2692 CCInfo.AnalyzeFormalArguments(Ins, 2693 CCAssignFnForNode(CallConv, /* Return*/ false, 2694 isVarArg)); 2695 2696 SmallVector<SDValue, 16> ArgValues; 2697 int lastInsIndex = -1; 2698 SDValue ArgValue; 2699 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); 2700 unsigned CurArgIdx = 0; 2701 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2702 CCValAssign &VA = ArgLocs[i]; 2703 std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); 2704 CurArgIdx = Ins[VA.getValNo()].OrigArgIndex; 2705 // Arguments stored in registers. 2706 if (VA.isRegLoc()) { 2707 EVT RegVT = VA.getLocVT(); 2708 2709 if (VA.needsCustom()) { 2710 // f64 and vector types are split up into multiple registers or 2711 // combinations of registers and stack slots. 2712 if (VA.getLocVT() == MVT::v2f64) { 2713 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2714 Chain, DAG, dl); 2715 VA = ArgLocs[++i]; // skip ahead to next loc 2716 SDValue ArgValue2; 2717 if (VA.isMemLoc()) { 2718 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2719 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2720 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2721 MachinePointerInfo::getFixedStack(FI), 2722 false, false, false, 0); 2723 } else { 2724 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2725 Chain, DAG, dl); 2726 } 2727 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2728 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2729 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2730 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2731 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2732 } else 2733 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2734 2735 } else { 2736 const TargetRegisterClass *RC; 2737 2738 if (RegVT == MVT::f32) 2739 RC = &ARM::SPRRegClass; 2740 else if (RegVT == MVT::f64) 2741 RC = &ARM::DPRRegClass; 2742 else if (RegVT == MVT::v2f64) 2743 RC = &ARM::QPRRegClass; 2744 else if (RegVT == MVT::i32) 2745 RC = AFI->isThumb1OnlyFunction() ? 2746 (const TargetRegisterClass*)&ARM::tGPRRegClass : 2747 (const TargetRegisterClass*)&ARM::GPRRegClass; 2748 else 2749 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2750 2751 // Transform the arguments in physical registers into virtual ones. 2752 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2753 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2754 } 2755 2756 // If this is an 8 or 16-bit value, it is really passed promoted 2757 // to 32 bits. Insert an assert[sz]ext to capture this, then 2758 // truncate to the right size. 2759 switch (VA.getLocInfo()) { 2760 default: llvm_unreachable("Unknown loc info!"); 2761 case CCValAssign::Full: break; 2762 case CCValAssign::BCvt: 2763 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 2764 break; 2765 case CCValAssign::SExt: 2766 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2767 DAG.getValueType(VA.getValVT())); 2768 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2769 break; 2770 case CCValAssign::ZExt: 2771 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2772 DAG.getValueType(VA.getValVT())); 2773 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2774 break; 2775 } 2776 2777 InVals.push_back(ArgValue); 2778 2779 } else { // VA.isRegLoc() 2780 2781 // sanity check 2782 assert(VA.isMemLoc()); 2783 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2784 2785 int index = ArgLocs[i].getValNo(); 2786 2787 // Some Ins[] entries become multiple ArgLoc[] entries. 2788 // Process them only once. 2789 if (index != lastInsIndex) 2790 { 2791 ISD::ArgFlagsTy Flags = Ins[index].Flags; 2792 // FIXME: For now, all byval parameter objects are marked mutable. 2793 // This can be changed with more analysis. 2794 // In case of tail call optimization mark all arguments mutable. 2795 // Since they could be overwritten by lowering of arguments in case of 2796 // a tail call. 2797 if (Flags.isByVal()) { 2798 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2799 if (!AFI->getVarArgsFrameIndex()) { 2800 VarArgStyleRegisters(CCInfo, DAG, 2801 dl, Chain, CurOrigArg, 2802 Ins[VA.getValNo()].PartOffset, 2803 VA.getLocMemOffset(), 2804 true /*force mutable frames*/); 2805 int VAFrameIndex = AFI->getVarArgsFrameIndex(); 2806 InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy())); 2807 } else { 2808 int FI = MFI->CreateFixedObject(Flags.getByValSize(), 2809 VA.getLocMemOffset(), false); 2810 InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); 2811 } 2812 } else { 2813 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 2814 VA.getLocMemOffset(), true); 2815 2816 // Create load nodes to retrieve arguments from the stack. 2817 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2818 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2819 MachinePointerInfo::getFixedStack(FI), 2820 false, false, false, 0)); 2821 } 2822 lastInsIndex = index; 2823 } 2824 } 2825 } 2826 2827 // varargs 2828 if (isVarArg) 2829 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0, 2830 CCInfo.getNextStackOffset()); 2831 2832 return Chain; 2833 } 2834 2835 /// isFloatingPointZero - Return true if this is +0.0. 2836 static bool isFloatingPointZero(SDValue Op) { 2837 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 2838 return CFP->getValueAPF().isPosZero(); 2839 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 2840 // Maybe this has already been legalized into the constant pool? 2841 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 2842 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 2843 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 2844 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 2845 return CFP->getValueAPF().isPosZero(); 2846 } 2847 } 2848 return false; 2849 } 2850 2851 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for 2852 /// the given operands. 2853 SDValue 2854 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2855 SDValue &ARMcc, SelectionDAG &DAG, 2856 DebugLoc dl) const { 2857 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 2858 unsigned C = RHSC->getZExtValue(); 2859 if (!isLegalICmpImmediate(C)) { 2860 // Constant does not fit, try adjusting it by one? 2861 switch (CC) { 2862 default: break; 2863 case ISD::SETLT: 2864 case ISD::SETGE: 2865 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 2866 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 2867 RHS = DAG.getConstant(C-1, MVT::i32); 2868 } 2869 break; 2870 case ISD::SETULT: 2871 case ISD::SETUGE: 2872 if (C != 0 && isLegalICmpImmediate(C-1)) { 2873 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 2874 RHS = DAG.getConstant(C-1, MVT::i32); 2875 } 2876 break; 2877 case ISD::SETLE: 2878 case ISD::SETGT: 2879 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 2880 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 2881 RHS = DAG.getConstant(C+1, MVT::i32); 2882 } 2883 break; 2884 case ISD::SETULE: 2885 case ISD::SETUGT: 2886 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 2887 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 2888 RHS = DAG.getConstant(C+1, MVT::i32); 2889 } 2890 break; 2891 } 2892 } 2893 } 2894 2895 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2896 ARMISD::NodeType CompareType; 2897 switch (CondCode) { 2898 default: 2899 CompareType = ARMISD::CMP; 2900 break; 2901 case ARMCC::EQ: 2902 case ARMCC::NE: 2903 // Uses only Z Flag 2904 CompareType = ARMISD::CMPZ; 2905 break; 2906 } 2907 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2908 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 2909 } 2910 2911 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 2912 SDValue 2913 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 2914 DebugLoc dl) const { 2915 SDValue Cmp; 2916 if (!isFloatingPointZero(RHS)) 2917 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 2918 else 2919 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 2920 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 2921 } 2922 2923 /// duplicateCmp - Glue values can have only one use, so this function 2924 /// duplicates a comparison node. 2925 SDValue 2926 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 2927 unsigned Opc = Cmp.getOpcode(); 2928 DebugLoc DL = Cmp.getDebugLoc(); 2929 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 2930 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2931 2932 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 2933 Cmp = Cmp.getOperand(0); 2934 Opc = Cmp.getOpcode(); 2935 if (Opc == ARMISD::CMPFP) 2936 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2937 else { 2938 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 2939 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 2940 } 2941 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 2942 } 2943 2944 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 2945 SDValue Cond = Op.getOperand(0); 2946 SDValue SelectTrue = Op.getOperand(1); 2947 SDValue SelectFalse = Op.getOperand(2); 2948 DebugLoc dl = Op.getDebugLoc(); 2949 2950 // Convert: 2951 // 2952 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 2953 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 2954 // 2955 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 2956 const ConstantSDNode *CMOVTrue = 2957 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 2958 const ConstantSDNode *CMOVFalse = 2959 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 2960 2961 if (CMOVTrue && CMOVFalse) { 2962 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 2963 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 2964 2965 SDValue True; 2966 SDValue False; 2967 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 2968 True = SelectTrue; 2969 False = SelectFalse; 2970 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 2971 True = SelectFalse; 2972 False = SelectTrue; 2973 } 2974 2975 if (True.getNode() && False.getNode()) { 2976 EVT VT = Op.getValueType(); 2977 SDValue ARMcc = Cond.getOperand(2); 2978 SDValue CCR = Cond.getOperand(3); 2979 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 2980 assert(True.getValueType() == VT); 2981 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 2982 } 2983 } 2984 } 2985 2986 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the 2987 // undefined bits before doing a full-word comparison with zero. 2988 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, 2989 DAG.getConstant(1, Cond.getValueType())); 2990 2991 return DAG.getSelectCC(dl, Cond, 2992 DAG.getConstant(0, Cond.getValueType()), 2993 SelectTrue, SelectFalse, ISD::SETNE); 2994 } 2995 2996 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 2997 EVT VT = Op.getValueType(); 2998 SDValue LHS = Op.getOperand(0); 2999 SDValue RHS = Op.getOperand(1); 3000 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 3001 SDValue TrueVal = Op.getOperand(2); 3002 SDValue FalseVal = Op.getOperand(3); 3003 DebugLoc dl = Op.getDebugLoc(); 3004 3005 if (LHS.getValueType() == MVT::i32) { 3006 SDValue ARMcc; 3007 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3008 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3009 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 3010 } 3011 3012 ARMCC::CondCodes CondCode, CondCode2; 3013 FPCCToARMCC(CC, CondCode, CondCode2); 3014 3015 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 3016 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3017 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3018 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 3019 ARMcc, CCR, Cmp); 3020 if (CondCode2 != ARMCC::AL) { 3021 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 3022 // FIXME: Needs another CMP because flag can have but one use. 3023 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 3024 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 3025 Result, TrueVal, ARMcc2, CCR, Cmp2); 3026 } 3027 return Result; 3028 } 3029 3030 /// canChangeToInt - Given the fp compare operand, return true if it is suitable 3031 /// to morph to an integer compare sequence. 3032 static bool canChangeToInt(SDValue Op, bool &SeenZero, 3033 const ARMSubtarget *Subtarget) { 3034 SDNode *N = Op.getNode(); 3035 if (!N->hasOneUse()) 3036 // Otherwise it requires moving the value from fp to integer registers. 3037 return false; 3038 if (!N->getNumValues()) 3039 return false; 3040 EVT VT = Op.getValueType(); 3041 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 3042 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 3043 // vmrs are very slow, e.g. cortex-a8. 3044 return false; 3045 3046 if (isFloatingPointZero(Op)) { 3047 SeenZero = true; 3048 return true; 3049 } 3050 return ISD::isNormalLoad(N); 3051 } 3052 3053 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 3054 if (isFloatingPointZero(Op)) 3055 return DAG.getConstant(0, MVT::i32); 3056 3057 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 3058 return DAG.getLoad(MVT::i32, Op.getDebugLoc(), 3059 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 3060 Ld->isVolatile(), Ld->isNonTemporal(), 3061 Ld->isInvariant(), Ld->getAlignment()); 3062 3063 llvm_unreachable("Unknown VFP cmp argument!"); 3064 } 3065 3066 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 3067 SDValue &RetVal1, SDValue &RetVal2) { 3068 if (isFloatingPointZero(Op)) { 3069 RetVal1 = DAG.getConstant(0, MVT::i32); 3070 RetVal2 = DAG.getConstant(0, MVT::i32); 3071 return; 3072 } 3073 3074 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 3075 SDValue Ptr = Ld->getBasePtr(); 3076 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 3077 Ld->getChain(), Ptr, 3078 Ld->getPointerInfo(), 3079 Ld->isVolatile(), Ld->isNonTemporal(), 3080 Ld->isInvariant(), Ld->getAlignment()); 3081 3082 EVT PtrType = Ptr.getValueType(); 3083 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 3084 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), 3085 PtrType, Ptr, DAG.getConstant(4, PtrType)); 3086 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 3087 Ld->getChain(), NewPtr, 3088 Ld->getPointerInfo().getWithOffset(4), 3089 Ld->isVolatile(), Ld->isNonTemporal(), 3090 Ld->isInvariant(), NewAlign); 3091 return; 3092 } 3093 3094 llvm_unreachable("Unknown VFP cmp argument!"); 3095 } 3096 3097 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 3098 /// f32 and even f64 comparisons to integer ones. 3099 SDValue 3100 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 3101 SDValue Chain = Op.getOperand(0); 3102 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3103 SDValue LHS = Op.getOperand(2); 3104 SDValue RHS = Op.getOperand(3); 3105 SDValue Dest = Op.getOperand(4); 3106 DebugLoc dl = Op.getDebugLoc(); 3107 3108 bool LHSSeenZero = false; 3109 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); 3110 bool RHSSeenZero = false; 3111 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); 3112 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { 3113 // If unsafe fp math optimization is enabled and there are no other uses of 3114 // the CMP operands, and the condition code is EQ or NE, we can optimize it 3115 // to an integer comparison. 3116 if (CC == ISD::SETOEQ) 3117 CC = ISD::SETEQ; 3118 else if (CC == ISD::SETUNE) 3119 CC = ISD::SETNE; 3120 3121 SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); 3122 SDValue ARMcc; 3123 if (LHS.getValueType() == MVT::f32) { 3124 LHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3125 bitcastf32Toi32(LHS, DAG), Mask); 3126 RHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3127 bitcastf32Toi32(RHS, DAG), Mask); 3128 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3129 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3130 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3131 Chain, Dest, ARMcc, CCR, Cmp); 3132 } 3133 3134 SDValue LHS1, LHS2; 3135 SDValue RHS1, RHS2; 3136 expandf64Toi32(LHS, DAG, LHS1, LHS2); 3137 expandf64Toi32(RHS, DAG, RHS1, RHS2); 3138 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); 3139 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); 3140 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3141 ARMcc = DAG.getConstant(CondCode, MVT::i32); 3142 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3143 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 3144 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 3145 } 3146 3147 return SDValue(); 3148 } 3149 3150 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 3151 SDValue Chain = Op.getOperand(0); 3152 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3153 SDValue LHS = Op.getOperand(2); 3154 SDValue RHS = Op.getOperand(3); 3155 SDValue Dest = Op.getOperand(4); 3156 DebugLoc dl = Op.getDebugLoc(); 3157 3158 if (LHS.getValueType() == MVT::i32) { 3159 SDValue ARMcc; 3160 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3161 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3162 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3163 Chain, Dest, ARMcc, CCR, Cmp); 3164 } 3165 3166 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 3167 3168 if (getTargetMachine().Options.UnsafeFPMath && 3169 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 3170 CC == ISD::SETNE || CC == ISD::SETUNE)) { 3171 SDValue Result = OptimizeVFPBrcond(Op, DAG); 3172 if (Result.getNode()) 3173 return Result; 3174 } 3175 3176 ARMCC::CondCodes CondCode, CondCode2; 3177 FPCCToARMCC(CC, CondCode, CondCode2); 3178 3179 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 3180 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3181 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3182 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3183 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 3184 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3185 if (CondCode2 != ARMCC::AL) { 3186 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 3187 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 3188 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3189 } 3190 return Res; 3191 } 3192 3193 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 3194 SDValue Chain = Op.getOperand(0); 3195 SDValue Table = Op.getOperand(1); 3196 SDValue Index = Op.getOperand(2); 3197 DebugLoc dl = Op.getDebugLoc(); 3198 3199 EVT PTy = getPointerTy(); 3200 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 3201 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 3202 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 3203 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 3204 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 3205 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 3206 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 3207 if (Subtarget->isThumb2()) { 3208 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 3209 // which does another jump to the destination. This also makes it easier 3210 // to translate it to TBB / TBH later. 3211 // FIXME: This might not work if the function is extremely large. 3212 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 3213 Addr, Op.getOperand(2), JTI, UId); 3214 } 3215 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 3216 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 3217 MachinePointerInfo::getJumpTable(), 3218 false, false, false, 0); 3219 Chain = Addr.getValue(1); 3220 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 3221 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3222 } else { 3223 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 3224 MachinePointerInfo::getJumpTable(), 3225 false, false, false, 0); 3226 Chain = Addr.getValue(1); 3227 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3228 } 3229 } 3230 3231 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3232 EVT VT = Op.getValueType(); 3233 DebugLoc dl = Op.getDebugLoc(); 3234 3235 if (Op.getValueType().getVectorElementType() == MVT::i32) { 3236 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) 3237 return Op; 3238 return DAG.UnrollVectorOp(Op.getNode()); 3239 } 3240 3241 assert(Op.getOperand(0).getValueType() == MVT::v4f32 && 3242 "Invalid type for custom lowering!"); 3243 if (VT != MVT::v4i16) 3244 return DAG.UnrollVectorOp(Op.getNode()); 3245 3246 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); 3247 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); 3248 } 3249 3250 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3251 EVT VT = Op.getValueType(); 3252 if (VT.isVector()) 3253 return LowerVectorFP_TO_INT(Op, DAG); 3254 3255 DebugLoc dl = Op.getDebugLoc(); 3256 unsigned Opc; 3257 3258 switch (Op.getOpcode()) { 3259 default: llvm_unreachable("Invalid opcode!"); 3260 case ISD::FP_TO_SINT: 3261 Opc = ARMISD::FTOSI; 3262 break; 3263 case ISD::FP_TO_UINT: 3264 Opc = ARMISD::FTOUI; 3265 break; 3266 } 3267 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 3268 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 3269 } 3270 3271 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3272 EVT VT = Op.getValueType(); 3273 DebugLoc dl = Op.getDebugLoc(); 3274 3275 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { 3276 if (VT.getVectorElementType() == MVT::f32) 3277 return Op; 3278 return DAG.UnrollVectorOp(Op.getNode()); 3279 } 3280 3281 assert(Op.getOperand(0).getValueType() == MVT::v4i16 && 3282 "Invalid type for custom lowering!"); 3283 if (VT != MVT::v4f32) 3284 return DAG.UnrollVectorOp(Op.getNode()); 3285 3286 unsigned CastOpc; 3287 unsigned Opc; 3288 switch (Op.getOpcode()) { 3289 default: llvm_unreachable("Invalid opcode!"); 3290 case ISD::SINT_TO_FP: 3291 CastOpc = ISD::SIGN_EXTEND; 3292 Opc = ISD::SINT_TO_FP; 3293 break; 3294 case ISD::UINT_TO_FP: 3295 CastOpc = ISD::ZERO_EXTEND; 3296 Opc = ISD::UINT_TO_FP; 3297 break; 3298 } 3299 3300 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 3301 return DAG.getNode(Opc, dl, VT, Op); 3302 } 3303 3304 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3305 EVT VT = Op.getValueType(); 3306 if (VT.isVector()) 3307 return LowerVectorINT_TO_FP(Op, DAG); 3308 3309 DebugLoc dl = Op.getDebugLoc(); 3310 unsigned Opc; 3311 3312 switch (Op.getOpcode()) { 3313 default: llvm_unreachable("Invalid opcode!"); 3314 case ISD::SINT_TO_FP: 3315 Opc = ARMISD::SITOF; 3316 break; 3317 case ISD::UINT_TO_FP: 3318 Opc = ARMISD::UITOF; 3319 break; 3320 } 3321 3322 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 3323 return DAG.getNode(Opc, dl, VT, Op); 3324 } 3325 3326 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 3327 // Implement fcopysign with a fabs and a conditional fneg. 3328 SDValue Tmp0 = Op.getOperand(0); 3329 SDValue Tmp1 = Op.getOperand(1); 3330 DebugLoc dl = Op.getDebugLoc(); 3331 EVT VT = Op.getValueType(); 3332 EVT SrcVT = Tmp1.getValueType(); 3333 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 3334 Tmp0.getOpcode() == ARMISD::VMOVDRR; 3335 bool UseNEON = !InGPR && Subtarget->hasNEON(); 3336 3337 if (UseNEON) { 3338 // Use VBSL to copy the sign bit. 3339 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 3340 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 3341 DAG.getTargetConstant(EncodedVal, MVT::i32)); 3342 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 3343 if (VT == MVT::f64) 3344 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3345 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 3346 DAG.getConstant(32, MVT::i32)); 3347 else /*if (VT == MVT::f32)*/ 3348 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 3349 if (SrcVT == MVT::f32) { 3350 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 3351 if (VT == MVT::f64) 3352 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3353 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 3354 DAG.getConstant(32, MVT::i32)); 3355 } else if (VT == MVT::f32) 3356 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 3357 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 3358 DAG.getConstant(32, MVT::i32)); 3359 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 3360 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 3361 3362 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 3363 MVT::i32); 3364 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 3365 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 3366 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 3367 3368 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 3369 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 3370 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 3371 if (VT == MVT::f32) { 3372 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 3373 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 3374 DAG.getConstant(0, MVT::i32)); 3375 } else { 3376 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 3377 } 3378 3379 return Res; 3380 } 3381 3382 // Bitcast operand 1 to i32. 3383 if (SrcVT == MVT::f64) 3384 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3385 &Tmp1, 1).getValue(1); 3386 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 3387 3388 // Or in the signbit with integer operations. 3389 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 3390 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 3391 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 3392 if (VT == MVT::f32) { 3393 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 3394 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 3395 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 3396 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 3397 } 3398 3399 // f64: Or the high part with signbit and then combine two parts. 3400 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3401 &Tmp0, 1); 3402 SDValue Lo = Tmp0.getValue(0); 3403 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 3404 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 3405 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 3406 } 3407 3408 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 3409 MachineFunction &MF = DAG.getMachineFunction(); 3410 MachineFrameInfo *MFI = MF.getFrameInfo(); 3411 MFI->setReturnAddressIsTaken(true); 3412 3413 EVT VT = Op.getValueType(); 3414 DebugLoc dl = Op.getDebugLoc(); 3415 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3416 if (Depth) { 3417 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 3418 SDValue Offset = DAG.getConstant(4, MVT::i32); 3419 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 3420 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 3421 MachinePointerInfo(), false, false, false, 0); 3422 } 3423 3424 // Return LR, which contains the return address. Mark it an implicit live-in. 3425 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 3426 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 3427 } 3428 3429 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 3430 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3431 MFI->setFrameAddressIsTaken(true); 3432 3433 EVT VT = Op.getValueType(); 3434 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 3435 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3436 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 3437 ? ARM::R7 : ARM::R11; 3438 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 3439 while (Depth--) 3440 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 3441 MachinePointerInfo(), 3442 false, false, false, 0); 3443 return FrameAddr; 3444 } 3445 3446 /// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec), 3447 /// and size(DestVec) > 128-bits. 3448 /// This is achieved by doing the one extension from the SrcVec, splitting the 3449 /// result, extending these parts, and then concatenating these into the 3450 /// destination. 3451 static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { 3452 SDValue Op = N->getOperand(0); 3453 EVT SrcVT = Op.getValueType(); 3454 EVT DestVT = N->getValueType(0); 3455 3456 assert(DestVT.getSizeInBits() > 128 && 3457 "Custom sext/zext expansion needs >128-bit vector."); 3458 // If this is a normal length extension, use the default expansion. 3459 if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() && 3460 SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) 3461 return SDValue(); 3462 3463 DebugLoc dl = N->getDebugLoc(); 3464 unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); 3465 unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); 3466 unsigned NumElts = SrcVT.getVectorNumElements(); 3467 LLVMContext &Ctx = *DAG.getContext(); 3468 SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi; 3469 3470 EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), 3471 NumElts); 3472 EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), 3473 NumElts/2); 3474 EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize), 3475 NumElts/2); 3476 3477 Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op); 3478 SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, 3479 DAG.getIntPtrConstant(0)); 3480 SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, 3481 DAG.getIntPtrConstant(NumElts/2)); 3482 ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo); 3483 ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi); 3484 return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi); 3485 } 3486 3487 /// ExpandBITCAST - If the target supports VFP, this function is called to 3488 /// expand a bit convert where either the source or destination type is i64 to 3489 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 3490 /// operand type is illegal (e.g., v2f32 for a target that doesn't support 3491 /// vectors), since the legalizer won't know what to do with that. 3492 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 3493 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3494 DebugLoc dl = N->getDebugLoc(); 3495 SDValue Op = N->getOperand(0); 3496 3497 // This function is only supposed to be called for i64 types, either as the 3498 // source or destination of the bit convert. 3499 EVT SrcVT = Op.getValueType(); 3500 EVT DstVT = N->getValueType(0); 3501 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 3502 "ExpandBITCAST called for non-i64 type"); 3503 3504 // Turn i64->f64 into VMOVDRR. 3505 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 3506 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3507 DAG.getConstant(0, MVT::i32)); 3508 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3509 DAG.getConstant(1, MVT::i32)); 3510 return DAG.getNode(ISD::BITCAST, dl, DstVT, 3511 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 3512 } 3513 3514 // Turn f64->i64 into VMOVRRD. 3515 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 3516 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 3517 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 3518 // Merge the pieces into a single i64 value. 3519 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 3520 } 3521 3522 return SDValue(); 3523 } 3524 3525 /// getZeroVector - Returns a vector of specified type with all zero elements. 3526 /// Zero vectors are used to represent vector negation and in those cases 3527 /// will be implemented with the NEON VNEG instruction. However, VNEG does 3528 /// not support i64 elements, so sometimes the zero vectors will need to be 3529 /// explicitly constructed. Regardless, use a canonical VMOV to create the 3530 /// zero vector. 3531 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 3532 assert(VT.isVector() && "Expected a vector type"); 3533 // The canonical modified immediate encoding of a zero vector is....0! 3534 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 3535 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 3536 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 3537 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3538 } 3539 3540 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two 3541 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 3542 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 3543 SelectionDAG &DAG) const { 3544 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3545 EVT VT = Op.getValueType(); 3546 unsigned VTBits = VT.getSizeInBits(); 3547 DebugLoc dl = Op.getDebugLoc(); 3548 SDValue ShOpLo = Op.getOperand(0); 3549 SDValue ShOpHi = Op.getOperand(1); 3550 SDValue ShAmt = Op.getOperand(2); 3551 SDValue ARMcc; 3552 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 3553 3554 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 3555 3556 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3557 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3558 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 3559 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3560 DAG.getConstant(VTBits, MVT::i32)); 3561 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 3562 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3563 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 3564 3565 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3566 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3567 ARMcc, DAG, dl); 3568 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 3569 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 3570 CCR, Cmp); 3571 3572 SDValue Ops[2] = { Lo, Hi }; 3573 return DAG.getMergeValues(Ops, 2, dl); 3574 } 3575 3576 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 3577 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 3578 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 3579 SelectionDAG &DAG) const { 3580 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3581 EVT VT = Op.getValueType(); 3582 unsigned VTBits = VT.getSizeInBits(); 3583 DebugLoc dl = Op.getDebugLoc(); 3584 SDValue ShOpLo = Op.getOperand(0); 3585 SDValue ShOpHi = Op.getOperand(1); 3586 SDValue ShAmt = Op.getOperand(2); 3587 SDValue ARMcc; 3588 3589 assert(Op.getOpcode() == ISD::SHL_PARTS); 3590 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3591 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3592 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 3593 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3594 DAG.getConstant(VTBits, MVT::i32)); 3595 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 3596 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 3597 3598 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3599 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3600 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3601 ARMcc, DAG, dl); 3602 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 3603 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 3604 CCR, Cmp); 3605 3606 SDValue Ops[2] = { Lo, Hi }; 3607 return DAG.getMergeValues(Ops, 2, dl); 3608 } 3609 3610 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 3611 SelectionDAG &DAG) const { 3612 // The rounding mode is in bits 23:22 of the FPSCR. 3613 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 3614 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 3615 // so that the shift + and get folded into a bitfield extract. 3616 DebugLoc dl = Op.getDebugLoc(); 3617 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 3618 DAG.getConstant(Intrinsic::arm_get_fpscr, 3619 MVT::i32)); 3620 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 3621 DAG.getConstant(1U << 22, MVT::i32)); 3622 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 3623 DAG.getConstant(22, MVT::i32)); 3624 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 3625 DAG.getConstant(3, MVT::i32)); 3626 } 3627 3628 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 3629 const ARMSubtarget *ST) { 3630 EVT VT = N->getValueType(0); 3631 DebugLoc dl = N->getDebugLoc(); 3632 3633 if (!ST->hasV6T2Ops()) 3634 return SDValue(); 3635 3636 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 3637 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 3638 } 3639 3640 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count 3641 /// for each 16-bit element from operand, repeated. The basic idea is to 3642 /// leverage vcnt to get the 8-bit counts, gather and add the results. 3643 /// 3644 /// Trace for v4i16: 3645 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 3646 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element) 3647 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi) 3648 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] 3649 /// [b0 b1 b2 b3 b4 b5 b6 b7] 3650 /// +[b1 b0 b3 b2 b5 b4 b7 b6] 3651 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0, 3652 /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) 3653 static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { 3654 EVT VT = N->getValueType(0); 3655 DebugLoc DL = N->getDebugLoc(); 3656 3657 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; 3658 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); 3659 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0); 3660 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1); 3661 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2); 3662 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3); 3663 } 3664 3665 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the 3666 /// bit-count for each 16-bit element from the operand. We need slightly 3667 /// different sequencing for v4i16 and v8i16 to stay within NEON's available 3668 /// 64/128-bit registers. 3669 /// 3670 /// Trace for v4i16: 3671 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 3672 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi) 3673 /// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ] 3674 /// v4i16:Extracted = [k0 k1 k2 k3 ] 3675 static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { 3676 EVT VT = N->getValueType(0); 3677 DebugLoc DL = N->getDebugLoc(); 3678 3679 SDValue BitCounts = getCTPOP16BitCounts(N, DAG); 3680 if (VT.is64BitVector()) { 3681 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts); 3682 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended, 3683 DAG.getIntPtrConstant(0)); 3684 } else { 3685 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, 3686 BitCounts, DAG.getIntPtrConstant(0)); 3687 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted); 3688 } 3689 } 3690 3691 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the 3692 /// bit-count for each 32-bit element from the operand. The idea here is 3693 /// to split the vector into 16-bit elements, leverage the 16-bit count 3694 /// routine, and then combine the results. 3695 /// 3696 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged): 3697 /// input = [v0 v1 ] (vi: 32-bit elements) 3698 /// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1]) 3699 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi) 3700 /// vrev: N0 = [k1 k0 k3 k2 ] 3701 /// [k0 k1 k2 k3 ] 3702 /// N1 =+[k1 k0 k3 k2 ] 3703 /// [k0 k2 k1 k3 ] 3704 /// N2 =+[k1 k3 k0 k2 ] 3705 /// [k0 k2 k1 k3 ] 3706 /// Extended =+[k1 k3 k0 k2 ] 3707 /// [k0 k2 ] 3708 /// Extracted=+[k1 k3 ] 3709 /// 3710 static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { 3711 EVT VT = N->getValueType(0); 3712 DebugLoc DL = N->getDebugLoc(); 3713 3714 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; 3715 3716 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0)); 3717 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG); 3718 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16); 3719 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0); 3720 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1); 3721 3722 if (VT.is64BitVector()) { 3723 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2); 3724 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended, 3725 DAG.getIntPtrConstant(0)); 3726 } else { 3727 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2, 3728 DAG.getIntPtrConstant(0)); 3729 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted); 3730 } 3731 } 3732 3733 static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, 3734 const ARMSubtarget *ST) { 3735 EVT VT = N->getValueType(0); 3736 3737 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON."); 3738 assert((VT == MVT::v2i32 || VT == MVT::v4i32 || 3739 VT == MVT::v4i16 || VT == MVT::v8i16) && 3740 "Unexpected type for custom ctpop lowering"); 3741 3742 if (VT.getVectorElementType() == MVT::i32) 3743 return lowerCTPOP32BitElements(N, DAG); 3744 else 3745 return lowerCTPOP16BitElements(N, DAG); 3746 } 3747 3748 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 3749 const ARMSubtarget *ST) { 3750 EVT VT = N->getValueType(0); 3751 DebugLoc dl = N->getDebugLoc(); 3752 3753 if (!VT.isVector()) 3754 return SDValue(); 3755 3756 // Lower vector shifts on NEON to use VSHL. 3757 assert(ST->hasNEON() && "unexpected vector shift"); 3758 3759 // Left shifts translate directly to the vshiftu intrinsic. 3760 if (N->getOpcode() == ISD::SHL) 3761 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3762 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 3763 N->getOperand(0), N->getOperand(1)); 3764 3765 assert((N->getOpcode() == ISD::SRA || 3766 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 3767 3768 // NEON uses the same intrinsics for both left and right shifts. For 3769 // right shifts, the shift amounts are negative, so negate the vector of 3770 // shift amounts. 3771 EVT ShiftVT = N->getOperand(1).getValueType(); 3772 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 3773 getZeroVector(ShiftVT, DAG, dl), 3774 N->getOperand(1)); 3775 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 3776 Intrinsic::arm_neon_vshifts : 3777 Intrinsic::arm_neon_vshiftu); 3778 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3779 DAG.getConstant(vshiftInt, MVT::i32), 3780 N->getOperand(0), NegatedCount); 3781 } 3782 3783 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 3784 const ARMSubtarget *ST) { 3785 EVT VT = N->getValueType(0); 3786 DebugLoc dl = N->getDebugLoc(); 3787 3788 // We can get here for a node like i32 = ISD::SHL i32, i64 3789 if (VT != MVT::i64) 3790 return SDValue(); 3791 3792 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 3793 "Unknown shift to lower!"); 3794 3795 // We only lower SRA, SRL of 1 here, all others use generic lowering. 3796 if (!isa<ConstantSDNode>(N->getOperand(1)) || 3797 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 3798 return SDValue(); 3799 3800 // If we are in thumb mode, we don't have RRX. 3801 if (ST->isThumb1Only()) return SDValue(); 3802 3803 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 3804 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3805 DAG.getConstant(0, MVT::i32)); 3806 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3807 DAG.getConstant(1, MVT::i32)); 3808 3809 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 3810 // captures the result into a carry flag. 3811 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 3812 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); 3813 3814 // The low part is an ARMISD::RRX operand, which shifts the carry in. 3815 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 3816 3817 // Merge the pieces into a single i64 value. 3818 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 3819 } 3820 3821 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 3822 SDValue TmpOp0, TmpOp1; 3823 bool Invert = false; 3824 bool Swap = false; 3825 unsigned Opc = 0; 3826 3827 SDValue Op0 = Op.getOperand(0); 3828 SDValue Op1 = Op.getOperand(1); 3829 SDValue CC = Op.getOperand(2); 3830 EVT VT = Op.getValueType(); 3831 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3832 DebugLoc dl = Op.getDebugLoc(); 3833 3834 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 3835 switch (SetCCOpcode) { 3836 default: llvm_unreachable("Illegal FP comparison"); 3837 case ISD::SETUNE: 3838 case ISD::SETNE: Invert = true; // Fallthrough 3839 case ISD::SETOEQ: 3840 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3841 case ISD::SETOLT: 3842 case ISD::SETLT: Swap = true; // Fallthrough 3843 case ISD::SETOGT: 3844 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3845 case ISD::SETOLE: 3846 case ISD::SETLE: Swap = true; // Fallthrough 3847 case ISD::SETOGE: 3848 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3849 case ISD::SETUGE: Swap = true; // Fallthrough 3850 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 3851 case ISD::SETUGT: Swap = true; // Fallthrough 3852 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 3853 case ISD::SETUEQ: Invert = true; // Fallthrough 3854 case ISD::SETONE: 3855 // Expand this to (OLT | OGT). 3856 TmpOp0 = Op0; 3857 TmpOp1 = Op1; 3858 Opc = ISD::OR; 3859 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3860 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 3861 break; 3862 case ISD::SETUO: Invert = true; // Fallthrough 3863 case ISD::SETO: 3864 // Expand this to (OLT | OGE). 3865 TmpOp0 = Op0; 3866 TmpOp1 = Op1; 3867 Opc = ISD::OR; 3868 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3869 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 3870 break; 3871 } 3872 } else { 3873 // Integer comparisons. 3874 switch (SetCCOpcode) { 3875 default: llvm_unreachable("Illegal integer comparison"); 3876 case ISD::SETNE: Invert = true; 3877 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3878 case ISD::SETLT: Swap = true; 3879 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3880 case ISD::SETLE: Swap = true; 3881 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3882 case ISD::SETULT: Swap = true; 3883 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 3884 case ISD::SETULE: Swap = true; 3885 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 3886 } 3887 3888 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 3889 if (Opc == ARMISD::VCEQ) { 3890 3891 SDValue AndOp; 3892 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3893 AndOp = Op0; 3894 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 3895 AndOp = Op1; 3896 3897 // Ignore bitconvert. 3898 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 3899 AndOp = AndOp.getOperand(0); 3900 3901 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 3902 Opc = ARMISD::VTST; 3903 Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); 3904 Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); 3905 Invert = !Invert; 3906 } 3907 } 3908 } 3909 3910 if (Swap) 3911 std::swap(Op0, Op1); 3912 3913 // If one of the operands is a constant vector zero, attempt to fold the 3914 // comparison to a specialized compare-against-zero form. 3915 SDValue SingleOp; 3916 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3917 SingleOp = Op0; 3918 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 3919 if (Opc == ARMISD::VCGE) 3920 Opc = ARMISD::VCLEZ; 3921 else if (Opc == ARMISD::VCGT) 3922 Opc = ARMISD::VCLTZ; 3923 SingleOp = Op1; 3924 } 3925 3926 SDValue Result; 3927 if (SingleOp.getNode()) { 3928 switch (Opc) { 3929 case ARMISD::VCEQ: 3930 Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; 3931 case ARMISD::VCGE: 3932 Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; 3933 case ARMISD::VCLEZ: 3934 Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; 3935 case ARMISD::VCGT: 3936 Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; 3937 case ARMISD::VCLTZ: 3938 Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; 3939 default: 3940 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3941 } 3942 } else { 3943 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3944 } 3945 3946 if (Invert) 3947 Result = DAG.getNOT(dl, Result, VT); 3948 3949 return Result; 3950 } 3951 3952 /// isNEONModifiedImm - Check if the specified splat value corresponds to a 3953 /// valid vector constant for a NEON instruction with a "modified immediate" 3954 /// operand (e.g., VMOV). If so, return the encoded value. 3955 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 3956 unsigned SplatBitSize, SelectionDAG &DAG, 3957 EVT &VT, bool is128Bits, NEONModImmType type) { 3958 unsigned OpCmode, Imm; 3959 3960 // SplatBitSize is set to the smallest size that splats the vector, so a 3961 // zero vector will always have SplatBitSize == 8. However, NEON modified 3962 // immediate instructions others than VMOV do not support the 8-bit encoding 3963 // of a zero vector, and the default encoding of zero is supposed to be the 3964 // 32-bit version. 3965 if (SplatBits == 0) 3966 SplatBitSize = 32; 3967 3968 switch (SplatBitSize) { 3969 case 8: 3970 if (type != VMOVModImm) 3971 return SDValue(); 3972 // Any 1-byte value is OK. Op=0, Cmode=1110. 3973 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 3974 OpCmode = 0xe; 3975 Imm = SplatBits; 3976 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 3977 break; 3978 3979 case 16: 3980 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 3981 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 3982 if ((SplatBits & ~0xff) == 0) { 3983 // Value = 0x00nn: Op=x, Cmode=100x. 3984 OpCmode = 0x8; 3985 Imm = SplatBits; 3986 break; 3987 } 3988 if ((SplatBits & ~0xff00) == 0) { 3989 // Value = 0xnn00: Op=x, Cmode=101x. 3990 OpCmode = 0xa; 3991 Imm = SplatBits >> 8; 3992 break; 3993 } 3994 return SDValue(); 3995 3996 case 32: 3997 // NEON's 32-bit VMOV supports splat values where: 3998 // * only one byte is nonzero, or 3999 // * the least significant byte is 0xff and the second byte is nonzero, or 4000 // * the least significant 2 bytes are 0xff and the third is nonzero. 4001 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 4002 if ((SplatBits & ~0xff) == 0) { 4003 // Value = 0x000000nn: Op=x, Cmode=000x. 4004 OpCmode = 0; 4005 Imm = SplatBits; 4006 break; 4007 } 4008 if ((SplatBits & ~0xff00) == 0) { 4009 // Value = 0x0000nn00: Op=x, Cmode=001x. 4010 OpCmode = 0x2; 4011 Imm = SplatBits >> 8; 4012 break; 4013 } 4014 if ((SplatBits & ~0xff0000) == 0) { 4015 // Value = 0x00nn0000: Op=x, Cmode=010x. 4016 OpCmode = 0x4; 4017 Imm = SplatBits >> 16; 4018 break; 4019 } 4020 if ((SplatBits & ~0xff000000) == 0) { 4021 // Value = 0xnn000000: Op=x, Cmode=011x. 4022 OpCmode = 0x6; 4023 Imm = SplatBits >> 24; 4024 break; 4025 } 4026 4027 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC 4028 if (type == OtherModImm) return SDValue(); 4029 4030 if ((SplatBits & ~0xffff) == 0 && 4031 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 4032 // Value = 0x0000nnff: Op=x, Cmode=1100. 4033 OpCmode = 0xc; 4034 Imm = SplatBits >> 8; 4035 SplatBits |= 0xff; 4036 break; 4037 } 4038 4039 if ((SplatBits & ~0xffffff) == 0 && 4040 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 4041 // Value = 0x00nnffff: Op=x, Cmode=1101. 4042 OpCmode = 0xd; 4043 Imm = SplatBits >> 16; 4044 SplatBits |= 0xffff; 4045 break; 4046 } 4047 4048 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 4049 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 4050 // VMOV.I32. A (very) minor optimization would be to replicate the value 4051 // and fall through here to test for a valid 64-bit splat. But, then the 4052 // caller would also need to check and handle the change in size. 4053 return SDValue(); 4054 4055 case 64: { 4056 if (type != VMOVModImm) 4057 return SDValue(); 4058 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 4059 uint64_t BitMask = 0xff; 4060 uint64_t Val = 0; 4061 unsigned ImmMask = 1; 4062 Imm = 0; 4063 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 4064 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 4065 Val |= BitMask; 4066 Imm |= ImmMask; 4067 } else if ((SplatBits & BitMask) != 0) { 4068 return SDValue(); 4069 } 4070 BitMask <<= 8; 4071 ImmMask <<= 1; 4072 } 4073 // Op=1, Cmode=1110. 4074 OpCmode = 0x1e; 4075 SplatBits = Val; 4076 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 4077 break; 4078 } 4079 4080 default: 4081 llvm_unreachable("unexpected size for isNEONModifiedImm"); 4082 } 4083 4084 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 4085 return DAG.getTargetConstant(EncodedVal, MVT::i32); 4086 } 4087 4088 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, 4089 const ARMSubtarget *ST) const { 4090 if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) 4091 return SDValue(); 4092 4093 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); 4094 assert(Op.getValueType() == MVT::f32 && 4095 "ConstantFP custom lowering should only occur for f32."); 4096 4097 // Try splatting with a VMOV.f32... 4098 APFloat FPVal = CFP->getValueAPF(); 4099 int ImmVal = ARM_AM::getFP32Imm(FPVal); 4100 if (ImmVal != -1) { 4101 DebugLoc DL = Op.getDebugLoc(); 4102 SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); 4103 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, 4104 NewVal); 4105 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, 4106 DAG.getConstant(0, MVT::i32)); 4107 } 4108 4109 // If that fails, try a VMOV.i32 4110 EVT VMovVT; 4111 unsigned iVal = FPVal.bitcastToAPInt().getZExtValue(); 4112 SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false, 4113 VMOVModImm); 4114 if (NewVal != SDValue()) { 4115 DebugLoc DL = Op.getDebugLoc(); 4116 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, 4117 NewVal); 4118 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 4119 VecConstant); 4120 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 4121 DAG.getConstant(0, MVT::i32)); 4122 } 4123 4124 // Finally, try a VMVN.i32 4125 NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false, 4126 VMVNModImm); 4127 if (NewVal != SDValue()) { 4128 DebugLoc DL = Op.getDebugLoc(); 4129 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); 4130 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 4131 VecConstant); 4132 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 4133 DAG.getConstant(0, MVT::i32)); 4134 } 4135 4136 return SDValue(); 4137 } 4138 4139 // check if an VEXT instruction can handle the shuffle mask when the 4140 // vector sources of the shuffle are the same. 4141 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { 4142 unsigned NumElts = VT.getVectorNumElements(); 4143 4144 // Assume that the first shuffle index is not UNDEF. Fail if it is. 4145 if (M[0] < 0) 4146 return false; 4147 4148 Imm = M[0]; 4149 4150 // If this is a VEXT shuffle, the immediate value is the index of the first 4151 // element. The other shuffle indices must be the successive elements after 4152 // the first one. 4153 unsigned ExpectedElt = Imm; 4154 for (unsigned i = 1; i < NumElts; ++i) { 4155 // Increment the expected index. If it wraps around, just follow it 4156 // back to index zero and keep going. 4157 ++ExpectedElt; 4158 if (ExpectedElt == NumElts) 4159 ExpectedElt = 0; 4160 4161 if (M[i] < 0) continue; // ignore UNDEF indices 4162 if (ExpectedElt != static_cast<unsigned>(M[i])) 4163 return false; 4164 } 4165 4166 return true; 4167 } 4168 4169 4170 static bool isVEXTMask(ArrayRef<int> M, EVT VT, 4171 bool &ReverseVEXT, unsigned &Imm) { 4172 unsigned NumElts = VT.getVectorNumElements(); 4173 ReverseVEXT = false; 4174 4175 // Assume that the first shuffle index is not UNDEF. Fail if it is. 4176 if (M[0] < 0) 4177 return false; 4178 4179 Imm = M[0]; 4180 4181 // If this is a VEXT shuffle, the immediate value is the index of the first 4182 // element. The other shuffle indices must be the successive elements after 4183 // the first one. 4184 unsigned ExpectedElt = Imm; 4185 for (unsigned i = 1; i < NumElts; ++i) { 4186 // Increment the expected index. If it wraps around, it may still be 4187 // a VEXT but the source vectors must be swapped. 4188 ExpectedElt += 1; 4189 if (ExpectedElt == NumElts * 2) { 4190 ExpectedElt = 0; 4191 ReverseVEXT = true; 4192 } 4193 4194 if (M[i] < 0) continue; // ignore UNDEF indices 4195 if (ExpectedElt != static_cast<unsigned>(M[i])) 4196 return false; 4197 } 4198 4199 // Adjust the index value if the source operands will be swapped. 4200 if (ReverseVEXT) 4201 Imm -= NumElts; 4202 4203 return true; 4204 } 4205 4206 /// isVREVMask - Check if a vector shuffle corresponds to a VREV 4207 /// instruction with the specified blocksize. (The order of the elements 4208 /// within each block of the vector is reversed.) 4209 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { 4210 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 4211 "Only possible block sizes for VREV are: 16, 32, 64"); 4212 4213 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4214 if (EltSz == 64) 4215 return false; 4216 4217 unsigned NumElts = VT.getVectorNumElements(); 4218 unsigned BlockElts = M[0] + 1; 4219 // If the first shuffle index is UNDEF, be optimistic. 4220 if (M[0] < 0) 4221 BlockElts = BlockSize / EltSz; 4222 4223 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 4224 return false; 4225 4226 for (unsigned i = 0; i < NumElts; ++i) { 4227 if (M[i] < 0) continue; // ignore UNDEF indices 4228 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 4229 return false; 4230 } 4231 4232 return true; 4233 } 4234 4235 static bool isVTBLMask(ArrayRef<int> M, EVT VT) { 4236 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of 4237 // range, then 0 is placed into the resulting vector. So pretty much any mask 4238 // of 8 elements can work here. 4239 return VT == MVT::v8i8 && M.size() == 8; 4240 } 4241 4242 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 4243 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4244 if (EltSz == 64) 4245 return false; 4246 4247 unsigned NumElts = VT.getVectorNumElements(); 4248 WhichResult = (M[0] == 0 ? 0 : 1); 4249 for (unsigned i = 0; i < NumElts; i += 2) { 4250 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 4251 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 4252 return false; 4253 } 4254 return true; 4255 } 4256 4257 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 4258 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4259 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 4260 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4261 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4262 if (EltSz == 64) 4263 return false; 4264 4265 unsigned NumElts = VT.getVectorNumElements(); 4266 WhichResult = (M[0] == 0 ? 0 : 1); 4267 for (unsigned i = 0; i < NumElts; i += 2) { 4268 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 4269 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 4270 return false; 4271 } 4272 return true; 4273 } 4274 4275 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 4276 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4277 if (EltSz == 64) 4278 return false; 4279 4280 unsigned NumElts = VT.getVectorNumElements(); 4281 WhichResult = (M[0] == 0 ? 0 : 1); 4282 for (unsigned i = 0; i != NumElts; ++i) { 4283 if (M[i] < 0) continue; // ignore UNDEF indices 4284 if ((unsigned) M[i] != 2 * i + WhichResult) 4285 return false; 4286 } 4287 4288 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4289 if (VT.is64BitVector() && EltSz == 32) 4290 return false; 4291 4292 return true; 4293 } 4294 4295 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 4296 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4297 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 4298 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4299 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4300 if (EltSz == 64) 4301 return false; 4302 4303 unsigned Half = VT.getVectorNumElements() / 2; 4304 WhichResult = (M[0] == 0 ? 0 : 1); 4305 for (unsigned j = 0; j != 2; ++j) { 4306 unsigned Idx = WhichResult; 4307 for (unsigned i = 0; i != Half; ++i) { 4308 int MIdx = M[i + j * Half]; 4309 if (MIdx >= 0 && (unsigned) MIdx != Idx) 4310 return false; 4311 Idx += 2; 4312 } 4313 } 4314 4315 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4316 if (VT.is64BitVector() && EltSz == 32) 4317 return false; 4318 4319 return true; 4320 } 4321 4322 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 4323 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4324 if (EltSz == 64) 4325 return false; 4326 4327 unsigned NumElts = VT.getVectorNumElements(); 4328 WhichResult = (M[0] == 0 ? 0 : 1); 4329 unsigned Idx = WhichResult * NumElts / 2; 4330 for (unsigned i = 0; i != NumElts; i += 2) { 4331 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 4332 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 4333 return false; 4334 Idx += 1; 4335 } 4336 4337 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4338 if (VT.is64BitVector() && EltSz == 32) 4339 return false; 4340 4341 return true; 4342 } 4343 4344 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 4345 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4346 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 4347 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4348 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4349 if (EltSz == 64) 4350 return false; 4351 4352 unsigned NumElts = VT.getVectorNumElements(); 4353 WhichResult = (M[0] == 0 ? 0 : 1); 4354 unsigned Idx = WhichResult * NumElts / 2; 4355 for (unsigned i = 0; i != NumElts; i += 2) { 4356 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 4357 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 4358 return false; 4359 Idx += 1; 4360 } 4361 4362 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4363 if (VT.is64BitVector() && EltSz == 32) 4364 return false; 4365 4366 return true; 4367 } 4368 4369 /// \return true if this is a reverse operation on an vector. 4370 static bool isReverseMask(ArrayRef<int> M, EVT VT) { 4371 unsigned NumElts = VT.getVectorNumElements(); 4372 // Make sure the mask has the right size. 4373 if (NumElts != M.size()) 4374 return false; 4375 4376 // Look for <15, ..., 3, -1, 1, 0>. 4377 for (unsigned i = 0; i != NumElts; ++i) 4378 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) 4379 return false; 4380 4381 return true; 4382 } 4383 4384 // If N is an integer constant that can be moved into a register in one 4385 // instruction, return an SDValue of such a constant (will become a MOV 4386 // instruction). Otherwise return null. 4387 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 4388 const ARMSubtarget *ST, DebugLoc dl) { 4389 uint64_t Val; 4390 if (!isa<ConstantSDNode>(N)) 4391 return SDValue(); 4392 Val = cast<ConstantSDNode>(N)->getZExtValue(); 4393 4394 if (ST->isThumb1Only()) { 4395 if (Val <= 255 || ~Val <= 255) 4396 return DAG.getConstant(Val, MVT::i32); 4397 } else { 4398 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 4399 return DAG.getConstant(Val, MVT::i32); 4400 } 4401 return SDValue(); 4402 } 4403 4404 // If this is a case we can't handle, return null and let the default 4405 // expansion code take care of it. 4406 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 4407 const ARMSubtarget *ST) const { 4408 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 4409 DebugLoc dl = Op.getDebugLoc(); 4410 EVT VT = Op.getValueType(); 4411 4412 APInt SplatBits, SplatUndef; 4413 unsigned SplatBitSize; 4414 bool HasAnyUndefs; 4415 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 4416 if (SplatBitSize <= 64) { 4417 // Check if an immediate VMOV works. 4418 EVT VmovVT; 4419 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 4420 SplatUndef.getZExtValue(), SplatBitSize, 4421 DAG, VmovVT, VT.is128BitVector(), 4422 VMOVModImm); 4423 if (Val.getNode()) { 4424 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 4425 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4426 } 4427 4428 // Try an immediate VMVN. 4429 uint64_t NegatedImm = (~SplatBits).getZExtValue(); 4430 Val = isNEONModifiedImm(NegatedImm, 4431 SplatUndef.getZExtValue(), SplatBitSize, 4432 DAG, VmovVT, VT.is128BitVector(), 4433 VMVNModImm); 4434 if (Val.getNode()) { 4435 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 4436 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4437 } 4438 4439 // Use vmov.f32 to materialize other v2f32 and v4f32 splats. 4440 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { 4441 int ImmVal = ARM_AM::getFP32Imm(SplatBits); 4442 if (ImmVal != -1) { 4443 SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); 4444 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); 4445 } 4446 } 4447 } 4448 } 4449 4450 // Scan through the operands to see if only one value is used. 4451 // 4452 // As an optimisation, even if more than one value is used it may be more 4453 // profitable to splat with one value then change some lanes. 4454 // 4455 // Heuristically we decide to do this if the vector has a "dominant" value, 4456 // defined as splatted to more than half of the lanes. 4457 unsigned NumElts = VT.getVectorNumElements(); 4458 bool isOnlyLowElement = true; 4459 bool usesOnlyOneValue = true; 4460 bool hasDominantValue = false; 4461 bool isConstant = true; 4462 4463 // Map of the number of times a particular SDValue appears in the 4464 // element list. 4465 DenseMap<SDValue, unsigned> ValueCounts; 4466 SDValue Value; 4467 for (unsigned i = 0; i < NumElts; ++i) { 4468 SDValue V = Op.getOperand(i); 4469 if (V.getOpcode() == ISD::UNDEF) 4470 continue; 4471 if (i > 0) 4472 isOnlyLowElement = false; 4473 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 4474 isConstant = false; 4475 4476 ValueCounts.insert(std::make_pair(V, 0)); 4477 unsigned &Count = ValueCounts[V]; 4478 4479 // Is this value dominant? (takes up more than half of the lanes) 4480 if (++Count > (NumElts / 2)) { 4481 hasDominantValue = true; 4482 Value = V; 4483 } 4484 } 4485 if (ValueCounts.size() != 1) 4486 usesOnlyOneValue = false; 4487 if (!Value.getNode() && ValueCounts.size() > 0) 4488 Value = ValueCounts.begin()->first; 4489 4490 if (ValueCounts.size() == 0) 4491 return DAG.getUNDEF(VT); 4492 4493 if (isOnlyLowElement) 4494 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 4495 4496 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4497 4498 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 4499 // i32 and try again. 4500 if (hasDominantValue && EltSize <= 32) { 4501 if (!isConstant) { 4502 SDValue N; 4503 4504 // If we are VDUPing a value that comes directly from a vector, that will 4505 // cause an unnecessary move to and from a GPR, where instead we could 4506 // just use VDUPLANE. We can only do this if the lane being extracted 4507 // is at a constant index, as the VDUP from lane instructions only have 4508 // constant-index forms. 4509 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 4510 isa<ConstantSDNode>(Value->getOperand(1))) { 4511 // We need to create a new undef vector to use for the VDUPLANE if the 4512 // size of the vector from which we get the value is different than the 4513 // size of the vector that we need to create. We will insert the element 4514 // such that the register coalescer will remove unnecessary copies. 4515 if (VT != Value->getOperand(0).getValueType()) { 4516 ConstantSDNode *constIndex; 4517 constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)); 4518 assert(constIndex && "The index is not a constant!"); 4519 unsigned index = constIndex->getAPIntValue().getLimitedValue() % 4520 VT.getVectorNumElements(); 4521 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4522 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), 4523 Value, DAG.getConstant(index, MVT::i32)), 4524 DAG.getConstant(index, MVT::i32)); 4525 } else 4526 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4527 Value->getOperand(0), Value->getOperand(1)); 4528 } else 4529 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); 4530 4531 if (!usesOnlyOneValue) { 4532 // The dominant value was splatted as 'N', but we now have to insert 4533 // all differing elements. 4534 for (unsigned I = 0; I < NumElts; ++I) { 4535 if (Op.getOperand(I) == Value) 4536 continue; 4537 SmallVector<SDValue, 3> Ops; 4538 Ops.push_back(N); 4539 Ops.push_back(Op.getOperand(I)); 4540 Ops.push_back(DAG.getConstant(I, MVT::i32)); 4541 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); 4542 } 4543 } 4544 return N; 4545 } 4546 if (VT.getVectorElementType().isFloatingPoint()) { 4547 SmallVector<SDValue, 8> Ops; 4548 for (unsigned i = 0; i < NumElts; ++i) 4549 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, 4550 Op.getOperand(i))); 4551 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 4552 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); 4553 Val = LowerBUILD_VECTOR(Val, DAG, ST); 4554 if (Val.getNode()) 4555 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4556 } 4557 if (usesOnlyOneValue) { 4558 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 4559 if (isConstant && Val.getNode()) 4560 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 4561 } 4562 } 4563 4564 // If all elements are constants and the case above didn't get hit, fall back 4565 // to the default expansion, which will generate a load from the constant 4566 // pool. 4567 if (isConstant) 4568 return SDValue(); 4569 4570 // Empirical tests suggest this is rarely worth it for vectors of length <= 2. 4571 if (NumElts >= 4) { 4572 SDValue shuffle = ReconstructShuffle(Op, DAG); 4573 if (shuffle != SDValue()) 4574 return shuffle; 4575 } 4576 4577 // Vectors with 32- or 64-bit elements can be built by directly assigning 4578 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 4579 // will be legalized. 4580 if (EltSize >= 32) { 4581 // Do the expansion with floating-point types, since that is what the VFP 4582 // registers are defined to use, and since i64 is not legal. 4583 EVT EltVT = EVT::getFloatingPointVT(EltSize); 4584 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 4585 SmallVector<SDValue, 8> Ops; 4586 for (unsigned i = 0; i < NumElts; ++i) 4587 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); 4588 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 4589 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4590 } 4591 4592 return SDValue(); 4593 } 4594 4595 // Gather data to see if the operation can be modelled as a 4596 // shuffle in combination with VEXTs. 4597 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, 4598 SelectionDAG &DAG) const { 4599 DebugLoc dl = Op.getDebugLoc(); 4600 EVT VT = Op.getValueType(); 4601 unsigned NumElts = VT.getVectorNumElements(); 4602 4603 SmallVector<SDValue, 2> SourceVecs; 4604 SmallVector<unsigned, 2> MinElts; 4605 SmallVector<unsigned, 2> MaxElts; 4606 4607 for (unsigned i = 0; i < NumElts; ++i) { 4608 SDValue V = Op.getOperand(i); 4609 if (V.getOpcode() == ISD::UNDEF) 4610 continue; 4611 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { 4612 // A shuffle can only come from building a vector from various 4613 // elements of other vectors. 4614 return SDValue(); 4615 } else if (V.getOperand(0).getValueType().getVectorElementType() != 4616 VT.getVectorElementType()) { 4617 // This code doesn't know how to handle shuffles where the vector 4618 // element types do not match (this happens because type legalization 4619 // promotes the return type of EXTRACT_VECTOR_ELT). 4620 // FIXME: It might be appropriate to extend this code to handle 4621 // mismatched types. 4622 return SDValue(); 4623 } 4624 4625 // Record this extraction against the appropriate vector if possible... 4626 SDValue SourceVec = V.getOperand(0); 4627 // If the element number isn't a constant, we can't effectively 4628 // analyze what's going on. 4629 if (!isa<ConstantSDNode>(V.getOperand(1))) 4630 return SDValue(); 4631 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); 4632 bool FoundSource = false; 4633 for (unsigned j = 0; j < SourceVecs.size(); ++j) { 4634 if (SourceVecs[j] == SourceVec) { 4635 if (MinElts[j] > EltNo) 4636 MinElts[j] = EltNo; 4637 if (MaxElts[j] < EltNo) 4638 MaxElts[j] = EltNo; 4639 FoundSource = true; 4640 break; 4641 } 4642 } 4643 4644 // Or record a new source if not... 4645 if (!FoundSource) { 4646 SourceVecs.push_back(SourceVec); 4647 MinElts.push_back(EltNo); 4648 MaxElts.push_back(EltNo); 4649 } 4650 } 4651 4652 // Currently only do something sane when at most two source vectors 4653 // involved. 4654 if (SourceVecs.size() > 2) 4655 return SDValue(); 4656 4657 SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; 4658 int VEXTOffsets[2] = {0, 0}; 4659 4660 // This loop extracts the usage patterns of the source vectors 4661 // and prepares appropriate SDValues for a shuffle if possible. 4662 for (unsigned i = 0; i < SourceVecs.size(); ++i) { 4663 if (SourceVecs[i].getValueType() == VT) { 4664 // No VEXT necessary 4665 ShuffleSrcs[i] = SourceVecs[i]; 4666 VEXTOffsets[i] = 0; 4667 continue; 4668 } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { 4669 // It probably isn't worth padding out a smaller vector just to 4670 // break it down again in a shuffle. 4671 return SDValue(); 4672 } 4673 4674 // Since only 64-bit and 128-bit vectors are legal on ARM and 4675 // we've eliminated the other cases... 4676 assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && 4677 "unexpected vector sizes in ReconstructShuffle"); 4678 4679 if (MaxElts[i] - MinElts[i] >= NumElts) { 4680 // Span too large for a VEXT to cope 4681 return SDValue(); 4682 } 4683 4684 if (MinElts[i] >= NumElts) { 4685 // The extraction can just take the second half 4686 VEXTOffsets[i] = NumElts; 4687 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4688 SourceVecs[i], 4689 DAG.getIntPtrConstant(NumElts)); 4690 } else if (MaxElts[i] < NumElts) { 4691 // The extraction can just take the first half 4692 VEXTOffsets[i] = 0; 4693 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4694 SourceVecs[i], 4695 DAG.getIntPtrConstant(0)); 4696 } else { 4697 // An actual VEXT is needed 4698 VEXTOffsets[i] = MinElts[i]; 4699 SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4700 SourceVecs[i], 4701 DAG.getIntPtrConstant(0)); 4702 SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4703 SourceVecs[i], 4704 DAG.getIntPtrConstant(NumElts)); 4705 ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, 4706 DAG.getConstant(VEXTOffsets[i], MVT::i32)); 4707 } 4708 } 4709 4710 SmallVector<int, 8> Mask; 4711 4712 for (unsigned i = 0; i < NumElts; ++i) { 4713 SDValue Entry = Op.getOperand(i); 4714 if (Entry.getOpcode() == ISD::UNDEF) { 4715 Mask.push_back(-1); 4716 continue; 4717 } 4718 4719 SDValue ExtractVec = Entry.getOperand(0); 4720 int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) 4721 .getOperand(1))->getSExtValue(); 4722 if (ExtractVec == SourceVecs[0]) { 4723 Mask.push_back(ExtractElt - VEXTOffsets[0]); 4724 } else { 4725 Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); 4726 } 4727 } 4728 4729 // Final check before we try to produce nonsense... 4730 if (isShuffleMaskLegal(Mask, VT)) 4731 return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], 4732 &Mask[0]); 4733 4734 return SDValue(); 4735 } 4736 4737 /// isShuffleMaskLegal - Targets can use this to indicate that they only 4738 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4739 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4740 /// are assumed to be legal. 4741 bool 4742 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 4743 EVT VT) const { 4744 if (VT.getVectorNumElements() == 4 && 4745 (VT.is128BitVector() || VT.is64BitVector())) { 4746 unsigned PFIndexes[4]; 4747 for (unsigned i = 0; i != 4; ++i) { 4748 if (M[i] < 0) 4749 PFIndexes[i] = 8; 4750 else 4751 PFIndexes[i] = M[i]; 4752 } 4753 4754 // Compute the index in the perfect shuffle table. 4755 unsigned PFTableIndex = 4756 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4757 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4758 unsigned Cost = (PFEntry >> 30); 4759 4760 if (Cost <= 4) 4761 return true; 4762 } 4763 4764 bool ReverseVEXT; 4765 unsigned Imm, WhichResult; 4766 4767 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4768 return (EltSize >= 32 || 4769 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 4770 isVREVMask(M, VT, 64) || 4771 isVREVMask(M, VT, 32) || 4772 isVREVMask(M, VT, 16) || 4773 isVEXTMask(M, VT, ReverseVEXT, Imm) || 4774 isVTBLMask(M, VT) || 4775 isVTRNMask(M, VT, WhichResult) || 4776 isVUZPMask(M, VT, WhichResult) || 4777 isVZIPMask(M, VT, WhichResult) || 4778 isVTRN_v_undef_Mask(M, VT, WhichResult) || 4779 isVUZP_v_undef_Mask(M, VT, WhichResult) || 4780 isVZIP_v_undef_Mask(M, VT, WhichResult) || 4781 ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); 4782 } 4783 4784 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 4785 /// the specified operations to build the shuffle. 4786 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 4787 SDValue RHS, SelectionDAG &DAG, 4788 DebugLoc dl) { 4789 unsigned OpNum = (PFEntry >> 26) & 0x0F; 4790 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 4791 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 4792 4793 enum { 4794 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 4795 OP_VREV, 4796 OP_VDUP0, 4797 OP_VDUP1, 4798 OP_VDUP2, 4799 OP_VDUP3, 4800 OP_VEXT1, 4801 OP_VEXT2, 4802 OP_VEXT3, 4803 OP_VUZPL, // VUZP, left result 4804 OP_VUZPR, // VUZP, right result 4805 OP_VZIPL, // VZIP, left result 4806 OP_VZIPR, // VZIP, right result 4807 OP_VTRNL, // VTRN, left result 4808 OP_VTRNR // VTRN, right result 4809 }; 4810 4811 if (OpNum == OP_COPY) { 4812 if (LHSID == (1*9+2)*9+3) return LHS; 4813 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 4814 return RHS; 4815 } 4816 4817 SDValue OpLHS, OpRHS; 4818 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 4819 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 4820 EVT VT = OpLHS.getValueType(); 4821 4822 switch (OpNum) { 4823 default: llvm_unreachable("Unknown shuffle opcode!"); 4824 case OP_VREV: 4825 // VREV divides the vector in half and swaps within the half. 4826 if (VT.getVectorElementType() == MVT::i32 || 4827 VT.getVectorElementType() == MVT::f32) 4828 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 4829 // vrev <4 x i16> -> VREV32 4830 if (VT.getVectorElementType() == MVT::i16) 4831 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); 4832 // vrev <4 x i8> -> VREV16 4833 assert(VT.getVectorElementType() == MVT::i8); 4834 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); 4835 case OP_VDUP0: 4836 case OP_VDUP1: 4837 case OP_VDUP2: 4838 case OP_VDUP3: 4839 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4840 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 4841 case OP_VEXT1: 4842 case OP_VEXT2: 4843 case OP_VEXT3: 4844 return DAG.getNode(ARMISD::VEXT, dl, VT, 4845 OpLHS, OpRHS, 4846 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 4847 case OP_VUZPL: 4848 case OP_VUZPR: 4849 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4850 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 4851 case OP_VZIPL: 4852 case OP_VZIPR: 4853 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4854 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 4855 case OP_VTRNL: 4856 case OP_VTRNR: 4857 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4858 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 4859 } 4860 } 4861 4862 static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, 4863 ArrayRef<int> ShuffleMask, 4864 SelectionDAG &DAG) { 4865 // Check to see if we can use the VTBL instruction. 4866 SDValue V1 = Op.getOperand(0); 4867 SDValue V2 = Op.getOperand(1); 4868 DebugLoc DL = Op.getDebugLoc(); 4869 4870 SmallVector<SDValue, 8> VTBLMask; 4871 for (ArrayRef<int>::iterator 4872 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) 4873 VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); 4874 4875 if (V2.getNode()->getOpcode() == ISD::UNDEF) 4876 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, 4877 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4878 &VTBLMask[0], 8)); 4879 4880 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, 4881 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4882 &VTBLMask[0], 8)); 4883 } 4884 4885 static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, 4886 SelectionDAG &DAG) { 4887 DebugLoc DL = Op.getDebugLoc(); 4888 SDValue OpLHS = Op.getOperand(0); 4889 EVT VT = OpLHS.getValueType(); 4890 4891 assert((VT == MVT::v8i16 || VT == MVT::v16i8) && 4892 "Expect an v8i16/v16i8 type"); 4893 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); 4894 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, 4895 // extract the first 8 bytes into the top double word and the last 8 bytes 4896 // into the bottom double word. The v8i16 case is similar. 4897 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; 4898 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, 4899 DAG.getConstant(ExtractNum, MVT::i32)); 4900 } 4901 4902 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 4903 SDValue V1 = Op.getOperand(0); 4904 SDValue V2 = Op.getOperand(1); 4905 DebugLoc dl = Op.getDebugLoc(); 4906 EVT VT = Op.getValueType(); 4907 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 4908 4909 // Convert shuffles that are directly supported on NEON to target-specific 4910 // DAG nodes, instead of keeping them as shuffles and matching them again 4911 // during code selection. This is more efficient and avoids the possibility 4912 // of inconsistencies between legalization and selection. 4913 // FIXME: floating-point vectors should be canonicalized to integer vectors 4914 // of the same time so that they get CSEd properly. 4915 ArrayRef<int> ShuffleMask = SVN->getMask(); 4916 4917 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4918 if (EltSize <= 32) { 4919 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 4920 int Lane = SVN->getSplatIndex(); 4921 // If this is undef splat, generate it via "just" vdup, if possible. 4922 if (Lane == -1) Lane = 0; 4923 4924 // Test if V1 is a SCALAR_TO_VECTOR. 4925 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4926 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 4927 } 4928 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR 4929 // (and probably will turn into a SCALAR_TO_VECTOR once legalization 4930 // reaches it). 4931 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && 4932 !isa<ConstantSDNode>(V1.getOperand(0))) { 4933 bool IsScalarToVector = true; 4934 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) 4935 if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { 4936 IsScalarToVector = false; 4937 break; 4938 } 4939 if (IsScalarToVector) 4940 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 4941 } 4942 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 4943 DAG.getConstant(Lane, MVT::i32)); 4944 } 4945 4946 bool ReverseVEXT; 4947 unsigned Imm; 4948 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 4949 if (ReverseVEXT) 4950 std::swap(V1, V2); 4951 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 4952 DAG.getConstant(Imm, MVT::i32)); 4953 } 4954 4955 if (isVREVMask(ShuffleMask, VT, 64)) 4956 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 4957 if (isVREVMask(ShuffleMask, VT, 32)) 4958 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 4959 if (isVREVMask(ShuffleMask, VT, 16)) 4960 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 4961 4962 if (V2->getOpcode() == ISD::UNDEF && 4963 isSingletonVEXTMask(ShuffleMask, VT, Imm)) { 4964 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, 4965 DAG.getConstant(Imm, MVT::i32)); 4966 } 4967 4968 // Check for Neon shuffles that modify both input vectors in place. 4969 // If both results are used, i.e., if there are two shuffles with the same 4970 // source operands and with masks corresponding to both results of one of 4971 // these operations, DAG memoization will ensure that a single node is 4972 // used for both shuffles. 4973 unsigned WhichResult; 4974 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 4975 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4976 V1, V2).getValue(WhichResult); 4977 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 4978 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4979 V1, V2).getValue(WhichResult); 4980 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 4981 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4982 V1, V2).getValue(WhichResult); 4983 4984 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4985 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4986 V1, V1).getValue(WhichResult); 4987 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4988 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4989 V1, V1).getValue(WhichResult); 4990 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4991 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4992 V1, V1).getValue(WhichResult); 4993 } 4994 4995 // If the shuffle is not directly supported and it has 4 elements, use 4996 // the PerfectShuffle-generated table to synthesize it from other shuffles. 4997 unsigned NumElts = VT.getVectorNumElements(); 4998 if (NumElts == 4) { 4999 unsigned PFIndexes[4]; 5000 for (unsigned i = 0; i != 4; ++i) { 5001 if (ShuffleMask[i] < 0) 5002 PFIndexes[i] = 8; 5003 else 5004 PFIndexes[i] = ShuffleMask[i]; 5005 } 5006 5007 // Compute the index in the perfect shuffle table. 5008 unsigned PFTableIndex = 5009 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 5010 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 5011 unsigned Cost = (PFEntry >> 30); 5012 5013 if (Cost <= 4) 5014 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 5015 } 5016 5017 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 5018 if (EltSize >= 32) { 5019 // Do the expansion with floating-point types, since that is what the VFP 5020 // registers are defined to use, and since i64 is not legal. 5021 EVT EltVT = EVT::getFloatingPointVT(EltSize); 5022 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 5023 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); 5024 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); 5025 SmallVector<SDValue, 8> Ops; 5026 for (unsigned i = 0; i < NumElts; ++i) { 5027 if (ShuffleMask[i] < 0) 5028 Ops.push_back(DAG.getUNDEF(EltVT)); 5029 else 5030 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 5031 ShuffleMask[i] < (int)NumElts ? V1 : V2, 5032 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 5033 MVT::i32))); 5034 } 5035 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 5036 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 5037 } 5038 5039 if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) 5040 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); 5041 5042 if (VT == MVT::v8i8) { 5043 SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); 5044 if (NewOp.getNode()) 5045 return NewOp; 5046 } 5047 5048 return SDValue(); 5049 } 5050 5051 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 5052 // INSERT_VECTOR_ELT is legal only for immediate indexes. 5053 SDValue Lane = Op.getOperand(2); 5054 if (!isa<ConstantSDNode>(Lane)) 5055 return SDValue(); 5056 5057 return Op; 5058 } 5059 5060 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 5061 // EXTRACT_VECTOR_ELT is legal only for immediate indexes. 5062 SDValue Lane = Op.getOperand(1); 5063 if (!isa<ConstantSDNode>(Lane)) 5064 return SDValue(); 5065 5066 SDValue Vec = Op.getOperand(0); 5067 if (Op.getValueType() == MVT::i32 && 5068 Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { 5069 DebugLoc dl = Op.getDebugLoc(); 5070 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 5071 } 5072 5073 return Op; 5074 } 5075 5076 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 5077 // The only time a CONCAT_VECTORS operation can have legal types is when 5078 // two 64-bit vectors are concatenated to a 128-bit vector. 5079 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 5080 "unexpected CONCAT_VECTORS"); 5081 DebugLoc dl = Op.getDebugLoc(); 5082 SDValue Val = DAG.getUNDEF(MVT::v2f64); 5083 SDValue Op0 = Op.getOperand(0); 5084 SDValue Op1 = Op.getOperand(1); 5085 if (Op0.getOpcode() != ISD::UNDEF) 5086 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 5087 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), 5088 DAG.getIntPtrConstant(0)); 5089 if (Op1.getOpcode() != ISD::UNDEF) 5090 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 5091 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), 5092 DAG.getIntPtrConstant(1)); 5093 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); 5094 } 5095 5096 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each 5097 /// element has been zero/sign-extended, depending on the isSigned parameter, 5098 /// from an integer type half its size. 5099 static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, 5100 bool isSigned) { 5101 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. 5102 EVT VT = N->getValueType(0); 5103 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { 5104 SDNode *BVN = N->getOperand(0).getNode(); 5105 if (BVN->getValueType(0) != MVT::v4i32 || 5106 BVN->getOpcode() != ISD::BUILD_VECTOR) 5107 return false; 5108 unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 5109 unsigned HiElt = 1 - LoElt; 5110 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); 5111 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); 5112 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); 5113 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); 5114 if (!Lo0 || !Hi0 || !Lo1 || !Hi1) 5115 return false; 5116 if (isSigned) { 5117 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && 5118 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) 5119 return true; 5120 } else { 5121 if (Hi0->isNullValue() && Hi1->isNullValue()) 5122 return true; 5123 } 5124 return false; 5125 } 5126 5127 if (N->getOpcode() != ISD::BUILD_VECTOR) 5128 return false; 5129 5130 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 5131 SDNode *Elt = N->getOperand(i).getNode(); 5132 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 5133 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 5134 unsigned HalfSize = EltSize / 2; 5135 if (isSigned) { 5136 if (!isIntN(HalfSize, C->getSExtValue())) 5137 return false; 5138 } else { 5139 if (!isUIntN(HalfSize, C->getZExtValue())) 5140 return false; 5141 } 5142 continue; 5143 } 5144 return false; 5145 } 5146 5147 return true; 5148 } 5149 5150 /// isSignExtended - Check if a node is a vector value that is sign-extended 5151 /// or a constant BUILD_VECTOR with sign-extended elements. 5152 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { 5153 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) 5154 return true; 5155 if (isExtendedBUILD_VECTOR(N, DAG, true)) 5156 return true; 5157 return false; 5158 } 5159 5160 /// isZeroExtended - Check if a node is a vector value that is zero-extended 5161 /// or a constant BUILD_VECTOR with zero-extended elements. 5162 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { 5163 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) 5164 return true; 5165 if (isExtendedBUILD_VECTOR(N, DAG, false)) 5166 return true; 5167 return false; 5168 } 5169 5170 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total 5171 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. 5172 /// We insert the required extension here to get the vector to fill a D register. 5173 static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, 5174 const EVT &OrigTy, 5175 const EVT &ExtTy, 5176 unsigned ExtOpcode) { 5177 // The vector originally had a size of OrigTy. It was then extended to ExtTy. 5178 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than 5179 // 64-bits we need to insert a new extension so that it will be 64-bits. 5180 assert(ExtTy.is128BitVector() && "Unexpected extension size"); 5181 if (OrigTy.getSizeInBits() >= 64) 5182 return N; 5183 5184 // Must extend size to at least 64 bits to be used as an operand for VMULL. 5185 MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy; 5186 EVT NewVT; 5187 switch (OrigSimpleTy) { 5188 default: llvm_unreachable("Unexpected Orig Vector Type"); 5189 case MVT::v2i8: 5190 case MVT::v2i16: 5191 NewVT = MVT::v2i32; 5192 break; 5193 case MVT::v4i8: 5194 NewVT = MVT::v4i16; 5195 break; 5196 } 5197 return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N); 5198 } 5199 5200 /// SkipLoadExtensionForVMULL - return a load of the original vector size that 5201 /// does not do any sign/zero extension. If the original vector is less 5202 /// than 64 bits, an appropriate extension will be added after the load to 5203 /// reach a total size of 64 bits. We have to add the extension separately 5204 /// because ARM does not have a sign/zero extending load for vectors. 5205 static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { 5206 SDValue NonExtendingLoad = 5207 DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), 5208 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 5209 LD->isNonTemporal(), LD->isInvariant(), 5210 LD->getAlignment()); 5211 unsigned ExtOp = 0; 5212 switch (LD->getExtensionType()) { 5213 default: llvm_unreachable("Unexpected LoadExtType"); 5214 case ISD::EXTLOAD: 5215 case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break; 5216 case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break; 5217 } 5218 MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy; 5219 MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy; 5220 return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG, 5221 MemType, ExtType, ExtOp); 5222 } 5223 5224 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, 5225 /// extending load, or BUILD_VECTOR with extended elements, return the 5226 /// unextended value. The unextended vector should be 64 bits so that it can 5227 /// be used as an operand to a VMULL instruction. If the original vector size 5228 /// before extension is less than 64 bits we add a an extension to resize 5229 /// the vector to 64 bits. 5230 static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { 5231 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 5232 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG, 5233 N->getOperand(0)->getValueType(0), 5234 N->getValueType(0), 5235 N->getOpcode()); 5236 5237 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 5238 return SkipLoadExtensionForVMULL(LD, DAG); 5239 5240 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will 5241 // have been legalized as a BITCAST from v4i32. 5242 if (N->getOpcode() == ISD::BITCAST) { 5243 SDNode *BVN = N->getOperand(0).getNode(); 5244 assert(BVN->getOpcode() == ISD::BUILD_VECTOR && 5245 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); 5246 unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 5247 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32, 5248 BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); 5249 } 5250 // Construct a new BUILD_VECTOR with elements truncated to half the size. 5251 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); 5252 EVT VT = N->getValueType(0); 5253 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; 5254 unsigned NumElts = VT.getVectorNumElements(); 5255 MVT TruncVT = MVT::getIntegerVT(EltSize); 5256 SmallVector<SDValue, 8> Ops; 5257 for (unsigned i = 0; i != NumElts; ++i) { 5258 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); 5259 const APInt &CInt = C->getAPIntValue(); 5260 // Element types smaller than 32 bits are not legal, so use i32 elements. 5261 // The values are implicitly truncated so sext vs. zext doesn't matter. 5262 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); 5263 } 5264 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 5265 MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); 5266 } 5267 5268 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { 5269 unsigned Opcode = N->getOpcode(); 5270 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 5271 SDNode *N0 = N->getOperand(0).getNode(); 5272 SDNode *N1 = N->getOperand(1).getNode(); 5273 return N0->hasOneUse() && N1->hasOneUse() && 5274 isSignExtended(N0, DAG) && isSignExtended(N1, DAG); 5275 } 5276 return false; 5277 } 5278 5279 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { 5280 unsigned Opcode = N->getOpcode(); 5281 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 5282 SDNode *N0 = N->getOperand(0).getNode(); 5283 SDNode *N1 = N->getOperand(1).getNode(); 5284 return N0->hasOneUse() && N1->hasOneUse() && 5285 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); 5286 } 5287 return false; 5288 } 5289 5290 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 5291 // Multiplications are only custom-lowered for 128-bit vectors so that 5292 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 5293 EVT VT = Op.getValueType(); 5294 assert(VT.is128BitVector() && VT.isInteger() && 5295 "unexpected type for custom-lowering ISD::MUL"); 5296 SDNode *N0 = Op.getOperand(0).getNode(); 5297 SDNode *N1 = Op.getOperand(1).getNode(); 5298 unsigned NewOpc = 0; 5299 bool isMLA = false; 5300 bool isN0SExt = isSignExtended(N0, DAG); 5301 bool isN1SExt = isSignExtended(N1, DAG); 5302 if (isN0SExt && isN1SExt) 5303 NewOpc = ARMISD::VMULLs; 5304 else { 5305 bool isN0ZExt = isZeroExtended(N0, DAG); 5306 bool isN1ZExt = isZeroExtended(N1, DAG); 5307 if (isN0ZExt && isN1ZExt) 5308 NewOpc = ARMISD::VMULLu; 5309 else if (isN1SExt || isN1ZExt) { 5310 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these 5311 // into (s/zext A * s/zext C) + (s/zext B * s/zext C) 5312 if (isN1SExt && isAddSubSExt(N0, DAG)) { 5313 NewOpc = ARMISD::VMULLs; 5314 isMLA = true; 5315 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { 5316 NewOpc = ARMISD::VMULLu; 5317 isMLA = true; 5318 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { 5319 std::swap(N0, N1); 5320 NewOpc = ARMISD::VMULLu; 5321 isMLA = true; 5322 } 5323 } 5324 5325 if (!NewOpc) { 5326 if (VT == MVT::v2i64) 5327 // Fall through to expand this. It is not legal. 5328 return SDValue(); 5329 else 5330 // Other vector multiplications are legal. 5331 return Op; 5332 } 5333 } 5334 5335 // Legalize to a VMULL instruction. 5336 DebugLoc DL = Op.getDebugLoc(); 5337 SDValue Op0; 5338 SDValue Op1 = SkipExtensionForVMULL(N1, DAG); 5339 if (!isMLA) { 5340 Op0 = SkipExtensionForVMULL(N0, DAG); 5341 assert(Op0.getValueType().is64BitVector() && 5342 Op1.getValueType().is64BitVector() && 5343 "unexpected types for extended operands to VMULL"); 5344 return DAG.getNode(NewOpc, DL, VT, Op0, Op1); 5345 } 5346 5347 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during 5348 // isel lowering to take advantage of no-stall back to back vmul + vmla. 5349 // vmull q0, d4, d6 5350 // vmlal q0, d5, d6 5351 // is faster than 5352 // vaddl q0, d4, d5 5353 // vmovl q1, d6 5354 // vmul q0, q0, q1 5355 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG); 5356 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG); 5357 EVT Op1VT = Op1.getValueType(); 5358 return DAG.getNode(N0->getOpcode(), DL, VT, 5359 DAG.getNode(NewOpc, DL, VT, 5360 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), 5361 DAG.getNode(NewOpc, DL, VT, 5362 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); 5363 } 5364 5365 static SDValue 5366 LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { 5367 // Convert to float 5368 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); 5369 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); 5370 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); 5371 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); 5372 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); 5373 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); 5374 // Get reciprocal estimate. 5375 // float4 recip = vrecpeq_f32(yf); 5376 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5377 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); 5378 // Because char has a smaller range than uchar, we can actually get away 5379 // without any newton steps. This requires that we use a weird bias 5380 // of 0xb000, however (again, this has been exhaustively tested). 5381 // float4 result = as_float4(as_int4(xf*recip) + 0xb000); 5382 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); 5383 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); 5384 Y = DAG.getConstant(0xb000, MVT::i32); 5385 Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); 5386 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); 5387 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); 5388 // Convert back to short. 5389 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); 5390 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); 5391 return X; 5392 } 5393 5394 static SDValue 5395 LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { 5396 SDValue N2; 5397 // Convert to float. 5398 // float4 yf = vcvt_f32_s32(vmovl_s16(y)); 5399 // float4 xf = vcvt_f32_s32(vmovl_s16(x)); 5400 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); 5401 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); 5402 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 5403 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 5404 5405 // Use reciprocal estimate and one refinement step. 5406 // float4 recip = vrecpeq_f32(yf); 5407 // recip *= vrecpsq_f32(yf, recip); 5408 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5409 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); 5410 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5411 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5412 N1, N2); 5413 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5414 // Because short has a smaller range than ushort, we can actually get away 5415 // with only a single newton step. This requires that we use a weird bias 5416 // of 89, however (again, this has been exhaustively tested). 5417 // float4 result = as_float4(as_int4(xf*recip) + 0x89); 5418 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 5419 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 5420 N1 = DAG.getConstant(0x89, MVT::i32); 5421 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 5422 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 5423 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 5424 // Convert back to integer and return. 5425 // return vmovn_s32(vcvt_s32_f32(result)); 5426 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 5427 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 5428 return N0; 5429 } 5430 5431 static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { 5432 EVT VT = Op.getValueType(); 5433 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 5434 "unexpected type for custom-lowering ISD::SDIV"); 5435 5436 DebugLoc dl = Op.getDebugLoc(); 5437 SDValue N0 = Op.getOperand(0); 5438 SDValue N1 = Op.getOperand(1); 5439 SDValue N2, N3; 5440 5441 if (VT == MVT::v8i8) { 5442 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); 5443 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); 5444 5445 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5446 DAG.getIntPtrConstant(4)); 5447 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5448 DAG.getIntPtrConstant(4)); 5449 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5450 DAG.getIntPtrConstant(0)); 5451 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5452 DAG.getIntPtrConstant(0)); 5453 5454 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 5455 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 5456 5457 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 5458 N0 = LowerCONCAT_VECTORS(N0, DAG); 5459 5460 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); 5461 return N0; 5462 } 5463 return LowerSDIV_v4i16(N0, N1, dl, DAG); 5464 } 5465 5466 static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { 5467 EVT VT = Op.getValueType(); 5468 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 5469 "unexpected type for custom-lowering ISD::UDIV"); 5470 5471 DebugLoc dl = Op.getDebugLoc(); 5472 SDValue N0 = Op.getOperand(0); 5473 SDValue N1 = Op.getOperand(1); 5474 SDValue N2, N3; 5475 5476 if (VT == MVT::v8i8) { 5477 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); 5478 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); 5479 5480 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5481 DAG.getIntPtrConstant(4)); 5482 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5483 DAG.getIntPtrConstant(4)); 5484 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5485 DAG.getIntPtrConstant(0)); 5486 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5487 DAG.getIntPtrConstant(0)); 5488 5489 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 5490 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 5491 5492 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 5493 N0 = LowerCONCAT_VECTORS(N0, DAG); 5494 5495 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 5496 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), 5497 N0); 5498 return N0; 5499 } 5500 5501 // v4i16 sdiv ... Convert to float. 5502 // float4 yf = vcvt_f32_s32(vmovl_u16(y)); 5503 // float4 xf = vcvt_f32_s32(vmovl_u16(x)); 5504 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); 5505 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); 5506 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 5507 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 5508 5509 // Use reciprocal estimate and two refinement steps. 5510 // float4 recip = vrecpeq_f32(yf); 5511 // recip *= vrecpsq_f32(yf, recip); 5512 // recip *= vrecpsq_f32(yf, recip); 5513 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5514 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1); 5515 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5516 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5517 BN1, N2); 5518 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5519 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5520 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5521 BN1, N2); 5522 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5523 // Simply multiplying by the reciprocal estimate can leave us a few ulps 5524 // too low, so we add 2 ulps (exhaustive testing shows that this is enough, 5525 // and that it will never cause us to return an answer too large). 5526 // float4 result = as_float4(as_int4(xf*recip) + 2); 5527 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 5528 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 5529 N1 = DAG.getConstant(2, MVT::i32); 5530 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 5531 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 5532 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 5533 // Convert back to integer and return. 5534 // return vmovn_u32(vcvt_s32_f32(result)); 5535 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 5536 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 5537 return N0; 5538 } 5539 5540 static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { 5541 EVT VT = Op.getNode()->getValueType(0); 5542 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 5543 5544 unsigned Opc; 5545 bool ExtraOp = false; 5546 switch (Op.getOpcode()) { 5547 default: llvm_unreachable("Invalid code"); 5548 case ISD::ADDC: Opc = ARMISD::ADDC; break; 5549 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; 5550 case ISD::SUBC: Opc = ARMISD::SUBC; break; 5551 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; 5552 } 5553 5554 if (!ExtraOp) 5555 return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), 5556 Op.getOperand(1)); 5557 return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), 5558 Op.getOperand(1), Op.getOperand(2)); 5559 } 5560 5561 static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { 5562 // Monotonic load/store is legal for all targets 5563 if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) 5564 return Op; 5565 5566 // Aquire/Release load/store is not legal for targets without a 5567 // dmb or equivalent available. 5568 return SDValue(); 5569 } 5570 5571 5572 static void 5573 ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, 5574 SelectionDAG &DAG, unsigned NewOp) { 5575 DebugLoc dl = Node->getDebugLoc(); 5576 assert (Node->getValueType(0) == MVT::i64 && 5577 "Only know how to expand i64 atomics"); 5578 5579 SmallVector<SDValue, 6> Ops; 5580 Ops.push_back(Node->getOperand(0)); // Chain 5581 Ops.push_back(Node->getOperand(1)); // Ptr 5582 // Low part of Val1 5583 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5584 Node->getOperand(2), DAG.getIntPtrConstant(0))); 5585 // High part of Val1 5586 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5587 Node->getOperand(2), DAG.getIntPtrConstant(1))); 5588 if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { 5589 // High part of Val1 5590 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5591 Node->getOperand(3), DAG.getIntPtrConstant(0))); 5592 // High part of Val2 5593 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5594 Node->getOperand(3), DAG.getIntPtrConstant(1))); 5595 } 5596 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 5597 SDValue Result = 5598 DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, 5599 cast<MemSDNode>(Node)->getMemOperand()); 5600 SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; 5601 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); 5602 Results.push_back(Result.getValue(2)); 5603 } 5604 5605 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 5606 switch (Op.getOpcode()) { 5607 default: llvm_unreachable("Don't know how to custom lower this!"); 5608 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5609 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 5610 case ISD::GlobalAddress: 5611 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 5612 LowerGlobalAddressELF(Op, DAG); 5613 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5614 case ISD::SELECT: return LowerSELECT(Op, DAG); 5615 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 5616 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 5617 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 5618 case ISD::VASTART: return LowerVASTART(Op, DAG); 5619 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); 5620 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); 5621 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); 5622 case ISD::SINT_TO_FP: 5623 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 5624 case ISD::FP_TO_SINT: 5625 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 5626 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 5627 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5628 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5629 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 5630 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 5631 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 5632 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 5633 Subtarget); 5634 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); 5635 case ISD::SHL: 5636 case ISD::SRL: 5637 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 5638 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 5639 case ISD::SRL_PARTS: 5640 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 5641 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 5642 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); 5643 case ISD::SETCC: return LowerVSETCC(Op, DAG); 5644 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); 5645 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 5646 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5647 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 5648 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 5649 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 5650 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 5651 case ISD::MUL: return LowerMUL(Op, DAG); 5652 case ISD::SDIV: return LowerSDIV(Op, DAG); 5653 case ISD::UDIV: return LowerUDIV(Op, DAG); 5654 case ISD::ADDC: 5655 case ISD::ADDE: 5656 case ISD::SUBC: 5657 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); 5658 case ISD::ATOMIC_LOAD: 5659 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); 5660 } 5661 } 5662 5663 /// ReplaceNodeResults - Replace the results of node with an illegal result 5664 /// type with new values built out of custom code. 5665 void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 5666 SmallVectorImpl<SDValue>&Results, 5667 SelectionDAG &DAG) const { 5668 SDValue Res; 5669 switch (N->getOpcode()) { 5670 default: 5671 llvm_unreachable("Don't know how to custom expand this!"); 5672 case ISD::BITCAST: 5673 Res = ExpandBITCAST(N, DAG); 5674 break; 5675 case ISD::SIGN_EXTEND: 5676 case ISD::ZERO_EXTEND: 5677 Res = ExpandVectorExtension(N, DAG); 5678 break; 5679 case ISD::SRL: 5680 case ISD::SRA: 5681 Res = Expand64BitShift(N, DAG, Subtarget); 5682 break; 5683 case ISD::ATOMIC_LOAD_ADD: 5684 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); 5685 return; 5686 case ISD::ATOMIC_LOAD_AND: 5687 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); 5688 return; 5689 case ISD::ATOMIC_LOAD_NAND: 5690 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); 5691 return; 5692 case ISD::ATOMIC_LOAD_OR: 5693 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); 5694 return; 5695 case ISD::ATOMIC_LOAD_SUB: 5696 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); 5697 return; 5698 case ISD::ATOMIC_LOAD_XOR: 5699 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); 5700 return; 5701 case ISD::ATOMIC_SWAP: 5702 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); 5703 return; 5704 case ISD::ATOMIC_CMP_SWAP: 5705 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); 5706 return; 5707 case ISD::ATOMIC_LOAD_MIN: 5708 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG); 5709 return; 5710 case ISD::ATOMIC_LOAD_UMIN: 5711 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG); 5712 return; 5713 case ISD::ATOMIC_LOAD_MAX: 5714 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG); 5715 return; 5716 case ISD::ATOMIC_LOAD_UMAX: 5717 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG); 5718 return; 5719 } 5720 if (Res.getNode()) 5721 Results.push_back(Res); 5722 } 5723 5724 //===----------------------------------------------------------------------===// 5725 // ARM Scheduler Hooks 5726 //===----------------------------------------------------------------------===// 5727 5728 MachineBasicBlock * 5729 ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 5730 MachineBasicBlock *BB, 5731 unsigned Size) const { 5732 unsigned dest = MI->getOperand(0).getReg(); 5733 unsigned ptr = MI->getOperand(1).getReg(); 5734 unsigned oldval = MI->getOperand(2).getReg(); 5735 unsigned newval = MI->getOperand(3).getReg(); 5736 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5737 DebugLoc dl = MI->getDebugLoc(); 5738 bool isThumb2 = Subtarget->isThumb2(); 5739 5740 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5741 unsigned scratch = MRI.createVirtualRegister(isThumb2 ? 5742 (const TargetRegisterClass*)&ARM::rGPRRegClass : 5743 (const TargetRegisterClass*)&ARM::GPRRegClass); 5744 5745 if (isThumb2) { 5746 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 5747 MRI.constrainRegClass(oldval, &ARM::rGPRRegClass); 5748 MRI.constrainRegClass(newval, &ARM::rGPRRegClass); 5749 } 5750 5751 unsigned ldrOpc, strOpc; 5752 switch (Size) { 5753 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5754 case 1: 5755 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5756 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5757 break; 5758 case 2: 5759 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5760 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5761 break; 5762 case 4: 5763 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5764 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5765 break; 5766 } 5767 5768 MachineFunction *MF = BB->getParent(); 5769 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5770 MachineFunction::iterator It = BB; 5771 ++It; // insert the new blocks after the current block 5772 5773 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 5774 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 5775 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5776 MF->insert(It, loop1MBB); 5777 MF->insert(It, loop2MBB); 5778 MF->insert(It, exitMBB); 5779 5780 // Transfer the remainder of BB and its successor edges to exitMBB. 5781 exitMBB->splice(exitMBB->begin(), BB, 5782 llvm::next(MachineBasicBlock::iterator(MI)), 5783 BB->end()); 5784 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5785 5786 // thisMBB: 5787 // ... 5788 // fallthrough --> loop1MBB 5789 BB->addSuccessor(loop1MBB); 5790 5791 // loop1MBB: 5792 // ldrex dest, [ptr] 5793 // cmp dest, oldval 5794 // bne exitMBB 5795 BB = loop1MBB; 5796 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5797 if (ldrOpc == ARM::t2LDREX) 5798 MIB.addImm(0); 5799 AddDefaultPred(MIB); 5800 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5801 .addReg(dest).addReg(oldval)); 5802 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5803 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5804 BB->addSuccessor(loop2MBB); 5805 BB->addSuccessor(exitMBB); 5806 5807 // loop2MBB: 5808 // strex scratch, newval, [ptr] 5809 // cmp scratch, #0 5810 // bne loop1MBB 5811 BB = loop2MBB; 5812 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); 5813 if (strOpc == ARM::t2STREX) 5814 MIB.addImm(0); 5815 AddDefaultPred(MIB); 5816 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5817 .addReg(scratch).addImm(0)); 5818 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5819 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5820 BB->addSuccessor(loop1MBB); 5821 BB->addSuccessor(exitMBB); 5822 5823 // exitMBB: 5824 // ... 5825 BB = exitMBB; 5826 5827 MI->eraseFromParent(); // The instruction is gone now. 5828 5829 return BB; 5830 } 5831 5832 MachineBasicBlock * 5833 ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 5834 unsigned Size, unsigned BinOpcode) const { 5835 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 5836 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5837 5838 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5839 MachineFunction *MF = BB->getParent(); 5840 MachineFunction::iterator It = BB; 5841 ++It; 5842 5843 unsigned dest = MI->getOperand(0).getReg(); 5844 unsigned ptr = MI->getOperand(1).getReg(); 5845 unsigned incr = MI->getOperand(2).getReg(); 5846 DebugLoc dl = MI->getDebugLoc(); 5847 bool isThumb2 = Subtarget->isThumb2(); 5848 5849 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5850 if (isThumb2) { 5851 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 5852 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 5853 } 5854 5855 unsigned ldrOpc, strOpc; 5856 switch (Size) { 5857 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5858 case 1: 5859 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5860 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5861 break; 5862 case 2: 5863 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5864 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5865 break; 5866 case 4: 5867 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5868 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5869 break; 5870 } 5871 5872 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5873 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5874 MF->insert(It, loopMBB); 5875 MF->insert(It, exitMBB); 5876 5877 // Transfer the remainder of BB and its successor edges to exitMBB. 5878 exitMBB->splice(exitMBB->begin(), BB, 5879 llvm::next(MachineBasicBlock::iterator(MI)), 5880 BB->end()); 5881 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5882 5883 const TargetRegisterClass *TRC = isThumb2 ? 5884 (const TargetRegisterClass*)&ARM::rGPRRegClass : 5885 (const TargetRegisterClass*)&ARM::GPRRegClass; 5886 unsigned scratch = MRI.createVirtualRegister(TRC); 5887 unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); 5888 5889 // thisMBB: 5890 // ... 5891 // fallthrough --> loopMBB 5892 BB->addSuccessor(loopMBB); 5893 5894 // loopMBB: 5895 // ldrex dest, ptr 5896 // <binop> scratch2, dest, incr 5897 // strex scratch, scratch2, ptr 5898 // cmp scratch, #0 5899 // bne- loopMBB 5900 // fallthrough --> exitMBB 5901 BB = loopMBB; 5902 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5903 if (ldrOpc == ARM::t2LDREX) 5904 MIB.addImm(0); 5905 AddDefaultPred(MIB); 5906 if (BinOpcode) { 5907 // operand order needs to go the other way for NAND 5908 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 5909 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 5910 addReg(incr).addReg(dest)).addReg(0); 5911 else 5912 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 5913 addReg(dest).addReg(incr)).addReg(0); 5914 } 5915 5916 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 5917 if (strOpc == ARM::t2STREX) 5918 MIB.addImm(0); 5919 AddDefaultPred(MIB); 5920 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5921 .addReg(scratch).addImm(0)); 5922 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5923 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5924 5925 BB->addSuccessor(loopMBB); 5926 BB->addSuccessor(exitMBB); 5927 5928 // exitMBB: 5929 // ... 5930 BB = exitMBB; 5931 5932 MI->eraseFromParent(); // The instruction is gone now. 5933 5934 return BB; 5935 } 5936 5937 MachineBasicBlock * 5938 ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, 5939 MachineBasicBlock *BB, 5940 unsigned Size, 5941 bool signExtend, 5942 ARMCC::CondCodes Cond) const { 5943 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5944 5945 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5946 MachineFunction *MF = BB->getParent(); 5947 MachineFunction::iterator It = BB; 5948 ++It; 5949 5950 unsigned dest = MI->getOperand(0).getReg(); 5951 unsigned ptr = MI->getOperand(1).getReg(); 5952 unsigned incr = MI->getOperand(2).getReg(); 5953 unsigned oldval = dest; 5954 DebugLoc dl = MI->getDebugLoc(); 5955 bool isThumb2 = Subtarget->isThumb2(); 5956 5957 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5958 if (isThumb2) { 5959 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 5960 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 5961 } 5962 5963 unsigned ldrOpc, strOpc, extendOpc; 5964 switch (Size) { 5965 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5966 case 1: 5967 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5968 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5969 extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; 5970 break; 5971 case 2: 5972 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5973 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5974 extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; 5975 break; 5976 case 4: 5977 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5978 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5979 extendOpc = 0; 5980 break; 5981 } 5982 5983 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5984 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5985 MF->insert(It, loopMBB); 5986 MF->insert(It, exitMBB); 5987 5988 // Transfer the remainder of BB and its successor edges to exitMBB. 5989 exitMBB->splice(exitMBB->begin(), BB, 5990 llvm::next(MachineBasicBlock::iterator(MI)), 5991 BB->end()); 5992 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5993 5994 const TargetRegisterClass *TRC = isThumb2 ? 5995 (const TargetRegisterClass*)&ARM::rGPRRegClass : 5996 (const TargetRegisterClass*)&ARM::GPRRegClass; 5997 unsigned scratch = MRI.createVirtualRegister(TRC); 5998 unsigned scratch2 = MRI.createVirtualRegister(TRC); 5999 6000 // thisMBB: 6001 // ... 6002 // fallthrough --> loopMBB 6003 BB->addSuccessor(loopMBB); 6004 6005 // loopMBB: 6006 // ldrex dest, ptr 6007 // (sign extend dest, if required) 6008 // cmp dest, incr 6009 // cmov.cond scratch2, incr, dest 6010 // strex scratch, scratch2, ptr 6011 // cmp scratch, #0 6012 // bne- loopMBB 6013 // fallthrough --> exitMBB 6014 BB = loopMBB; 6015 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 6016 if (ldrOpc == ARM::t2LDREX) 6017 MIB.addImm(0); 6018 AddDefaultPred(MIB); 6019 6020 // Sign extend the value, if necessary. 6021 if (signExtend && extendOpc) { 6022 oldval = MRI.createVirtualRegister(&ARM::GPRRegClass); 6023 AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) 6024 .addReg(dest) 6025 .addImm(0)); 6026 } 6027 6028 // Build compare and cmov instructions. 6029 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 6030 .addReg(oldval).addReg(incr)); 6031 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) 6032 .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR); 6033 6034 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 6035 if (strOpc == ARM::t2STREX) 6036 MIB.addImm(0); 6037 AddDefaultPred(MIB); 6038 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6039 .addReg(scratch).addImm(0)); 6040 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6041 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6042 6043 BB->addSuccessor(loopMBB); 6044 BB->addSuccessor(exitMBB); 6045 6046 // exitMBB: 6047 // ... 6048 BB = exitMBB; 6049 6050 MI->eraseFromParent(); // The instruction is gone now. 6051 6052 return BB; 6053 } 6054 6055 MachineBasicBlock * 6056 ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, 6057 unsigned Op1, unsigned Op2, 6058 bool NeedsCarry, bool IsCmpxchg, 6059 bool IsMinMax, ARMCC::CondCodes CC) const { 6060 // This also handles ATOMIC_SWAP, indicated by Op1==0. 6061 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6062 6063 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6064 MachineFunction *MF = BB->getParent(); 6065 MachineFunction::iterator It = BB; 6066 ++It; 6067 6068 unsigned destlo = MI->getOperand(0).getReg(); 6069 unsigned desthi = MI->getOperand(1).getReg(); 6070 unsigned ptr = MI->getOperand(2).getReg(); 6071 unsigned vallo = MI->getOperand(3).getReg(); 6072 unsigned valhi = MI->getOperand(4).getReg(); 6073 DebugLoc dl = MI->getDebugLoc(); 6074 bool isThumb2 = Subtarget->isThumb2(); 6075 6076 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6077 if (isThumb2) { 6078 MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); 6079 MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); 6080 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 6081 } 6082 6083 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6084 MachineBasicBlock *contBB = 0, *cont2BB = 0; 6085 if (IsCmpxchg || IsMinMax) 6086 contBB = MF->CreateMachineBasicBlock(LLVM_BB); 6087 if (IsCmpxchg) 6088 cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); 6089 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6090 6091 MF->insert(It, loopMBB); 6092 if (IsCmpxchg || IsMinMax) MF->insert(It, contBB); 6093 if (IsCmpxchg) MF->insert(It, cont2BB); 6094 MF->insert(It, exitMBB); 6095 6096 // Transfer the remainder of BB and its successor edges to exitMBB. 6097 exitMBB->splice(exitMBB->begin(), BB, 6098 llvm::next(MachineBasicBlock::iterator(MI)), 6099 BB->end()); 6100 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6101 6102 const TargetRegisterClass *TRC = isThumb2 ? 6103 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6104 (const TargetRegisterClass*)&ARM::GPRRegClass; 6105 unsigned storesuccess = MRI.createVirtualRegister(TRC); 6106 6107 // thisMBB: 6108 // ... 6109 // fallthrough --> loopMBB 6110 BB->addSuccessor(loopMBB); 6111 6112 // loopMBB: 6113 // ldrexd r2, r3, ptr 6114 // <binopa> r0, r2, incr 6115 // <binopb> r1, r3, incr 6116 // strexd storesuccess, r0, r1, ptr 6117 // cmp storesuccess, #0 6118 // bne- loopMBB 6119 // fallthrough --> exitMBB 6120 BB = loopMBB; 6121 6122 // Load 6123 if (isThumb2) { 6124 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD)) 6125 .addReg(destlo, RegState::Define) 6126 .addReg(desthi, RegState::Define) 6127 .addReg(ptr)); 6128 } else { 6129 unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6130 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD)) 6131 .addReg(GPRPair0, RegState::Define).addReg(ptr)); 6132 // Copy r2/r3 into dest. (This copy will normally be coalesced.) 6133 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) 6134 .addReg(GPRPair0, 0, ARM::gsub_0); 6135 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) 6136 .addReg(GPRPair0, 0, ARM::gsub_1); 6137 } 6138 6139 unsigned StoreLo, StoreHi; 6140 if (IsCmpxchg) { 6141 // Add early exit 6142 for (unsigned i = 0; i < 2; i++) { 6143 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : 6144 ARM::CMPrr)) 6145 .addReg(i == 0 ? destlo : desthi) 6146 .addReg(i == 0 ? vallo : valhi)); 6147 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6148 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6149 BB->addSuccessor(exitMBB); 6150 BB->addSuccessor(i == 0 ? contBB : cont2BB); 6151 BB = (i == 0 ? contBB : cont2BB); 6152 } 6153 6154 // Copy to physregs for strexd 6155 StoreLo = MI->getOperand(5).getReg(); 6156 StoreHi = MI->getOperand(6).getReg(); 6157 } else if (Op1) { 6158 // Perform binary operation 6159 unsigned tmpRegLo = MRI.createVirtualRegister(TRC); 6160 AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo) 6161 .addReg(destlo).addReg(vallo)) 6162 .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); 6163 unsigned tmpRegHi = MRI.createVirtualRegister(TRC); 6164 AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi) 6165 .addReg(desthi).addReg(valhi)) 6166 .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); 6167 6168 StoreLo = tmpRegLo; 6169 StoreHi = tmpRegHi; 6170 } else { 6171 // Copy to physregs for strexd 6172 StoreLo = vallo; 6173 StoreHi = valhi; 6174 } 6175 if (IsMinMax) { 6176 // Compare and branch to exit block. 6177 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6178 .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR); 6179 BB->addSuccessor(exitMBB); 6180 BB->addSuccessor(contBB); 6181 BB = contBB; 6182 StoreLo = vallo; 6183 StoreHi = valhi; 6184 } 6185 6186 // Store 6187 if (isThumb2) { 6188 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess) 6189 .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); 6190 } else { 6191 // Marshal a pair... 6192 unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6193 unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6194 unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6195 BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); 6196 BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) 6197 .addReg(UndefPair) 6198 .addReg(StoreLo) 6199 .addImm(ARM::gsub_0); 6200 BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair) 6201 .addReg(r1) 6202 .addReg(StoreHi) 6203 .addImm(ARM::gsub_1); 6204 6205 // ...and store it 6206 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess) 6207 .addReg(StorePair).addReg(ptr)); 6208 } 6209 // Cmp+jump 6210 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6211 .addReg(storesuccess).addImm(0)); 6212 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6213 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6214 6215 BB->addSuccessor(loopMBB); 6216 BB->addSuccessor(exitMBB); 6217 6218 // exitMBB: 6219 // ... 6220 BB = exitMBB; 6221 6222 MI->eraseFromParent(); // The instruction is gone now. 6223 6224 return BB; 6225 } 6226 6227 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and 6228 /// registers the function context. 6229 void ARMTargetLowering:: 6230 SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, 6231 MachineBasicBlock *DispatchBB, int FI) const { 6232 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6233 DebugLoc dl = MI->getDebugLoc(); 6234 MachineFunction *MF = MBB->getParent(); 6235 MachineRegisterInfo *MRI = &MF->getRegInfo(); 6236 MachineConstantPool *MCP = MF->getConstantPool(); 6237 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 6238 const Function *F = MF->getFunction(); 6239 6240 bool isThumb = Subtarget->isThumb(); 6241 bool isThumb2 = Subtarget->isThumb2(); 6242 6243 unsigned PCLabelId = AFI->createPICLabelUId(); 6244 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; 6245 ARMConstantPoolValue *CPV = 6246 ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); 6247 unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); 6248 6249 const TargetRegisterClass *TRC = isThumb ? 6250 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6251 (const TargetRegisterClass*)&ARM::GPRRegClass; 6252 6253 // Grab constant pool and fixed stack memory operands. 6254 MachineMemOperand *CPMMO = 6255 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(), 6256 MachineMemOperand::MOLoad, 4, 4); 6257 6258 MachineMemOperand *FIMMOSt = 6259 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 6260 MachineMemOperand::MOStore, 4, 4); 6261 6262 // Load the address of the dispatch MBB into the jump buffer. 6263 if (isThumb2) { 6264 // Incoming value: jbuf 6265 // ldr.n r5, LCPI1_1 6266 // orr r5, r5, #1 6267 // add r5, pc 6268 // str r5, [$jbuf, #+4] ; &jbuf[1] 6269 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6270 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) 6271 .addConstantPoolIndex(CPI) 6272 .addMemOperand(CPMMO)); 6273 // Set the low bit because of thumb mode. 6274 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6275 AddDefaultCC( 6276 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) 6277 .addReg(NewVReg1, RegState::Kill) 6278 .addImm(0x01))); 6279 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6280 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) 6281 .addReg(NewVReg2, RegState::Kill) 6282 .addImm(PCLabelId); 6283 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) 6284 .addReg(NewVReg3, RegState::Kill) 6285 .addFrameIndex(FI) 6286 .addImm(36) // &jbuf[1] :: pc 6287 .addMemOperand(FIMMOSt)); 6288 } else if (isThumb) { 6289 // Incoming value: jbuf 6290 // ldr.n r1, LCPI1_4 6291 // add r1, pc 6292 // mov r2, #1 6293 // orrs r1, r2 6294 // add r2, $jbuf, #+4 ; &jbuf[1] 6295 // str r1, [r2] 6296 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6297 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) 6298 .addConstantPoolIndex(CPI) 6299 .addMemOperand(CPMMO)); 6300 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6301 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) 6302 .addReg(NewVReg1, RegState::Kill) 6303 .addImm(PCLabelId); 6304 // Set the low bit because of thumb mode. 6305 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6306 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) 6307 .addReg(ARM::CPSR, RegState::Define) 6308 .addImm(1)); 6309 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6310 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) 6311 .addReg(ARM::CPSR, RegState::Define) 6312 .addReg(NewVReg2, RegState::Kill) 6313 .addReg(NewVReg3, RegState::Kill)); 6314 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 6315 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) 6316 .addFrameIndex(FI) 6317 .addImm(36)); // &jbuf[1] :: pc 6318 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) 6319 .addReg(NewVReg4, RegState::Kill) 6320 .addReg(NewVReg5, RegState::Kill) 6321 .addImm(0) 6322 .addMemOperand(FIMMOSt)); 6323 } else { 6324 // Incoming value: jbuf 6325 // ldr r1, LCPI1_1 6326 // add r1, pc, r1 6327 // str r1, [$jbuf, #+4] ; &jbuf[1] 6328 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6329 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) 6330 .addConstantPoolIndex(CPI) 6331 .addImm(0) 6332 .addMemOperand(CPMMO)); 6333 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6334 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) 6335 .addReg(NewVReg1, RegState::Kill) 6336 .addImm(PCLabelId)); 6337 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) 6338 .addReg(NewVReg2, RegState::Kill) 6339 .addFrameIndex(FI) 6340 .addImm(36) // &jbuf[1] :: pc 6341 .addMemOperand(FIMMOSt)); 6342 } 6343 } 6344 6345 MachineBasicBlock *ARMTargetLowering:: 6346 EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { 6347 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6348 DebugLoc dl = MI->getDebugLoc(); 6349 MachineFunction *MF = MBB->getParent(); 6350 MachineRegisterInfo *MRI = &MF->getRegInfo(); 6351 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 6352 MachineFrameInfo *MFI = MF->getFrameInfo(); 6353 int FI = MFI->getFunctionContextIndex(); 6354 6355 const TargetRegisterClass *TRC = Subtarget->isThumb() ? 6356 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6357 (const TargetRegisterClass*)&ARM::GPRnopcRegClass; 6358 6359 // Get a mapping of the call site numbers to all of the landing pads they're 6360 // associated with. 6361 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; 6362 unsigned MaxCSNum = 0; 6363 MachineModuleInfo &MMI = MF->getMMI(); 6364 for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; 6365 ++BB) { 6366 if (!BB->isLandingPad()) continue; 6367 6368 // FIXME: We should assert that the EH_LABEL is the first MI in the landing 6369 // pad. 6370 for (MachineBasicBlock::iterator 6371 II = BB->begin(), IE = BB->end(); II != IE; ++II) { 6372 if (!II->isEHLabel()) continue; 6373 6374 MCSymbol *Sym = II->getOperand(0).getMCSymbol(); 6375 if (!MMI.hasCallSiteLandingPad(Sym)) continue; 6376 6377 SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym); 6378 for (SmallVectorImpl<unsigned>::iterator 6379 CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); 6380 CSI != CSE; ++CSI) { 6381 CallSiteNumToLPad[*CSI].push_back(BB); 6382 MaxCSNum = std::max(MaxCSNum, *CSI); 6383 } 6384 break; 6385 } 6386 } 6387 6388 // Get an ordered list of the machine basic blocks for the jump table. 6389 std::vector<MachineBasicBlock*> LPadList; 6390 SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; 6391 LPadList.reserve(CallSiteNumToLPad.size()); 6392 for (unsigned I = 1; I <= MaxCSNum; ++I) { 6393 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; 6394 for (SmallVectorImpl<MachineBasicBlock*>::iterator 6395 II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { 6396 LPadList.push_back(*II); 6397 InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); 6398 } 6399 } 6400 6401 assert(!LPadList.empty() && 6402 "No landing pad destinations for the dispatch jump table!"); 6403 6404 // Create the jump table and associated information. 6405 MachineJumpTableInfo *JTI = 6406 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); 6407 unsigned MJTI = JTI->createJumpTableIndex(LPadList); 6408 unsigned UId = AFI->createJumpTableUId(); 6409 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 6410 6411 // Create the MBBs for the dispatch code. 6412 6413 // Shove the dispatch's address into the return slot in the function context. 6414 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); 6415 DispatchBB->setIsLandingPad(); 6416 6417 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); 6418 unsigned trap_opcode; 6419 if (Subtarget->isThumb()) 6420 trap_opcode = ARM::tTRAP; 6421 else 6422 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP; 6423 6424 BuildMI(TrapBB, dl, TII->get(trap_opcode)); 6425 DispatchBB->addSuccessor(TrapBB); 6426 6427 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); 6428 DispatchBB->addSuccessor(DispContBB); 6429 6430 // Insert and MBBs. 6431 MF->insert(MF->end(), DispatchBB); 6432 MF->insert(MF->end(), DispContBB); 6433 MF->insert(MF->end(), TrapBB); 6434 6435 // Insert code into the entry block that creates and registers the function 6436 // context. 6437 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); 6438 6439 MachineMemOperand *FIMMOLd = 6440 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 6441 MachineMemOperand::MOLoad | 6442 MachineMemOperand::MOVolatile, 4, 4); 6443 6444 MachineInstrBuilder MIB; 6445 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); 6446 6447 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); 6448 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); 6449 6450 // Add a register mask with no preserved registers. This results in all 6451 // registers being marked as clobbered. 6452 MIB.addRegMask(RI.getNoPreservedMask()); 6453 6454 unsigned NumLPads = LPadList.size(); 6455 if (Subtarget->isThumb2()) { 6456 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6457 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) 6458 .addFrameIndex(FI) 6459 .addImm(4) 6460 .addMemOperand(FIMMOLd)); 6461 6462 if (NumLPads < 256) { 6463 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) 6464 .addReg(NewVReg1) 6465 .addImm(LPadList.size())); 6466 } else { 6467 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6468 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) 6469 .addImm(NumLPads & 0xFFFF)); 6470 6471 unsigned VReg2 = VReg1; 6472 if ((NumLPads & 0xFFFF0000) != 0) { 6473 VReg2 = MRI->createVirtualRegister(TRC); 6474 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) 6475 .addReg(VReg1) 6476 .addImm(NumLPads >> 16)); 6477 } 6478 6479 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) 6480 .addReg(NewVReg1) 6481 .addReg(VReg2)); 6482 } 6483 6484 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) 6485 .addMBB(TrapBB) 6486 .addImm(ARMCC::HI) 6487 .addReg(ARM::CPSR); 6488 6489 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6490 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) 6491 .addJumpTableIndex(MJTI) 6492 .addImm(UId)); 6493 6494 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6495 AddDefaultCC( 6496 AddDefaultPred( 6497 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) 6498 .addReg(NewVReg3, RegState::Kill) 6499 .addReg(NewVReg1) 6500 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 6501 6502 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) 6503 .addReg(NewVReg4, RegState::Kill) 6504 .addReg(NewVReg1) 6505 .addJumpTableIndex(MJTI) 6506 .addImm(UId); 6507 } else if (Subtarget->isThumb()) { 6508 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6509 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) 6510 .addFrameIndex(FI) 6511 .addImm(1) 6512 .addMemOperand(FIMMOLd)); 6513 6514 if (NumLPads < 256) { 6515 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) 6516 .addReg(NewVReg1) 6517 .addImm(NumLPads)); 6518 } else { 6519 MachineConstantPool *ConstantPool = MF->getConstantPool(); 6520 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 6521 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 6522 6523 // MachineConstantPool wants an explicit alignment. 6524 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 6525 if (Align == 0) 6526 Align = getDataLayout()->getTypeAllocSize(C->getType()); 6527 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 6528 6529 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6530 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) 6531 .addReg(VReg1, RegState::Define) 6532 .addConstantPoolIndex(Idx)); 6533 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) 6534 .addReg(NewVReg1) 6535 .addReg(VReg1)); 6536 } 6537 6538 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) 6539 .addMBB(TrapBB) 6540 .addImm(ARMCC::HI) 6541 .addReg(ARM::CPSR); 6542 6543 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6544 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) 6545 .addReg(ARM::CPSR, RegState::Define) 6546 .addReg(NewVReg1) 6547 .addImm(2)); 6548 6549 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6550 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) 6551 .addJumpTableIndex(MJTI) 6552 .addImm(UId)); 6553 6554 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6555 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) 6556 .addReg(ARM::CPSR, RegState::Define) 6557 .addReg(NewVReg2, RegState::Kill) 6558 .addReg(NewVReg3)); 6559 6560 MachineMemOperand *JTMMOLd = 6561 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 6562 MachineMemOperand::MOLoad, 4, 4); 6563 6564 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 6565 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) 6566 .addReg(NewVReg4, RegState::Kill) 6567 .addImm(0) 6568 .addMemOperand(JTMMOLd)); 6569 6570 unsigned NewVReg6 = NewVReg5; 6571 if (RelocM == Reloc::PIC_) { 6572 NewVReg6 = MRI->createVirtualRegister(TRC); 6573 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) 6574 .addReg(ARM::CPSR, RegState::Define) 6575 .addReg(NewVReg5, RegState::Kill) 6576 .addReg(NewVReg3)); 6577 } 6578 6579 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) 6580 .addReg(NewVReg6, RegState::Kill) 6581 .addJumpTableIndex(MJTI) 6582 .addImm(UId); 6583 } else { 6584 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6585 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) 6586 .addFrameIndex(FI) 6587 .addImm(4) 6588 .addMemOperand(FIMMOLd)); 6589 6590 if (NumLPads < 256) { 6591 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) 6592 .addReg(NewVReg1) 6593 .addImm(NumLPads)); 6594 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { 6595 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6596 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) 6597 .addImm(NumLPads & 0xFFFF)); 6598 6599 unsigned VReg2 = VReg1; 6600 if ((NumLPads & 0xFFFF0000) != 0) { 6601 VReg2 = MRI->createVirtualRegister(TRC); 6602 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) 6603 .addReg(VReg1) 6604 .addImm(NumLPads >> 16)); 6605 } 6606 6607 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 6608 .addReg(NewVReg1) 6609 .addReg(VReg2)); 6610 } else { 6611 MachineConstantPool *ConstantPool = MF->getConstantPool(); 6612 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 6613 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 6614 6615 // MachineConstantPool wants an explicit alignment. 6616 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 6617 if (Align == 0) 6618 Align = getDataLayout()->getTypeAllocSize(C->getType()); 6619 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 6620 6621 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6622 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) 6623 .addReg(VReg1, RegState::Define) 6624 .addConstantPoolIndex(Idx) 6625 .addImm(0)); 6626 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 6627 .addReg(NewVReg1) 6628 .addReg(VReg1, RegState::Kill)); 6629 } 6630 6631 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) 6632 .addMBB(TrapBB) 6633 .addImm(ARMCC::HI) 6634 .addReg(ARM::CPSR); 6635 6636 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6637 AddDefaultCC( 6638 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) 6639 .addReg(NewVReg1) 6640 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 6641 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6642 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) 6643 .addJumpTableIndex(MJTI) 6644 .addImm(UId)); 6645 6646 MachineMemOperand *JTMMOLd = 6647 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 6648 MachineMemOperand::MOLoad, 4, 4); 6649 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 6650 AddDefaultPred( 6651 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) 6652 .addReg(NewVReg3, RegState::Kill) 6653 .addReg(NewVReg4) 6654 .addImm(0) 6655 .addMemOperand(JTMMOLd)); 6656 6657 if (RelocM == Reloc::PIC_) { 6658 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) 6659 .addReg(NewVReg5, RegState::Kill) 6660 .addReg(NewVReg4) 6661 .addJumpTableIndex(MJTI) 6662 .addImm(UId); 6663 } else { 6664 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr)) 6665 .addReg(NewVReg5, RegState::Kill) 6666 .addJumpTableIndex(MJTI) 6667 .addImm(UId); 6668 } 6669 } 6670 6671 // Add the jump table entries as successors to the MBB. 6672 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs; 6673 for (std::vector<MachineBasicBlock*>::iterator 6674 I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { 6675 MachineBasicBlock *CurMBB = *I; 6676 if (SeenMBBs.insert(CurMBB)) 6677 DispContBB->addSuccessor(CurMBB); 6678 } 6679 6680 // N.B. the order the invoke BBs are processed in doesn't matter here. 6681 const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); 6682 SmallVector<MachineBasicBlock*, 64> MBBLPads; 6683 for (SmallPtrSet<MachineBasicBlock*, 64>::iterator 6684 I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { 6685 MachineBasicBlock *BB = *I; 6686 6687 // Remove the landing pad successor from the invoke block and replace it 6688 // with the new dispatch block. 6689 SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(), 6690 BB->succ_end()); 6691 while (!Successors.empty()) { 6692 MachineBasicBlock *SMBB = Successors.pop_back_val(); 6693 if (SMBB->isLandingPad()) { 6694 BB->removeSuccessor(SMBB); 6695 MBBLPads.push_back(SMBB); 6696 } 6697 } 6698 6699 BB->addSuccessor(DispatchBB); 6700 6701 // Find the invoke call and mark all of the callee-saved registers as 6702 // 'implicit defined' so that they're spilled. This prevents code from 6703 // moving instructions to before the EH block, where they will never be 6704 // executed. 6705 for (MachineBasicBlock::reverse_iterator 6706 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { 6707 if (!II->isCall()) continue; 6708 6709 DenseMap<unsigned, bool> DefRegs; 6710 for (MachineInstr::mop_iterator 6711 OI = II->operands_begin(), OE = II->operands_end(); 6712 OI != OE; ++OI) { 6713 if (!OI->isReg()) continue; 6714 DefRegs[OI->getReg()] = true; 6715 } 6716 6717 MachineInstrBuilder MIB(*MF, &*II); 6718 6719 for (unsigned i = 0; SavedRegs[i] != 0; ++i) { 6720 unsigned Reg = SavedRegs[i]; 6721 if (Subtarget->isThumb2() && 6722 !ARM::tGPRRegClass.contains(Reg) && 6723 !ARM::hGPRRegClass.contains(Reg)) 6724 continue; 6725 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg)) 6726 continue; 6727 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg)) 6728 continue; 6729 if (!DefRegs[Reg]) 6730 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); 6731 } 6732 6733 break; 6734 } 6735 } 6736 6737 // Mark all former landing pads as non-landing pads. The dispatch is the only 6738 // landing pad now. 6739 for (SmallVectorImpl<MachineBasicBlock*>::iterator 6740 I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) 6741 (*I)->setIsLandingPad(false); 6742 6743 // The instruction is gone now. 6744 MI->eraseFromParent(); 6745 6746 return MBB; 6747 } 6748 6749 static 6750 MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 6751 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 6752 E = MBB->succ_end(); I != E; ++I) 6753 if (*I != Succ) 6754 return *I; 6755 llvm_unreachable("Expecting a BB with two successors!"); 6756 } 6757 6758 MachineBasicBlock *ARMTargetLowering:: 6759 EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { 6760 // This pseudo instruction has 3 operands: dst, src, size 6761 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). 6762 // Otherwise, we will generate unrolled scalar copies. 6763 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6764 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6765 MachineFunction::iterator It = BB; 6766 ++It; 6767 6768 unsigned dest = MI->getOperand(0).getReg(); 6769 unsigned src = MI->getOperand(1).getReg(); 6770 unsigned SizeVal = MI->getOperand(2).getImm(); 6771 unsigned Align = MI->getOperand(3).getImm(); 6772 DebugLoc dl = MI->getDebugLoc(); 6773 6774 bool isThumb2 = Subtarget->isThumb2(); 6775 MachineFunction *MF = BB->getParent(); 6776 MachineRegisterInfo &MRI = MF->getRegInfo(); 6777 unsigned ldrOpc, strOpc, UnitSize = 0; 6778 6779 const TargetRegisterClass *TRC = isThumb2 ? 6780 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6781 (const TargetRegisterClass*)&ARM::GPRRegClass; 6782 const TargetRegisterClass *TRC_Vec = 0; 6783 6784 if (Align & 1) { 6785 ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; 6786 strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; 6787 UnitSize = 1; 6788 } else if (Align & 2) { 6789 ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST; 6790 strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST; 6791 UnitSize = 2; 6792 } else { 6793 // Check whether we can use NEON instructions. 6794 if (!MF->getFunction()->getAttributes(). 6795 hasAttribute(AttributeSet::FunctionIndex, 6796 Attribute::NoImplicitFloat) && 6797 Subtarget->hasNEON()) { 6798 if ((Align % 16 == 0) && SizeVal >= 16) { 6799 ldrOpc = ARM::VLD1q32wb_fixed; 6800 strOpc = ARM::VST1q32wb_fixed; 6801 UnitSize = 16; 6802 TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass; 6803 } 6804 else if ((Align % 8 == 0) && SizeVal >= 8) { 6805 ldrOpc = ARM::VLD1d32wb_fixed; 6806 strOpc = ARM::VST1d32wb_fixed; 6807 UnitSize = 8; 6808 TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass; 6809 } 6810 } 6811 // Can't use NEON instructions. 6812 if (UnitSize == 0) { 6813 ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; 6814 strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; 6815 UnitSize = 4; 6816 } 6817 } 6818 6819 unsigned BytesLeft = SizeVal % UnitSize; 6820 unsigned LoopSize = SizeVal - BytesLeft; 6821 6822 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) { 6823 // Use LDR and STR to copy. 6824 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize) 6825 // [destOut] = STR_POST(scratch, destIn, UnitSize) 6826 unsigned srcIn = src; 6827 unsigned destIn = dest; 6828 for (unsigned i = 0; i < LoopSize; i+=UnitSize) { 6829 unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); 6830 unsigned srcOut = MRI.createVirtualRegister(TRC); 6831 unsigned destOut = MRI.createVirtualRegister(TRC); 6832 if (UnitSize >= 8) { 6833 AddDefaultPred(BuildMI(*BB, MI, dl, 6834 TII->get(ldrOpc), scratch) 6835 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0)); 6836 6837 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 6838 .addReg(destIn).addImm(0).addReg(scratch)); 6839 } else if (isThumb2) { 6840 AddDefaultPred(BuildMI(*BB, MI, dl, 6841 TII->get(ldrOpc), scratch) 6842 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize)); 6843 6844 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 6845 .addReg(scratch).addReg(destIn) 6846 .addImm(UnitSize)); 6847 } else { 6848 AddDefaultPred(BuildMI(*BB, MI, dl, 6849 TII->get(ldrOpc), scratch) 6850 .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0) 6851 .addImm(UnitSize)); 6852 6853 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 6854 .addReg(scratch).addReg(destIn) 6855 .addReg(0).addImm(UnitSize)); 6856 } 6857 srcIn = srcOut; 6858 destIn = destOut; 6859 } 6860 6861 // Handle the leftover bytes with LDRB and STRB. 6862 // [scratch, srcOut] = LDRB_POST(srcIn, 1) 6863 // [destOut] = STRB_POST(scratch, destIn, 1) 6864 ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; 6865 strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; 6866 for (unsigned i = 0; i < BytesLeft; i++) { 6867 unsigned scratch = MRI.createVirtualRegister(TRC); 6868 unsigned srcOut = MRI.createVirtualRegister(TRC); 6869 unsigned destOut = MRI.createVirtualRegister(TRC); 6870 if (isThumb2) { 6871 AddDefaultPred(BuildMI(*BB, MI, dl, 6872 TII->get(ldrOpc),scratch) 6873 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); 6874 6875 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 6876 .addReg(scratch).addReg(destIn) 6877 .addReg(0).addImm(1)); 6878 } else { 6879 AddDefaultPred(BuildMI(*BB, MI, dl, 6880 TII->get(ldrOpc),scratch) 6881 .addReg(srcOut, RegState::Define).addReg(srcIn) 6882 .addReg(0).addImm(1)); 6883 6884 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 6885 .addReg(scratch).addReg(destIn) 6886 .addReg(0).addImm(1)); 6887 } 6888 srcIn = srcOut; 6889 destIn = destOut; 6890 } 6891 MI->eraseFromParent(); // The instruction is gone now. 6892 return BB; 6893 } 6894 6895 // Expand the pseudo op to a loop. 6896 // thisMBB: 6897 // ... 6898 // movw varEnd, # --> with thumb2 6899 // movt varEnd, # 6900 // ldrcp varEnd, idx --> without thumb2 6901 // fallthrough --> loopMBB 6902 // loopMBB: 6903 // PHI varPhi, varEnd, varLoop 6904 // PHI srcPhi, src, srcLoop 6905 // PHI destPhi, dst, destLoop 6906 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 6907 // [destLoop] = STR_POST(scratch, destPhi, UnitSize) 6908 // subs varLoop, varPhi, #UnitSize 6909 // bne loopMBB 6910 // fallthrough --> exitMBB 6911 // exitMBB: 6912 // epilogue to handle left-over bytes 6913 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 6914 // [destOut] = STRB_POST(scratch, destLoop, 1) 6915 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6916 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6917 MF->insert(It, loopMBB); 6918 MF->insert(It, exitMBB); 6919 6920 // Transfer the remainder of BB and its successor edges to exitMBB. 6921 exitMBB->splice(exitMBB->begin(), BB, 6922 llvm::next(MachineBasicBlock::iterator(MI)), 6923 BB->end()); 6924 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6925 6926 // Load an immediate to varEnd. 6927 unsigned varEnd = MRI.createVirtualRegister(TRC); 6928 if (isThumb2) { 6929 unsigned VReg1 = varEnd; 6930 if ((LoopSize & 0xFFFF0000) != 0) 6931 VReg1 = MRI.createVirtualRegister(TRC); 6932 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1) 6933 .addImm(LoopSize & 0xFFFF)); 6934 6935 if ((LoopSize & 0xFFFF0000) != 0) 6936 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) 6937 .addReg(VReg1) 6938 .addImm(LoopSize >> 16)); 6939 } else { 6940 MachineConstantPool *ConstantPool = MF->getConstantPool(); 6941 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 6942 const Constant *C = ConstantInt::get(Int32Ty, LoopSize); 6943 6944 // MachineConstantPool wants an explicit alignment. 6945 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 6946 if (Align == 0) 6947 Align = getDataLayout()->getTypeAllocSize(C->getType()); 6948 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 6949 6950 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp)) 6951 .addReg(varEnd, RegState::Define) 6952 .addConstantPoolIndex(Idx) 6953 .addImm(0)); 6954 } 6955 BB->addSuccessor(loopMBB); 6956 6957 // Generate the loop body: 6958 // varPhi = PHI(varLoop, varEnd) 6959 // srcPhi = PHI(srcLoop, src) 6960 // destPhi = PHI(destLoop, dst) 6961 MachineBasicBlock *entryBB = BB; 6962 BB = loopMBB; 6963 unsigned varLoop = MRI.createVirtualRegister(TRC); 6964 unsigned varPhi = MRI.createVirtualRegister(TRC); 6965 unsigned srcLoop = MRI.createVirtualRegister(TRC); 6966 unsigned srcPhi = MRI.createVirtualRegister(TRC); 6967 unsigned destLoop = MRI.createVirtualRegister(TRC); 6968 unsigned destPhi = MRI.createVirtualRegister(TRC); 6969 6970 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) 6971 .addReg(varLoop).addMBB(loopMBB) 6972 .addReg(varEnd).addMBB(entryBB); 6973 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi) 6974 .addReg(srcLoop).addMBB(loopMBB) 6975 .addReg(src).addMBB(entryBB); 6976 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi) 6977 .addReg(destLoop).addMBB(loopMBB) 6978 .addReg(dest).addMBB(entryBB); 6979 6980 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 6981 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) 6982 unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); 6983 if (UnitSize >= 8) { 6984 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) 6985 .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0)); 6986 6987 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) 6988 .addReg(destPhi).addImm(0).addReg(scratch)); 6989 } else if (isThumb2) { 6990 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) 6991 .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize)); 6992 6993 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) 6994 .addReg(scratch).addReg(destPhi) 6995 .addImm(UnitSize)); 6996 } else { 6997 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) 6998 .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0) 6999 .addImm(UnitSize)); 7000 7001 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) 7002 .addReg(scratch).addReg(destPhi) 7003 .addReg(0).addImm(UnitSize)); 7004 } 7005 7006 // Decrement loop variable by UnitSize. 7007 MachineInstrBuilder MIB = BuildMI(BB, dl, 7008 TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); 7009 AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); 7010 MIB->getOperand(5).setReg(ARM::CPSR); 7011 MIB->getOperand(5).setIsDef(true); 7012 7013 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 7014 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 7015 7016 // loopMBB can loop back to loopMBB or fall through to exitMBB. 7017 BB->addSuccessor(loopMBB); 7018 BB->addSuccessor(exitMBB); 7019 7020 // Add epilogue to handle BytesLeft. 7021 BB = exitMBB; 7022 MachineInstr *StartOfExit = exitMBB->begin(); 7023 ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; 7024 strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; 7025 7026 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 7027 // [destOut] = STRB_POST(scratch, destLoop, 1) 7028 unsigned srcIn = srcLoop; 7029 unsigned destIn = destLoop; 7030 for (unsigned i = 0; i < BytesLeft; i++) { 7031 unsigned scratch = MRI.createVirtualRegister(TRC); 7032 unsigned srcOut = MRI.createVirtualRegister(TRC); 7033 unsigned destOut = MRI.createVirtualRegister(TRC); 7034 if (isThumb2) { 7035 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, 7036 TII->get(ldrOpc),scratch) 7037 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); 7038 7039 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) 7040 .addReg(scratch).addReg(destIn) 7041 .addImm(1)); 7042 } else { 7043 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, 7044 TII->get(ldrOpc),scratch) 7045 .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1)); 7046 7047 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) 7048 .addReg(scratch).addReg(destIn) 7049 .addReg(0).addImm(1)); 7050 } 7051 srcIn = srcOut; 7052 destIn = destOut; 7053 } 7054 7055 MI->eraseFromParent(); // The instruction is gone now. 7056 return BB; 7057 } 7058 7059 MachineBasicBlock * 7060 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 7061 MachineBasicBlock *BB) const { 7062 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 7063 DebugLoc dl = MI->getDebugLoc(); 7064 bool isThumb2 = Subtarget->isThumb2(); 7065 switch (MI->getOpcode()) { 7066 default: { 7067 MI->dump(); 7068 llvm_unreachable("Unexpected instr type to insert"); 7069 } 7070 // The Thumb2 pre-indexed stores have the same MI operands, they just 7071 // define them differently in the .td files from the isel patterns, so 7072 // they need pseudos. 7073 case ARM::t2STR_preidx: 7074 MI->setDesc(TII->get(ARM::t2STR_PRE)); 7075 return BB; 7076 case ARM::t2STRB_preidx: 7077 MI->setDesc(TII->get(ARM::t2STRB_PRE)); 7078 return BB; 7079 case ARM::t2STRH_preidx: 7080 MI->setDesc(TII->get(ARM::t2STRH_PRE)); 7081 return BB; 7082 7083 case ARM::STRi_preidx: 7084 case ARM::STRBi_preidx: { 7085 unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? 7086 ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; 7087 // Decode the offset. 7088 unsigned Offset = MI->getOperand(4).getImm(); 7089 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; 7090 Offset = ARM_AM::getAM2Offset(Offset); 7091 if (isSub) 7092 Offset = -Offset; 7093 7094 MachineMemOperand *MMO = *MI->memoperands_begin(); 7095 BuildMI(*BB, MI, dl, TII->get(NewOpc)) 7096 .addOperand(MI->getOperand(0)) // Rn_wb 7097 .addOperand(MI->getOperand(1)) // Rt 7098 .addOperand(MI->getOperand(2)) // Rn 7099 .addImm(Offset) // offset (skip GPR==zero_reg) 7100 .addOperand(MI->getOperand(5)) // pred 7101 .addOperand(MI->getOperand(6)) 7102 .addMemOperand(MMO); 7103 MI->eraseFromParent(); 7104 return BB; 7105 } 7106 case ARM::STRr_preidx: 7107 case ARM::STRBr_preidx: 7108 case ARM::STRH_preidx: { 7109 unsigned NewOpc; 7110 switch (MI->getOpcode()) { 7111 default: llvm_unreachable("unexpected opcode!"); 7112 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; 7113 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; 7114 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; 7115 } 7116 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); 7117 for (unsigned i = 0; i < MI->getNumOperands(); ++i) 7118 MIB.addOperand(MI->getOperand(i)); 7119 MI->eraseFromParent(); 7120 return BB; 7121 } 7122 case ARM::ATOMIC_LOAD_ADD_I8: 7123 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 7124 case ARM::ATOMIC_LOAD_ADD_I16: 7125 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 7126 case ARM::ATOMIC_LOAD_ADD_I32: 7127 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 7128 7129 case ARM::ATOMIC_LOAD_AND_I8: 7130 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7131 case ARM::ATOMIC_LOAD_AND_I16: 7132 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7133 case ARM::ATOMIC_LOAD_AND_I32: 7134 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7135 7136 case ARM::ATOMIC_LOAD_OR_I8: 7137 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7138 case ARM::ATOMIC_LOAD_OR_I16: 7139 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7140 case ARM::ATOMIC_LOAD_OR_I32: 7141 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7142 7143 case ARM::ATOMIC_LOAD_XOR_I8: 7144 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7145 case ARM::ATOMIC_LOAD_XOR_I16: 7146 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7147 case ARM::ATOMIC_LOAD_XOR_I32: 7148 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7149 7150 case ARM::ATOMIC_LOAD_NAND_I8: 7151 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 7152 case ARM::ATOMIC_LOAD_NAND_I16: 7153 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 7154 case ARM::ATOMIC_LOAD_NAND_I32: 7155 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 7156 7157 case ARM::ATOMIC_LOAD_SUB_I8: 7158 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 7159 case ARM::ATOMIC_LOAD_SUB_I16: 7160 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 7161 case ARM::ATOMIC_LOAD_SUB_I32: 7162 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 7163 7164 case ARM::ATOMIC_LOAD_MIN_I8: 7165 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); 7166 case ARM::ATOMIC_LOAD_MIN_I16: 7167 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); 7168 case ARM::ATOMIC_LOAD_MIN_I32: 7169 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); 7170 7171 case ARM::ATOMIC_LOAD_MAX_I8: 7172 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); 7173 case ARM::ATOMIC_LOAD_MAX_I16: 7174 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); 7175 case ARM::ATOMIC_LOAD_MAX_I32: 7176 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); 7177 7178 case ARM::ATOMIC_LOAD_UMIN_I8: 7179 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); 7180 case ARM::ATOMIC_LOAD_UMIN_I16: 7181 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); 7182 case ARM::ATOMIC_LOAD_UMIN_I32: 7183 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); 7184 7185 case ARM::ATOMIC_LOAD_UMAX_I8: 7186 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); 7187 case ARM::ATOMIC_LOAD_UMAX_I16: 7188 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); 7189 case ARM::ATOMIC_LOAD_UMAX_I32: 7190 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); 7191 7192 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 7193 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 7194 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 7195 7196 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 7197 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 7198 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 7199 7200 7201 case ARM::ATOMADD6432: 7202 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, 7203 isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, 7204 /*NeedsCarry*/ true); 7205 case ARM::ATOMSUB6432: 7206 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7207 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7208 /*NeedsCarry*/ true); 7209 case ARM::ATOMOR6432: 7210 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, 7211 isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7212 case ARM::ATOMXOR6432: 7213 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, 7214 isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7215 case ARM::ATOMAND6432: 7216 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, 7217 isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7218 case ARM::ATOMSWAP6432: 7219 return EmitAtomicBinary64(MI, BB, 0, 0, false); 7220 case ARM::ATOMCMPXCHG6432: 7221 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7222 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7223 /*NeedsCarry*/ false, /*IsCmpxchg*/true); 7224 case ARM::ATOMMIN6432: 7225 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7226 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7227 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7228 /*IsMinMax*/ true, ARMCC::LT); 7229 case ARM::ATOMMAX6432: 7230 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7231 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7232 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7233 /*IsMinMax*/ true, ARMCC::GE); 7234 case ARM::ATOMUMIN6432: 7235 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7236 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7237 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7238 /*IsMinMax*/ true, ARMCC::LO); 7239 case ARM::ATOMUMAX6432: 7240 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7241 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7242 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7243 /*IsMinMax*/ true, ARMCC::HS); 7244 7245 case ARM::tMOVCCr_pseudo: { 7246 // To "insert" a SELECT_CC instruction, we actually have to insert the 7247 // diamond control-flow pattern. The incoming instruction knows the 7248 // destination vreg to set, the condition code register to branch on, the 7249 // true/false values to select between, and a branch opcode to use. 7250 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7251 MachineFunction::iterator It = BB; 7252 ++It; 7253 7254 // thisMBB: 7255 // ... 7256 // TrueVal = ... 7257 // cmpTY ccX, r1, r2 7258 // bCC copy1MBB 7259 // fallthrough --> copy0MBB 7260 MachineBasicBlock *thisMBB = BB; 7261 MachineFunction *F = BB->getParent(); 7262 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 7263 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 7264 F->insert(It, copy0MBB); 7265 F->insert(It, sinkMBB); 7266 7267 // Transfer the remainder of BB and its successor edges to sinkMBB. 7268 sinkMBB->splice(sinkMBB->begin(), BB, 7269 llvm::next(MachineBasicBlock::iterator(MI)), 7270 BB->end()); 7271 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 7272 7273 BB->addSuccessor(copy0MBB); 7274 BB->addSuccessor(sinkMBB); 7275 7276 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 7277 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 7278 7279 // copy0MBB: 7280 // %FalseValue = ... 7281 // # fallthrough to sinkMBB 7282 BB = copy0MBB; 7283 7284 // Update machine-CFG edges 7285 BB->addSuccessor(sinkMBB); 7286 7287 // sinkMBB: 7288 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 7289 // ... 7290 BB = sinkMBB; 7291 BuildMI(*BB, BB->begin(), dl, 7292 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 7293 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 7294 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 7295 7296 MI->eraseFromParent(); // The pseudo instruction is gone now. 7297 return BB; 7298 } 7299 7300 case ARM::BCCi64: 7301 case ARM::BCCZi64: { 7302 // If there is an unconditional branch to the other successor, remove it. 7303 BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); 7304 7305 // Compare both parts that make up the double comparison separately for 7306 // equality. 7307 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 7308 7309 unsigned LHS1 = MI->getOperand(1).getReg(); 7310 unsigned LHS2 = MI->getOperand(2).getReg(); 7311 if (RHSisZero) { 7312 AddDefaultPred(BuildMI(BB, dl, 7313 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7314 .addReg(LHS1).addImm(0)); 7315 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7316 .addReg(LHS2).addImm(0) 7317 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 7318 } else { 7319 unsigned RHS1 = MI->getOperand(3).getReg(); 7320 unsigned RHS2 = MI->getOperand(4).getReg(); 7321 AddDefaultPred(BuildMI(BB, dl, 7322 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 7323 .addReg(LHS1).addReg(RHS1)); 7324 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 7325 .addReg(LHS2).addReg(RHS2) 7326 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 7327 } 7328 7329 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 7330 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 7331 if (MI->getOperand(0).getImm() == ARMCC::NE) 7332 std::swap(destMBB, exitMBB); 7333 7334 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 7335 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 7336 if (isThumb2) 7337 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); 7338 else 7339 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); 7340 7341 MI->eraseFromParent(); // The pseudo instruction is gone now. 7342 return BB; 7343 } 7344 7345 case ARM::Int_eh_sjlj_setjmp: 7346 case ARM::Int_eh_sjlj_setjmp_nofp: 7347 case ARM::tInt_eh_sjlj_setjmp: 7348 case ARM::t2Int_eh_sjlj_setjmp: 7349 case ARM::t2Int_eh_sjlj_setjmp_nofp: 7350 EmitSjLjDispatchBlock(MI, BB); 7351 return BB; 7352 7353 case ARM::ABS: 7354 case ARM::t2ABS: { 7355 // To insert an ABS instruction, we have to insert the 7356 // diamond control-flow pattern. The incoming instruction knows the 7357 // source vreg to test against 0, the destination vreg to set, 7358 // the condition code register to branch on, the 7359 // true/false values to select between, and a branch opcode to use. 7360 // It transforms 7361 // V1 = ABS V0 7362 // into 7363 // V2 = MOVS V0 7364 // BCC (branch to SinkBB if V0 >= 0) 7365 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) 7366 // SinkBB: V1 = PHI(V2, V3) 7367 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7368 MachineFunction::iterator BBI = BB; 7369 ++BBI; 7370 MachineFunction *Fn = BB->getParent(); 7371 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); 7372 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); 7373 Fn->insert(BBI, RSBBB); 7374 Fn->insert(BBI, SinkBB); 7375 7376 unsigned int ABSSrcReg = MI->getOperand(1).getReg(); 7377 unsigned int ABSDstReg = MI->getOperand(0).getReg(); 7378 bool isThumb2 = Subtarget->isThumb2(); 7379 MachineRegisterInfo &MRI = Fn->getRegInfo(); 7380 // In Thumb mode S must not be specified if source register is the SP or 7381 // PC and if destination register is the SP, so restrict register class 7382 unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? 7383 (const TargetRegisterClass*)&ARM::rGPRRegClass : 7384 (const TargetRegisterClass*)&ARM::GPRRegClass); 7385 7386 // Transfer the remainder of BB and its successor edges to sinkMBB. 7387 SinkBB->splice(SinkBB->begin(), BB, 7388 llvm::next(MachineBasicBlock::iterator(MI)), 7389 BB->end()); 7390 SinkBB->transferSuccessorsAndUpdatePHIs(BB); 7391 7392 BB->addSuccessor(RSBBB); 7393 BB->addSuccessor(SinkBB); 7394 7395 // fall through to SinkMBB 7396 RSBBB->addSuccessor(SinkBB); 7397 7398 // insert a cmp at the end of BB 7399 AddDefaultPred(BuildMI(BB, dl, 7400 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7401 .addReg(ABSSrcReg).addImm(0)); 7402 7403 // insert a bcc with opposite CC to ARMCC::MI at the end of BB 7404 BuildMI(BB, dl, 7405 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) 7406 .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); 7407 7408 // insert rsbri in RSBBB 7409 // Note: BCC and rsbri will be converted into predicated rsbmi 7410 // by if-conversion pass 7411 BuildMI(*RSBBB, RSBBB->begin(), dl, 7412 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) 7413 .addReg(ABSSrcReg, RegState::Kill) 7414 .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 7415 7416 // insert PHI in SinkBB, 7417 // reuse ABSDstReg to not change uses of ABS instruction 7418 BuildMI(*SinkBB, SinkBB->begin(), dl, 7419 TII->get(ARM::PHI), ABSDstReg) 7420 .addReg(NewRsbDstReg).addMBB(RSBBB) 7421 .addReg(ABSSrcReg).addMBB(BB); 7422 7423 // remove ABS instruction 7424 MI->eraseFromParent(); 7425 7426 // return last added BB 7427 return SinkBB; 7428 } 7429 case ARM::COPY_STRUCT_BYVAL_I32: 7430 ++NumLoopByVals; 7431 return EmitStructByval(MI, BB); 7432 } 7433 } 7434 7435 void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, 7436 SDNode *Node) const { 7437 if (!MI->hasPostISelHook()) { 7438 assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && 7439 "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); 7440 return; 7441 } 7442 7443 const MCInstrDesc *MCID = &MI->getDesc(); 7444 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, 7445 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional 7446 // operand is still set to noreg. If needed, set the optional operand's 7447 // register to CPSR, and remove the redundant implicit def. 7448 // 7449 // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>). 7450 7451 // Rename pseudo opcodes. 7452 unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); 7453 if (NewOpc) { 7454 const ARMBaseInstrInfo *TII = 7455 static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); 7456 MCID = &TII->get(NewOpc); 7457 7458 assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && 7459 "converted opcode should be the same except for cc_out"); 7460 7461 MI->setDesc(*MCID); 7462 7463 // Add the optional cc_out operand 7464 MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); 7465 } 7466 unsigned ccOutIdx = MCID->getNumOperands() - 1; 7467 7468 // Any ARM instruction that sets the 's' bit should specify an optional 7469 // "cc_out" operand in the last operand position. 7470 if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { 7471 assert(!NewOpc && "Optional cc_out operand required"); 7472 return; 7473 } 7474 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it 7475 // since we already have an optional CPSR def. 7476 bool definesCPSR = false; 7477 bool deadCPSR = false; 7478 for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands(); 7479 i != e; ++i) { 7480 const MachineOperand &MO = MI->getOperand(i); 7481 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { 7482 definesCPSR = true; 7483 if (MO.isDead()) 7484 deadCPSR = true; 7485 MI->RemoveOperand(i); 7486 break; 7487 } 7488 } 7489 if (!definesCPSR) { 7490 assert(!NewOpc && "Optional cc_out operand required"); 7491 return; 7492 } 7493 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); 7494 if (deadCPSR) { 7495 assert(!MI->getOperand(ccOutIdx).getReg() && 7496 "expect uninitialized optional cc_out operand"); 7497 return; 7498 } 7499 7500 // If this instruction was defined with an optional CPSR def and its dag node 7501 // had a live implicit CPSR def, then activate the optional CPSR def. 7502 MachineOperand &MO = MI->getOperand(ccOutIdx); 7503 MO.setReg(ARM::CPSR); 7504 MO.setIsDef(true); 7505 } 7506 7507 //===----------------------------------------------------------------------===// 7508 // ARM Optimization Hooks 7509 //===----------------------------------------------------------------------===// 7510 7511 // Helper function that checks if N is a null or all ones constant. 7512 static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { 7513 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); 7514 if (!C) 7515 return false; 7516 return AllOnes ? C->isAllOnesValue() : C->isNullValue(); 7517 } 7518 7519 // Return true if N is conditionally 0 or all ones. 7520 // Detects these expressions where cc is an i1 value: 7521 // 7522 // (select cc 0, y) [AllOnes=0] 7523 // (select cc y, 0) [AllOnes=0] 7524 // (zext cc) [AllOnes=0] 7525 // (sext cc) [AllOnes=0/1] 7526 // (select cc -1, y) [AllOnes=1] 7527 // (select cc y, -1) [AllOnes=1] 7528 // 7529 // Invert is set when N is the null/all ones constant when CC is false. 7530 // OtherOp is set to the alternative value of N. 7531 static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, 7532 SDValue &CC, bool &Invert, 7533 SDValue &OtherOp, 7534 SelectionDAG &DAG) { 7535 switch (N->getOpcode()) { 7536 default: return false; 7537 case ISD::SELECT: { 7538 CC = N->getOperand(0); 7539 SDValue N1 = N->getOperand(1); 7540 SDValue N2 = N->getOperand(2); 7541 if (isZeroOrAllOnes(N1, AllOnes)) { 7542 Invert = false; 7543 OtherOp = N2; 7544 return true; 7545 } 7546 if (isZeroOrAllOnes(N2, AllOnes)) { 7547 Invert = true; 7548 OtherOp = N1; 7549 return true; 7550 } 7551 return false; 7552 } 7553 case ISD::ZERO_EXTEND: 7554 // (zext cc) can never be the all ones value. 7555 if (AllOnes) 7556 return false; 7557 // Fall through. 7558 case ISD::SIGN_EXTEND: { 7559 EVT VT = N->getValueType(0); 7560 CC = N->getOperand(0); 7561 if (CC.getValueType() != MVT::i1) 7562 return false; 7563 Invert = !AllOnes; 7564 if (AllOnes) 7565 // When looking for an AllOnes constant, N is an sext, and the 'other' 7566 // value is 0. 7567 OtherOp = DAG.getConstant(0, VT); 7568 else if (N->getOpcode() == ISD::ZERO_EXTEND) 7569 // When looking for a 0 constant, N can be zext or sext. 7570 OtherOp = DAG.getConstant(1, VT); 7571 else 7572 OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); 7573 return true; 7574 } 7575 } 7576 } 7577 7578 // Combine a constant select operand into its use: 7579 // 7580 // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 7581 // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 7582 // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1] 7583 // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 7584 // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 7585 // 7586 // The transform is rejected if the select doesn't have a constant operand that 7587 // is null, or all ones when AllOnes is set. 7588 // 7589 // Also recognize sext/zext from i1: 7590 // 7591 // (add (zext cc), x) -> (select cc (add x, 1), x) 7592 // (add (sext cc), x) -> (select cc (add x, -1), x) 7593 // 7594 // These transformations eventually create predicated instructions. 7595 // 7596 // @param N The node to transform. 7597 // @param Slct The N operand that is a select. 7598 // @param OtherOp The other N operand (x above). 7599 // @param DCI Context. 7600 // @param AllOnes Require the select constant to be all ones instead of null. 7601 // @returns The new node, or SDValue() on failure. 7602 static 7603 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 7604 TargetLowering::DAGCombinerInfo &DCI, 7605 bool AllOnes = false) { 7606 SelectionDAG &DAG = DCI.DAG; 7607 EVT VT = N->getValueType(0); 7608 SDValue NonConstantVal; 7609 SDValue CCOp; 7610 bool SwapSelectOps; 7611 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps, 7612 NonConstantVal, DAG)) 7613 return SDValue(); 7614 7615 // Slct is now know to be the desired identity constant when CC is true. 7616 SDValue TrueVal = OtherOp; 7617 SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, 7618 OtherOp, NonConstantVal); 7619 // Unless SwapSelectOps says CC should be false. 7620 if (SwapSelectOps) 7621 std::swap(TrueVal, FalseVal); 7622 7623 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 7624 CCOp, TrueVal, FalseVal); 7625 } 7626 7627 // Attempt combineSelectAndUse on each operand of a commutative operator N. 7628 static 7629 SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, 7630 TargetLowering::DAGCombinerInfo &DCI) { 7631 SDValue N0 = N->getOperand(0); 7632 SDValue N1 = N->getOperand(1); 7633 if (N0.getNode()->hasOneUse()) { 7634 SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes); 7635 if (Result.getNode()) 7636 return Result; 7637 } 7638 if (N1.getNode()->hasOneUse()) { 7639 SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes); 7640 if (Result.getNode()) 7641 return Result; 7642 } 7643 return SDValue(); 7644 } 7645 7646 // AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction 7647 // (only after legalization). 7648 static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, 7649 TargetLowering::DAGCombinerInfo &DCI, 7650 const ARMSubtarget *Subtarget) { 7651 7652 // Only perform optimization if after legalize, and if NEON is available. We 7653 // also expected both operands to be BUILD_VECTORs. 7654 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() 7655 || N0.getOpcode() != ISD::BUILD_VECTOR 7656 || N1.getOpcode() != ISD::BUILD_VECTOR) 7657 return SDValue(); 7658 7659 // Check output type since VPADDL operand elements can only be 8, 16, or 32. 7660 EVT VT = N->getValueType(0); 7661 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) 7662 return SDValue(); 7663 7664 // Check that the vector operands are of the right form. 7665 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR 7666 // operands, where N is the size of the formed vector. 7667 // Each EXTRACT_VECTOR should have the same input vector and odd or even 7668 // index such that we have a pair wise add pattern. 7669 7670 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. 7671 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 7672 return SDValue(); 7673 SDValue Vec = N0->getOperand(0)->getOperand(0); 7674 SDNode *V = Vec.getNode(); 7675 unsigned nextIndex = 0; 7676 7677 // For each operands to the ADD which are BUILD_VECTORs, 7678 // check to see if each of their operands are an EXTRACT_VECTOR with 7679 // the same vector and appropriate index. 7680 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { 7681 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT 7682 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 7683 7684 SDValue ExtVec0 = N0->getOperand(i); 7685 SDValue ExtVec1 = N1->getOperand(i); 7686 7687 // First operand is the vector, verify its the same. 7688 if (V != ExtVec0->getOperand(0).getNode() || 7689 V != ExtVec1->getOperand(0).getNode()) 7690 return SDValue(); 7691 7692 // Second is the constant, verify its correct. 7693 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1)); 7694 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1)); 7695 7696 // For the constant, we want to see all the even or all the odd. 7697 if (!C0 || !C1 || C0->getZExtValue() != nextIndex 7698 || C1->getZExtValue() != nextIndex+1) 7699 return SDValue(); 7700 7701 // Increment index. 7702 nextIndex+=2; 7703 } else 7704 return SDValue(); 7705 } 7706 7707 // Create VPADDL node. 7708 SelectionDAG &DAG = DCI.DAG; 7709 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7710 7711 // Build operand list. 7712 SmallVector<SDValue, 8> Ops; 7713 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, 7714 TLI.getPointerTy())); 7715 7716 // Input is the vector. 7717 Ops.push_back(Vec); 7718 7719 // Get widened type and narrowed type. 7720 MVT widenType; 7721 unsigned numElem = VT.getVectorNumElements(); 7722 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { 7723 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; 7724 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; 7725 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; 7726 default: 7727 llvm_unreachable("Invalid vector element type for padd optimization."); 7728 } 7729 7730 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 7731 widenType, &Ops[0], Ops.size()); 7732 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp); 7733 } 7734 7735 static SDValue findMUL_LOHI(SDValue V) { 7736 if (V->getOpcode() == ISD::UMUL_LOHI || 7737 V->getOpcode() == ISD::SMUL_LOHI) 7738 return V; 7739 return SDValue(); 7740 } 7741 7742 static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, 7743 TargetLowering::DAGCombinerInfo &DCI, 7744 const ARMSubtarget *Subtarget) { 7745 7746 if (Subtarget->isThumb1Only()) return SDValue(); 7747 7748 // Only perform the checks after legalize when the pattern is available. 7749 if (DCI.isBeforeLegalize()) return SDValue(); 7750 7751 // Look for multiply add opportunities. 7752 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where 7753 // each add nodes consumes a value from ISD::UMUL_LOHI and there is 7754 // a glue link from the first add to the second add. 7755 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by 7756 // a S/UMLAL instruction. 7757 // loAdd UMUL_LOHI 7758 // \ / :lo \ :hi 7759 // \ / \ [no multiline comment] 7760 // ADDC | hiAdd 7761 // \ :glue / / 7762 // \ / / 7763 // ADDE 7764 // 7765 assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); 7766 SDValue AddcOp0 = AddcNode->getOperand(0); 7767 SDValue AddcOp1 = AddcNode->getOperand(1); 7768 7769 // Check if the two operands are from the same mul_lohi node. 7770 if (AddcOp0.getNode() == AddcOp1.getNode()) 7771 return SDValue(); 7772 7773 assert(AddcNode->getNumValues() == 2 && 7774 AddcNode->getValueType(0) == MVT::i32 && 7775 AddcNode->getValueType(1) == MVT::Glue && 7776 "Expect ADDC with two result values: i32, glue"); 7777 7778 // Check that the ADDC adds the low result of the S/UMUL_LOHI. 7779 if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && 7780 AddcOp0->getOpcode() != ISD::SMUL_LOHI && 7781 AddcOp1->getOpcode() != ISD::UMUL_LOHI && 7782 AddcOp1->getOpcode() != ISD::SMUL_LOHI) 7783 return SDValue(); 7784 7785 // Look for the glued ADDE. 7786 SDNode* AddeNode = AddcNode->getGluedUser(); 7787 if (AddeNode == NULL) 7788 return SDValue(); 7789 7790 // Make sure it is really an ADDE. 7791 if (AddeNode->getOpcode() != ISD::ADDE) 7792 return SDValue(); 7793 7794 assert(AddeNode->getNumOperands() == 3 && 7795 AddeNode->getOperand(2).getValueType() == MVT::Glue && 7796 "ADDE node has the wrong inputs"); 7797 7798 // Check for the triangle shape. 7799 SDValue AddeOp0 = AddeNode->getOperand(0); 7800 SDValue AddeOp1 = AddeNode->getOperand(1); 7801 7802 // Make sure that the ADDE operands are not coming from the same node. 7803 if (AddeOp0.getNode() == AddeOp1.getNode()) 7804 return SDValue(); 7805 7806 // Find the MUL_LOHI node walking up ADDE's operands. 7807 bool IsLeftOperandMUL = false; 7808 SDValue MULOp = findMUL_LOHI(AddeOp0); 7809 if (MULOp == SDValue()) 7810 MULOp = findMUL_LOHI(AddeOp1); 7811 else 7812 IsLeftOperandMUL = true; 7813 if (MULOp == SDValue()) 7814 return SDValue(); 7815 7816 // Figure out the right opcode. 7817 unsigned Opc = MULOp->getOpcode(); 7818 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; 7819 7820 // Figure out the high and low input values to the MLAL node. 7821 SDValue* HiMul = &MULOp; 7822 SDValue* HiAdd = NULL; 7823 SDValue* LoMul = NULL; 7824 SDValue* LowAdd = NULL; 7825 7826 if (IsLeftOperandMUL) 7827 HiAdd = &AddeOp1; 7828 else 7829 HiAdd = &AddeOp0; 7830 7831 7832 if (AddcOp0->getOpcode() == Opc) { 7833 LoMul = &AddcOp0; 7834 LowAdd = &AddcOp1; 7835 } 7836 if (AddcOp1->getOpcode() == Opc) { 7837 LoMul = &AddcOp1; 7838 LowAdd = &AddcOp0; 7839 } 7840 7841 if (LoMul == NULL) 7842 return SDValue(); 7843 7844 if (LoMul->getNode() != HiMul->getNode()) 7845 return SDValue(); 7846 7847 // Create the merged node. 7848 SelectionDAG &DAG = DCI.DAG; 7849 7850 // Build operand list. 7851 SmallVector<SDValue, 8> Ops; 7852 Ops.push_back(LoMul->getOperand(0)); 7853 Ops.push_back(LoMul->getOperand(1)); 7854 Ops.push_back(*LowAdd); 7855 Ops.push_back(*HiAdd); 7856 7857 SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(), 7858 DAG.getVTList(MVT::i32, MVT::i32), 7859 &Ops[0], Ops.size()); 7860 7861 // Replace the ADDs' nodes uses by the MLA node's values. 7862 SDValue HiMLALResult(MLALNode.getNode(), 1); 7863 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); 7864 7865 SDValue LoMLALResult(MLALNode.getNode(), 0); 7866 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); 7867 7868 // Return original node to notify the driver to stop replacing. 7869 SDValue resNode(AddcNode, 0); 7870 return resNode; 7871 } 7872 7873 /// PerformADDCCombine - Target-specific dag combine transform from 7874 /// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL. 7875 static SDValue PerformADDCCombine(SDNode *N, 7876 TargetLowering::DAGCombinerInfo &DCI, 7877 const ARMSubtarget *Subtarget) { 7878 7879 return AddCombineTo64bitMLAL(N, DCI, Subtarget); 7880 7881 } 7882 7883 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 7884 /// operands N0 and N1. This is a helper for PerformADDCombine that is 7885 /// called with the default operands, and if that fails, with commuted 7886 /// operands. 7887 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 7888 TargetLowering::DAGCombinerInfo &DCI, 7889 const ARMSubtarget *Subtarget){ 7890 7891 // Attempt to create vpaddl for this add. 7892 SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); 7893 if (Result.getNode()) 7894 return Result; 7895 7896 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 7897 if (N0.getNode()->hasOneUse()) { 7898 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 7899 if (Result.getNode()) return Result; 7900 } 7901 return SDValue(); 7902 } 7903 7904 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 7905 /// 7906 static SDValue PerformADDCombine(SDNode *N, 7907 TargetLowering::DAGCombinerInfo &DCI, 7908 const ARMSubtarget *Subtarget) { 7909 SDValue N0 = N->getOperand(0); 7910 SDValue N1 = N->getOperand(1); 7911 7912 // First try with the default operand order. 7913 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget); 7914 if (Result.getNode()) 7915 return Result; 7916 7917 // If that didn't work, try again with the operands commuted. 7918 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); 7919 } 7920 7921 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 7922 /// 7923 static SDValue PerformSUBCombine(SDNode *N, 7924 TargetLowering::DAGCombinerInfo &DCI) { 7925 SDValue N0 = N->getOperand(0); 7926 SDValue N1 = N->getOperand(1); 7927 7928 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 7929 if (N1.getNode()->hasOneUse()) { 7930 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 7931 if (Result.getNode()) return Result; 7932 } 7933 7934 return SDValue(); 7935 } 7936 7937 /// PerformVMULCombine 7938 /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the 7939 /// special multiplier accumulator forwarding. 7940 /// vmul d3, d0, d2 7941 /// vmla d3, d1, d2 7942 /// is faster than 7943 /// vadd d3, d0, d1 7944 /// vmul d3, d3, d2 7945 static SDValue PerformVMULCombine(SDNode *N, 7946 TargetLowering::DAGCombinerInfo &DCI, 7947 const ARMSubtarget *Subtarget) { 7948 if (!Subtarget->hasVMLxForwarding()) 7949 return SDValue(); 7950 7951 SelectionDAG &DAG = DCI.DAG; 7952 SDValue N0 = N->getOperand(0); 7953 SDValue N1 = N->getOperand(1); 7954 unsigned Opcode = N0.getOpcode(); 7955 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 7956 Opcode != ISD::FADD && Opcode != ISD::FSUB) { 7957 Opcode = N1.getOpcode(); 7958 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 7959 Opcode != ISD::FADD && Opcode != ISD::FSUB) 7960 return SDValue(); 7961 std::swap(N0, N1); 7962 } 7963 7964 EVT VT = N->getValueType(0); 7965 DebugLoc DL = N->getDebugLoc(); 7966 SDValue N00 = N0->getOperand(0); 7967 SDValue N01 = N0->getOperand(1); 7968 return DAG.getNode(Opcode, DL, VT, 7969 DAG.getNode(ISD::MUL, DL, VT, N00, N1), 7970 DAG.getNode(ISD::MUL, DL, VT, N01, N1)); 7971 } 7972 7973 static SDValue PerformMULCombine(SDNode *N, 7974 TargetLowering::DAGCombinerInfo &DCI, 7975 const ARMSubtarget *Subtarget) { 7976 SelectionDAG &DAG = DCI.DAG; 7977 7978 if (Subtarget->isThumb1Only()) 7979 return SDValue(); 7980 7981 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 7982 return SDValue(); 7983 7984 EVT VT = N->getValueType(0); 7985 if (VT.is64BitVector() || VT.is128BitVector()) 7986 return PerformVMULCombine(N, DCI, Subtarget); 7987 if (VT != MVT::i32) 7988 return SDValue(); 7989 7990 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 7991 if (!C) 7992 return SDValue(); 7993 7994 int64_t MulAmt = C->getSExtValue(); 7995 unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); 7996 7997 ShiftAmt = ShiftAmt & (32 - 1); 7998 SDValue V = N->getOperand(0); 7999 DebugLoc DL = N->getDebugLoc(); 8000 8001 SDValue Res; 8002 MulAmt >>= ShiftAmt; 8003 8004 if (MulAmt >= 0) { 8005 if (isPowerOf2_32(MulAmt - 1)) { 8006 // (mul x, 2^N + 1) => (add (shl x, N), x) 8007 Res = DAG.getNode(ISD::ADD, DL, VT, 8008 V, 8009 DAG.getNode(ISD::SHL, DL, VT, 8010 V, 8011 DAG.getConstant(Log2_32(MulAmt - 1), 8012 MVT::i32))); 8013 } else if (isPowerOf2_32(MulAmt + 1)) { 8014 // (mul x, 2^N - 1) => (sub (shl x, N), x) 8015 Res = DAG.getNode(ISD::SUB, DL, VT, 8016 DAG.getNode(ISD::SHL, DL, VT, 8017 V, 8018 DAG.getConstant(Log2_32(MulAmt + 1), 8019 MVT::i32)), 8020 V); 8021 } else 8022 return SDValue(); 8023 } else { 8024 uint64_t MulAmtAbs = -MulAmt; 8025 if (isPowerOf2_32(MulAmtAbs + 1)) { 8026 // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) 8027 Res = DAG.getNode(ISD::SUB, DL, VT, 8028 V, 8029 DAG.getNode(ISD::SHL, DL, VT, 8030 V, 8031 DAG.getConstant(Log2_32(MulAmtAbs + 1), 8032 MVT::i32))); 8033 } else if (isPowerOf2_32(MulAmtAbs - 1)) { 8034 // (mul x, -(2^N + 1)) => - (add (shl x, N), x) 8035 Res = DAG.getNode(ISD::ADD, DL, VT, 8036 V, 8037 DAG.getNode(ISD::SHL, DL, VT, 8038 V, 8039 DAG.getConstant(Log2_32(MulAmtAbs-1), 8040 MVT::i32))); 8041 Res = DAG.getNode(ISD::SUB, DL, VT, 8042 DAG.getConstant(0, MVT::i32),Res); 8043 8044 } else 8045 return SDValue(); 8046 } 8047 8048 if (ShiftAmt != 0) 8049 Res = DAG.getNode(ISD::SHL, DL, VT, 8050 Res, DAG.getConstant(ShiftAmt, MVT::i32)); 8051 8052 // Do not add new nodes to DAG combiner worklist. 8053 DCI.CombineTo(N, Res, false); 8054 return SDValue(); 8055 } 8056 8057 static SDValue PerformANDCombine(SDNode *N, 8058 TargetLowering::DAGCombinerInfo &DCI, 8059 const ARMSubtarget *Subtarget) { 8060 8061 // Attempt to use immediate-form VBIC 8062 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 8063 DebugLoc dl = N->getDebugLoc(); 8064 EVT VT = N->getValueType(0); 8065 SelectionDAG &DAG = DCI.DAG; 8066 8067 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8068 return SDValue(); 8069 8070 APInt SplatBits, SplatUndef; 8071 unsigned SplatBitSize; 8072 bool HasAnyUndefs; 8073 if (BVN && 8074 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 8075 if (SplatBitSize <= 64) { 8076 EVT VbicVT; 8077 SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), 8078 SplatUndef.getZExtValue(), SplatBitSize, 8079 DAG, VbicVT, VT.is128BitVector(), 8080 OtherModImm); 8081 if (Val.getNode()) { 8082 SDValue Input = 8083 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); 8084 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); 8085 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); 8086 } 8087 } 8088 } 8089 8090 if (!Subtarget->isThumb1Only()) { 8091 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) 8092 SDValue Result = combineSelectAndUseCommutative(N, true, DCI); 8093 if (Result.getNode()) 8094 return Result; 8095 } 8096 8097 return SDValue(); 8098 } 8099 8100 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR 8101 static SDValue PerformORCombine(SDNode *N, 8102 TargetLowering::DAGCombinerInfo &DCI, 8103 const ARMSubtarget *Subtarget) { 8104 // Attempt to use immediate-form VORR 8105 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 8106 DebugLoc dl = N->getDebugLoc(); 8107 EVT VT = N->getValueType(0); 8108 SelectionDAG &DAG = DCI.DAG; 8109 8110 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8111 return SDValue(); 8112 8113 APInt SplatBits, SplatUndef; 8114 unsigned SplatBitSize; 8115 bool HasAnyUndefs; 8116 if (BVN && Subtarget->hasNEON() && 8117 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 8118 if (SplatBitSize <= 64) { 8119 EVT VorrVT; 8120 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 8121 SplatUndef.getZExtValue(), SplatBitSize, 8122 DAG, VorrVT, VT.is128BitVector(), 8123 OtherModImm); 8124 if (Val.getNode()) { 8125 SDValue Input = 8126 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); 8127 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); 8128 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); 8129 } 8130 } 8131 } 8132 8133 if (!Subtarget->isThumb1Only()) { 8134 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 8135 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 8136 if (Result.getNode()) 8137 return Result; 8138 } 8139 8140 // The code below optimizes (or (and X, Y), Z). 8141 // The AND operand needs to have a single user to make these optimizations 8142 // profitable. 8143 SDValue N0 = N->getOperand(0); 8144 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) 8145 return SDValue(); 8146 SDValue N1 = N->getOperand(1); 8147 8148 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. 8149 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && 8150 DAG.getTargetLoweringInfo().isTypeLegal(VT)) { 8151 APInt SplatUndef; 8152 unsigned SplatBitSize; 8153 bool HasAnyUndefs; 8154 8155 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); 8156 APInt SplatBits0; 8157 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, 8158 HasAnyUndefs) && !HasAnyUndefs) { 8159 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); 8160 APInt SplatBits1; 8161 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, 8162 HasAnyUndefs) && !HasAnyUndefs && 8163 SplatBits0 == ~SplatBits1) { 8164 // Canonicalize the vector type to make instruction selection simpler. 8165 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 8166 SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, 8167 N0->getOperand(1), N0->getOperand(0), 8168 N1->getOperand(0)); 8169 return DAG.getNode(ISD::BITCAST, dl, VT, Result); 8170 } 8171 } 8172 } 8173 8174 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 8175 // reasonable. 8176 8177 // BFI is only available on V6T2+ 8178 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 8179 return SDValue(); 8180 8181 DebugLoc DL = N->getDebugLoc(); 8182 // 1) or (and A, mask), val => ARMbfi A, val, mask 8183 // iff (val & mask) == val 8184 // 8185 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 8186 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 8187 // && mask == ~mask2 8188 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 8189 // && ~mask == mask2 8190 // (i.e., copy a bitfield value into another bitfield of the same width) 8191 8192 if (VT != MVT::i32) 8193 return SDValue(); 8194 8195 SDValue N00 = N0.getOperand(0); 8196 8197 // The value and the mask need to be constants so we can verify this is 8198 // actually a bitfield set. If the mask is 0xffff, we can do better 8199 // via a movt instruction, so don't use BFI in that case. 8200 SDValue MaskOp = N0.getOperand(1); 8201 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp); 8202 if (!MaskC) 8203 return SDValue(); 8204 unsigned Mask = MaskC->getZExtValue(); 8205 if (Mask == 0xffff) 8206 return SDValue(); 8207 SDValue Res; 8208 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 8209 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 8210 if (N1C) { 8211 unsigned Val = N1C->getZExtValue(); 8212 if ((Val & ~Mask) != Val) 8213 return SDValue(); 8214 8215 if (ARM::isBitFieldInvertedMask(Mask)) { 8216 Val >>= CountTrailingZeros_32(~Mask); 8217 8218 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, 8219 DAG.getConstant(Val, MVT::i32), 8220 DAG.getConstant(Mask, MVT::i32)); 8221 8222 // Do not add new nodes to DAG combiner worklist. 8223 DCI.CombineTo(N, Res, false); 8224 return SDValue(); 8225 } 8226 } else if (N1.getOpcode() == ISD::AND) { 8227 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 8228 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 8229 if (!N11C) 8230 return SDValue(); 8231 unsigned Mask2 = N11C->getZExtValue(); 8232 8233 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern 8234 // as is to match. 8235 if (ARM::isBitFieldInvertedMask(Mask) && 8236 (Mask == ~Mask2)) { 8237 // The pack halfword instruction works better for masks that fit it, 8238 // so use that when it's available. 8239 if (Subtarget->hasT2ExtractPack() && 8240 (Mask == 0xffff || Mask == 0xffff0000)) 8241 return SDValue(); 8242 // 2a 8243 unsigned amt = CountTrailingZeros_32(Mask2); 8244 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 8245 DAG.getConstant(amt, MVT::i32)); 8246 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, 8247 DAG.getConstant(Mask, MVT::i32)); 8248 // Do not add new nodes to DAG combiner worklist. 8249 DCI.CombineTo(N, Res, false); 8250 return SDValue(); 8251 } else if (ARM::isBitFieldInvertedMask(~Mask) && 8252 (~Mask == Mask2)) { 8253 // The pack halfword instruction works better for masks that fit it, 8254 // so use that when it's available. 8255 if (Subtarget->hasT2ExtractPack() && 8256 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 8257 return SDValue(); 8258 // 2b 8259 unsigned lsb = CountTrailingZeros_32(Mask); 8260 Res = DAG.getNode(ISD::SRL, DL, VT, N00, 8261 DAG.getConstant(lsb, MVT::i32)); 8262 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 8263 DAG.getConstant(Mask2, MVT::i32)); 8264 // Do not add new nodes to DAG combiner worklist. 8265 DCI.CombineTo(N, Res, false); 8266 return SDValue(); 8267 } 8268 } 8269 8270 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && 8271 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) && 8272 ARM::isBitFieldInvertedMask(~Mask)) { 8273 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask 8274 // where lsb(mask) == #shamt and masked bits of B are known zero. 8275 SDValue ShAmt = N00.getOperand(1); 8276 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 8277 unsigned LSB = CountTrailingZeros_32(Mask); 8278 if (ShAmtC != LSB) 8279 return SDValue(); 8280 8281 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), 8282 DAG.getConstant(~Mask, MVT::i32)); 8283 8284 // Do not add new nodes to DAG combiner worklist. 8285 DCI.CombineTo(N, Res, false); 8286 } 8287 8288 return SDValue(); 8289 } 8290 8291 static SDValue PerformXORCombine(SDNode *N, 8292 TargetLowering::DAGCombinerInfo &DCI, 8293 const ARMSubtarget *Subtarget) { 8294 EVT VT = N->getValueType(0); 8295 SelectionDAG &DAG = DCI.DAG; 8296 8297 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8298 return SDValue(); 8299 8300 if (!Subtarget->isThumb1Only()) { 8301 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 8302 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 8303 if (Result.getNode()) 8304 return Result; 8305 } 8306 8307 return SDValue(); 8308 } 8309 8310 /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff 8311 /// the bits being cleared by the AND are not demanded by the BFI. 8312 static SDValue PerformBFICombine(SDNode *N, 8313 TargetLowering::DAGCombinerInfo &DCI) { 8314 SDValue N1 = N->getOperand(1); 8315 if (N1.getOpcode() == ISD::AND) { 8316 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 8317 if (!N11C) 8318 return SDValue(); 8319 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 8320 unsigned LSB = CountTrailingZeros_32(~InvMask); 8321 unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB; 8322 unsigned Mask = (1 << Width)-1; 8323 unsigned Mask2 = N11C->getZExtValue(); 8324 if ((Mask & (~Mask2)) == 0) 8325 return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0), 8326 N->getOperand(0), N1.getOperand(0), 8327 N->getOperand(2)); 8328 } 8329 return SDValue(); 8330 } 8331 8332 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for 8333 /// ARMISD::VMOVRRD. 8334 static SDValue PerformVMOVRRDCombine(SDNode *N, 8335 TargetLowering::DAGCombinerInfo &DCI) { 8336 // vmovrrd(vmovdrr x, y) -> x,y 8337 SDValue InDouble = N->getOperand(0); 8338 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 8339 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 8340 8341 // vmovrrd(load f64) -> (load i32), (load i32) 8342 SDNode *InNode = InDouble.getNode(); 8343 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && 8344 InNode->getValueType(0) == MVT::f64 && 8345 InNode->getOperand(1).getOpcode() == ISD::FrameIndex && 8346 !cast<LoadSDNode>(InNode)->isVolatile()) { 8347 // TODO: Should this be done for non-FrameIndex operands? 8348 LoadSDNode *LD = cast<LoadSDNode>(InNode); 8349 8350 SelectionDAG &DAG = DCI.DAG; 8351 DebugLoc DL = LD->getDebugLoc(); 8352 SDValue BasePtr = LD->getBasePtr(); 8353 SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, 8354 LD->getPointerInfo(), LD->isVolatile(), 8355 LD->isNonTemporal(), LD->isInvariant(), 8356 LD->getAlignment()); 8357 8358 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 8359 DAG.getConstant(4, MVT::i32)); 8360 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, 8361 LD->getPointerInfo(), LD->isVolatile(), 8362 LD->isNonTemporal(), LD->isInvariant(), 8363 std::min(4U, LD->getAlignment() / 2)); 8364 8365 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); 8366 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); 8367 DCI.RemoveFromWorklist(LD); 8368 DAG.DeleteNode(LD); 8369 return Result; 8370 } 8371 8372 return SDValue(); 8373 } 8374 8375 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for 8376 /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. 8377 static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { 8378 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) 8379 SDValue Op0 = N->getOperand(0); 8380 SDValue Op1 = N->getOperand(1); 8381 if (Op0.getOpcode() == ISD::BITCAST) 8382 Op0 = Op0.getOperand(0); 8383 if (Op1.getOpcode() == ISD::BITCAST) 8384 Op1 = Op1.getOperand(0); 8385 if (Op0.getOpcode() == ARMISD::VMOVRRD && 8386 Op0.getNode() == Op1.getNode() && 8387 Op0.getResNo() == 0 && Op1.getResNo() == 1) 8388 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 8389 N->getValueType(0), Op0.getOperand(0)); 8390 return SDValue(); 8391 } 8392 8393 /// PerformSTORECombine - Target-specific dag combine xforms for 8394 /// ISD::STORE. 8395 static SDValue PerformSTORECombine(SDNode *N, 8396 TargetLowering::DAGCombinerInfo &DCI) { 8397 StoreSDNode *St = cast<StoreSDNode>(N); 8398 if (St->isVolatile()) 8399 return SDValue(); 8400 8401 // Optimize trunc store (of multiple scalars) to shuffle and store. First, 8402 // pack all of the elements in one place. Next, store to memory in fewer 8403 // chunks. 8404 SDValue StVal = St->getValue(); 8405 EVT VT = StVal.getValueType(); 8406 if (St->isTruncatingStore() && VT.isVector()) { 8407 SelectionDAG &DAG = DCI.DAG; 8408 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8409 EVT StVT = St->getMemoryVT(); 8410 unsigned NumElems = VT.getVectorNumElements(); 8411 assert(StVT != VT && "Cannot truncate to the same type"); 8412 unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); 8413 unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); 8414 8415 // From, To sizes and ElemCount must be pow of two 8416 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); 8417 8418 // We are going to use the original vector elt for storing. 8419 // Accumulated smaller vector elements must be a multiple of the store size. 8420 if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); 8421 8422 unsigned SizeRatio = FromEltSz / ToEltSz; 8423 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); 8424 8425 // Create a type on which we perform the shuffle. 8426 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), 8427 NumElems*SizeRatio); 8428 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); 8429 8430 DebugLoc DL = St->getDebugLoc(); 8431 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); 8432 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); 8433 for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; 8434 8435 // Can't shuffle using an illegal type. 8436 if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); 8437 8438 SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, 8439 DAG.getUNDEF(WideVec.getValueType()), 8440 ShuffleVec.data()); 8441 // At this point all of the data is stored at the bottom of the 8442 // register. We now need to save it to mem. 8443 8444 // Find the largest store unit 8445 MVT StoreType = MVT::i8; 8446 for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; 8447 tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { 8448 MVT Tp = (MVT::SimpleValueType)tp; 8449 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) 8450 StoreType = Tp; 8451 } 8452 // Didn't find a legal store type. 8453 if (!TLI.isTypeLegal(StoreType)) 8454 return SDValue(); 8455 8456 // Bitcast the original vector into a vector of store-size units 8457 EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), 8458 StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); 8459 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); 8460 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); 8461 SmallVector<SDValue, 8> Chains; 8462 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, 8463 TLI.getPointerTy()); 8464 SDValue BasePtr = St->getBasePtr(); 8465 8466 // Perform one or more big stores into memory. 8467 unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); 8468 for (unsigned I = 0; I < E; I++) { 8469 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 8470 StoreType, ShuffWide, 8471 DAG.getIntPtrConstant(I)); 8472 SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, 8473 St->getPointerInfo(), St->isVolatile(), 8474 St->isNonTemporal(), St->getAlignment()); 8475 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, 8476 Increment); 8477 Chains.push_back(Ch); 8478 } 8479 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], 8480 Chains.size()); 8481 } 8482 8483 if (!ISD::isNormalStore(St)) 8484 return SDValue(); 8485 8486 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and 8487 // ARM stores of arguments in the same cache line. 8488 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && 8489 StVal.getNode()->hasOneUse()) { 8490 SelectionDAG &DAG = DCI.DAG; 8491 DebugLoc DL = St->getDebugLoc(); 8492 SDValue BasePtr = St->getBasePtr(); 8493 SDValue NewST1 = DAG.getStore(St->getChain(), DL, 8494 StVal.getNode()->getOperand(0), BasePtr, 8495 St->getPointerInfo(), St->isVolatile(), 8496 St->isNonTemporal(), St->getAlignment()); 8497 8498 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 8499 DAG.getConstant(4, MVT::i32)); 8500 return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), 8501 OffsetPtr, St->getPointerInfo(), St->isVolatile(), 8502 St->isNonTemporal(), 8503 std::min(4U, St->getAlignment() / 2)); 8504 } 8505 8506 if (StVal.getValueType() != MVT::i64 || 8507 StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 8508 return SDValue(); 8509 8510 // Bitcast an i64 store extracted from a vector to f64. 8511 // Otherwise, the i64 value will be legalized to a pair of i32 values. 8512 SelectionDAG &DAG = DCI.DAG; 8513 DebugLoc dl = StVal.getDebugLoc(); 8514 SDValue IntVec = StVal.getOperand(0); 8515 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 8516 IntVec.getValueType().getVectorNumElements()); 8517 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); 8518 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 8519 Vec, StVal.getOperand(1)); 8520 dl = N->getDebugLoc(); 8521 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); 8522 // Make the DAGCombiner fold the bitcasts. 8523 DCI.AddToWorklist(Vec.getNode()); 8524 DCI.AddToWorklist(ExtElt.getNode()); 8525 DCI.AddToWorklist(V.getNode()); 8526 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), 8527 St->getPointerInfo(), St->isVolatile(), 8528 St->isNonTemporal(), St->getAlignment(), 8529 St->getTBAAInfo()); 8530 } 8531 8532 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node 8533 /// are normal, non-volatile loads. If so, it is profitable to bitcast an 8534 /// i64 vector to have f64 elements, since the value can then be loaded 8535 /// directly into a VFP register. 8536 static bool hasNormalLoadOperand(SDNode *N) { 8537 unsigned NumElts = N->getValueType(0).getVectorNumElements(); 8538 for (unsigned i = 0; i < NumElts; ++i) { 8539 SDNode *Elt = N->getOperand(i).getNode(); 8540 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile()) 8541 return true; 8542 } 8543 return false; 8544 } 8545 8546 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for 8547 /// ISD::BUILD_VECTOR. 8548 static SDValue PerformBUILD_VECTORCombine(SDNode *N, 8549 TargetLowering::DAGCombinerInfo &DCI){ 8550 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): 8551 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value 8552 // into a pair of GPRs, which is fine when the value is used as a scalar, 8553 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. 8554 SelectionDAG &DAG = DCI.DAG; 8555 if (N->getNumOperands() == 2) { 8556 SDValue RV = PerformVMOVDRRCombine(N, DAG); 8557 if (RV.getNode()) 8558 return RV; 8559 } 8560 8561 // Load i64 elements as f64 values so that type legalization does not split 8562 // them up into i32 values. 8563 EVT VT = N->getValueType(0); 8564 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) 8565 return SDValue(); 8566 DebugLoc dl = N->getDebugLoc(); 8567 SmallVector<SDValue, 8> Ops; 8568 unsigned NumElts = VT.getVectorNumElements(); 8569 for (unsigned i = 0; i < NumElts; ++i) { 8570 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); 8571 Ops.push_back(V); 8572 // Make the DAGCombiner fold the bitcast. 8573 DCI.AddToWorklist(V.getNode()); 8574 } 8575 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); 8576 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); 8577 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 8578 } 8579 8580 /// PerformInsertEltCombine - Target-specific dag combine xforms for 8581 /// ISD::INSERT_VECTOR_ELT. 8582 static SDValue PerformInsertEltCombine(SDNode *N, 8583 TargetLowering::DAGCombinerInfo &DCI) { 8584 // Bitcast an i64 load inserted into a vector to f64. 8585 // Otherwise, the i64 value will be legalized to a pair of i32 values. 8586 EVT VT = N->getValueType(0); 8587 SDNode *Elt = N->getOperand(1).getNode(); 8588 if (VT.getVectorElementType() != MVT::i64 || 8589 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile()) 8590 return SDValue(); 8591 8592 SelectionDAG &DAG = DCI.DAG; 8593 DebugLoc dl = N->getDebugLoc(); 8594 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 8595 VT.getVectorNumElements()); 8596 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); 8597 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); 8598 // Make the DAGCombiner fold the bitcasts. 8599 DCI.AddToWorklist(Vec.getNode()); 8600 DCI.AddToWorklist(V.getNode()); 8601 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, 8602 Vec, V, N->getOperand(2)); 8603 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); 8604 } 8605 8606 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for 8607 /// ISD::VECTOR_SHUFFLE. 8608 static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { 8609 // The LLVM shufflevector instruction does not require the shuffle mask 8610 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does 8611 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the 8612 // operands do not match the mask length, they are extended by concatenating 8613 // them with undef vectors. That is probably the right thing for other 8614 // targets, but for NEON it is better to concatenate two double-register 8615 // size vector operands into a single quad-register size vector. Do that 8616 // transformation here: 8617 // shuffle(concat(v1, undef), concat(v2, undef)) -> 8618 // shuffle(concat(v1, v2), undef) 8619 SDValue Op0 = N->getOperand(0); 8620 SDValue Op1 = N->getOperand(1); 8621 if (Op0.getOpcode() != ISD::CONCAT_VECTORS || 8622 Op1.getOpcode() != ISD::CONCAT_VECTORS || 8623 Op0.getNumOperands() != 2 || 8624 Op1.getNumOperands() != 2) 8625 return SDValue(); 8626 SDValue Concat0Op1 = Op0.getOperand(1); 8627 SDValue Concat1Op1 = Op1.getOperand(1); 8628 if (Concat0Op1.getOpcode() != ISD::UNDEF || 8629 Concat1Op1.getOpcode() != ISD::UNDEF) 8630 return SDValue(); 8631 // Skip the transformation if any of the types are illegal. 8632 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8633 EVT VT = N->getValueType(0); 8634 if (!TLI.isTypeLegal(VT) || 8635 !TLI.isTypeLegal(Concat0Op1.getValueType()) || 8636 !TLI.isTypeLegal(Concat1Op1.getValueType())) 8637 return SDValue(); 8638 8639 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, 8640 Op0.getOperand(0), Op1.getOperand(0)); 8641 // Translate the shuffle mask. 8642 SmallVector<int, 16> NewMask; 8643 unsigned NumElts = VT.getVectorNumElements(); 8644 unsigned HalfElts = NumElts/2; 8645 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 8646 for (unsigned n = 0; n < NumElts; ++n) { 8647 int MaskElt = SVN->getMaskElt(n); 8648 int NewElt = -1; 8649 if (MaskElt < (int)HalfElts) 8650 NewElt = MaskElt; 8651 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) 8652 NewElt = HalfElts + MaskElt - NumElts; 8653 NewMask.push_back(NewElt); 8654 } 8655 return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat, 8656 DAG.getUNDEF(VT), NewMask.data()); 8657 } 8658 8659 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and 8660 /// NEON load/store intrinsics to merge base address updates. 8661 static SDValue CombineBaseUpdate(SDNode *N, 8662 TargetLowering::DAGCombinerInfo &DCI) { 8663 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 8664 return SDValue(); 8665 8666 SelectionDAG &DAG = DCI.DAG; 8667 bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || 8668 N->getOpcode() == ISD::INTRINSIC_W_CHAIN); 8669 unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); 8670 SDValue Addr = N->getOperand(AddrOpIdx); 8671 8672 // Search for a use of the address operand that is an increment. 8673 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), 8674 UE = Addr.getNode()->use_end(); UI != UE; ++UI) { 8675 SDNode *User = *UI; 8676 if (User->getOpcode() != ISD::ADD || 8677 UI.getUse().getResNo() != Addr.getResNo()) 8678 continue; 8679 8680 // Check that the add is independent of the load/store. Otherwise, folding 8681 // it would create a cycle. 8682 if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) 8683 continue; 8684 8685 // Find the new opcode for the updating load/store. 8686 bool isLoad = true; 8687 bool isLaneOp = false; 8688 unsigned NewOpc = 0; 8689 unsigned NumVecs = 0; 8690 if (isIntrinsic) { 8691 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 8692 switch (IntNo) { 8693 default: llvm_unreachable("unexpected intrinsic for Neon base update"); 8694 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; 8695 NumVecs = 1; break; 8696 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; 8697 NumVecs = 2; break; 8698 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; 8699 NumVecs = 3; break; 8700 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; 8701 NumVecs = 4; break; 8702 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; 8703 NumVecs = 2; isLaneOp = true; break; 8704 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; 8705 NumVecs = 3; isLaneOp = true; break; 8706 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; 8707 NumVecs = 4; isLaneOp = true; break; 8708 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; 8709 NumVecs = 1; isLoad = false; break; 8710 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; 8711 NumVecs = 2; isLoad = false; break; 8712 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; 8713 NumVecs = 3; isLoad = false; break; 8714 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; 8715 NumVecs = 4; isLoad = false; break; 8716 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; 8717 NumVecs = 2; isLoad = false; isLaneOp = true; break; 8718 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; 8719 NumVecs = 3; isLoad = false; isLaneOp = true; break; 8720 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; 8721 NumVecs = 4; isLoad = false; isLaneOp = true; break; 8722 } 8723 } else { 8724 isLaneOp = true; 8725 switch (N->getOpcode()) { 8726 default: llvm_unreachable("unexpected opcode for Neon base update"); 8727 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; 8728 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; 8729 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; 8730 } 8731 } 8732 8733 // Find the size of memory referenced by the load/store. 8734 EVT VecTy; 8735 if (isLoad) 8736 VecTy = N->getValueType(0); 8737 else 8738 VecTy = N->getOperand(AddrOpIdx+1).getValueType(); 8739 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; 8740 if (isLaneOp) 8741 NumBytes /= VecTy.getVectorNumElements(); 8742 8743 // If the increment is a constant, it must match the memory ref size. 8744 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); 8745 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { 8746 uint64_t IncVal = CInc->getZExtValue(); 8747 if (IncVal != NumBytes) 8748 continue; 8749 } else if (NumBytes >= 3 * 16) { 8750 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two 8751 // separate instructions that make it harder to use a non-constant update. 8752 continue; 8753 } 8754 8755 // Create the new updating load/store node. 8756 EVT Tys[6]; 8757 unsigned NumResultVecs = (isLoad ? NumVecs : 0); 8758 unsigned n; 8759 for (n = 0; n < NumResultVecs; ++n) 8760 Tys[n] = VecTy; 8761 Tys[n++] = MVT::i32; 8762 Tys[n] = MVT::Other; 8763 SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); 8764 SmallVector<SDValue, 8> Ops; 8765 Ops.push_back(N->getOperand(0)); // incoming chain 8766 Ops.push_back(N->getOperand(AddrOpIdx)); 8767 Ops.push_back(Inc); 8768 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { 8769 Ops.push_back(N->getOperand(i)); 8770 } 8771 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); 8772 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys, 8773 Ops.data(), Ops.size(), 8774 MemInt->getMemoryVT(), 8775 MemInt->getMemOperand()); 8776 8777 // Update the uses. 8778 std::vector<SDValue> NewResults; 8779 for (unsigned i = 0; i < NumResultVecs; ++i) { 8780 NewResults.push_back(SDValue(UpdN.getNode(), i)); 8781 } 8782 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain 8783 DCI.CombineTo(N, NewResults); 8784 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); 8785 8786 break; 8787 } 8788 return SDValue(); 8789 } 8790 8791 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a 8792 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic 8793 /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and 8794 /// return true. 8795 static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 8796 SelectionDAG &DAG = DCI.DAG; 8797 EVT VT = N->getValueType(0); 8798 // vldN-dup instructions only support 64-bit vectors for N > 1. 8799 if (!VT.is64BitVector()) 8800 return false; 8801 8802 // Check if the VDUPLANE operand is a vldN-dup intrinsic. 8803 SDNode *VLD = N->getOperand(0).getNode(); 8804 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) 8805 return false; 8806 unsigned NumVecs = 0; 8807 unsigned NewOpc = 0; 8808 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); 8809 if (IntNo == Intrinsic::arm_neon_vld2lane) { 8810 NumVecs = 2; 8811 NewOpc = ARMISD::VLD2DUP; 8812 } else if (IntNo == Intrinsic::arm_neon_vld3lane) { 8813 NumVecs = 3; 8814 NewOpc = ARMISD::VLD3DUP; 8815 } else if (IntNo == Intrinsic::arm_neon_vld4lane) { 8816 NumVecs = 4; 8817 NewOpc = ARMISD::VLD4DUP; 8818 } else { 8819 return false; 8820 } 8821 8822 // First check that all the vldN-lane uses are VDUPLANEs and that the lane 8823 // numbers match the load. 8824 unsigned VLDLaneNo = 8825 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); 8826 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 8827 UI != UE; ++UI) { 8828 // Ignore uses of the chain result. 8829 if (UI.getUse().getResNo() == NumVecs) 8830 continue; 8831 SDNode *User = *UI; 8832 if (User->getOpcode() != ARMISD::VDUPLANE || 8833 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) 8834 return false; 8835 } 8836 8837 // Create the vldN-dup node. 8838 EVT Tys[5]; 8839 unsigned n; 8840 for (n = 0; n < NumVecs; ++n) 8841 Tys[n] = VT; 8842 Tys[n] = MVT::Other; 8843 SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); 8844 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; 8845 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); 8846 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys, 8847 Ops, 2, VLDMemInt->getMemoryVT(), 8848 VLDMemInt->getMemOperand()); 8849 8850 // Update the uses. 8851 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 8852 UI != UE; ++UI) { 8853 unsigned ResNo = UI.getUse().getResNo(); 8854 // Ignore uses of the chain result. 8855 if (ResNo == NumVecs) 8856 continue; 8857 SDNode *User = *UI; 8858 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); 8859 } 8860 8861 // Now the vldN-lane intrinsic is dead except for its chain result. 8862 // Update uses of the chain. 8863 std::vector<SDValue> VLDDupResults; 8864 for (unsigned n = 0; n < NumVecs; ++n) 8865 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); 8866 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); 8867 DCI.CombineTo(VLD, VLDDupResults); 8868 8869 return true; 8870 } 8871 8872 /// PerformVDUPLANECombine - Target-specific dag combine xforms for 8873 /// ARMISD::VDUPLANE. 8874 static SDValue PerformVDUPLANECombine(SDNode *N, 8875 TargetLowering::DAGCombinerInfo &DCI) { 8876 SDValue Op = N->getOperand(0); 8877 8878 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses 8879 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. 8880 if (CombineVLDDUP(N, DCI)) 8881 return SDValue(N, 0); 8882 8883 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 8884 // redundant. Ignore bit_converts for now; element sizes are checked below. 8885 while (Op.getOpcode() == ISD::BITCAST) 8886 Op = Op.getOperand(0); 8887 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 8888 return SDValue(); 8889 8890 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 8891 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 8892 // The canonical VMOV for a zero vector uses a 32-bit element size. 8893 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 8894 unsigned EltBits; 8895 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 8896 EltSize = 8; 8897 EVT VT = N->getValueType(0); 8898 if (EltSize > VT.getVectorElementType().getSizeInBits()) 8899 return SDValue(); 8900 8901 return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); 8902 } 8903 8904 // isConstVecPow2 - Return true if each vector element is a power of 2, all 8905 // elements are the same constant, C, and Log2(C) ranges from 1 to 32. 8906 static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) 8907 { 8908 integerPart cN; 8909 integerPart c0 = 0; 8910 for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); 8911 I != E; I++) { 8912 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I)); 8913 if (!C) 8914 return false; 8915 8916 bool isExact; 8917 APFloat APF = C->getValueAPF(); 8918 if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) 8919 != APFloat::opOK || !isExact) 8920 return false; 8921 8922 c0 = (I == 0) ? cN : c0; 8923 if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) 8924 return false; 8925 } 8926 C = c0; 8927 return true; 8928 } 8929 8930 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) 8931 /// can replace combinations of VMUL and VCVT (floating-point to integer) 8932 /// when the VMUL has a constant operand that is a power of 2. 8933 /// 8934 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 8935 /// vmul.f32 d16, d17, d16 8936 /// vcvt.s32.f32 d16, d16 8937 /// becomes: 8938 /// vcvt.s32.f32 d16, d16, #3 8939 static SDValue PerformVCVTCombine(SDNode *N, 8940 TargetLowering::DAGCombinerInfo &DCI, 8941 const ARMSubtarget *Subtarget) { 8942 SelectionDAG &DAG = DCI.DAG; 8943 SDValue Op = N->getOperand(0); 8944 8945 if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || 8946 Op.getOpcode() != ISD::FMUL) 8947 return SDValue(); 8948 8949 uint64_t C; 8950 SDValue N0 = Op->getOperand(0); 8951 SDValue ConstVec = Op->getOperand(1); 8952 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; 8953 8954 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 8955 !isConstVecPow2(ConstVec, isSigned, C)) 8956 return SDValue(); 8957 8958 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : 8959 Intrinsic::arm_neon_vcvtfp2fxu; 8960 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 8961 N->getValueType(0), 8962 DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, 8963 DAG.getConstant(Log2_64(C), MVT::i32)); 8964 } 8965 8966 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) 8967 /// can replace combinations of VCVT (integer to floating-point) and VDIV 8968 /// when the VDIV has a constant operand that is a power of 2. 8969 /// 8970 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 8971 /// vcvt.f32.s32 d16, d16 8972 /// vdiv.f32 d16, d17, d16 8973 /// becomes: 8974 /// vcvt.f32.s32 d16, d16, #3 8975 static SDValue PerformVDIVCombine(SDNode *N, 8976 TargetLowering::DAGCombinerInfo &DCI, 8977 const ARMSubtarget *Subtarget) { 8978 SelectionDAG &DAG = DCI.DAG; 8979 SDValue Op = N->getOperand(0); 8980 unsigned OpOpcode = Op.getNode()->getOpcode(); 8981 8982 if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || 8983 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) 8984 return SDValue(); 8985 8986 uint64_t C; 8987 SDValue ConstVec = N->getOperand(1); 8988 bool isSigned = OpOpcode == ISD::SINT_TO_FP; 8989 8990 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 8991 !isConstVecPow2(ConstVec, isSigned, C)) 8992 return SDValue(); 8993 8994 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : 8995 Intrinsic::arm_neon_vcvtfxu2fp; 8996 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 8997 Op.getValueType(), 8998 DAG.getConstant(IntrinsicOpcode, MVT::i32), 8999 Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); 9000 } 9001 9002 /// Getvshiftimm - Check if this is a valid build_vector for the immediate 9003 /// operand of a vector shift operation, where all the elements of the 9004 /// build_vector must have the same constant integer value. 9005 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 9006 // Ignore bit_converts. 9007 while (Op.getOpcode() == ISD::BITCAST) 9008 Op = Op.getOperand(0); 9009 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 9010 APInt SplatBits, SplatUndef; 9011 unsigned SplatBitSize; 9012 bool HasAnyUndefs; 9013 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 9014 HasAnyUndefs, ElementBits) || 9015 SplatBitSize > ElementBits) 9016 return false; 9017 Cnt = SplatBits.getSExtValue(); 9018 return true; 9019 } 9020 9021 /// isVShiftLImm - Check if this is a valid build_vector for the immediate 9022 /// operand of a vector shift left operation. That value must be in the range: 9023 /// 0 <= Value < ElementBits for a left shift; or 9024 /// 0 <= Value <= ElementBits for a long left shift. 9025 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 9026 assert(VT.isVector() && "vector shift count is not a vector type"); 9027 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 9028 if (! getVShiftImm(Op, ElementBits, Cnt)) 9029 return false; 9030 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 9031 } 9032 9033 /// isVShiftRImm - Check if this is a valid build_vector for the immediate 9034 /// operand of a vector shift right operation. For a shift opcode, the value 9035 /// is positive, but for an intrinsic the value count must be negative. The 9036 /// absolute value must be in the range: 9037 /// 1 <= |Value| <= ElementBits for a right shift; or 9038 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 9039 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 9040 int64_t &Cnt) { 9041 assert(VT.isVector() && "vector shift count is not a vector type"); 9042 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 9043 if (! getVShiftImm(Op, ElementBits, Cnt)) 9044 return false; 9045 if (isIntrinsic) 9046 Cnt = -Cnt; 9047 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 9048 } 9049 9050 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 9051 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 9052 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 9053 switch (IntNo) { 9054 default: 9055 // Don't do anything for most intrinsics. 9056 break; 9057 9058 // Vector shifts: check for immediate versions and lower them. 9059 // Note: This is done during DAG combining instead of DAG legalizing because 9060 // the build_vectors for 64-bit vector element shift counts are generally 9061 // not legal, and it is hard to see their values after they get legalized to 9062 // loads from a constant pool. 9063 case Intrinsic::arm_neon_vshifts: 9064 case Intrinsic::arm_neon_vshiftu: 9065 case Intrinsic::arm_neon_vshiftls: 9066 case Intrinsic::arm_neon_vshiftlu: 9067 case Intrinsic::arm_neon_vshiftn: 9068 case Intrinsic::arm_neon_vrshifts: 9069 case Intrinsic::arm_neon_vrshiftu: 9070 case Intrinsic::arm_neon_vrshiftn: 9071 case Intrinsic::arm_neon_vqshifts: 9072 case Intrinsic::arm_neon_vqshiftu: 9073 case Intrinsic::arm_neon_vqshiftsu: 9074 case Intrinsic::arm_neon_vqshiftns: 9075 case Intrinsic::arm_neon_vqshiftnu: 9076 case Intrinsic::arm_neon_vqshiftnsu: 9077 case Intrinsic::arm_neon_vqrshiftns: 9078 case Intrinsic::arm_neon_vqrshiftnu: 9079 case Intrinsic::arm_neon_vqrshiftnsu: { 9080 EVT VT = N->getOperand(1).getValueType(); 9081 int64_t Cnt; 9082 unsigned VShiftOpc = 0; 9083 9084 switch (IntNo) { 9085 case Intrinsic::arm_neon_vshifts: 9086 case Intrinsic::arm_neon_vshiftu: 9087 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 9088 VShiftOpc = ARMISD::VSHL; 9089 break; 9090 } 9091 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 9092 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 9093 ARMISD::VSHRs : ARMISD::VSHRu); 9094 break; 9095 } 9096 return SDValue(); 9097 9098 case Intrinsic::arm_neon_vshiftls: 9099 case Intrinsic::arm_neon_vshiftlu: 9100 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 9101 break; 9102 llvm_unreachable("invalid shift count for vshll intrinsic"); 9103 9104 case Intrinsic::arm_neon_vrshifts: 9105 case Intrinsic::arm_neon_vrshiftu: 9106 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 9107 break; 9108 return SDValue(); 9109 9110 case Intrinsic::arm_neon_vqshifts: 9111 case Intrinsic::arm_neon_vqshiftu: 9112 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 9113 break; 9114 return SDValue(); 9115 9116 case Intrinsic::arm_neon_vqshiftsu: 9117 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 9118 break; 9119 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 9120 9121 case Intrinsic::arm_neon_vshiftn: 9122 case Intrinsic::arm_neon_vrshiftn: 9123 case Intrinsic::arm_neon_vqshiftns: 9124 case Intrinsic::arm_neon_vqshiftnu: 9125 case Intrinsic::arm_neon_vqshiftnsu: 9126 case Intrinsic::arm_neon_vqrshiftns: 9127 case Intrinsic::arm_neon_vqrshiftnu: 9128 case Intrinsic::arm_neon_vqrshiftnsu: 9129 // Narrowing shifts require an immediate right shift. 9130 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 9131 break; 9132 llvm_unreachable("invalid shift count for narrowing vector shift " 9133 "intrinsic"); 9134 9135 default: 9136 llvm_unreachable("unhandled vector shift"); 9137 } 9138 9139 switch (IntNo) { 9140 case Intrinsic::arm_neon_vshifts: 9141 case Intrinsic::arm_neon_vshiftu: 9142 // Opcode already set above. 9143 break; 9144 case Intrinsic::arm_neon_vshiftls: 9145 case Intrinsic::arm_neon_vshiftlu: 9146 if (Cnt == VT.getVectorElementType().getSizeInBits()) 9147 VShiftOpc = ARMISD::VSHLLi; 9148 else 9149 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 9150 ARMISD::VSHLLs : ARMISD::VSHLLu); 9151 break; 9152 case Intrinsic::arm_neon_vshiftn: 9153 VShiftOpc = ARMISD::VSHRN; break; 9154 case Intrinsic::arm_neon_vrshifts: 9155 VShiftOpc = ARMISD::VRSHRs; break; 9156 case Intrinsic::arm_neon_vrshiftu: 9157 VShiftOpc = ARMISD::VRSHRu; break; 9158 case Intrinsic::arm_neon_vrshiftn: 9159 VShiftOpc = ARMISD::VRSHRN; break; 9160 case Intrinsic::arm_neon_vqshifts: 9161 VShiftOpc = ARMISD::VQSHLs; break; 9162 case Intrinsic::arm_neon_vqshiftu: 9163 VShiftOpc = ARMISD::VQSHLu; break; 9164 case Intrinsic::arm_neon_vqshiftsu: 9165 VShiftOpc = ARMISD::VQSHLsu; break; 9166 case Intrinsic::arm_neon_vqshiftns: 9167 VShiftOpc = ARMISD::VQSHRNs; break; 9168 case Intrinsic::arm_neon_vqshiftnu: 9169 VShiftOpc = ARMISD::VQSHRNu; break; 9170 case Intrinsic::arm_neon_vqshiftnsu: 9171 VShiftOpc = ARMISD::VQSHRNsu; break; 9172 case Intrinsic::arm_neon_vqrshiftns: 9173 VShiftOpc = ARMISD::VQRSHRNs; break; 9174 case Intrinsic::arm_neon_vqrshiftnu: 9175 VShiftOpc = ARMISD::VQRSHRNu; break; 9176 case Intrinsic::arm_neon_vqrshiftnsu: 9177 VShiftOpc = ARMISD::VQRSHRNsu; break; 9178 } 9179 9180 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 9181 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 9182 } 9183 9184 case Intrinsic::arm_neon_vshiftins: { 9185 EVT VT = N->getOperand(1).getValueType(); 9186 int64_t Cnt; 9187 unsigned VShiftOpc = 0; 9188 9189 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 9190 VShiftOpc = ARMISD::VSLI; 9191 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 9192 VShiftOpc = ARMISD::VSRI; 9193 else { 9194 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 9195 } 9196 9197 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 9198 N->getOperand(1), N->getOperand(2), 9199 DAG.getConstant(Cnt, MVT::i32)); 9200 } 9201 9202 case Intrinsic::arm_neon_vqrshifts: 9203 case Intrinsic::arm_neon_vqrshiftu: 9204 // No immediate versions of these to check for. 9205 break; 9206 } 9207 9208 return SDValue(); 9209 } 9210 9211 /// PerformShiftCombine - Checks for immediate versions of vector shifts and 9212 /// lowers them. As with the vector shift intrinsics, this is done during DAG 9213 /// combining instead of DAG legalizing because the build_vectors for 64-bit 9214 /// vector element shift counts are generally not legal, and it is hard to see 9215 /// their values after they get legalized to loads from a constant pool. 9216 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 9217 const ARMSubtarget *ST) { 9218 EVT VT = N->getValueType(0); 9219 if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { 9220 // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 9221 // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. 9222 SDValue N1 = N->getOperand(1); 9223 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 9224 SDValue N0 = N->getOperand(0); 9225 if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && 9226 DAG.MaskedValueIsZero(N0.getOperand(0), 9227 APInt::getHighBitsSet(32, 16))) 9228 return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1); 9229 } 9230 } 9231 9232 // Nothing to be done for scalar shifts. 9233 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9234 if (!VT.isVector() || !TLI.isTypeLegal(VT)) 9235 return SDValue(); 9236 9237 assert(ST->hasNEON() && "unexpected vector shift"); 9238 int64_t Cnt; 9239 9240 switch (N->getOpcode()) { 9241 default: llvm_unreachable("unexpected shift opcode"); 9242 9243 case ISD::SHL: 9244 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 9245 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 9246 DAG.getConstant(Cnt, MVT::i32)); 9247 break; 9248 9249 case ISD::SRA: 9250 case ISD::SRL: 9251 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 9252 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 9253 ARMISD::VSHRs : ARMISD::VSHRu); 9254 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 9255 DAG.getConstant(Cnt, MVT::i32)); 9256 } 9257 } 9258 return SDValue(); 9259 } 9260 9261 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 9262 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 9263 static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 9264 const ARMSubtarget *ST) { 9265 SDValue N0 = N->getOperand(0); 9266 9267 // Check for sign- and zero-extensions of vector extract operations of 8- 9268 // and 16-bit vector elements. NEON supports these directly. They are 9269 // handled during DAG combining because type legalization will promote them 9270 // to 32-bit types and it is messy to recognize the operations after that. 9271 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 9272 SDValue Vec = N0.getOperand(0); 9273 SDValue Lane = N0.getOperand(1); 9274 EVT VT = N->getValueType(0); 9275 EVT EltVT = N0.getValueType(); 9276 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9277 9278 if (VT == MVT::i32 && 9279 (EltVT == MVT::i8 || EltVT == MVT::i16) && 9280 TLI.isTypeLegal(Vec.getValueType()) && 9281 isa<ConstantSDNode>(Lane)) { 9282 9283 unsigned Opc = 0; 9284 switch (N->getOpcode()) { 9285 default: llvm_unreachable("unexpected opcode"); 9286 case ISD::SIGN_EXTEND: 9287 Opc = ARMISD::VGETLANEs; 9288 break; 9289 case ISD::ZERO_EXTEND: 9290 case ISD::ANY_EXTEND: 9291 Opc = ARMISD::VGETLANEu; 9292 break; 9293 } 9294 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 9295 } 9296 } 9297 9298 return SDValue(); 9299 } 9300 9301 /// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 9302 /// to match f32 max/min patterns to use NEON vmax/vmin instructions. 9303 static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 9304 const ARMSubtarget *ST) { 9305 // If the target supports NEON, try to use vmax/vmin instructions for f32 9306 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 9307 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 9308 // a NaN; only do the transformation when it matches that behavior. 9309 9310 // For now only do this when using NEON for FP operations; if using VFP, it 9311 // is not obvious that the benefit outweighs the cost of switching to the 9312 // NEON pipeline. 9313 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 9314 N->getValueType(0) != MVT::f32) 9315 return SDValue(); 9316 9317 SDValue CondLHS = N->getOperand(0); 9318 SDValue CondRHS = N->getOperand(1); 9319 SDValue LHS = N->getOperand(2); 9320 SDValue RHS = N->getOperand(3); 9321 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 9322 9323 unsigned Opcode = 0; 9324 bool IsReversed; 9325 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 9326 IsReversed = false; // x CC y ? x : y 9327 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 9328 IsReversed = true ; // x CC y ? y : x 9329 } else { 9330 return SDValue(); 9331 } 9332 9333 bool IsUnordered; 9334 switch (CC) { 9335 default: break; 9336 case ISD::SETOLT: 9337 case ISD::SETOLE: 9338 case ISD::SETLT: 9339 case ISD::SETLE: 9340 case ISD::SETULT: 9341 case ISD::SETULE: 9342 // If LHS is NaN, an ordered comparison will be false and the result will 9343 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 9344 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 9345 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 9346 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 9347 break; 9348 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 9349 // will return -0, so vmin can only be used for unsafe math or if one of 9350 // the operands is known to be nonzero. 9351 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 9352 !DAG.getTarget().Options.UnsafeFPMath && 9353 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 9354 break; 9355 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 9356 break; 9357 9358 case ISD::SETOGT: 9359 case ISD::SETOGE: 9360 case ISD::SETGT: 9361 case ISD::SETGE: 9362 case ISD::SETUGT: 9363 case ISD::SETUGE: 9364 // If LHS is NaN, an ordered comparison will be false and the result will 9365 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 9366 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 9367 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 9368 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 9369 break; 9370 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 9371 // will return +0, so vmax can only be used for unsafe math or if one of 9372 // the operands is known to be nonzero. 9373 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 9374 !DAG.getTarget().Options.UnsafeFPMath && 9375 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 9376 break; 9377 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 9378 break; 9379 } 9380 9381 if (!Opcode) 9382 return SDValue(); 9383 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); 9384 } 9385 9386 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. 9387 SDValue 9388 ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { 9389 SDValue Cmp = N->getOperand(4); 9390 if (Cmp.getOpcode() != ARMISD::CMPZ) 9391 // Only looking at EQ and NE cases. 9392 return SDValue(); 9393 9394 EVT VT = N->getValueType(0); 9395 DebugLoc dl = N->getDebugLoc(); 9396 SDValue LHS = Cmp.getOperand(0); 9397 SDValue RHS = Cmp.getOperand(1); 9398 SDValue FalseVal = N->getOperand(0); 9399 SDValue TrueVal = N->getOperand(1); 9400 SDValue ARMcc = N->getOperand(2); 9401 ARMCC::CondCodes CC = 9402 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 9403 9404 // Simplify 9405 // mov r1, r0 9406 // cmp r1, x 9407 // mov r0, y 9408 // moveq r0, x 9409 // to 9410 // cmp r0, x 9411 // movne r0, y 9412 // 9413 // mov r1, r0 9414 // cmp r1, x 9415 // mov r0, x 9416 // movne r0, y 9417 // to 9418 // cmp r0, x 9419 // movne r0, y 9420 /// FIXME: Turn this into a target neutral optimization? 9421 SDValue Res; 9422 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { 9423 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, 9424 N->getOperand(3), Cmp); 9425 } else if (CC == ARMCC::EQ && TrueVal == RHS) { 9426 SDValue ARMcc; 9427 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); 9428 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, 9429 N->getOperand(3), NewCmp); 9430 } 9431 9432 if (Res.getNode()) { 9433 APInt KnownZero, KnownOne; 9434 DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); 9435 // Capture demanded bits information that would be otherwise lost. 9436 if (KnownZero == 0xfffffffe) 9437 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 9438 DAG.getValueType(MVT::i1)); 9439 else if (KnownZero == 0xffffff00) 9440 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 9441 DAG.getValueType(MVT::i8)); 9442 else if (KnownZero == 0xffff0000) 9443 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 9444 DAG.getValueType(MVT::i16)); 9445 } 9446 9447 return Res; 9448 } 9449 9450 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 9451 DAGCombinerInfo &DCI) const { 9452 switch (N->getOpcode()) { 9453 default: break; 9454 case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); 9455 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); 9456 case ISD::SUB: return PerformSUBCombine(N, DCI); 9457 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 9458 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 9459 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); 9460 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); 9461 case ARMISD::BFI: return PerformBFICombine(N, DCI); 9462 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 9463 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); 9464 case ISD::STORE: return PerformSTORECombine(N, DCI); 9465 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); 9466 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); 9467 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); 9468 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 9469 case ISD::FP_TO_SINT: 9470 case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); 9471 case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); 9472 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 9473 case ISD::SHL: 9474 case ISD::SRA: 9475 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 9476 case ISD::SIGN_EXTEND: 9477 case ISD::ZERO_EXTEND: 9478 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 9479 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 9480 case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); 9481 case ARMISD::VLD2DUP: 9482 case ARMISD::VLD3DUP: 9483 case ARMISD::VLD4DUP: 9484 return CombineBaseUpdate(N, DCI); 9485 case ISD::INTRINSIC_VOID: 9486 case ISD::INTRINSIC_W_CHAIN: 9487 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9488 case Intrinsic::arm_neon_vld1: 9489 case Intrinsic::arm_neon_vld2: 9490 case Intrinsic::arm_neon_vld3: 9491 case Intrinsic::arm_neon_vld4: 9492 case Intrinsic::arm_neon_vld2lane: 9493 case Intrinsic::arm_neon_vld3lane: 9494 case Intrinsic::arm_neon_vld4lane: 9495 case Intrinsic::arm_neon_vst1: 9496 case Intrinsic::arm_neon_vst2: 9497 case Intrinsic::arm_neon_vst3: 9498 case Intrinsic::arm_neon_vst4: 9499 case Intrinsic::arm_neon_vst2lane: 9500 case Intrinsic::arm_neon_vst3lane: 9501 case Intrinsic::arm_neon_vst4lane: 9502 return CombineBaseUpdate(N, DCI); 9503 default: break; 9504 } 9505 break; 9506 } 9507 return SDValue(); 9508 } 9509 9510 bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, 9511 EVT VT) const { 9512 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); 9513 } 9514 9515 bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { 9516 // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus 9517 bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); 9518 9519 switch (VT.getSimpleVT().SimpleTy) { 9520 default: 9521 return false; 9522 case MVT::i8: 9523 case MVT::i16: 9524 case MVT::i32: { 9525 // Unaligned access can use (for example) LRDB, LRDH, LDR 9526 if (AllowsUnaligned) { 9527 if (Fast) 9528 *Fast = Subtarget->hasV7Ops(); 9529 return true; 9530 } 9531 return false; 9532 } 9533 case MVT::f64: 9534 case MVT::v2f64: { 9535 // For any little-endian targets with neon, we can support unaligned ld/st 9536 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. 9537 // A big-endian target may also explictly support unaligned accesses 9538 if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { 9539 if (Fast) 9540 *Fast = true; 9541 return true; 9542 } 9543 return false; 9544 } 9545 } 9546 } 9547 9548 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, 9549 unsigned AlignCheck) { 9550 return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && 9551 (DstAlign == 0 || DstAlign % AlignCheck == 0)); 9552 } 9553 9554 EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, 9555 unsigned DstAlign, unsigned SrcAlign, 9556 bool IsMemset, bool ZeroMemset, 9557 bool MemcpyStrSrc, 9558 MachineFunction &MF) const { 9559 const Function *F = MF.getFunction(); 9560 9561 // See if we can use NEON instructions for this... 9562 if ((!IsMemset || ZeroMemset) && 9563 Subtarget->hasNEON() && 9564 !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 9565 Attribute::NoImplicitFloat)) { 9566 bool Fast; 9567 if (Size >= 16 && 9568 (memOpAlign(SrcAlign, DstAlign, 16) || 9569 (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) { 9570 return MVT::v2f64; 9571 } else if (Size >= 8 && 9572 (memOpAlign(SrcAlign, DstAlign, 8) || 9573 (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) { 9574 return MVT::f64; 9575 } 9576 } 9577 9578 // Lowering to i32/i16 if the size permits. 9579 if (Size >= 4) 9580 return MVT::i32; 9581 else if (Size >= 2) 9582 return MVT::i16; 9583 9584 // Let the target-independent logic figure it out. 9585 return MVT::Other; 9586 } 9587 9588 bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 9589 if (Val.getOpcode() != ISD::LOAD) 9590 return false; 9591 9592 EVT VT1 = Val.getValueType(); 9593 if (!VT1.isSimple() || !VT1.isInteger() || 9594 !VT2.isSimple() || !VT2.isInteger()) 9595 return false; 9596 9597 switch (VT1.getSimpleVT().SimpleTy) { 9598 default: break; 9599 case MVT::i1: 9600 case MVT::i8: 9601 case MVT::i16: 9602 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits. 9603 return true; 9604 } 9605 9606 return false; 9607 } 9608 9609 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 9610 if (V < 0) 9611 return false; 9612 9613 unsigned Scale = 1; 9614 switch (VT.getSimpleVT().SimpleTy) { 9615 default: return false; 9616 case MVT::i1: 9617 case MVT::i8: 9618 // Scale == 1; 9619 break; 9620 case MVT::i16: 9621 // Scale == 2; 9622 Scale = 2; 9623 break; 9624 case MVT::i32: 9625 // Scale == 4; 9626 Scale = 4; 9627 break; 9628 } 9629 9630 if ((V & (Scale - 1)) != 0) 9631 return false; 9632 V /= Scale; 9633 return V == (V & ((1LL << 5) - 1)); 9634 } 9635 9636 static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 9637 const ARMSubtarget *Subtarget) { 9638 bool isNeg = false; 9639 if (V < 0) { 9640 isNeg = true; 9641 V = - V; 9642 } 9643 9644 switch (VT.getSimpleVT().SimpleTy) { 9645 default: return false; 9646 case MVT::i1: 9647 case MVT::i8: 9648 case MVT::i16: 9649 case MVT::i32: 9650 // + imm12 or - imm8 9651 if (isNeg) 9652 return V == (V & ((1LL << 8) - 1)); 9653 return V == (V & ((1LL << 12) - 1)); 9654 case MVT::f32: 9655 case MVT::f64: 9656 // Same as ARM mode. FIXME: NEON? 9657 if (!Subtarget->hasVFP2()) 9658 return false; 9659 if ((V & 3) != 0) 9660 return false; 9661 V >>= 2; 9662 return V == (V & ((1LL << 8) - 1)); 9663 } 9664 } 9665 9666 /// isLegalAddressImmediate - Return true if the integer value can be used 9667 /// as the offset of the target addressing mode for load / store of the 9668 /// given type. 9669 static bool isLegalAddressImmediate(int64_t V, EVT VT, 9670 const ARMSubtarget *Subtarget) { 9671 if (V == 0) 9672 return true; 9673 9674 if (!VT.isSimple()) 9675 return false; 9676 9677 if (Subtarget->isThumb1Only()) 9678 return isLegalT1AddressImmediate(V, VT); 9679 else if (Subtarget->isThumb2()) 9680 return isLegalT2AddressImmediate(V, VT, Subtarget); 9681 9682 // ARM mode. 9683 if (V < 0) 9684 V = - V; 9685 switch (VT.getSimpleVT().SimpleTy) { 9686 default: return false; 9687 case MVT::i1: 9688 case MVT::i8: 9689 case MVT::i32: 9690 // +- imm12 9691 return V == (V & ((1LL << 12) - 1)); 9692 case MVT::i16: 9693 // +- imm8 9694 return V == (V & ((1LL << 8) - 1)); 9695 case MVT::f32: 9696 case MVT::f64: 9697 if (!Subtarget->hasVFP2()) // FIXME: NEON? 9698 return false; 9699 if ((V & 3) != 0) 9700 return false; 9701 V >>= 2; 9702 return V == (V & ((1LL << 8) - 1)); 9703 } 9704 } 9705 9706 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 9707 EVT VT) const { 9708 int Scale = AM.Scale; 9709 if (Scale < 0) 9710 return false; 9711 9712 switch (VT.getSimpleVT().SimpleTy) { 9713 default: return false; 9714 case MVT::i1: 9715 case MVT::i8: 9716 case MVT::i16: 9717 case MVT::i32: 9718 if (Scale == 1) 9719 return true; 9720 // r + r << imm 9721 Scale = Scale & ~1; 9722 return Scale == 2 || Scale == 4 || Scale == 8; 9723 case MVT::i64: 9724 // r + r 9725 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 9726 return true; 9727 return false; 9728 case MVT::isVoid: 9729 // Note, we allow "void" uses (basically, uses that aren't loads or 9730 // stores), because arm allows folding a scale into many arithmetic 9731 // operations. This should be made more precise and revisited later. 9732 9733 // Allow r << imm, but the imm has to be a multiple of two. 9734 if (Scale & 1) return false; 9735 return isPowerOf2_32(Scale); 9736 } 9737 } 9738 9739 /// isLegalAddressingMode - Return true if the addressing mode represented 9740 /// by AM is legal for this target, for a load/store of the specified type. 9741 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 9742 Type *Ty) const { 9743 EVT VT = getValueType(Ty, true); 9744 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 9745 return false; 9746 9747 // Can never fold addr of global into load/store. 9748 if (AM.BaseGV) 9749 return false; 9750 9751 switch (AM.Scale) { 9752 case 0: // no scale reg, must be "r+i" or "r", or "i". 9753 break; 9754 case 1: 9755 if (Subtarget->isThumb1Only()) 9756 return false; 9757 // FALL THROUGH. 9758 default: 9759 // ARM doesn't support any R+R*scale+imm addr modes. 9760 if (AM.BaseOffs) 9761 return false; 9762 9763 if (!VT.isSimple()) 9764 return false; 9765 9766 if (Subtarget->isThumb2()) 9767 return isLegalT2ScaledAddressingMode(AM, VT); 9768 9769 int Scale = AM.Scale; 9770 switch (VT.getSimpleVT().SimpleTy) { 9771 default: return false; 9772 case MVT::i1: 9773 case MVT::i8: 9774 case MVT::i32: 9775 if (Scale < 0) Scale = -Scale; 9776 if (Scale == 1) 9777 return true; 9778 // r + r << imm 9779 return isPowerOf2_32(Scale & ~1); 9780 case MVT::i16: 9781 case MVT::i64: 9782 // r + r 9783 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 9784 return true; 9785 return false; 9786 9787 case MVT::isVoid: 9788 // Note, we allow "void" uses (basically, uses that aren't loads or 9789 // stores), because arm allows folding a scale into many arithmetic 9790 // operations. This should be made more precise and revisited later. 9791 9792 // Allow r << imm, but the imm has to be a multiple of two. 9793 if (Scale & 1) return false; 9794 return isPowerOf2_32(Scale); 9795 } 9796 } 9797 return true; 9798 } 9799 9800 /// isLegalICmpImmediate - Return true if the specified immediate is legal 9801 /// icmp immediate, that is the target has icmp instructions which can compare 9802 /// a register against the immediate without having to materialize the 9803 /// immediate into a register. 9804 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 9805 // Thumb2 and ARM modes can use cmn for negative immediates. 9806 if (!Subtarget->isThumb()) 9807 return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1; 9808 if (Subtarget->isThumb2()) 9809 return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1; 9810 // Thumb1 doesn't have cmn, and only 8-bit immediates. 9811 return Imm >= 0 && Imm <= 255; 9812 } 9813 9814 /// isLegalAddImmediate - Return true if the specified immediate is a legal add 9815 /// *or sub* immediate, that is the target has add or sub instructions which can 9816 /// add a register with the immediate without having to materialize the 9817 /// immediate into a register. 9818 bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { 9819 // Same encoding for add/sub, just flip the sign. 9820 int64_t AbsImm = llvm::abs64(Imm); 9821 if (!Subtarget->isThumb()) 9822 return ARM_AM::getSOImmVal(AbsImm) != -1; 9823 if (Subtarget->isThumb2()) 9824 return ARM_AM::getT2SOImmVal(AbsImm) != -1; 9825 // Thumb1 only has 8-bit unsigned immediate. 9826 return AbsImm >= 0 && AbsImm <= 255; 9827 } 9828 9829 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 9830 bool isSEXTLoad, SDValue &Base, 9831 SDValue &Offset, bool &isInc, 9832 SelectionDAG &DAG) { 9833 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 9834 return false; 9835 9836 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 9837 // AddressingMode 3 9838 Base = Ptr->getOperand(0); 9839 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 9840 int RHSC = (int)RHS->getZExtValue(); 9841 if (RHSC < 0 && RHSC > -256) { 9842 assert(Ptr->getOpcode() == ISD::ADD); 9843 isInc = false; 9844 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 9845 return true; 9846 } 9847 } 9848 isInc = (Ptr->getOpcode() == ISD::ADD); 9849 Offset = Ptr->getOperand(1); 9850 return true; 9851 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 9852 // AddressingMode 2 9853 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 9854 int RHSC = (int)RHS->getZExtValue(); 9855 if (RHSC < 0 && RHSC > -0x1000) { 9856 assert(Ptr->getOpcode() == ISD::ADD); 9857 isInc = false; 9858 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 9859 Base = Ptr->getOperand(0); 9860 return true; 9861 } 9862 } 9863 9864 if (Ptr->getOpcode() == ISD::ADD) { 9865 isInc = true; 9866 ARM_AM::ShiftOpc ShOpcVal= 9867 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); 9868 if (ShOpcVal != ARM_AM::no_shift) { 9869 Base = Ptr->getOperand(1); 9870 Offset = Ptr->getOperand(0); 9871 } else { 9872 Base = Ptr->getOperand(0); 9873 Offset = Ptr->getOperand(1); 9874 } 9875 return true; 9876 } 9877 9878 isInc = (Ptr->getOpcode() == ISD::ADD); 9879 Base = Ptr->getOperand(0); 9880 Offset = Ptr->getOperand(1); 9881 return true; 9882 } 9883 9884 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 9885 return false; 9886 } 9887 9888 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 9889 bool isSEXTLoad, SDValue &Base, 9890 SDValue &Offset, bool &isInc, 9891 SelectionDAG &DAG) { 9892 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 9893 return false; 9894 9895 Base = Ptr->getOperand(0); 9896 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 9897 int RHSC = (int)RHS->getZExtValue(); 9898 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 9899 assert(Ptr->getOpcode() == ISD::ADD); 9900 isInc = false; 9901 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 9902 return true; 9903 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 9904 isInc = Ptr->getOpcode() == ISD::ADD; 9905 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 9906 return true; 9907 } 9908 } 9909 9910 return false; 9911 } 9912 9913 /// getPreIndexedAddressParts - returns true by value, base pointer and 9914 /// offset pointer and addressing mode by reference if the node's address 9915 /// can be legally represented as pre-indexed load / store address. 9916 bool 9917 ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 9918 SDValue &Offset, 9919 ISD::MemIndexedMode &AM, 9920 SelectionDAG &DAG) const { 9921 if (Subtarget->isThumb1Only()) 9922 return false; 9923 9924 EVT VT; 9925 SDValue Ptr; 9926 bool isSEXTLoad = false; 9927 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 9928 Ptr = LD->getBasePtr(); 9929 VT = LD->getMemoryVT(); 9930 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 9931 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 9932 Ptr = ST->getBasePtr(); 9933 VT = ST->getMemoryVT(); 9934 } else 9935 return false; 9936 9937 bool isInc; 9938 bool isLegal = false; 9939 if (Subtarget->isThumb2()) 9940 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 9941 Offset, isInc, DAG); 9942 else 9943 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 9944 Offset, isInc, DAG); 9945 if (!isLegal) 9946 return false; 9947 9948 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 9949 return true; 9950 } 9951 9952 /// getPostIndexedAddressParts - returns true by value, base pointer and 9953 /// offset pointer and addressing mode by reference if this node can be 9954 /// combined with a load / store to form a post-indexed load / store. 9955 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 9956 SDValue &Base, 9957 SDValue &Offset, 9958 ISD::MemIndexedMode &AM, 9959 SelectionDAG &DAG) const { 9960 if (Subtarget->isThumb1Only()) 9961 return false; 9962 9963 EVT VT; 9964 SDValue Ptr; 9965 bool isSEXTLoad = false; 9966 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 9967 VT = LD->getMemoryVT(); 9968 Ptr = LD->getBasePtr(); 9969 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 9970 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 9971 VT = ST->getMemoryVT(); 9972 Ptr = ST->getBasePtr(); 9973 } else 9974 return false; 9975 9976 bool isInc; 9977 bool isLegal = false; 9978 if (Subtarget->isThumb2()) 9979 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 9980 isInc, DAG); 9981 else 9982 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 9983 isInc, DAG); 9984 if (!isLegal) 9985 return false; 9986 9987 if (Ptr != Base) { 9988 // Swap base ptr and offset to catch more post-index load / store when 9989 // it's legal. In Thumb2 mode, offset must be an immediate. 9990 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 9991 !Subtarget->isThumb2()) 9992 std::swap(Base, Offset); 9993 9994 // Post-indexed load / store update the base pointer. 9995 if (Ptr != Base) 9996 return false; 9997 } 9998 9999 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 10000 return true; 10001 } 10002 10003 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 10004 APInt &KnownZero, 10005 APInt &KnownOne, 10006 const SelectionDAG &DAG, 10007 unsigned Depth) const { 10008 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); 10009 switch (Op.getOpcode()) { 10010 default: break; 10011 case ARMISD::CMOV: { 10012 // Bits are known zero/one if known on the LHS and RHS. 10013 DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); 10014 if (KnownZero == 0 && KnownOne == 0) return; 10015 10016 APInt KnownZeroRHS, KnownOneRHS; 10017 DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); 10018 KnownZero &= KnownZeroRHS; 10019 KnownOne &= KnownOneRHS; 10020 return; 10021 } 10022 } 10023 } 10024 10025 //===----------------------------------------------------------------------===// 10026 // ARM Inline Assembly Support 10027 //===----------------------------------------------------------------------===// 10028 10029 bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { 10030 // Looking for "rev" which is V6+. 10031 if (!Subtarget->hasV6Ops()) 10032 return false; 10033 10034 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); 10035 std::string AsmStr = IA->getAsmString(); 10036 SmallVector<StringRef, 4> AsmPieces; 10037 SplitString(AsmStr, AsmPieces, ";\n"); 10038 10039 switch (AsmPieces.size()) { 10040 default: return false; 10041 case 1: 10042 AsmStr = AsmPieces[0]; 10043 AsmPieces.clear(); 10044 SplitString(AsmStr, AsmPieces, " \t,"); 10045 10046 // rev $0, $1 10047 if (AsmPieces.size() == 3 && 10048 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && 10049 IA->getConstraintString().compare(0, 4, "=l,l") == 0) { 10050 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); 10051 if (Ty && Ty->getBitWidth() == 32) 10052 return IntrinsicLowering::LowerToByteSwap(CI); 10053 } 10054 break; 10055 } 10056 10057 return false; 10058 } 10059 10060 /// getConstraintType - Given a constraint letter, return the type of 10061 /// constraint it is for this target. 10062 ARMTargetLowering::ConstraintType 10063 ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 10064 if (Constraint.size() == 1) { 10065 switch (Constraint[0]) { 10066 default: break; 10067 case 'l': return C_RegisterClass; 10068 case 'w': return C_RegisterClass; 10069 case 'h': return C_RegisterClass; 10070 case 'x': return C_RegisterClass; 10071 case 't': return C_RegisterClass; 10072 case 'j': return C_Other; // Constant for movw. 10073 // An address with a single base register. Due to the way we 10074 // currently handle addresses it is the same as an 'r' memory constraint. 10075 case 'Q': return C_Memory; 10076 } 10077 } else if (Constraint.size() == 2) { 10078 switch (Constraint[0]) { 10079 default: break; 10080 // All 'U+' constraints are addresses. 10081 case 'U': return C_Memory; 10082 } 10083 } 10084 return TargetLowering::getConstraintType(Constraint); 10085 } 10086 10087 /// Examine constraint type and operand type and determine a weight value. 10088 /// This object must already have been set up with the operand type 10089 /// and the current alternative constraint selected. 10090 TargetLowering::ConstraintWeight 10091 ARMTargetLowering::getSingleConstraintMatchWeight( 10092 AsmOperandInfo &info, const char *constraint) const { 10093 ConstraintWeight weight = CW_Invalid; 10094 Value *CallOperandVal = info.CallOperandVal; 10095 // If we don't have a value, we can't do a match, 10096 // but allow it at the lowest weight. 10097 if (CallOperandVal == NULL) 10098 return CW_Default; 10099 Type *type = CallOperandVal->getType(); 10100 // Look at the constraint type. 10101 switch (*constraint) { 10102 default: 10103 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 10104 break; 10105 case 'l': 10106 if (type->isIntegerTy()) { 10107 if (Subtarget->isThumb()) 10108 weight = CW_SpecificReg; 10109 else 10110 weight = CW_Register; 10111 } 10112 break; 10113 case 'w': 10114 if (type->isFloatingPointTy()) 10115 weight = CW_Register; 10116 break; 10117 } 10118 return weight; 10119 } 10120 10121 typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; 10122 RCPair 10123 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 10124 EVT VT) const { 10125 if (Constraint.size() == 1) { 10126 // GCC ARM Constraint Letters 10127 switch (Constraint[0]) { 10128 case 'l': // Low regs or general regs. 10129 if (Subtarget->isThumb()) 10130 return RCPair(0U, &ARM::tGPRRegClass); 10131 return RCPair(0U, &ARM::GPRRegClass); 10132 case 'h': // High regs or no regs. 10133 if (Subtarget->isThumb()) 10134 return RCPair(0U, &ARM::hGPRRegClass); 10135 break; 10136 case 'r': 10137 return RCPair(0U, &ARM::GPRRegClass); 10138 case 'w': 10139 if (VT == MVT::f32) 10140 return RCPair(0U, &ARM::SPRRegClass); 10141 if (VT.getSizeInBits() == 64) 10142 return RCPair(0U, &ARM::DPRRegClass); 10143 if (VT.getSizeInBits() == 128) 10144 return RCPair(0U, &ARM::QPRRegClass); 10145 break; 10146 case 'x': 10147 if (VT == MVT::f32) 10148 return RCPair(0U, &ARM::SPR_8RegClass); 10149 if (VT.getSizeInBits() == 64) 10150 return RCPair(0U, &ARM::DPR_8RegClass); 10151 if (VT.getSizeInBits() == 128) 10152 return RCPair(0U, &ARM::QPR_8RegClass); 10153 break; 10154 case 't': 10155 if (VT == MVT::f32) 10156 return RCPair(0U, &ARM::SPRRegClass); 10157 break; 10158 } 10159 } 10160 if (StringRef("{cc}").equals_lower(Constraint)) 10161 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass); 10162 10163 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 10164 } 10165 10166 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 10167 /// vector. If it is invalid, don't add anything to Ops. 10168 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 10169 std::string &Constraint, 10170 std::vector<SDValue>&Ops, 10171 SelectionDAG &DAG) const { 10172 SDValue Result(0, 0); 10173 10174 // Currently only support length 1 constraints. 10175 if (Constraint.length() != 1) return; 10176 10177 char ConstraintLetter = Constraint[0]; 10178 switch (ConstraintLetter) { 10179 default: break; 10180 case 'j': 10181 case 'I': case 'J': case 'K': case 'L': 10182 case 'M': case 'N': case 'O': 10183 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 10184 if (!C) 10185 return; 10186 10187 int64_t CVal64 = C->getSExtValue(); 10188 int CVal = (int) CVal64; 10189 // None of these constraints allow values larger than 32 bits. Check 10190 // that the value fits in an int. 10191 if (CVal != CVal64) 10192 return; 10193 10194 switch (ConstraintLetter) { 10195 case 'j': 10196 // Constant suitable for movw, must be between 0 and 10197 // 65535. 10198 if (Subtarget->hasV6T2Ops()) 10199 if (CVal >= 0 && CVal <= 65535) 10200 break; 10201 return; 10202 case 'I': 10203 if (Subtarget->isThumb1Only()) { 10204 // This must be a constant between 0 and 255, for ADD 10205 // immediates. 10206 if (CVal >= 0 && CVal <= 255) 10207 break; 10208 } else if (Subtarget->isThumb2()) { 10209 // A constant that can be used as an immediate value in a 10210 // data-processing instruction. 10211 if (ARM_AM::getT2SOImmVal(CVal) != -1) 10212 break; 10213 } else { 10214 // A constant that can be used as an immediate value in a 10215 // data-processing instruction. 10216 if (ARM_AM::getSOImmVal(CVal) != -1) 10217 break; 10218 } 10219 return; 10220 10221 case 'J': 10222 if (Subtarget->isThumb()) { // FIXME thumb2 10223 // This must be a constant between -255 and -1, for negated ADD 10224 // immediates. This can be used in GCC with an "n" modifier that 10225 // prints the negated value, for use with SUB instructions. It is 10226 // not useful otherwise but is implemented for compatibility. 10227 if (CVal >= -255 && CVal <= -1) 10228 break; 10229 } else { 10230 // This must be a constant between -4095 and 4095. It is not clear 10231 // what this constraint is intended for. Implemented for 10232 // compatibility with GCC. 10233 if (CVal >= -4095 && CVal <= 4095) 10234 break; 10235 } 10236 return; 10237 10238 case 'K': 10239 if (Subtarget->isThumb1Only()) { 10240 // A 32-bit value where only one byte has a nonzero value. Exclude 10241 // zero to match GCC. This constraint is used by GCC internally for 10242 // constants that can be loaded with a move/shift combination. 10243 // It is not useful otherwise but is implemented for compatibility. 10244 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 10245 break; 10246 } else if (Subtarget->isThumb2()) { 10247 // A constant whose bitwise inverse can be used as an immediate 10248 // value in a data-processing instruction. This can be used in GCC 10249 // with a "B" modifier that prints the inverted value, for use with 10250 // BIC and MVN instructions. It is not useful otherwise but is 10251 // implemented for compatibility. 10252 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 10253 break; 10254 } else { 10255 // A constant whose bitwise inverse can be used as an immediate 10256 // value in a data-processing instruction. This can be used in GCC 10257 // with a "B" modifier that prints the inverted value, for use with 10258 // BIC and MVN instructions. It is not useful otherwise but is 10259 // implemented for compatibility. 10260 if (ARM_AM::getSOImmVal(~CVal) != -1) 10261 break; 10262 } 10263 return; 10264 10265 case 'L': 10266 if (Subtarget->isThumb1Only()) { 10267 // This must be a constant between -7 and 7, 10268 // for 3-operand ADD/SUB immediate instructions. 10269 if (CVal >= -7 && CVal < 7) 10270 break; 10271 } else if (Subtarget->isThumb2()) { 10272 // A constant whose negation can be used as an immediate value in a 10273 // data-processing instruction. This can be used in GCC with an "n" 10274 // modifier that prints the negated value, for use with SUB 10275 // instructions. It is not useful otherwise but is implemented for 10276 // compatibility. 10277 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 10278 break; 10279 } else { 10280 // A constant whose negation can be used as an immediate value in a 10281 // data-processing instruction. This can be used in GCC with an "n" 10282 // modifier that prints the negated value, for use with SUB 10283 // instructions. It is not useful otherwise but is implemented for 10284 // compatibility. 10285 if (ARM_AM::getSOImmVal(-CVal) != -1) 10286 break; 10287 } 10288 return; 10289 10290 case 'M': 10291 if (Subtarget->isThumb()) { // FIXME thumb2 10292 // This must be a multiple of 4 between 0 and 1020, for 10293 // ADD sp + immediate. 10294 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 10295 break; 10296 } else { 10297 // A power of two or a constant between 0 and 32. This is used in 10298 // GCC for the shift amount on shifted register operands, but it is 10299 // useful in general for any shift amounts. 10300 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 10301 break; 10302 } 10303 return; 10304 10305 case 'N': 10306 if (Subtarget->isThumb()) { // FIXME thumb2 10307 // This must be a constant between 0 and 31, for shift amounts. 10308 if (CVal >= 0 && CVal <= 31) 10309 break; 10310 } 10311 return; 10312 10313 case 'O': 10314 if (Subtarget->isThumb()) { // FIXME thumb2 10315 // This must be a multiple of 4 between -508 and 508, for 10316 // ADD/SUB sp = sp + immediate. 10317 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 10318 break; 10319 } 10320 return; 10321 } 10322 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 10323 break; 10324 } 10325 10326 if (Result.getNode()) { 10327 Ops.push_back(Result); 10328 return; 10329 } 10330 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 10331 } 10332 10333 bool 10334 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 10335 // The ARM target isn't yet aware of offsets. 10336 return false; 10337 } 10338 10339 bool ARM::isBitFieldInvertedMask(unsigned v) { 10340 if (v == 0xffffffff) 10341 return 0; 10342 // there can be 1's on either or both "outsides", all the "inside" 10343 // bits must be 0's 10344 unsigned int lsb = 0, msb = 31; 10345 while (v & (1 << msb)) --msb; 10346 while (v & (1 << lsb)) ++lsb; 10347 for (unsigned int i = lsb; i <= msb; ++i) { 10348 if (v & (1 << i)) 10349 return 0; 10350 } 10351 return 1; 10352 } 10353 10354 /// isFPImmLegal - Returns true if the target can instruction select the 10355 /// specified FP immediate natively. If false, the legalizer will 10356 /// materialize the FP immediate as a load from a constant pool. 10357 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 10358 if (!Subtarget->hasVFP3()) 10359 return false; 10360 if (VT == MVT::f32) 10361 return ARM_AM::getFP32Imm(Imm) != -1; 10362 if (VT == MVT::f64) 10363 return ARM_AM::getFP64Imm(Imm) != -1; 10364 return false; 10365 } 10366 10367 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as 10368 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment 10369 /// specified in the intrinsic calls. 10370 bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 10371 const CallInst &I, 10372 unsigned Intrinsic) const { 10373 switch (Intrinsic) { 10374 case Intrinsic::arm_neon_vld1: 10375 case Intrinsic::arm_neon_vld2: 10376 case Intrinsic::arm_neon_vld3: 10377 case Intrinsic::arm_neon_vld4: 10378 case Intrinsic::arm_neon_vld2lane: 10379 case Intrinsic::arm_neon_vld3lane: 10380 case Intrinsic::arm_neon_vld4lane: { 10381 Info.opc = ISD::INTRINSIC_W_CHAIN; 10382 // Conservatively set memVT to the entire set of vectors loaded. 10383 uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; 10384 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 10385 Info.ptrVal = I.getArgOperand(0); 10386 Info.offset = 0; 10387 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 10388 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 10389 Info.vol = false; // volatile loads with NEON intrinsics not supported 10390 Info.readMem = true; 10391 Info.writeMem = false; 10392 return true; 10393 } 10394 case Intrinsic::arm_neon_vst1: 10395 case Intrinsic::arm_neon_vst2: 10396 case Intrinsic::arm_neon_vst3: 10397 case Intrinsic::arm_neon_vst4: 10398 case Intrinsic::arm_neon_vst2lane: 10399 case Intrinsic::arm_neon_vst3lane: 10400 case Intrinsic::arm_neon_vst4lane: { 10401 Info.opc = ISD::INTRINSIC_VOID; 10402 // Conservatively set memVT to the entire set of vectors stored. 10403 unsigned NumElts = 0; 10404 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { 10405 Type *ArgTy = I.getArgOperand(ArgI)->getType(); 10406 if (!ArgTy->isVectorTy()) 10407 break; 10408 NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; 10409 } 10410 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 10411 Info.ptrVal = I.getArgOperand(0); 10412 Info.offset = 0; 10413 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 10414 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 10415 Info.vol = false; // volatile stores with NEON intrinsics not supported 10416 Info.readMem = false; 10417 Info.writeMem = true; 10418 return true; 10419 } 10420 case Intrinsic::arm_strexd: { 10421 Info.opc = ISD::INTRINSIC_W_CHAIN; 10422 Info.memVT = MVT::i64; 10423 Info.ptrVal = I.getArgOperand(2); 10424 Info.offset = 0; 10425 Info.align = 8; 10426 Info.vol = true; 10427 Info.readMem = false; 10428 Info.writeMem = true; 10429 return true; 10430 } 10431 case Intrinsic::arm_ldrexd: { 10432 Info.opc = ISD::INTRINSIC_W_CHAIN; 10433 Info.memVT = MVT::i64; 10434 Info.ptrVal = I.getArgOperand(0); 10435 Info.offset = 0; 10436 Info.align = 8; 10437 Info.vol = true; 10438 Info.readMem = true; 10439 Info.writeMem = false; 10440 return true; 10441 } 10442 default: 10443 break; 10444 } 10445 10446 return false; 10447 } 10448