1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that ARM uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-isel" 16 #include "ARMISelLowering.h" 17 #include "ARM.h" 18 #include "ARMCallingConv.h" 19 #include "ARMConstantPoolValue.h" 20 #include "ARMMachineFunctionInfo.h" 21 #include "ARMPerfectShuffle.h" 22 #include "ARMSubtarget.h" 23 #include "ARMTargetMachine.h" 24 #include "ARMTargetObjectFile.h" 25 #include "MCTargetDesc/ARMAddressingModes.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/CodeGen/CallingConvLower.h" 29 #include "llvm/CodeGen/IntrinsicLowering.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineFrameInfo.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineModuleInfo.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/IR/CallingConv.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/GlobalValue.h" 41 #include "llvm/IR/Instruction.h" 42 #include "llvm/IR/Instructions.h" 43 #include "llvm/IR/Intrinsics.h" 44 #include "llvm/IR/Type.h" 45 #include "llvm/MC/MCSectionMachO.h" 46 #include "llvm/Support/CommandLine.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MathExtras.h" 49 #include "llvm/Support/raw_ostream.h" 50 #include "llvm/Target/TargetOptions.h" 51 #include <utility> 52 using namespace llvm; 53 54 STATISTIC(NumTailCalls, "Number of tail calls"); 55 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 56 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); 57 58 // This option should go away when tail calls fully work. 59 static cl::opt<bool> 60 EnableARMTailCalls("arm-tail-calls", cl::Hidden, 61 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 62 cl::init(false)); 63 64 cl::opt<bool> 65 EnableARMLongCalls("arm-long-calls", cl::Hidden, 66 cl::desc("Generate calls via indirect call instructions"), 67 cl::init(false)); 68 69 static cl::opt<bool> 70 ARMInterworking("arm-interworking", cl::Hidden, 71 cl::desc("Enable / disable ARM interworking (for debugging only)"), 72 cl::init(true)); 73 74 namespace { 75 class ARMCCState : public CCState { 76 public: 77 ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, 78 const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs, 79 LLVMContext &C, ParmContext PC) 80 : CCState(CC, isVarArg, MF, TM, locs, C) { 81 assert(((PC == Call) || (PC == Prologue)) && 82 "ARMCCState users must specify whether their context is call" 83 "or prologue generation."); 84 CallOrPrologue = PC; 85 } 86 }; 87 } 88 89 // The APCS parameter registers. 90 static const uint16_t GPRArgRegs[] = { 91 ARM::R0, ARM::R1, ARM::R2, ARM::R3 92 }; 93 94 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, 95 MVT PromotedBitwiseVT) { 96 if (VT != PromotedLdStVT) { 97 setOperationAction(ISD::LOAD, VT, Promote); 98 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); 99 100 setOperationAction(ISD::STORE, VT, Promote); 101 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); 102 } 103 104 MVT ElemTy = VT.getVectorElementType(); 105 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 106 setOperationAction(ISD::SETCC, VT, Custom); 107 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 108 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 109 if (ElemTy == MVT::i32) { 110 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 111 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 112 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 113 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 114 } else { 115 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 116 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 117 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 118 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 119 } 120 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 121 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 122 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); 123 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 124 setOperationAction(ISD::SELECT, VT, Expand); 125 setOperationAction(ISD::SELECT_CC, VT, Expand); 126 setOperationAction(ISD::VSELECT, VT, Expand); 127 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 128 if (VT.isInteger()) { 129 setOperationAction(ISD::SHL, VT, Custom); 130 setOperationAction(ISD::SRA, VT, Custom); 131 setOperationAction(ISD::SRL, VT, Custom); 132 } 133 134 // Promote all bit-wise operations. 135 if (VT.isInteger() && VT != PromotedBitwiseVT) { 136 setOperationAction(ISD::AND, VT, Promote); 137 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); 138 setOperationAction(ISD::OR, VT, Promote); 139 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); 140 setOperationAction(ISD::XOR, VT, Promote); 141 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); 142 } 143 144 // Neon does not support vector divide/remainder operations. 145 setOperationAction(ISD::SDIV, VT, Expand); 146 setOperationAction(ISD::UDIV, VT, Expand); 147 setOperationAction(ISD::FDIV, VT, Expand); 148 setOperationAction(ISD::SREM, VT, Expand); 149 setOperationAction(ISD::UREM, VT, Expand); 150 setOperationAction(ISD::FREM, VT, Expand); 151 } 152 153 void ARMTargetLowering::addDRTypeForNEON(MVT VT) { 154 addRegisterClass(VT, &ARM::DPRRegClass); 155 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 156 } 157 158 void ARMTargetLowering::addQRTypeForNEON(MVT VT) { 159 addRegisterClass(VT, &ARM::QPRRegClass); 160 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 161 } 162 163 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 164 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 165 return new TargetLoweringObjectFileMachO(); 166 167 return new ARMElfTargetObjectFile(); 168 } 169 170 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 171 : TargetLowering(TM, createTLOF(TM)) { 172 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 173 RegInfo = TM.getRegisterInfo(); 174 Itins = TM.getInstrItineraryData(); 175 176 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 177 178 if (Subtarget->isTargetIOS()) { 179 // Uses VFP for Thumb libfuncs if available. 180 if (Subtarget->isThumb() && Subtarget->hasVFP2() && 181 Subtarget->hasARMOps()) { 182 // Single-precision floating-point arithmetic. 183 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 184 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 185 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 186 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 187 188 // Double-precision floating-point arithmetic. 189 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 190 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 191 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 192 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 193 194 // Single-precision comparisons. 195 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 196 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 197 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 198 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 199 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 200 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 201 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 202 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 203 204 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 205 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 206 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 207 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 208 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 209 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 210 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 211 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 212 213 // Double-precision comparisons. 214 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 215 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 216 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 217 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 218 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 219 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 220 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 221 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 222 223 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 224 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 225 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 226 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 227 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 228 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 229 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 230 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 231 232 // Floating-point to integer conversions. 233 // i64 conversions are done via library routines even when generating VFP 234 // instructions, so use the same ones. 235 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 236 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 237 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 238 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 239 240 // Conversions between floating types. 241 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 242 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 243 244 // Integer to floating-point conversions. 245 // i64 conversions are done via library routines even when generating VFP 246 // instructions, so use the same ones. 247 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 248 // e.g., __floatunsidf vs. __floatunssidfvfp. 249 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 250 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 251 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 252 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 253 } 254 } 255 256 // These libcalls are not available in 32-bit. 257 setLibcallName(RTLIB::SHL_I128, 0); 258 setLibcallName(RTLIB::SRL_I128, 0); 259 setLibcallName(RTLIB::SRA_I128, 0); 260 261 if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { 262 // Double-precision floating-point arithmetic helper functions 263 // RTABI chapter 4.1.2, Table 2 264 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 265 setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 266 setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 267 setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 268 setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 269 setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 270 setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 271 setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 272 273 // Double-precision floating-point comparison helper functions 274 // RTABI chapter 4.1.2, Table 3 275 setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 276 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 277 setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 278 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 279 setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 280 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 281 setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 282 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 283 setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 284 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 285 setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 286 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 287 setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 288 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 289 setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 290 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 291 setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 292 setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 293 setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 294 setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 295 setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 296 setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 297 setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 298 setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 299 300 // Single-precision floating-point arithmetic helper functions 301 // RTABI chapter 4.1.2, Table 4 302 setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 303 setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 304 setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 305 setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 306 setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 307 setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 308 setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 309 setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 310 311 // Single-precision floating-point comparison helper functions 312 // RTABI chapter 4.1.2, Table 5 313 setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 314 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 315 setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 316 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 317 setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 318 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 319 setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 320 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 321 setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 322 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 323 setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 324 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 325 setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 326 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 327 setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 328 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 329 setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 330 setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 331 setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 332 setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 333 setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 334 setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 335 setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 336 setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 337 338 // Floating-point to integer conversions. 339 // RTABI chapter 4.1.2, Table 6 340 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 341 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 342 setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 343 setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 344 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 345 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 346 setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 347 setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 348 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 349 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 350 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 351 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 352 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 353 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 354 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 355 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 356 357 // Conversions between floating types. 358 // RTABI chapter 4.1.2, Table 7 359 setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 360 setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 361 setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 362 setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 363 364 // Integer to floating-point conversions. 365 // RTABI chapter 4.1.2, Table 8 366 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 367 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 368 setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 369 setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 370 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 371 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 372 setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 373 setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 374 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 375 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 376 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 377 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 378 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 379 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 380 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 381 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 382 383 // Long long helper functions 384 // RTABI chapter 4.2, Table 9 385 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 386 setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 387 setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 388 setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 389 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 390 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 391 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 392 setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 393 setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 394 setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 395 396 // Integer division functions 397 // RTABI chapter 4.3.1 398 setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 399 setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 400 setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 401 setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 402 setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 403 setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 404 setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 405 setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 406 setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 407 setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 408 setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 409 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 410 setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 411 setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 412 setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 413 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 414 415 // Memory operations 416 // RTABI chapter 4.3.4 417 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); 418 setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); 419 setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); 420 setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); 421 setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); 422 setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); 423 } 424 425 // Use divmod compiler-rt calls for iOS 5.0 and later. 426 if (Subtarget->getTargetTriple().isiOS() && 427 !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { 428 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 429 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 430 } 431 432 if (Subtarget->isThumb1Only()) 433 addRegisterClass(MVT::i32, &ARM::tGPRRegClass); 434 else 435 addRegisterClass(MVT::i32, &ARM::GPRRegClass); 436 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 437 !Subtarget->isThumb1Only()) { 438 addRegisterClass(MVT::f32, &ARM::SPRRegClass); 439 if (!Subtarget->isFPOnlySP()) 440 addRegisterClass(MVT::f64, &ARM::DPRRegClass); 441 442 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 443 } 444 445 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 446 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 447 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 448 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 449 setTruncStoreAction((MVT::SimpleValueType)VT, 450 (MVT::SimpleValueType)InnerVT, Expand); 451 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); 452 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); 453 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); 454 } 455 456 setOperationAction(ISD::ConstantFP, MVT::f32, Custom); 457 setOperationAction(ISD::ConstantFP, MVT::f64, Custom); 458 459 if (Subtarget->hasNEON()) { 460 addDRTypeForNEON(MVT::v2f32); 461 addDRTypeForNEON(MVT::v8i8); 462 addDRTypeForNEON(MVT::v4i16); 463 addDRTypeForNEON(MVT::v2i32); 464 addDRTypeForNEON(MVT::v1i64); 465 466 addQRTypeForNEON(MVT::v4f32); 467 addQRTypeForNEON(MVT::v2f64); 468 addQRTypeForNEON(MVT::v16i8); 469 addQRTypeForNEON(MVT::v8i16); 470 addQRTypeForNEON(MVT::v4i32); 471 addQRTypeForNEON(MVT::v2i64); 472 473 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 474 // neither Neon nor VFP support any arithmetic operations on it. 475 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively 476 // supported for v4f32. 477 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 478 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 479 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 480 // FIXME: Code duplication: FDIV and FREM are expanded always, see 481 // ARMTargetLowering::addTypeForNEON method for details. 482 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 483 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 484 // FIXME: Create unittest. 485 // In another words, find a way when "copysign" appears in DAG with vector 486 // operands. 487 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 488 // FIXME: Code duplication: SETCC has custom operation action, see 489 // ARMTargetLowering::addTypeForNEON method for details. 490 setOperationAction(ISD::SETCC, MVT::v2f64, Expand); 491 // FIXME: Create unittest for FNEG and for FABS. 492 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 493 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 494 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 495 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 496 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 497 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 498 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 499 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 500 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 501 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 502 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 503 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 504 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. 505 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 506 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 507 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 508 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 509 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 510 setOperationAction(ISD::FMA, MVT::v2f64, Expand); 511 512 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 513 setOperationAction(ISD::FSIN, MVT::v4f32, Expand); 514 setOperationAction(ISD::FCOS, MVT::v4f32, Expand); 515 setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); 516 setOperationAction(ISD::FPOW, MVT::v4f32, Expand); 517 setOperationAction(ISD::FLOG, MVT::v4f32, Expand); 518 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); 519 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); 520 setOperationAction(ISD::FEXP, MVT::v4f32, Expand); 521 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); 522 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); 523 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); 524 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 525 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 526 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); 527 528 // Mark v2f32 intrinsics. 529 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); 530 setOperationAction(ISD::FSIN, MVT::v2f32, Expand); 531 setOperationAction(ISD::FCOS, MVT::v2f32, Expand); 532 setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); 533 setOperationAction(ISD::FPOW, MVT::v2f32, Expand); 534 setOperationAction(ISD::FLOG, MVT::v2f32, Expand); 535 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); 536 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); 537 setOperationAction(ISD::FEXP, MVT::v2f32, Expand); 538 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); 539 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); 540 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); 541 setOperationAction(ISD::FRINT, MVT::v2f32, Expand); 542 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); 543 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); 544 545 // Neon does not support some operations on v1i64 and v2i64 types. 546 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 547 // Custom handling for some quad-vector types to detect VMULL. 548 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 549 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 550 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 551 // Custom handling for some vector types to avoid expensive expansions 552 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 553 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 554 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 555 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 556 setOperationAction(ISD::SETCC, MVT::v1i64, Expand); 557 setOperationAction(ISD::SETCC, MVT::v2i64, Expand); 558 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 559 // a destination type that is wider than the source, and nor does 560 // it have a FP_TO_[SU]INT instruction with a narrower destination than 561 // source. 562 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 563 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 564 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); 565 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); 566 567 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); 568 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 569 570 // NEON does not have single instruction CTPOP for vectors with element 571 // types wider than 8-bits. However, custom lowering can leverage the 572 // v8i8/v16i8 vcnt instruction. 573 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); 574 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); 575 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); 576 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); 577 578 // NEON only has FMA instructions as of VFP4. 579 if (!Subtarget->hasVFP4()) { 580 setOperationAction(ISD::FMA, MVT::v2f32, Expand); 581 setOperationAction(ISD::FMA, MVT::v4f32, Expand); 582 } 583 584 setTargetDAGCombine(ISD::INTRINSIC_VOID); 585 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 586 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 587 setTargetDAGCombine(ISD::SHL); 588 setTargetDAGCombine(ISD::SRL); 589 setTargetDAGCombine(ISD::SRA); 590 setTargetDAGCombine(ISD::SIGN_EXTEND); 591 setTargetDAGCombine(ISD::ZERO_EXTEND); 592 setTargetDAGCombine(ISD::ANY_EXTEND); 593 setTargetDAGCombine(ISD::SELECT_CC); 594 setTargetDAGCombine(ISD::BUILD_VECTOR); 595 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 596 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 597 setTargetDAGCombine(ISD::STORE); 598 setTargetDAGCombine(ISD::FP_TO_SINT); 599 setTargetDAGCombine(ISD::FP_TO_UINT); 600 setTargetDAGCombine(ISD::FDIV); 601 602 // It is legal to extload from v4i8 to v4i16 or v4i32. 603 MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, 604 MVT::v4i16, MVT::v2i16, 605 MVT::v2i32}; 606 for (unsigned i = 0; i < 6; ++i) { 607 setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); 608 setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); 609 setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); 610 } 611 } 612 613 // ARM and Thumb2 support UMLAL/SMLAL. 614 if (!Subtarget->isThumb1Only()) 615 setTargetDAGCombine(ISD::ADDC); 616 617 618 computeRegisterProperties(); 619 620 // ARM does not have f32 extending load. 621 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 622 623 // ARM does not have i1 sign extending load. 624 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 625 626 // ARM supports all 4 flavors of integer indexed load / store. 627 if (!Subtarget->isThumb1Only()) { 628 for (unsigned im = (unsigned)ISD::PRE_INC; 629 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 630 setIndexedLoadAction(im, MVT::i1, Legal); 631 setIndexedLoadAction(im, MVT::i8, Legal); 632 setIndexedLoadAction(im, MVT::i16, Legal); 633 setIndexedLoadAction(im, MVT::i32, Legal); 634 setIndexedStoreAction(im, MVT::i1, Legal); 635 setIndexedStoreAction(im, MVT::i8, Legal); 636 setIndexedStoreAction(im, MVT::i16, Legal); 637 setIndexedStoreAction(im, MVT::i32, Legal); 638 } 639 } 640 641 // i64 operation support. 642 setOperationAction(ISD::MUL, MVT::i64, Expand); 643 setOperationAction(ISD::MULHU, MVT::i32, Expand); 644 if (Subtarget->isThumb1Only()) { 645 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 646 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 647 } 648 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() 649 || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) 650 setOperationAction(ISD::MULHS, MVT::i32, Expand); 651 652 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 653 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 654 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 655 setOperationAction(ISD::SRL, MVT::i64, Custom); 656 setOperationAction(ISD::SRA, MVT::i64, Custom); 657 658 if (!Subtarget->isThumb1Only()) { 659 // FIXME: We should do this for Thumb1 as well. 660 setOperationAction(ISD::ADDC, MVT::i32, Custom); 661 setOperationAction(ISD::ADDE, MVT::i32, Custom); 662 setOperationAction(ISD::SUBC, MVT::i32, Custom); 663 setOperationAction(ISD::SUBE, MVT::i32, Custom); 664 } 665 666 // ARM does not have ROTL. 667 setOperationAction(ISD::ROTL, MVT::i32, Expand); 668 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 669 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 670 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 671 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 672 673 // These just redirect to CTTZ and CTLZ on ARM. 674 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); 675 setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); 676 677 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); 678 679 // Only ARMv6 has BSWAP. 680 if (!Subtarget->hasV6Ops()) 681 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 682 683 if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && 684 !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { 685 // These are expanded into libcalls if the cpu doesn't have HW divider. 686 setOperationAction(ISD::SDIV, MVT::i32, Expand); 687 setOperationAction(ISD::UDIV, MVT::i32, Expand); 688 } 689 690 // FIXME: Also set divmod for SREM on EABI 691 setOperationAction(ISD::SREM, MVT::i32, Expand); 692 setOperationAction(ISD::UREM, MVT::i32, Expand); 693 // Register based DivRem for AEABI (RTABI 4.2) 694 if (Subtarget->isTargetAEABI()) { 695 setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); 696 setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); 697 setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); 698 setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod"); 699 setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod"); 700 setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod"); 701 setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod"); 702 setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod"); 703 704 setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS); 705 setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS); 706 setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS); 707 setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS); 708 setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS); 709 setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS); 710 setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS); 711 setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS); 712 713 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 714 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 715 } else { 716 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 717 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 718 } 719 720 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 721 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 722 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 723 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 724 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 725 726 setOperationAction(ISD::TRAP, MVT::Other, Legal); 727 728 // Use the default implementation. 729 setOperationAction(ISD::VASTART, MVT::Other, Custom); 730 setOperationAction(ISD::VAARG, MVT::Other, Expand); 731 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 732 setOperationAction(ISD::VAEND, MVT::Other, Expand); 733 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 734 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 735 736 if (!Subtarget->isTargetDarwin()) { 737 // Non-Darwin platforms may return values in these registers via the 738 // personality function. 739 setExceptionPointerRegister(ARM::R0); 740 setExceptionSelectorRegister(ARM::R1); 741 } 742 743 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 744 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 745 // the default expansion. 746 if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { 747 // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and 748 // handled normally. 749 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 750 // Custom lowering for 64-bit ops 751 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); 752 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 753 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); 754 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); 755 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); 756 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); 757 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); 758 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); 759 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); 760 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); 761 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); 762 // On v8, we have particularly efficient implementations of atomic fences 763 // if they can be combined with nearby atomic loads and stores. 764 if (!Subtarget->hasV8Ops()) { 765 // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. 766 setInsertFencesForAtomic(true); 767 } 768 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); 769 } else { 770 // If there's anything we can use as a barrier, go through custom lowering 771 // for ATOMIC_FENCE. 772 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, 773 Subtarget->hasAnyDataBarrier() ? Custom : Expand); 774 775 // Set them all for expansion, which will force libcalls. 776 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 777 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 778 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 779 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 780 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 781 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 782 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 783 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 784 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 785 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 786 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 787 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 788 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the 789 // Unordered/Monotonic case. 790 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); 791 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); 792 } 793 794 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 795 796 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 797 if (!Subtarget->hasV6Ops()) { 798 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 799 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 800 } 801 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 802 803 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 804 !Subtarget->isThumb1Only()) { 805 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 806 // iff target supports vfp2. 807 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 808 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 809 } 810 811 // We want to custom lower some of our intrinsics. 812 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 813 if (Subtarget->isTargetDarwin()) { 814 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 815 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 816 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 817 } 818 819 setOperationAction(ISD::SETCC, MVT::i32, Expand); 820 setOperationAction(ISD::SETCC, MVT::f32, Expand); 821 setOperationAction(ISD::SETCC, MVT::f64, Expand); 822 setOperationAction(ISD::SELECT, MVT::i32, Custom); 823 setOperationAction(ISD::SELECT, MVT::f32, Custom); 824 setOperationAction(ISD::SELECT, MVT::f64, Custom); 825 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 826 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 827 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 828 829 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 830 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 831 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 832 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 833 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 834 835 // We don't support sin/cos/fmod/copysign/pow 836 setOperationAction(ISD::FSIN, MVT::f64, Expand); 837 setOperationAction(ISD::FSIN, MVT::f32, Expand); 838 setOperationAction(ISD::FCOS, MVT::f32, Expand); 839 setOperationAction(ISD::FCOS, MVT::f64, Expand); 840 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 841 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 842 setOperationAction(ISD::FREM, MVT::f64, Expand); 843 setOperationAction(ISD::FREM, MVT::f32, Expand); 844 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 845 !Subtarget->isThumb1Only()) { 846 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 847 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 848 } 849 setOperationAction(ISD::FPOW, MVT::f64, Expand); 850 setOperationAction(ISD::FPOW, MVT::f32, Expand); 851 852 if (!Subtarget->hasVFP4()) { 853 setOperationAction(ISD::FMA, MVT::f64, Expand); 854 setOperationAction(ISD::FMA, MVT::f32, Expand); 855 } 856 857 // Various VFP goodness 858 if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { 859 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 860 if (Subtarget->hasVFP2()) { 861 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 862 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 863 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 864 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 865 } 866 // Special handling for half-precision FP. 867 if (!Subtarget->hasFP16()) { 868 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 869 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 870 } 871 } 872 873 // Combine sin / cos into one node or libcall if possible. 874 if (Subtarget->hasSinCos()) { 875 setLibcallName(RTLIB::SINCOS_F32, "sincosf"); 876 setLibcallName(RTLIB::SINCOS_F64, "sincos"); 877 if (Subtarget->getTargetTriple().getOS() == Triple::IOS) { 878 // For iOS, we don't want to the normal expansion of a libcall to 879 // sincos. We want to issue a libcall to __sincos_stret. 880 setOperationAction(ISD::FSINCOS, MVT::f64, Custom); 881 setOperationAction(ISD::FSINCOS, MVT::f32, Custom); 882 } 883 } 884 885 // We have target-specific dag combine patterns for the following nodes: 886 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 887 setTargetDAGCombine(ISD::ADD); 888 setTargetDAGCombine(ISD::SUB); 889 setTargetDAGCombine(ISD::MUL); 890 setTargetDAGCombine(ISD::AND); 891 setTargetDAGCombine(ISD::OR); 892 setTargetDAGCombine(ISD::XOR); 893 894 if (Subtarget->hasV6Ops()) 895 setTargetDAGCombine(ISD::SRL); 896 897 setStackPointerRegisterToSaveRestore(ARM::SP); 898 899 if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || 900 !Subtarget->hasVFP2()) 901 setSchedulingPreference(Sched::RegPressure); 902 else 903 setSchedulingPreference(Sched::Hybrid); 904 905 //// temporary - rewrite interface to use type 906 MaxStoresPerMemset = 8; 907 MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; 908 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores 909 MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 910 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores 911 MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 912 913 // On ARM arguments smaller than 4 bytes are extended, so all arguments 914 // are at least 4 bytes aligned. 915 setMinStackArgumentAlignment(4); 916 917 // Prefer likely predicted branches to selects on out-of-order cores. 918 PredictableSelectIsExpensive = Subtarget->isLikeA9(); 919 920 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); 921 } 922 923 static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, 924 bool isThumb2, unsigned &LdrOpc, 925 unsigned &StrOpc) { 926 static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB}, 927 {ARM::LDREXH, ARM::t2LDREXH}, 928 {ARM::LDREX, ARM::t2LDREX}, 929 {ARM::LDREXD, ARM::t2LDREXD}}; 930 static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB}, 931 {ARM::LDAEXH, ARM::t2LDAEXH}, 932 {ARM::LDAEX, ARM::t2LDAEX}, 933 {ARM::LDAEXD, ARM::t2LDAEXD}}; 934 static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB}, 935 {ARM::STREXH, ARM::t2STREXH}, 936 {ARM::STREX, ARM::t2STREX}, 937 {ARM::STREXD, ARM::t2STREXD}}; 938 static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB}, 939 {ARM::STLEXH, ARM::t2STLEXH}, 940 {ARM::STLEX, ARM::t2STLEX}, 941 {ARM::STLEXD, ARM::t2STLEXD}}; 942 943 const unsigned (*LoadOps)[2], (*StoreOps)[2]; 944 if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) 945 LoadOps = LoadAcqs; 946 else 947 LoadOps = LoadBares; 948 949 if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) 950 StoreOps = StoreRels; 951 else 952 StoreOps = StoreBares; 953 954 assert(isPowerOf2_32(Size) && Size <= 8 && 955 "unsupported size for atomic binary op!"); 956 957 LdrOpc = LoadOps[Log2_32(Size)][isThumb2]; 958 StrOpc = StoreOps[Log2_32(Size)][isThumb2]; 959 } 960 961 // FIXME: It might make sense to define the representative register class as the 962 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is 963 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 964 // SPR's representative would be DPR_VFP2. This should work well if register 965 // pressure tracking were modified such that a register use would increment the 966 // pressure of the register class's representative and all of it's super 967 // classes' representatives transitively. We have not implemented this because 968 // of the difficulty prior to coalescing of modeling operand register classes 969 // due to the common occurrence of cross class copies and subregister insertions 970 // and extractions. 971 std::pair<const TargetRegisterClass*, uint8_t> 972 ARMTargetLowering::findRepresentativeClass(MVT VT) const{ 973 const TargetRegisterClass *RRC = 0; 974 uint8_t Cost = 1; 975 switch (VT.SimpleTy) { 976 default: 977 return TargetLowering::findRepresentativeClass(VT); 978 // Use DPR as representative register class for all floating point 979 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 980 // the cost is 1 for both f32 and f64. 981 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 982 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 983 RRC = &ARM::DPRRegClass; 984 // When NEON is used for SP, only half of the register file is available 985 // because operations that define both SP and DP results will be constrained 986 // to the VFP2 class (D0-D15). We currently model this constraint prior to 987 // coalescing by double-counting the SP regs. See the FIXME above. 988 if (Subtarget->useNEONForSinglePrecisionFP()) 989 Cost = 2; 990 break; 991 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 992 case MVT::v4f32: case MVT::v2f64: 993 RRC = &ARM::DPRRegClass; 994 Cost = 2; 995 break; 996 case MVT::v4i64: 997 RRC = &ARM::DPRRegClass; 998 Cost = 4; 999 break; 1000 case MVT::v8i64: 1001 RRC = &ARM::DPRRegClass; 1002 Cost = 8; 1003 break; 1004 } 1005 return std::make_pair(RRC, Cost); 1006 } 1007 1008 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 1009 switch (Opcode) { 1010 default: return 0; 1011 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 1012 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 1013 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 1014 case ARMISD::CALL: return "ARMISD::CALL"; 1015 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 1016 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 1017 case ARMISD::tCALL: return "ARMISD::tCALL"; 1018 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 1019 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 1020 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 1021 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 1022 case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG"; 1023 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 1024 case ARMISD::CMP: return "ARMISD::CMP"; 1025 case ARMISD::CMN: return "ARMISD::CMN"; 1026 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 1027 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 1028 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 1029 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 1030 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 1031 1032 case ARMISD::CMOV: return "ARMISD::CMOV"; 1033 1034 case ARMISD::RBIT: return "ARMISD::RBIT"; 1035 1036 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 1037 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 1038 case ARMISD::SITOF: return "ARMISD::SITOF"; 1039 case ARMISD::UITOF: return "ARMISD::UITOF"; 1040 1041 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 1042 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 1043 case ARMISD::RRX: return "ARMISD::RRX"; 1044 1045 case ARMISD::ADDC: return "ARMISD::ADDC"; 1046 case ARMISD::ADDE: return "ARMISD::ADDE"; 1047 case ARMISD::SUBC: return "ARMISD::SUBC"; 1048 case ARMISD::SUBE: return "ARMISD::SUBE"; 1049 1050 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 1051 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 1052 1053 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 1054 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 1055 1056 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 1057 1058 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 1059 1060 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 1061 1062 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 1063 1064 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 1065 1066 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 1067 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 1068 case ARMISD::VCGE: return "ARMISD::VCGE"; 1069 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 1070 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 1071 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 1072 case ARMISD::VCGT: return "ARMISD::VCGT"; 1073 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 1074 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 1075 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 1076 case ARMISD::VTST: return "ARMISD::VTST"; 1077 1078 case ARMISD::VSHL: return "ARMISD::VSHL"; 1079 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 1080 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 1081 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 1082 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 1083 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 1084 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 1085 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 1086 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 1087 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 1088 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 1089 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 1090 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 1091 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 1092 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 1093 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 1094 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 1095 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 1096 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 1097 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 1098 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 1099 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 1100 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 1101 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; 1102 case ARMISD::VDUP: return "ARMISD::VDUP"; 1103 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 1104 case ARMISD::VEXT: return "ARMISD::VEXT"; 1105 case ARMISD::VREV64: return "ARMISD::VREV64"; 1106 case ARMISD::VREV32: return "ARMISD::VREV32"; 1107 case ARMISD::VREV16: return "ARMISD::VREV16"; 1108 case ARMISD::VZIP: return "ARMISD::VZIP"; 1109 case ARMISD::VUZP: return "ARMISD::VUZP"; 1110 case ARMISD::VTRN: return "ARMISD::VTRN"; 1111 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 1112 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 1113 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 1114 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 1115 case ARMISD::UMLAL: return "ARMISD::UMLAL"; 1116 case ARMISD::SMLAL: return "ARMISD::SMLAL"; 1117 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 1118 case ARMISD::FMAX: return "ARMISD::FMAX"; 1119 case ARMISD::FMIN: return "ARMISD::FMIN"; 1120 case ARMISD::VMAXNM: return "ARMISD::VMAX"; 1121 case ARMISD::VMINNM: return "ARMISD::VMIN"; 1122 case ARMISD::BFI: return "ARMISD::BFI"; 1123 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 1124 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 1125 case ARMISD::VBSL: return "ARMISD::VBSL"; 1126 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 1127 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 1128 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 1129 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 1130 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 1131 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 1132 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 1133 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 1134 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 1135 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 1136 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 1137 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 1138 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 1139 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 1140 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 1141 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 1142 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 1143 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 1144 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 1145 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 1146 } 1147 } 1148 1149 EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 1150 if (!VT.isVector()) return getPointerTy(); 1151 return VT.changeVectorElementTypeToInteger(); 1152 } 1153 1154 /// getRegClassFor - Return the register class that should be used for the 1155 /// specified value type. 1156 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { 1157 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 1158 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 1159 // load / store 4 to 8 consecutive D registers. 1160 if (Subtarget->hasNEON()) { 1161 if (VT == MVT::v4i64) 1162 return &ARM::QQPRRegClass; 1163 if (VT == MVT::v8i64) 1164 return &ARM::QQQQPRRegClass; 1165 } 1166 return TargetLowering::getRegClassFor(VT); 1167 } 1168 1169 // Create a fast isel object. 1170 FastISel * 1171 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, 1172 const TargetLibraryInfo *libInfo) const { 1173 return ARM::createFastISel(funcInfo, libInfo); 1174 } 1175 1176 /// getMaximalGlobalOffset - Returns the maximal possible offset which can 1177 /// be used for loads / stores from the global. 1178 unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 1179 return (Subtarget->isThumb1Only() ? 127 : 4095); 1180 } 1181 1182 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 1183 unsigned NumVals = N->getNumValues(); 1184 if (!NumVals) 1185 return Sched::RegPressure; 1186 1187 for (unsigned i = 0; i != NumVals; ++i) { 1188 EVT VT = N->getValueType(i); 1189 if (VT == MVT::Glue || VT == MVT::Other) 1190 continue; 1191 if (VT.isFloatingPoint() || VT.isVector()) 1192 return Sched::ILP; 1193 } 1194 1195 if (!N->isMachineOpcode()) 1196 return Sched::RegPressure; 1197 1198 // Load are scheduled for latency even if there instruction itinerary 1199 // is not available. 1200 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 1201 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1202 1203 if (MCID.getNumDefs() == 0) 1204 return Sched::RegPressure; 1205 if (!Itins->isEmpty() && 1206 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) 1207 return Sched::ILP; 1208 1209 return Sched::RegPressure; 1210 } 1211 1212 //===----------------------------------------------------------------------===// 1213 // Lowering Code 1214 //===----------------------------------------------------------------------===// 1215 1216 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 1217 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 1218 switch (CC) { 1219 default: llvm_unreachable("Unknown condition code!"); 1220 case ISD::SETNE: return ARMCC::NE; 1221 case ISD::SETEQ: return ARMCC::EQ; 1222 case ISD::SETGT: return ARMCC::GT; 1223 case ISD::SETGE: return ARMCC::GE; 1224 case ISD::SETLT: return ARMCC::LT; 1225 case ISD::SETLE: return ARMCC::LE; 1226 case ISD::SETUGT: return ARMCC::HI; 1227 case ISD::SETUGE: return ARMCC::HS; 1228 case ISD::SETULT: return ARMCC::LO; 1229 case ISD::SETULE: return ARMCC::LS; 1230 } 1231 } 1232 1233 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 1234 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 1235 ARMCC::CondCodes &CondCode2) { 1236 CondCode2 = ARMCC::AL; 1237 switch (CC) { 1238 default: llvm_unreachable("Unknown FP condition!"); 1239 case ISD::SETEQ: 1240 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 1241 case ISD::SETGT: 1242 case ISD::SETOGT: CondCode = ARMCC::GT; break; 1243 case ISD::SETGE: 1244 case ISD::SETOGE: CondCode = ARMCC::GE; break; 1245 case ISD::SETOLT: CondCode = ARMCC::MI; break; 1246 case ISD::SETOLE: CondCode = ARMCC::LS; break; 1247 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 1248 case ISD::SETO: CondCode = ARMCC::VC; break; 1249 case ISD::SETUO: CondCode = ARMCC::VS; break; 1250 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 1251 case ISD::SETUGT: CondCode = ARMCC::HI; break; 1252 case ISD::SETUGE: CondCode = ARMCC::PL; break; 1253 case ISD::SETLT: 1254 case ISD::SETULT: CondCode = ARMCC::LT; break; 1255 case ISD::SETLE: 1256 case ISD::SETULE: CondCode = ARMCC::LE; break; 1257 case ISD::SETNE: 1258 case ISD::SETUNE: CondCode = ARMCC::NE; break; 1259 } 1260 } 1261 1262 //===----------------------------------------------------------------------===// 1263 // Calling Convention Implementation 1264 //===----------------------------------------------------------------------===// 1265 1266 #include "ARMGenCallingConv.inc" 1267 1268 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1269 /// given CallingConvention value. 1270 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 1271 bool Return, 1272 bool isVarArg) const { 1273 switch (CC) { 1274 default: 1275 llvm_unreachable("Unsupported calling convention"); 1276 case CallingConv::Fast: 1277 if (Subtarget->hasVFP2() && !isVarArg) { 1278 if (!Subtarget->isAAPCS_ABI()) 1279 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1280 // For AAPCS ABI targets, just use VFP variant of the calling convention. 1281 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1282 } 1283 // Fallthrough 1284 case CallingConv::C: { 1285 // Use target triple & subtarget features to do actual dispatch. 1286 if (!Subtarget->isAAPCS_ABI()) 1287 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1288 else if (Subtarget->hasVFP2() && 1289 getTargetMachine().Options.FloatABIType == FloatABI::Hard && 1290 !isVarArg) 1291 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1292 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1293 } 1294 case CallingConv::ARM_AAPCS_VFP: 1295 if (!isVarArg) 1296 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1297 // Fallthrough 1298 case CallingConv::ARM_AAPCS: 1299 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1300 case CallingConv::ARM_APCS: 1301 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1302 case CallingConv::GHC: 1303 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); 1304 } 1305 } 1306 1307 /// LowerCallResult - Lower the result values of a call into the 1308 /// appropriate copies out of appropriate physical registers. 1309 SDValue 1310 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1311 CallingConv::ID CallConv, bool isVarArg, 1312 const SmallVectorImpl<ISD::InputArg> &Ins, 1313 SDLoc dl, SelectionDAG &DAG, 1314 SmallVectorImpl<SDValue> &InVals, 1315 bool isThisReturn, SDValue ThisVal) const { 1316 1317 // Assign locations to each value returned by this call. 1318 SmallVector<CCValAssign, 16> RVLocs; 1319 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1320 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1321 CCInfo.AnalyzeCallResult(Ins, 1322 CCAssignFnForNode(CallConv, /* Return*/ true, 1323 isVarArg)); 1324 1325 // Copy all of the result registers out of their specified physreg. 1326 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1327 CCValAssign VA = RVLocs[i]; 1328 1329 // Pass 'this' value directly from the argument to return value, to avoid 1330 // reg unit interference 1331 if (i == 0 && isThisReturn) { 1332 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && 1333 "unexpected return calling convention register assignment"); 1334 InVals.push_back(ThisVal); 1335 continue; 1336 } 1337 1338 SDValue Val; 1339 if (VA.needsCustom()) { 1340 // Handle f64 or half of a v2f64. 1341 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1342 InFlag); 1343 Chain = Lo.getValue(1); 1344 InFlag = Lo.getValue(2); 1345 VA = RVLocs[++i]; // skip ahead to next loc 1346 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1347 InFlag); 1348 Chain = Hi.getValue(1); 1349 InFlag = Hi.getValue(2); 1350 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1351 1352 if (VA.getLocVT() == MVT::v2f64) { 1353 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1354 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1355 DAG.getConstant(0, MVT::i32)); 1356 1357 VA = RVLocs[++i]; // skip ahead to next loc 1358 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1359 Chain = Lo.getValue(1); 1360 InFlag = Lo.getValue(2); 1361 VA = RVLocs[++i]; // skip ahead to next loc 1362 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1363 Chain = Hi.getValue(1); 1364 InFlag = Hi.getValue(2); 1365 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1366 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1367 DAG.getConstant(1, MVT::i32)); 1368 } 1369 } else { 1370 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1371 InFlag); 1372 Chain = Val.getValue(1); 1373 InFlag = Val.getValue(2); 1374 } 1375 1376 switch (VA.getLocInfo()) { 1377 default: llvm_unreachable("Unknown loc info!"); 1378 case CCValAssign::Full: break; 1379 case CCValAssign::BCvt: 1380 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 1381 break; 1382 } 1383 1384 InVals.push_back(Val); 1385 } 1386 1387 return Chain; 1388 } 1389 1390 /// LowerMemOpCallTo - Store the argument to the stack. 1391 SDValue 1392 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1393 SDValue StackPtr, SDValue Arg, 1394 SDLoc dl, SelectionDAG &DAG, 1395 const CCValAssign &VA, 1396 ISD::ArgFlagsTy Flags) const { 1397 unsigned LocMemOffset = VA.getLocMemOffset(); 1398 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1399 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1400 return DAG.getStore(Chain, dl, Arg, PtrOff, 1401 MachinePointerInfo::getStack(LocMemOffset), 1402 false, false, 0); 1403 } 1404 1405 void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, 1406 SDValue Chain, SDValue &Arg, 1407 RegsToPassVector &RegsToPass, 1408 CCValAssign &VA, CCValAssign &NextVA, 1409 SDValue &StackPtr, 1410 SmallVectorImpl<SDValue> &MemOpChains, 1411 ISD::ArgFlagsTy Flags) const { 1412 1413 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1414 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1415 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1416 1417 if (NextVA.isRegLoc()) 1418 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1419 else { 1420 assert(NextVA.isMemLoc()); 1421 if (StackPtr.getNode() == 0) 1422 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1423 1424 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1425 dl, DAG, NextVA, 1426 Flags)); 1427 } 1428 } 1429 1430 /// LowerCall - Lowering a call into a callseq_start <- 1431 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1432 /// nodes. 1433 SDValue 1434 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 1435 SmallVectorImpl<SDValue> &InVals) const { 1436 SelectionDAG &DAG = CLI.DAG; 1437 SDLoc &dl = CLI.DL; 1438 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1439 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1440 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1441 SDValue Chain = CLI.Chain; 1442 SDValue Callee = CLI.Callee; 1443 bool &isTailCall = CLI.IsTailCall; 1444 CallingConv::ID CallConv = CLI.CallConv; 1445 bool doesNotRet = CLI.DoesNotReturn; 1446 bool isVarArg = CLI.IsVarArg; 1447 1448 MachineFunction &MF = DAG.getMachineFunction(); 1449 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1450 bool isThisReturn = false; 1451 bool isSibCall = false; 1452 // Disable tail calls if they're not supported. 1453 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 1454 isTailCall = false; 1455 if (isTailCall) { 1456 // Check if it's really possible to do a tail call. 1457 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1458 isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), 1459 Outs, OutVals, Ins, DAG); 1460 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1461 // detected sibcalls. 1462 if (isTailCall) { 1463 ++NumTailCalls; 1464 isSibCall = true; 1465 } 1466 } 1467 1468 // Analyze operands of the call, assigning locations to each operand. 1469 SmallVector<CCValAssign, 16> ArgLocs; 1470 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1471 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1472 CCInfo.AnalyzeCallOperands(Outs, 1473 CCAssignFnForNode(CallConv, /* Return*/ false, 1474 isVarArg)); 1475 1476 // Get a count of how many bytes are to be pushed on the stack. 1477 unsigned NumBytes = CCInfo.getNextStackOffset(); 1478 1479 // For tail calls, memory operands are available in our caller's stack. 1480 if (isSibCall) 1481 NumBytes = 0; 1482 1483 // Adjust the stack pointer for the new arguments... 1484 // These operations are automatically eliminated by the prolog/epilog pass 1485 if (!isSibCall) 1486 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 1487 dl); 1488 1489 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1490 1491 RegsToPassVector RegsToPass; 1492 SmallVector<SDValue, 8> MemOpChains; 1493 1494 // Walk the register/memloc assignments, inserting copies/loads. In the case 1495 // of tail call optimization, arguments are handled later. 1496 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1497 i != e; 1498 ++i, ++realArgIdx) { 1499 CCValAssign &VA = ArgLocs[i]; 1500 SDValue Arg = OutVals[realArgIdx]; 1501 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1502 bool isByVal = Flags.isByVal(); 1503 1504 // Promote the value if needed. 1505 switch (VA.getLocInfo()) { 1506 default: llvm_unreachable("Unknown loc info!"); 1507 case CCValAssign::Full: break; 1508 case CCValAssign::SExt: 1509 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1510 break; 1511 case CCValAssign::ZExt: 1512 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1513 break; 1514 case CCValAssign::AExt: 1515 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1516 break; 1517 case CCValAssign::BCvt: 1518 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1519 break; 1520 } 1521 1522 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1523 if (VA.needsCustom()) { 1524 if (VA.getLocVT() == MVT::v2f64) { 1525 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1526 DAG.getConstant(0, MVT::i32)); 1527 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1528 DAG.getConstant(1, MVT::i32)); 1529 1530 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1531 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1532 1533 VA = ArgLocs[++i]; // skip ahead to next loc 1534 if (VA.isRegLoc()) { 1535 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1536 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1537 } else { 1538 assert(VA.isMemLoc()); 1539 1540 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1541 dl, DAG, VA, Flags)); 1542 } 1543 } else { 1544 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1545 StackPtr, MemOpChains, Flags); 1546 } 1547 } else if (VA.isRegLoc()) { 1548 if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) { 1549 assert(VA.getLocVT() == MVT::i32 && 1550 "unexpected calling convention register assignment"); 1551 assert(!Ins.empty() && Ins[0].VT == MVT::i32 && 1552 "unexpected use of 'returned'"); 1553 isThisReturn = true; 1554 } 1555 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1556 } else if (isByVal) { 1557 assert(VA.isMemLoc()); 1558 unsigned offset = 0; 1559 1560 // True if this byval aggregate will be split between registers 1561 // and memory. 1562 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); 1563 unsigned CurByValIdx = CCInfo.getInRegsParamsProceed(); 1564 1565 if (CurByValIdx < ByValArgsCount) { 1566 1567 unsigned RegBegin, RegEnd; 1568 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); 1569 1570 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1571 unsigned int i, j; 1572 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { 1573 SDValue Const = DAG.getConstant(4*i, MVT::i32); 1574 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 1575 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 1576 MachinePointerInfo(), 1577 false, false, false, 1578 DAG.InferPtrAlignment(AddArg)); 1579 MemOpChains.push_back(Load.getValue(1)); 1580 RegsToPass.push_back(std::make_pair(j, Load)); 1581 } 1582 1583 // If parameter size outsides register area, "offset" value 1584 // helps us to calculate stack slot for remained part properly. 1585 offset = RegEnd - RegBegin; 1586 1587 CCInfo.nextInRegsParam(); 1588 } 1589 1590 if (Flags.getByValSize() > 4*offset) { 1591 unsigned LocMemOffset = VA.getLocMemOffset(); 1592 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); 1593 SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, 1594 StkPtrOff); 1595 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); 1596 SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); 1597 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, 1598 MVT::i32); 1599 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); 1600 1601 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 1602 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; 1603 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, 1604 Ops, array_lengthof(Ops))); 1605 } 1606 } else if (!isSibCall) { 1607 assert(VA.isMemLoc()); 1608 1609 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1610 dl, DAG, VA, Flags)); 1611 } 1612 } 1613 1614 if (!MemOpChains.empty()) 1615 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1616 &MemOpChains[0], MemOpChains.size()); 1617 1618 // Build a sequence of copy-to-reg nodes chained together with token chain 1619 // and flag operands which copy the outgoing args into the appropriate regs. 1620 SDValue InFlag; 1621 // Tail call byval lowering might overwrite argument registers so in case of 1622 // tail call optimization the copies to registers are lowered later. 1623 if (!isTailCall) 1624 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1625 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1626 RegsToPass[i].second, InFlag); 1627 InFlag = Chain.getValue(1); 1628 } 1629 1630 // For tail calls lower the arguments to the 'real' stack slot. 1631 if (isTailCall) { 1632 // Force all the incoming stack arguments to be loaded from the stack 1633 // before any new outgoing arguments are stored to the stack, because the 1634 // outgoing stack slots may alias the incoming argument stack slots, and 1635 // the alias isn't otherwise explicit. This is slightly more conservative 1636 // than necessary, because it means that each store effectively depends 1637 // on every argument instead of just those arguments it would clobber. 1638 1639 // Do not flag preceding copytoreg stuff together with the following stuff. 1640 InFlag = SDValue(); 1641 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1642 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1643 RegsToPass[i].second, InFlag); 1644 InFlag = Chain.getValue(1); 1645 } 1646 InFlag = SDValue(); 1647 } 1648 1649 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1650 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1651 // node so that legalize doesn't hack it. 1652 bool isDirect = false; 1653 bool isARMFunc = false; 1654 bool isLocalARMFunc = false; 1655 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1656 1657 if (EnableARMLongCalls) { 1658 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1659 && "long-calls with non-static relocation model!"); 1660 // Handle a global address or an external symbol. If it's not one of 1661 // those, the target's already in a register, so we don't need to do 1662 // anything extra. 1663 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1664 const GlobalValue *GV = G->getGlobal(); 1665 // Create a constant pool entry for the callee address 1666 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1667 ARMConstantPoolValue *CPV = 1668 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); 1669 1670 // Get the address of the callee into a register 1671 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1672 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1673 Callee = DAG.getLoad(getPointerTy(), dl, 1674 DAG.getEntryNode(), CPAddr, 1675 MachinePointerInfo::getConstantPool(), 1676 false, false, false, 0); 1677 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1678 const char *Sym = S->getSymbol(); 1679 1680 // Create a constant pool entry for the callee address 1681 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1682 ARMConstantPoolValue *CPV = 1683 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1684 ARMPCLabelIndex, 0); 1685 // Get the address of the callee into a register 1686 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1687 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1688 Callee = DAG.getLoad(getPointerTy(), dl, 1689 DAG.getEntryNode(), CPAddr, 1690 MachinePointerInfo::getConstantPool(), 1691 false, false, false, 0); 1692 } 1693 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1694 const GlobalValue *GV = G->getGlobal(); 1695 isDirect = true; 1696 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1697 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1698 getTargetMachine().getRelocationModel() != Reloc::Static; 1699 isARMFunc = !Subtarget->isThumb() || isStub; 1700 // ARM call to a local ARM function is predicable. 1701 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1702 // tBX takes a register source operand. 1703 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1704 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1705 ARMConstantPoolValue *CPV = 1706 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); 1707 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1708 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1709 Callee = DAG.getLoad(getPointerTy(), dl, 1710 DAG.getEntryNode(), CPAddr, 1711 MachinePointerInfo::getConstantPool(), 1712 false, false, false, 0); 1713 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1714 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1715 getPointerTy(), Callee, PICLabel); 1716 } else { 1717 // On ELF targets for PIC code, direct calls should go through the PLT 1718 unsigned OpFlags = 0; 1719 if (Subtarget->isTargetELF() && 1720 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1721 OpFlags = ARMII::MO_PLT; 1722 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 1723 } 1724 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1725 isDirect = true; 1726 bool isStub = Subtarget->isTargetDarwin() && 1727 getTargetMachine().getRelocationModel() != Reloc::Static; 1728 isARMFunc = !Subtarget->isThumb() || isStub; 1729 // tBX takes a register source operand. 1730 const char *Sym = S->getSymbol(); 1731 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1732 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1733 ARMConstantPoolValue *CPV = 1734 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1735 ARMPCLabelIndex, 4); 1736 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1737 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1738 Callee = DAG.getLoad(getPointerTy(), dl, 1739 DAG.getEntryNode(), CPAddr, 1740 MachinePointerInfo::getConstantPool(), 1741 false, false, false, 0); 1742 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1743 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1744 getPointerTy(), Callee, PICLabel); 1745 } else { 1746 unsigned OpFlags = 0; 1747 // On ELF targets for PIC code, direct calls should go through the PLT 1748 if (Subtarget->isTargetELF() && 1749 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1750 OpFlags = ARMII::MO_PLT; 1751 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 1752 } 1753 } 1754 1755 // FIXME: handle tail calls differently. 1756 unsigned CallOpc; 1757 bool HasMinSizeAttr = MF.getFunction()->getAttributes(). 1758 hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); 1759 if (Subtarget->isThumb()) { 1760 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1761 CallOpc = ARMISD::CALL_NOLINK; 1762 else 1763 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1764 } else { 1765 if (!isDirect && !Subtarget->hasV5TOps()) 1766 CallOpc = ARMISD::CALL_NOLINK; 1767 else if (doesNotRet && isDirect && Subtarget->hasRAS() && 1768 // Emit regular call when code size is the priority 1769 !HasMinSizeAttr) 1770 // "mov lr, pc; b _foo" to avoid confusing the RSP 1771 CallOpc = ARMISD::CALL_NOLINK; 1772 else 1773 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; 1774 } 1775 1776 std::vector<SDValue> Ops; 1777 Ops.push_back(Chain); 1778 Ops.push_back(Callee); 1779 1780 // Add argument registers to the end of the list so that they are known live 1781 // into the call. 1782 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1783 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1784 RegsToPass[i].second.getValueType())); 1785 1786 // Add a register mask operand representing the call-preserved registers. 1787 if (!isTailCall) { 1788 const uint32_t *Mask; 1789 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 1790 const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); 1791 if (isThisReturn) { 1792 // For 'this' returns, use the R0-preserving mask if applicable 1793 Mask = ARI->getThisReturnPreservedMask(CallConv); 1794 if (!Mask) { 1795 // Set isThisReturn to false if the calling convention is not one that 1796 // allows 'returned' to be modeled in this way, so LowerCallResult does 1797 // not try to pass 'this' straight through 1798 isThisReturn = false; 1799 Mask = ARI->getCallPreservedMask(CallConv); 1800 } 1801 } else 1802 Mask = ARI->getCallPreservedMask(CallConv); 1803 1804 assert(Mask && "Missing call preserved mask for calling convention"); 1805 Ops.push_back(DAG.getRegisterMask(Mask)); 1806 } 1807 1808 if (InFlag.getNode()) 1809 Ops.push_back(InFlag); 1810 1811 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1812 if (isTailCall) 1813 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1814 1815 // Returns a chain and a flag for retval copy to use. 1816 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1817 InFlag = Chain.getValue(1); 1818 1819 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1820 DAG.getIntPtrConstant(0, true), InFlag, dl); 1821 if (!Ins.empty()) 1822 InFlag = Chain.getValue(1); 1823 1824 // Handle result values, copying them out of physregs into vregs that we 1825 // return. 1826 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 1827 InVals, isThisReturn, 1828 isThisReturn ? OutVals[0] : SDValue()); 1829 } 1830 1831 /// HandleByVal - Every parameter *after* a byval parameter is passed 1832 /// on the stack. Remember the next parameter register to allocate, 1833 /// and then confiscate the rest of the parameter registers to insure 1834 /// this. 1835 void 1836 ARMTargetLowering::HandleByVal( 1837 CCState *State, unsigned &size, unsigned Align) const { 1838 unsigned reg = State->AllocateReg(GPRArgRegs, 4); 1839 assert((State->getCallOrPrologue() == Prologue || 1840 State->getCallOrPrologue() == Call) && 1841 "unhandled ParmContext"); 1842 1843 // For in-prologue parameters handling, we also introduce stack offset 1844 // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal. 1845 // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how 1846 // NSAA should be evaluted (NSAA means "next stacked argument address"). 1847 // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs. 1848 // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs. 1849 unsigned NSAAOffset = State->getNextStackOffset(); 1850 if (State->getCallOrPrologue() != Call) { 1851 for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) { 1852 unsigned RB, RE; 1853 State->getInRegsParamInfo(i, RB, RE); 1854 assert(NSAAOffset >= (RE-RB)*4 && 1855 "Stack offset for byval regs doesn't introduced anymore?"); 1856 NSAAOffset -= (RE-RB)*4; 1857 } 1858 } 1859 if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { 1860 if (Subtarget->isAAPCS_ABI() && Align > 4) { 1861 unsigned AlignInRegs = Align / 4; 1862 unsigned Waste = (ARM::R4 - reg) % AlignInRegs; 1863 for (unsigned i = 0; i < Waste; ++i) 1864 reg = State->AllocateReg(GPRArgRegs, 4); 1865 } 1866 if (reg != 0) { 1867 unsigned excess = 4 * (ARM::R4 - reg); 1868 1869 // Special case when NSAA != SP and parameter size greater than size of 1870 // all remained GPR regs. In that case we can't split parameter, we must 1871 // send it to stack. We also must set NCRN to R4, so waste all 1872 // remained registers. 1873 if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { 1874 while (State->AllocateReg(GPRArgRegs, 4)) 1875 ; 1876 return; 1877 } 1878 1879 // First register for byval parameter is the first register that wasn't 1880 // allocated before this method call, so it would be "reg". 1881 // If parameter is small enough to be saved in range [reg, r4), then 1882 // the end (first after last) register would be reg + param-size-in-regs, 1883 // else parameter would be splitted between registers and stack, 1884 // end register would be r4 in this case. 1885 unsigned ByValRegBegin = reg; 1886 unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4; 1887 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); 1888 // Note, first register is allocated in the beginning of function already, 1889 // allocate remained amount of registers we need. 1890 for (unsigned i = reg+1; i != ByValRegEnd; ++i) 1891 State->AllocateReg(GPRArgRegs, 4); 1892 // At a call site, a byval parameter that is split between 1893 // registers and memory needs its size truncated here. In a 1894 // function prologue, such byval parameters are reassembled in 1895 // memory, and are not truncated. 1896 if (State->getCallOrPrologue() == Call) { 1897 // Make remained size equal to 0 in case, when 1898 // the whole structure may be stored into registers. 1899 if (size < excess) 1900 size = 0; 1901 else 1902 size -= excess; 1903 } 1904 } 1905 } 1906 } 1907 1908 /// MatchingStackOffset - Return true if the given stack call argument is 1909 /// already available in the same position (relatively) of the caller's 1910 /// incoming argument stack. 1911 static 1912 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1913 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1914 const TargetInstrInfo *TII) { 1915 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1916 int FI = INT_MAX; 1917 if (Arg.getOpcode() == ISD::CopyFromReg) { 1918 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1919 if (!TargetRegisterInfo::isVirtualRegister(VR)) 1920 return false; 1921 MachineInstr *Def = MRI->getVRegDef(VR); 1922 if (!Def) 1923 return false; 1924 if (!Flags.isByVal()) { 1925 if (!TII->isLoadFromStackSlot(Def, FI)) 1926 return false; 1927 } else { 1928 return false; 1929 } 1930 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1931 if (Flags.isByVal()) 1932 // ByVal argument is passed in as a pointer but it's now being 1933 // dereferenced. e.g. 1934 // define @foo(%struct.X* %A) { 1935 // tail call @bar(%struct.X* byval %A) 1936 // } 1937 return false; 1938 SDValue Ptr = Ld->getBasePtr(); 1939 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1940 if (!FINode) 1941 return false; 1942 FI = FINode->getIndex(); 1943 } else 1944 return false; 1945 1946 assert(FI != INT_MAX); 1947 if (!MFI->isFixedObjectIndex(FI)) 1948 return false; 1949 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1950 } 1951 1952 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 1953 /// for tail call optimization. Targets which want to do tail call 1954 /// optimization should implement this function. 1955 bool 1956 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1957 CallingConv::ID CalleeCC, 1958 bool isVarArg, 1959 bool isCalleeStructRet, 1960 bool isCallerStructRet, 1961 const SmallVectorImpl<ISD::OutputArg> &Outs, 1962 const SmallVectorImpl<SDValue> &OutVals, 1963 const SmallVectorImpl<ISD::InputArg> &Ins, 1964 SelectionDAG& DAG) const { 1965 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1966 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1967 bool CCMatch = CallerCC == CalleeCC; 1968 1969 // Look for obvious safe cases to perform tail call optimization that do not 1970 // require ABI changes. This is what gcc calls sibcall. 1971 1972 // Do not sibcall optimize vararg calls unless the call site is not passing 1973 // any arguments. 1974 if (isVarArg && !Outs.empty()) 1975 return false; 1976 1977 // Exception-handling functions need a special set of instructions to indicate 1978 // a return to the hardware. Tail-calling another function would probably 1979 // break this. 1980 if (CallerF->hasFnAttribute("interrupt")) 1981 return false; 1982 1983 // Also avoid sibcall optimization if either caller or callee uses struct 1984 // return semantics. 1985 if (isCalleeStructRet || isCallerStructRet) 1986 return false; 1987 1988 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1989 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 1990 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 1991 // support in the assembler and linker to be used. This would need to be 1992 // fixed to fully support tail calls in Thumb1. 1993 // 1994 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1995 // LR. This means if we need to reload LR, it takes an extra instructions, 1996 // which outweighs the value of the tail call; but here we don't know yet 1997 // whether LR is going to be used. Probably the right approach is to 1998 // generate the tail call here and turn it back into CALL/RET in 1999 // emitEpilogue if LR is used. 2000 2001 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 2002 // but we need to make sure there are enough registers; the only valid 2003 // registers are the 4 used for parameters. We don't currently do this 2004 // case. 2005 if (Subtarget->isThumb1Only()) 2006 return false; 2007 2008 // If the calling conventions do not match, then we'd better make sure the 2009 // results are returned in the same way as what the caller expects. 2010 if (!CCMatch) { 2011 SmallVector<CCValAssign, 16> RVLocs1; 2012 ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), 2013 getTargetMachine(), RVLocs1, *DAG.getContext(), Call); 2014 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 2015 2016 SmallVector<CCValAssign, 16> RVLocs2; 2017 ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), 2018 getTargetMachine(), RVLocs2, *DAG.getContext(), Call); 2019 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 2020 2021 if (RVLocs1.size() != RVLocs2.size()) 2022 return false; 2023 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 2024 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 2025 return false; 2026 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 2027 return false; 2028 if (RVLocs1[i].isRegLoc()) { 2029 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 2030 return false; 2031 } else { 2032 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 2033 return false; 2034 } 2035 } 2036 } 2037 2038 // If Caller's vararg or byval argument has been split between registers and 2039 // stack, do not perform tail call, since part of the argument is in caller's 2040 // local frame. 2041 const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). 2042 getInfo<ARMFunctionInfo>(); 2043 if (AFI_Caller->getArgRegsSaveSize()) 2044 return false; 2045 2046 // If the callee takes no arguments then go on to check the results of the 2047 // call. 2048 if (!Outs.empty()) { 2049 // Check if stack adjustment is needed. For now, do not do this if any 2050 // argument is passed on the stack. 2051 SmallVector<CCValAssign, 16> ArgLocs; 2052 ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), 2053 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 2054 CCInfo.AnalyzeCallOperands(Outs, 2055 CCAssignFnForNode(CalleeCC, false, isVarArg)); 2056 if (CCInfo.getNextStackOffset()) { 2057 MachineFunction &MF = DAG.getMachineFunction(); 2058 2059 // Check if the arguments are already laid out in the right way as 2060 // the caller's fixed stack objects. 2061 MachineFrameInfo *MFI = MF.getFrameInfo(); 2062 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 2063 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2064 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 2065 i != e; 2066 ++i, ++realArgIdx) { 2067 CCValAssign &VA = ArgLocs[i]; 2068 EVT RegVT = VA.getLocVT(); 2069 SDValue Arg = OutVals[realArgIdx]; 2070 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 2071 if (VA.getLocInfo() == CCValAssign::Indirect) 2072 return false; 2073 if (VA.needsCustom()) { 2074 // f64 and vector types are split into multiple registers or 2075 // register/stack-slot combinations. The types will not match 2076 // the registers; give up on memory f64 refs until we figure 2077 // out what to do about this. 2078 if (!VA.isRegLoc()) 2079 return false; 2080 if (!ArgLocs[++i].isRegLoc()) 2081 return false; 2082 if (RegVT == MVT::v2f64) { 2083 if (!ArgLocs[++i].isRegLoc()) 2084 return false; 2085 if (!ArgLocs[++i].isRegLoc()) 2086 return false; 2087 } 2088 } else if (!VA.isRegLoc()) { 2089 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 2090 MFI, MRI, TII)) 2091 return false; 2092 } 2093 } 2094 } 2095 } 2096 2097 return true; 2098 } 2099 2100 bool 2101 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 2102 MachineFunction &MF, bool isVarArg, 2103 const SmallVectorImpl<ISD::OutputArg> &Outs, 2104 LLVMContext &Context) const { 2105 SmallVector<CCValAssign, 16> RVLocs; 2106 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); 2107 return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true, 2108 isVarArg)); 2109 } 2110 2111 static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, 2112 SDLoc DL, SelectionDAG &DAG) { 2113 const MachineFunction &MF = DAG.getMachineFunction(); 2114 const Function *F = MF.getFunction(); 2115 2116 StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString(); 2117 2118 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset 2119 // version of the "preferred return address". These offsets affect the return 2120 // instruction if this is a return from PL1 without hypervisor extensions. 2121 // IRQ/FIQ: +4 "subs pc, lr, #4" 2122 // SWI: 0 "subs pc, lr, #0" 2123 // ABORT: +4 "subs pc, lr, #4" 2124 // UNDEF: +4/+2 "subs pc, lr, #0" 2125 // UNDEF varies depending on where the exception came from ARM or Thumb 2126 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. 2127 2128 int64_t LROffset; 2129 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || 2130 IntKind == "ABORT") 2131 LROffset = 4; 2132 else if (IntKind == "SWI" || IntKind == "UNDEF") 2133 LROffset = 0; 2134 else 2135 report_fatal_error("Unsupported interrupt attribute. If present, value " 2136 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); 2137 2138 RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false)); 2139 2140 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, 2141 RetOps.data(), RetOps.size()); 2142 } 2143 2144 SDValue 2145 ARMTargetLowering::LowerReturn(SDValue Chain, 2146 CallingConv::ID CallConv, bool isVarArg, 2147 const SmallVectorImpl<ISD::OutputArg> &Outs, 2148 const SmallVectorImpl<SDValue> &OutVals, 2149 SDLoc dl, SelectionDAG &DAG) const { 2150 2151 // CCValAssign - represent the assignment of the return value to a location. 2152 SmallVector<CCValAssign, 16> RVLocs; 2153 2154 // CCState - Info about the registers and stack slots. 2155 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2156 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 2157 2158 // Analyze outgoing return values. 2159 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 2160 isVarArg)); 2161 2162 SDValue Flag; 2163 SmallVector<SDValue, 4> RetOps; 2164 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 2165 2166 // Copy the result values into the output registers. 2167 for (unsigned i = 0, realRVLocIdx = 0; 2168 i != RVLocs.size(); 2169 ++i, ++realRVLocIdx) { 2170 CCValAssign &VA = RVLocs[i]; 2171 assert(VA.isRegLoc() && "Can only return in registers!"); 2172 2173 SDValue Arg = OutVals[realRVLocIdx]; 2174 2175 switch (VA.getLocInfo()) { 2176 default: llvm_unreachable("Unknown loc info!"); 2177 case CCValAssign::Full: break; 2178 case CCValAssign::BCvt: 2179 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 2180 break; 2181 } 2182 2183 if (VA.needsCustom()) { 2184 if (VA.getLocVT() == MVT::v2f64) { 2185 // Extract the first half and return it in two registers. 2186 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 2187 DAG.getConstant(0, MVT::i32)); 2188 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 2189 DAG.getVTList(MVT::i32, MVT::i32), Half); 2190 2191 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 2192 Flag = Chain.getValue(1); 2193 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2194 VA = RVLocs[++i]; // skip ahead to next loc 2195 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 2196 HalfGPRs.getValue(1), Flag); 2197 Flag = Chain.getValue(1); 2198 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2199 VA = RVLocs[++i]; // skip ahead to next loc 2200 2201 // Extract the 2nd half and fall through to handle it as an f64 value. 2202 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 2203 DAG.getConstant(1, MVT::i32)); 2204 } 2205 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 2206 // available. 2207 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 2208 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 2209 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 2210 Flag = Chain.getValue(1); 2211 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2212 VA = RVLocs[++i]; // skip ahead to next loc 2213 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 2214 Flag); 2215 } else 2216 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 2217 2218 // Guarantee that all emitted copies are 2219 // stuck together, avoiding something bad. 2220 Flag = Chain.getValue(1); 2221 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2222 } 2223 2224 // Update chain and glue. 2225 RetOps[0] = Chain; 2226 if (Flag.getNode()) 2227 RetOps.push_back(Flag); 2228 2229 // CPUs which aren't M-class use a special sequence to return from 2230 // exceptions (roughly, any instruction setting pc and cpsr simultaneously, 2231 // though we use "subs pc, lr, #N"). 2232 // 2233 // M-class CPUs actually use a normal return sequence with a special 2234 // (hardware-provided) value in LR, so the normal code path works. 2235 if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") && 2236 !Subtarget->isMClass()) { 2237 if (Subtarget->isThumb1Only()) 2238 report_fatal_error("interrupt attribute is not supported in Thumb1"); 2239 return LowerInterruptReturn(RetOps, dl, DAG); 2240 } 2241 2242 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, 2243 RetOps.data(), RetOps.size()); 2244 } 2245 2246 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 2247 if (N->getNumValues() != 1) 2248 return false; 2249 if (!N->hasNUsesOfValue(1, 0)) 2250 return false; 2251 2252 SDValue TCChain = Chain; 2253 SDNode *Copy = *N->use_begin(); 2254 if (Copy->getOpcode() == ISD::CopyToReg) { 2255 // If the copy has a glue operand, we conservatively assume it isn't safe to 2256 // perform a tail call. 2257 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 2258 return false; 2259 TCChain = Copy->getOperand(0); 2260 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { 2261 SDNode *VMov = Copy; 2262 // f64 returned in a pair of GPRs. 2263 SmallPtrSet<SDNode*, 2> Copies; 2264 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 2265 UI != UE; ++UI) { 2266 if (UI->getOpcode() != ISD::CopyToReg) 2267 return false; 2268 Copies.insert(*UI); 2269 } 2270 if (Copies.size() > 2) 2271 return false; 2272 2273 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 2274 UI != UE; ++UI) { 2275 SDValue UseChain = UI->getOperand(0); 2276 if (Copies.count(UseChain.getNode())) 2277 // Second CopyToReg 2278 Copy = *UI; 2279 else 2280 // First CopyToReg 2281 TCChain = UseChain; 2282 } 2283 } else if (Copy->getOpcode() == ISD::BITCAST) { 2284 // f32 returned in a single GPR. 2285 if (!Copy->hasOneUse()) 2286 return false; 2287 Copy = *Copy->use_begin(); 2288 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) 2289 return false; 2290 TCChain = Copy->getOperand(0); 2291 } else { 2292 return false; 2293 } 2294 2295 bool HasRet = false; 2296 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 2297 UI != UE; ++UI) { 2298 if (UI->getOpcode() != ARMISD::RET_FLAG && 2299 UI->getOpcode() != ARMISD::INTRET_FLAG) 2300 return false; 2301 HasRet = true; 2302 } 2303 2304 if (!HasRet) 2305 return false; 2306 2307 Chain = TCChain; 2308 return true; 2309 } 2310 2311 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 2312 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 2313 return false; 2314 2315 if (!CI->isTailCall()) 2316 return false; 2317 2318 return !Subtarget->isThumb1Only(); 2319 } 2320 2321 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2322 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 2323 // one of the above mentioned nodes. It has to be wrapped because otherwise 2324 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2325 // be used to form addressing mode. These wrapped nodes will be selected 2326 // into MOVi. 2327 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 2328 EVT PtrVT = Op.getValueType(); 2329 // FIXME there is no actual debug info here 2330 SDLoc dl(Op); 2331 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2332 SDValue Res; 2333 if (CP->isMachineConstantPoolEntry()) 2334 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2335 CP->getAlignment()); 2336 else 2337 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2338 CP->getAlignment()); 2339 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 2340 } 2341 2342 unsigned ARMTargetLowering::getJumpTableEncoding() const { 2343 return MachineJumpTableInfo::EK_Inline; 2344 } 2345 2346 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 2347 SelectionDAG &DAG) const { 2348 MachineFunction &MF = DAG.getMachineFunction(); 2349 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2350 unsigned ARMPCLabelIndex = 0; 2351 SDLoc DL(Op); 2352 EVT PtrVT = getPointerTy(); 2353 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 2354 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2355 SDValue CPAddr; 2356 if (RelocM == Reloc::Static) { 2357 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 2358 } else { 2359 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2360 ARMPCLabelIndex = AFI->createPICLabelUId(); 2361 ARMConstantPoolValue *CPV = 2362 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, 2363 ARMCP::CPBlockAddress, PCAdj); 2364 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2365 } 2366 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 2367 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 2368 MachinePointerInfo::getConstantPool(), 2369 false, false, false, 0); 2370 if (RelocM == Reloc::Static) 2371 return Result; 2372 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2373 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 2374 } 2375 2376 // Lower ISD::GlobalTLSAddress using the "general dynamic" model 2377 SDValue 2378 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 2379 SelectionDAG &DAG) const { 2380 SDLoc dl(GA); 2381 EVT PtrVT = getPointerTy(); 2382 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2383 MachineFunction &MF = DAG.getMachineFunction(); 2384 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2385 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2386 ARMConstantPoolValue *CPV = 2387 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2388 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 2389 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2390 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 2391 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 2392 MachinePointerInfo::getConstantPool(), 2393 false, false, false, 0); 2394 SDValue Chain = Argument.getValue(1); 2395 2396 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2397 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 2398 2399 // call __tls_get_addr. 2400 ArgListTy Args; 2401 ArgListEntry Entry; 2402 Entry.Node = Argument; 2403 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); 2404 Args.push_back(Entry); 2405 // FIXME: is there useful debug info available here? 2406 TargetLowering::CallLoweringInfo CLI(Chain, 2407 (Type *) Type::getInt32Ty(*DAG.getContext()), 2408 false, false, false, false, 2409 0, CallingConv::C, /*isTailCall=*/false, 2410 /*doesNotRet=*/false, /*isReturnValueUsed=*/true, 2411 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 2412 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2413 return CallResult.first; 2414 } 2415 2416 // Lower ISD::GlobalTLSAddress using the "initial exec" or 2417 // "local exec" model. 2418 SDValue 2419 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 2420 SelectionDAG &DAG, 2421 TLSModel::Model model) const { 2422 const GlobalValue *GV = GA->getGlobal(); 2423 SDLoc dl(GA); 2424 SDValue Offset; 2425 SDValue Chain = DAG.getEntryNode(); 2426 EVT PtrVT = getPointerTy(); 2427 // Get the Thread Pointer 2428 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2429 2430 if (model == TLSModel::InitialExec) { 2431 MachineFunction &MF = DAG.getMachineFunction(); 2432 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2433 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2434 // Initial exec model. 2435 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2436 ARMConstantPoolValue *CPV = 2437 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2438 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, 2439 true); 2440 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2441 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2442 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2443 MachinePointerInfo::getConstantPool(), 2444 false, false, false, 0); 2445 Chain = Offset.getValue(1); 2446 2447 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2448 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 2449 2450 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2451 MachinePointerInfo::getConstantPool(), 2452 false, false, false, 0); 2453 } else { 2454 // local exec model 2455 assert(model == TLSModel::LocalExec); 2456 ARMConstantPoolValue *CPV = 2457 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); 2458 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2459 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2460 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2461 MachinePointerInfo::getConstantPool(), 2462 false, false, false, 0); 2463 } 2464 2465 // The address of the thread local variable is the add of the thread 2466 // pointer with the offset of the variable. 2467 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 2468 } 2469 2470 SDValue 2471 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 2472 // TODO: implement the "local dynamic" model 2473 assert(Subtarget->isTargetELF() && 2474 "TLS not implemented for non-ELF targets"); 2475 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2476 2477 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); 2478 2479 switch (model) { 2480 case TLSModel::GeneralDynamic: 2481 case TLSModel::LocalDynamic: 2482 return LowerToTLSGeneralDynamicModel(GA, DAG); 2483 case TLSModel::InitialExec: 2484 case TLSModel::LocalExec: 2485 return LowerToTLSExecModels(GA, DAG, model); 2486 } 2487 llvm_unreachable("bogus TLS model"); 2488 } 2489 2490 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 2491 SelectionDAG &DAG) const { 2492 EVT PtrVT = getPointerTy(); 2493 SDLoc dl(Op); 2494 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2495 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2496 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 2497 ARMConstantPoolValue *CPV = 2498 ARMConstantPoolConstant::Create(GV, 2499 UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 2500 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2501 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2502 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 2503 CPAddr, 2504 MachinePointerInfo::getConstantPool(), 2505 false, false, false, 0); 2506 SDValue Chain = Result.getValue(1); 2507 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 2508 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 2509 if (!UseGOTOFF) 2510 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 2511 MachinePointerInfo::getGOT(), 2512 false, false, false, 0); 2513 return Result; 2514 } 2515 2516 // If we have T2 ops, we can materialize the address directly via movt/movw 2517 // pair. This is always cheaper. 2518 if (Subtarget->useMovt()) { 2519 ++NumMovwMovt; 2520 // FIXME: Once remat is capable of dealing with instructions with register 2521 // operands, expand this into two nodes. 2522 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2523 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2524 } else { 2525 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2526 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2527 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2528 MachinePointerInfo::getConstantPool(), 2529 false, false, false, 0); 2530 } 2531 } 2532 2533 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 2534 SelectionDAG &DAG) const { 2535 EVT PtrVT = getPointerTy(); 2536 SDLoc dl(Op); 2537 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2538 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2539 2540 if (Subtarget->useMovt()) { 2541 ++NumMovwMovt; 2542 // FIXME: Once remat is capable of dealing with instructions with register 2543 // operands, expand this into two nodes. 2544 if (RelocM == Reloc::Static) 2545 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2546 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2547 2548 unsigned Wrapper = 2549 RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper; 2550 2551 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); 2552 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); 2553 2554 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2555 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 2556 MachinePointerInfo::getGOT(), 2557 false, false, false, 0); 2558 return Result; 2559 } 2560 2561 unsigned ARMPCLabelIndex = 0; 2562 SDValue CPAddr; 2563 if (RelocM == Reloc::Static) { 2564 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2565 } else { 2566 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2567 ARMPCLabelIndex = AFI->createPICLabelUId(); 2568 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 2569 ARMConstantPoolValue *CPV = 2570 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 2571 PCAdj); 2572 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2573 } 2574 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2575 2576 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2577 MachinePointerInfo::getConstantPool(), 2578 false, false, false, 0); 2579 SDValue Chain = Result.getValue(1); 2580 2581 if (RelocM == Reloc::PIC_) { 2582 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2583 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2584 } 2585 2586 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2587 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 2588 false, false, false, 0); 2589 2590 return Result; 2591 } 2592 2593 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 2594 SelectionDAG &DAG) const { 2595 assert(Subtarget->isTargetELF() && 2596 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 2597 MachineFunction &MF = DAG.getMachineFunction(); 2598 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2599 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2600 EVT PtrVT = getPointerTy(); 2601 SDLoc dl(Op); 2602 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2603 ARMConstantPoolValue *CPV = 2604 ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", 2605 ARMPCLabelIndex, PCAdj); 2606 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2607 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2608 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2609 MachinePointerInfo::getConstantPool(), 2610 false, false, false, 0); 2611 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2612 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2613 } 2614 2615 SDValue 2616 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 2617 SDLoc dl(Op); 2618 SDValue Val = DAG.getConstant(0, MVT::i32); 2619 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, 2620 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), 2621 Op.getOperand(1), Val); 2622 } 2623 2624 SDValue 2625 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 2626 SDLoc dl(Op); 2627 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 2628 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 2629 } 2630 2631 SDValue 2632 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 2633 const ARMSubtarget *Subtarget) const { 2634 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2635 SDLoc dl(Op); 2636 switch (IntNo) { 2637 default: return SDValue(); // Don't custom lower most intrinsics. 2638 case Intrinsic::arm_thread_pointer: { 2639 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2640 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2641 } 2642 case Intrinsic::eh_sjlj_lsda: { 2643 MachineFunction &MF = DAG.getMachineFunction(); 2644 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2645 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2646 EVT PtrVT = getPointerTy(); 2647 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2648 SDValue CPAddr; 2649 unsigned PCAdj = (RelocM != Reloc::PIC_) 2650 ? 0 : (Subtarget->isThumb() ? 4 : 8); 2651 ARMConstantPoolValue *CPV = 2652 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, 2653 ARMCP::CPLSDA, PCAdj); 2654 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2655 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2656 SDValue Result = 2657 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2658 MachinePointerInfo::getConstantPool(), 2659 false, false, false, 0); 2660 2661 if (RelocM == Reloc::PIC_) { 2662 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2663 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2664 } 2665 return Result; 2666 } 2667 case Intrinsic::arm_neon_vmulls: 2668 case Intrinsic::arm_neon_vmullu: { 2669 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 2670 ? ARMISD::VMULLs : ARMISD::VMULLu; 2671 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), 2672 Op.getOperand(1), Op.getOperand(2)); 2673 } 2674 } 2675 } 2676 2677 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 2678 const ARMSubtarget *Subtarget) { 2679 // FIXME: handle "fence singlethread" more efficiently. 2680 SDLoc dl(Op); 2681 if (!Subtarget->hasDataBarrier()) { 2682 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2683 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2684 // here. 2685 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2686 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"); 2687 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2688 DAG.getConstant(0, MVT::i32)); 2689 } 2690 2691 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); 2692 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); 2693 unsigned Domain = ARM_MB::ISH; 2694 if (Subtarget->isMClass()) { 2695 // Only a full system barrier exists in the M-class architectures. 2696 Domain = ARM_MB::SY; 2697 } else if (Subtarget->isSwift() && Ord == Release) { 2698 // Swift happens to implement ISHST barriers in a way that's compatible with 2699 // Release semantics but weaker than ISH so we'd be fools not to use 2700 // it. Beware: other processors probably don't! 2701 Domain = ARM_MB::ISHST; 2702 } 2703 2704 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), 2705 DAG.getConstant(Intrinsic::arm_dmb, MVT::i32), 2706 DAG.getConstant(Domain, MVT::i32)); 2707 } 2708 2709 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 2710 const ARMSubtarget *Subtarget) { 2711 // ARM pre v5TE and Thumb1 does not have preload instructions. 2712 if (!(Subtarget->isThumb2() || 2713 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 2714 // Just preserve the chain. 2715 return Op.getOperand(0); 2716 2717 SDLoc dl(Op); 2718 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 2719 if (!isRead && 2720 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 2721 // ARMv7 with MP extension has PLDW. 2722 return Op.getOperand(0); 2723 2724 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 2725 if (Subtarget->isThumb()) { 2726 // Invert the bits. 2727 isRead = ~isRead & 1; 2728 isData = ~isData & 1; 2729 } 2730 2731 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 2732 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 2733 DAG.getConstant(isData, MVT::i32)); 2734 } 2735 2736 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2737 MachineFunction &MF = DAG.getMachineFunction(); 2738 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2739 2740 // vastart just stores the address of the VarArgsFrameIndex slot into the 2741 // memory location argument. 2742 SDLoc dl(Op); 2743 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2744 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2745 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2746 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2747 MachinePointerInfo(SV), false, false, 0); 2748 } 2749 2750 SDValue 2751 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2752 SDValue &Root, SelectionDAG &DAG, 2753 SDLoc dl) const { 2754 MachineFunction &MF = DAG.getMachineFunction(); 2755 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2756 2757 const TargetRegisterClass *RC; 2758 if (AFI->isThumb1OnlyFunction()) 2759 RC = &ARM::tGPRRegClass; 2760 else 2761 RC = &ARM::GPRRegClass; 2762 2763 // Transform the arguments stored in physical registers into virtual ones. 2764 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2765 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2766 2767 SDValue ArgValue2; 2768 if (NextVA.isMemLoc()) { 2769 MachineFrameInfo *MFI = MF.getFrameInfo(); 2770 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2771 2772 // Create load node to retrieve arguments from the stack. 2773 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2774 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2775 MachinePointerInfo::getFixedStack(FI), 2776 false, false, false, 0); 2777 } else { 2778 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2779 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2780 } 2781 2782 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2783 } 2784 2785 void 2786 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, 2787 unsigned InRegsParamRecordIdx, 2788 unsigned ArgSize, 2789 unsigned &ArgRegsSize, 2790 unsigned &ArgRegsSaveSize) 2791 const { 2792 unsigned NumGPRs; 2793 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { 2794 unsigned RBegin, REnd; 2795 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); 2796 NumGPRs = REnd - RBegin; 2797 } else { 2798 unsigned int firstUnalloced; 2799 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, 2800 sizeof(GPRArgRegs) / 2801 sizeof(GPRArgRegs[0])); 2802 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; 2803 } 2804 2805 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 2806 ArgRegsSize = NumGPRs * 4; 2807 2808 // If parameter is split between stack and GPRs... 2809 if (NumGPRs && Align == 8 && 2810 (ArgRegsSize < ArgSize || 2811 InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { 2812 // Add padding for part of param recovered from GPRs, so 2813 // its last byte must be at address K*8 - 1. 2814 // We need to do it, since remained (stack) part of parameter has 2815 // stack alignment, and we need to "attach" "GPRs head" without gaps 2816 // to it: 2817 // Stack: 2818 // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes... 2819 // [ [padding] [GPRs head] ] [ Tail passed via stack .... 2820 // 2821 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2822 unsigned Padding = 2823 ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) - 2824 (ArgRegsSize + AFI->getArgRegsSaveSize()); 2825 ArgRegsSaveSize = ArgRegsSize + Padding; 2826 } else 2827 // We don't need to extend regs save size for byval parameters if they 2828 // are passed via GPRs only. 2829 ArgRegsSaveSize = ArgRegsSize; 2830 } 2831 2832 // The remaining GPRs hold either the beginning of variable-argument 2833 // data, or the beginning of an aggregate passed by value (usually 2834 // byval). Either way, we allocate stack slots adjacent to the data 2835 // provided by our caller, and store the unallocated registers there. 2836 // If this is a variadic function, the va_list pointer will begin with 2837 // these values; otherwise, this reassembles a (byval) structure that 2838 // was split between registers and memory. 2839 // Return: The frame index registers were stored into. 2840 int 2841 ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, 2842 SDLoc dl, SDValue &Chain, 2843 const Value *OrigArg, 2844 unsigned InRegsParamRecordIdx, 2845 unsigned OffsetFromOrigArg, 2846 unsigned ArgOffset, 2847 unsigned ArgSize, 2848 bool ForceMutable) const { 2849 2850 // Currently, two use-cases possible: 2851 // Case #1. Non var-args function, and we meet first byval parameter. 2852 // Setup first unallocated register as first byval register; 2853 // eat all remained registers 2854 // (these two actions are performed by HandleByVal method). 2855 // Then, here, we initialize stack frame with 2856 // "store-reg" instructions. 2857 // Case #2. Var-args function, that doesn't contain byval parameters. 2858 // The same: eat all remained unallocated registers, 2859 // initialize stack frame. 2860 2861 MachineFunction &MF = DAG.getMachineFunction(); 2862 MachineFrameInfo *MFI = MF.getFrameInfo(); 2863 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2864 unsigned firstRegToSaveIndex, lastRegToSaveIndex; 2865 unsigned RBegin, REnd; 2866 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { 2867 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); 2868 firstRegToSaveIndex = RBegin - ARM::R0; 2869 lastRegToSaveIndex = REnd - ARM::R0; 2870 } else { 2871 firstRegToSaveIndex = CCInfo.getFirstUnallocated 2872 (GPRArgRegs, array_lengthof(GPRArgRegs)); 2873 lastRegToSaveIndex = 4; 2874 } 2875 2876 unsigned ArgRegsSize, ArgRegsSaveSize; 2877 computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize, 2878 ArgRegsSize, ArgRegsSaveSize); 2879 2880 // Store any by-val regs to their spots on the stack so that they may be 2881 // loaded by deferencing the result of formal parameter pointer or va_next. 2882 // Note: once stack area for byval/varargs registers 2883 // was initialized, it can't be initialized again. 2884 if (ArgRegsSaveSize) { 2885 2886 unsigned Padding = ArgRegsSaveSize - ArgRegsSize; 2887 2888 if (Padding) { 2889 assert(AFI->getStoredByValParamsPadding() == 0 && 2890 "The only parameter may be padded."); 2891 AFI->setStoredByValParamsPadding(Padding); 2892 } 2893 2894 int FrameIndex = MFI->CreateFixedObject( 2895 ArgRegsSaveSize, 2896 Padding + ArgOffset, 2897 false); 2898 SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); 2899 2900 SmallVector<SDValue, 4> MemOps; 2901 for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; 2902 ++firstRegToSaveIndex, ++i) { 2903 const TargetRegisterClass *RC; 2904 if (AFI->isThumb1OnlyFunction()) 2905 RC = &ARM::tGPRRegClass; 2906 else 2907 RC = &ARM::GPRRegClass; 2908 2909 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); 2910 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2911 SDValue Store = 2912 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2913 MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i), 2914 false, false, 0); 2915 MemOps.push_back(Store); 2916 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2917 DAG.getConstant(4, getPointerTy())); 2918 } 2919 2920 AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); 2921 2922 if (!MemOps.empty()) 2923 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2924 &MemOps[0], MemOps.size()); 2925 return FrameIndex; 2926 } else 2927 // This will point to the next argument passed via stack. 2928 return MFI->CreateFixedObject( 2929 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable); 2930 } 2931 2932 // Setup stack frame, the va_list pointer will start from. 2933 void 2934 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 2935 SDLoc dl, SDValue &Chain, 2936 unsigned ArgOffset, 2937 bool ForceMutable) const { 2938 MachineFunction &MF = DAG.getMachineFunction(); 2939 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2940 2941 // Try to store any remaining integer argument regs 2942 // to their spots on the stack so that they may be loaded by deferencing 2943 // the result of va_next. 2944 // If there is no regs to be stored, just point address after last 2945 // argument passed via stack. 2946 int FrameIndex = 2947 StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), 2948 0, ArgOffset, 0, ForceMutable); 2949 2950 AFI->setVarArgsFrameIndex(FrameIndex); 2951 } 2952 2953 SDValue 2954 ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2955 CallingConv::ID CallConv, bool isVarArg, 2956 const SmallVectorImpl<ISD::InputArg> 2957 &Ins, 2958 SDLoc dl, SelectionDAG &DAG, 2959 SmallVectorImpl<SDValue> &InVals) 2960 const { 2961 MachineFunction &MF = DAG.getMachineFunction(); 2962 MachineFrameInfo *MFI = MF.getFrameInfo(); 2963 2964 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2965 2966 // Assign locations to all of the incoming arguments. 2967 SmallVector<CCValAssign, 16> ArgLocs; 2968 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2969 getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue); 2970 CCInfo.AnalyzeFormalArguments(Ins, 2971 CCAssignFnForNode(CallConv, /* Return*/ false, 2972 isVarArg)); 2973 2974 SmallVector<SDValue, 16> ArgValues; 2975 int lastInsIndex = -1; 2976 SDValue ArgValue; 2977 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); 2978 unsigned CurArgIdx = 0; 2979 2980 // Initially ArgRegsSaveSize is zero. 2981 // Then we increase this value each time we meet byval parameter. 2982 // We also increase this value in case of varargs function. 2983 AFI->setArgRegsSaveSize(0); 2984 2985 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2986 CCValAssign &VA = ArgLocs[i]; 2987 std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); 2988 CurArgIdx = Ins[VA.getValNo()].OrigArgIndex; 2989 // Arguments stored in registers. 2990 if (VA.isRegLoc()) { 2991 EVT RegVT = VA.getLocVT(); 2992 2993 if (VA.needsCustom()) { 2994 // f64 and vector types are split up into multiple registers or 2995 // combinations of registers and stack slots. 2996 if (VA.getLocVT() == MVT::v2f64) { 2997 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2998 Chain, DAG, dl); 2999 VA = ArgLocs[++i]; // skip ahead to next loc 3000 SDValue ArgValue2; 3001 if (VA.isMemLoc()) { 3002 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 3003 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 3004 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 3005 MachinePointerInfo::getFixedStack(FI), 3006 false, false, false, 0); 3007 } else { 3008 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 3009 Chain, DAG, dl); 3010 } 3011 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 3012 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 3013 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 3014 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 3015 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 3016 } else 3017 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 3018 3019 } else { 3020 const TargetRegisterClass *RC; 3021 3022 if (RegVT == MVT::f32) 3023 RC = &ARM::SPRRegClass; 3024 else if (RegVT == MVT::f64) 3025 RC = &ARM::DPRRegClass; 3026 else if (RegVT == MVT::v2f64) 3027 RC = &ARM::QPRRegClass; 3028 else if (RegVT == MVT::i32) 3029 RC = AFI->isThumb1OnlyFunction() ? 3030 (const TargetRegisterClass*)&ARM::tGPRRegClass : 3031 (const TargetRegisterClass*)&ARM::GPRRegClass; 3032 else 3033 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 3034 3035 // Transform the arguments in physical registers into virtual ones. 3036 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 3037 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 3038 } 3039 3040 // If this is an 8 or 16-bit value, it is really passed promoted 3041 // to 32 bits. Insert an assert[sz]ext to capture this, then 3042 // truncate to the right size. 3043 switch (VA.getLocInfo()) { 3044 default: llvm_unreachable("Unknown loc info!"); 3045 case CCValAssign::Full: break; 3046 case CCValAssign::BCvt: 3047 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 3048 break; 3049 case CCValAssign::SExt: 3050 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 3051 DAG.getValueType(VA.getValVT())); 3052 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 3053 break; 3054 case CCValAssign::ZExt: 3055 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 3056 DAG.getValueType(VA.getValVT())); 3057 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 3058 break; 3059 } 3060 3061 InVals.push_back(ArgValue); 3062 3063 } else { // VA.isRegLoc() 3064 3065 // sanity check 3066 assert(VA.isMemLoc()); 3067 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 3068 3069 int index = ArgLocs[i].getValNo(); 3070 3071 // Some Ins[] entries become multiple ArgLoc[] entries. 3072 // Process them only once. 3073 if (index != lastInsIndex) 3074 { 3075 ISD::ArgFlagsTy Flags = Ins[index].Flags; 3076 // FIXME: For now, all byval parameter objects are marked mutable. 3077 // This can be changed with more analysis. 3078 // In case of tail call optimization mark all arguments mutable. 3079 // Since they could be overwritten by lowering of arguments in case of 3080 // a tail call. 3081 if (Flags.isByVal()) { 3082 unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); 3083 int FrameIndex = StoreByValRegs( 3084 CCInfo, DAG, dl, Chain, CurOrigArg, 3085 CurByValIndex, 3086 Ins[VA.getValNo()].PartOffset, 3087 VA.getLocMemOffset(), 3088 Flags.getByValSize(), 3089 true /*force mutable frames*/); 3090 InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); 3091 CCInfo.nextInRegsParam(); 3092 } else { 3093 unsigned FIOffset = VA.getLocMemOffset() + 3094 AFI->getStoredByValParamsPadding(); 3095 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 3096 FIOffset, true); 3097 3098 // Create load nodes to retrieve arguments from the stack. 3099 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 3100 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 3101 MachinePointerInfo::getFixedStack(FI), 3102 false, false, false, 0)); 3103 } 3104 lastInsIndex = index; 3105 } 3106 } 3107 } 3108 3109 // varargs 3110 if (isVarArg) 3111 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 3112 CCInfo.getNextStackOffset()); 3113 3114 return Chain; 3115 } 3116 3117 /// isFloatingPointZero - Return true if this is +0.0. 3118 static bool isFloatingPointZero(SDValue Op) { 3119 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 3120 return CFP->getValueAPF().isPosZero(); 3121 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 3122 // Maybe this has already been legalized into the constant pool? 3123 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 3124 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 3125 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 3126 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 3127 return CFP->getValueAPF().isPosZero(); 3128 } 3129 } 3130 return false; 3131 } 3132 3133 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for 3134 /// the given operands. 3135 SDValue 3136 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 3137 SDValue &ARMcc, SelectionDAG &DAG, 3138 SDLoc dl) const { 3139 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 3140 unsigned C = RHSC->getZExtValue(); 3141 if (!isLegalICmpImmediate(C)) { 3142 // Constant does not fit, try adjusting it by one? 3143 switch (CC) { 3144 default: break; 3145 case ISD::SETLT: 3146 case ISD::SETGE: 3147 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 3148 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 3149 RHS = DAG.getConstant(C-1, MVT::i32); 3150 } 3151 break; 3152 case ISD::SETULT: 3153 case ISD::SETUGE: 3154 if (C != 0 && isLegalICmpImmediate(C-1)) { 3155 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 3156 RHS = DAG.getConstant(C-1, MVT::i32); 3157 } 3158 break; 3159 case ISD::SETLE: 3160 case ISD::SETGT: 3161 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 3162 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 3163 RHS = DAG.getConstant(C+1, MVT::i32); 3164 } 3165 break; 3166 case ISD::SETULE: 3167 case ISD::SETUGT: 3168 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 3169 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 3170 RHS = DAG.getConstant(C+1, MVT::i32); 3171 } 3172 break; 3173 } 3174 } 3175 } 3176 3177 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3178 ARMISD::NodeType CompareType; 3179 switch (CondCode) { 3180 default: 3181 CompareType = ARMISD::CMP; 3182 break; 3183 case ARMCC::EQ: 3184 case ARMCC::NE: 3185 // Uses only Z Flag 3186 CompareType = ARMISD::CMPZ; 3187 break; 3188 } 3189 ARMcc = DAG.getConstant(CondCode, MVT::i32); 3190 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 3191 } 3192 3193 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 3194 SDValue 3195 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 3196 SDLoc dl) const { 3197 SDValue Cmp; 3198 if (!isFloatingPointZero(RHS)) 3199 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 3200 else 3201 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 3202 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 3203 } 3204 3205 /// duplicateCmp - Glue values can have only one use, so this function 3206 /// duplicates a comparison node. 3207 SDValue 3208 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 3209 unsigned Opc = Cmp.getOpcode(); 3210 SDLoc DL(Cmp); 3211 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 3212 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 3213 3214 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 3215 Cmp = Cmp.getOperand(0); 3216 Opc = Cmp.getOpcode(); 3217 if (Opc == ARMISD::CMPFP) 3218 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 3219 else { 3220 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 3221 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 3222 } 3223 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 3224 } 3225 3226 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 3227 SDValue Cond = Op.getOperand(0); 3228 SDValue SelectTrue = Op.getOperand(1); 3229 SDValue SelectFalse = Op.getOperand(2); 3230 SDLoc dl(Op); 3231 3232 // Convert: 3233 // 3234 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 3235 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 3236 // 3237 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 3238 const ConstantSDNode *CMOVTrue = 3239 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 3240 const ConstantSDNode *CMOVFalse = 3241 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 3242 3243 if (CMOVTrue && CMOVFalse) { 3244 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 3245 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 3246 3247 SDValue True; 3248 SDValue False; 3249 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 3250 True = SelectTrue; 3251 False = SelectFalse; 3252 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 3253 True = SelectFalse; 3254 False = SelectTrue; 3255 } 3256 3257 if (True.getNode() && False.getNode()) { 3258 EVT VT = Op.getValueType(); 3259 SDValue ARMcc = Cond.getOperand(2); 3260 SDValue CCR = Cond.getOperand(3); 3261 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 3262 assert(True.getValueType() == VT); 3263 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 3264 } 3265 } 3266 } 3267 3268 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the 3269 // undefined bits before doing a full-word comparison with zero. 3270 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, 3271 DAG.getConstant(1, Cond.getValueType())); 3272 3273 return DAG.getSelectCC(dl, Cond, 3274 DAG.getConstant(0, Cond.getValueType()), 3275 SelectTrue, SelectFalse, ISD::SETNE); 3276 } 3277 3278 static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) { 3279 if (CC == ISD::SETNE) 3280 return ISD::SETEQ; 3281 return ISD::getSetCCSwappedOperands(CC); 3282 } 3283 3284 static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 3285 bool &swpCmpOps, bool &swpVselOps) { 3286 // Start by selecting the GE condition code for opcodes that return true for 3287 // 'equality' 3288 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || 3289 CC == ISD::SETULE) 3290 CondCode = ARMCC::GE; 3291 3292 // and GT for opcodes that return false for 'equality'. 3293 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || 3294 CC == ISD::SETULT) 3295 CondCode = ARMCC::GT; 3296 3297 // Since we are constrained to GE/GT, if the opcode contains 'less', we need 3298 // to swap the compare operands. 3299 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || 3300 CC == ISD::SETULT) 3301 swpCmpOps = true; 3302 3303 // Both GT and GE are ordered comparisons, and return false for 'unordered'. 3304 // If we have an unordered opcode, we need to swap the operands to the VSEL 3305 // instruction (effectively negating the condition). 3306 // 3307 // This also has the effect of swapping which one of 'less' or 'greater' 3308 // returns true, so we also swap the compare operands. It also switches 3309 // whether we return true for 'equality', so we compensate by picking the 3310 // opposite condition code to our original choice. 3311 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || 3312 CC == ISD::SETUGT) { 3313 swpCmpOps = !swpCmpOps; 3314 swpVselOps = !swpVselOps; 3315 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; 3316 } 3317 3318 // 'ordered' is 'anything but unordered', so use the VS condition code and 3319 // swap the VSEL operands. 3320 if (CC == ISD::SETO) { 3321 CondCode = ARMCC::VS; 3322 swpVselOps = true; 3323 } 3324 3325 // 'unordered or not equal' is 'anything but equal', so use the EQ condition 3326 // code and swap the VSEL operands. 3327 if (CC == ISD::SETUNE) { 3328 CondCode = ARMCC::EQ; 3329 swpVselOps = true; 3330 } 3331 } 3332 3333 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 3334 EVT VT = Op.getValueType(); 3335 SDValue LHS = Op.getOperand(0); 3336 SDValue RHS = Op.getOperand(1); 3337 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 3338 SDValue TrueVal = Op.getOperand(2); 3339 SDValue FalseVal = Op.getOperand(3); 3340 SDLoc dl(Op); 3341 3342 if (LHS.getValueType() == MVT::i32) { 3343 // Try to generate VSEL on ARMv8. 3344 // The VSEL instruction can't use all the usual ARM condition 3345 // codes: it only has two bits to select the condition code, so it's 3346 // constrained to use only GE, GT, VS and EQ. 3347 // 3348 // To implement all the various ISD::SETXXX opcodes, we sometimes need to 3349 // swap the operands of the previous compare instruction (effectively 3350 // inverting the compare condition, swapping 'less' and 'greater') and 3351 // sometimes need to swap the operands to the VSEL (which inverts the 3352 // condition in the sense of firing whenever the previous condition didn't) 3353 if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || 3354 TrueVal.getValueType() == MVT::f64)) { 3355 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3356 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || 3357 CondCode == ARMCC::VC || CondCode == ARMCC::NE) { 3358 CC = getInverseCCForVSEL(CC); 3359 std::swap(TrueVal, FalseVal); 3360 } 3361 } 3362 3363 SDValue ARMcc; 3364 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3365 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3366 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, 3367 Cmp); 3368 } 3369 3370 ARMCC::CondCodes CondCode, CondCode2; 3371 FPCCToARMCC(CC, CondCode, CondCode2); 3372 3373 // Try to generate VSEL on ARMv8. 3374 if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || 3375 TrueVal.getValueType() == MVT::f64)) { 3376 // We can select VMAXNM/VMINNM from a compare followed by a select with the 3377 // same operands, as follows: 3378 // c = fcmp [ogt, olt, ugt, ult] a, b 3379 // select c, a, b 3380 // We only do this in unsafe-fp-math, because signed zeros and NaNs are 3381 // handled differently than the original code sequence. 3382 if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal && 3383 RHS == FalseVal) { 3384 if (CC == ISD::SETOGT || CC == ISD::SETUGT) 3385 return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); 3386 if (CC == ISD::SETOLT || CC == ISD::SETULT) 3387 return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); 3388 } 3389 3390 bool swpCmpOps = false; 3391 bool swpVselOps = false; 3392 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); 3393 3394 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || 3395 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { 3396 if (swpCmpOps) 3397 std::swap(LHS, RHS); 3398 if (swpVselOps) 3399 std::swap(TrueVal, FalseVal); 3400 } 3401 } 3402 3403 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 3404 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3405 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3406 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 3407 ARMcc, CCR, Cmp); 3408 if (CondCode2 != ARMCC::AL) { 3409 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 3410 // FIXME: Needs another CMP because flag can have but one use. 3411 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 3412 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 3413 Result, TrueVal, ARMcc2, CCR, Cmp2); 3414 } 3415 return Result; 3416 } 3417 3418 /// canChangeToInt - Given the fp compare operand, return true if it is suitable 3419 /// to morph to an integer compare sequence. 3420 static bool canChangeToInt(SDValue Op, bool &SeenZero, 3421 const ARMSubtarget *Subtarget) { 3422 SDNode *N = Op.getNode(); 3423 if (!N->hasOneUse()) 3424 // Otherwise it requires moving the value from fp to integer registers. 3425 return false; 3426 if (!N->getNumValues()) 3427 return false; 3428 EVT VT = Op.getValueType(); 3429 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 3430 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 3431 // vmrs are very slow, e.g. cortex-a8. 3432 return false; 3433 3434 if (isFloatingPointZero(Op)) { 3435 SeenZero = true; 3436 return true; 3437 } 3438 return ISD::isNormalLoad(N); 3439 } 3440 3441 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 3442 if (isFloatingPointZero(Op)) 3443 return DAG.getConstant(0, MVT::i32); 3444 3445 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 3446 return DAG.getLoad(MVT::i32, SDLoc(Op), 3447 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 3448 Ld->isVolatile(), Ld->isNonTemporal(), 3449 Ld->isInvariant(), Ld->getAlignment()); 3450 3451 llvm_unreachable("Unknown VFP cmp argument!"); 3452 } 3453 3454 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 3455 SDValue &RetVal1, SDValue &RetVal2) { 3456 if (isFloatingPointZero(Op)) { 3457 RetVal1 = DAG.getConstant(0, MVT::i32); 3458 RetVal2 = DAG.getConstant(0, MVT::i32); 3459 return; 3460 } 3461 3462 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 3463 SDValue Ptr = Ld->getBasePtr(); 3464 RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op), 3465 Ld->getChain(), Ptr, 3466 Ld->getPointerInfo(), 3467 Ld->isVolatile(), Ld->isNonTemporal(), 3468 Ld->isInvariant(), Ld->getAlignment()); 3469 3470 EVT PtrType = Ptr.getValueType(); 3471 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 3472 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op), 3473 PtrType, Ptr, DAG.getConstant(4, PtrType)); 3474 RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op), 3475 Ld->getChain(), NewPtr, 3476 Ld->getPointerInfo().getWithOffset(4), 3477 Ld->isVolatile(), Ld->isNonTemporal(), 3478 Ld->isInvariant(), NewAlign); 3479 return; 3480 } 3481 3482 llvm_unreachable("Unknown VFP cmp argument!"); 3483 } 3484 3485 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 3486 /// f32 and even f64 comparisons to integer ones. 3487 SDValue 3488 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 3489 SDValue Chain = Op.getOperand(0); 3490 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3491 SDValue LHS = Op.getOperand(2); 3492 SDValue RHS = Op.getOperand(3); 3493 SDValue Dest = Op.getOperand(4); 3494 SDLoc dl(Op); 3495 3496 bool LHSSeenZero = false; 3497 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); 3498 bool RHSSeenZero = false; 3499 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); 3500 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { 3501 // If unsafe fp math optimization is enabled and there are no other uses of 3502 // the CMP operands, and the condition code is EQ or NE, we can optimize it 3503 // to an integer comparison. 3504 if (CC == ISD::SETOEQ) 3505 CC = ISD::SETEQ; 3506 else if (CC == ISD::SETUNE) 3507 CC = ISD::SETNE; 3508 3509 SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); 3510 SDValue ARMcc; 3511 if (LHS.getValueType() == MVT::f32) { 3512 LHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3513 bitcastf32Toi32(LHS, DAG), Mask); 3514 RHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3515 bitcastf32Toi32(RHS, DAG), Mask); 3516 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3517 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3518 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3519 Chain, Dest, ARMcc, CCR, Cmp); 3520 } 3521 3522 SDValue LHS1, LHS2; 3523 SDValue RHS1, RHS2; 3524 expandf64Toi32(LHS, DAG, LHS1, LHS2); 3525 expandf64Toi32(RHS, DAG, RHS1, RHS2); 3526 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); 3527 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); 3528 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3529 ARMcc = DAG.getConstant(CondCode, MVT::i32); 3530 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3531 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 3532 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 3533 } 3534 3535 return SDValue(); 3536 } 3537 3538 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 3539 SDValue Chain = Op.getOperand(0); 3540 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3541 SDValue LHS = Op.getOperand(2); 3542 SDValue RHS = Op.getOperand(3); 3543 SDValue Dest = Op.getOperand(4); 3544 SDLoc dl(Op); 3545 3546 if (LHS.getValueType() == MVT::i32) { 3547 SDValue ARMcc; 3548 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3549 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3550 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3551 Chain, Dest, ARMcc, CCR, Cmp); 3552 } 3553 3554 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 3555 3556 if (getTargetMachine().Options.UnsafeFPMath && 3557 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 3558 CC == ISD::SETNE || CC == ISD::SETUNE)) { 3559 SDValue Result = OptimizeVFPBrcond(Op, DAG); 3560 if (Result.getNode()) 3561 return Result; 3562 } 3563 3564 ARMCC::CondCodes CondCode, CondCode2; 3565 FPCCToARMCC(CC, CondCode, CondCode2); 3566 3567 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 3568 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3569 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3570 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3571 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 3572 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3573 if (CondCode2 != ARMCC::AL) { 3574 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 3575 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 3576 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3577 } 3578 return Res; 3579 } 3580 3581 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 3582 SDValue Chain = Op.getOperand(0); 3583 SDValue Table = Op.getOperand(1); 3584 SDValue Index = Op.getOperand(2); 3585 SDLoc dl(Op); 3586 3587 EVT PTy = getPointerTy(); 3588 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 3589 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 3590 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 3591 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 3592 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 3593 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 3594 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 3595 if (Subtarget->isThumb2()) { 3596 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 3597 // which does another jump to the destination. This also makes it easier 3598 // to translate it to TBB / TBH later. 3599 // FIXME: This might not work if the function is extremely large. 3600 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 3601 Addr, Op.getOperand(2), JTI, UId); 3602 } 3603 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 3604 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 3605 MachinePointerInfo::getJumpTable(), 3606 false, false, false, 0); 3607 Chain = Addr.getValue(1); 3608 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 3609 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3610 } else { 3611 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 3612 MachinePointerInfo::getJumpTable(), 3613 false, false, false, 0); 3614 Chain = Addr.getValue(1); 3615 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3616 } 3617 } 3618 3619 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3620 EVT VT = Op.getValueType(); 3621 SDLoc dl(Op); 3622 3623 if (Op.getValueType().getVectorElementType() == MVT::i32) { 3624 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) 3625 return Op; 3626 return DAG.UnrollVectorOp(Op.getNode()); 3627 } 3628 3629 assert(Op.getOperand(0).getValueType() == MVT::v4f32 && 3630 "Invalid type for custom lowering!"); 3631 if (VT != MVT::v4i16) 3632 return DAG.UnrollVectorOp(Op.getNode()); 3633 3634 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); 3635 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); 3636 } 3637 3638 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3639 EVT VT = Op.getValueType(); 3640 if (VT.isVector()) 3641 return LowerVectorFP_TO_INT(Op, DAG); 3642 3643 SDLoc dl(Op); 3644 unsigned Opc; 3645 3646 switch (Op.getOpcode()) { 3647 default: llvm_unreachable("Invalid opcode!"); 3648 case ISD::FP_TO_SINT: 3649 Opc = ARMISD::FTOSI; 3650 break; 3651 case ISD::FP_TO_UINT: 3652 Opc = ARMISD::FTOUI; 3653 break; 3654 } 3655 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 3656 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 3657 } 3658 3659 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3660 EVT VT = Op.getValueType(); 3661 SDLoc dl(Op); 3662 3663 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { 3664 if (VT.getVectorElementType() == MVT::f32) 3665 return Op; 3666 return DAG.UnrollVectorOp(Op.getNode()); 3667 } 3668 3669 assert(Op.getOperand(0).getValueType() == MVT::v4i16 && 3670 "Invalid type for custom lowering!"); 3671 if (VT != MVT::v4f32) 3672 return DAG.UnrollVectorOp(Op.getNode()); 3673 3674 unsigned CastOpc; 3675 unsigned Opc; 3676 switch (Op.getOpcode()) { 3677 default: llvm_unreachable("Invalid opcode!"); 3678 case ISD::SINT_TO_FP: 3679 CastOpc = ISD::SIGN_EXTEND; 3680 Opc = ISD::SINT_TO_FP; 3681 break; 3682 case ISD::UINT_TO_FP: 3683 CastOpc = ISD::ZERO_EXTEND; 3684 Opc = ISD::UINT_TO_FP; 3685 break; 3686 } 3687 3688 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 3689 return DAG.getNode(Opc, dl, VT, Op); 3690 } 3691 3692 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3693 EVT VT = Op.getValueType(); 3694 if (VT.isVector()) 3695 return LowerVectorINT_TO_FP(Op, DAG); 3696 3697 SDLoc dl(Op); 3698 unsigned Opc; 3699 3700 switch (Op.getOpcode()) { 3701 default: llvm_unreachable("Invalid opcode!"); 3702 case ISD::SINT_TO_FP: 3703 Opc = ARMISD::SITOF; 3704 break; 3705 case ISD::UINT_TO_FP: 3706 Opc = ARMISD::UITOF; 3707 break; 3708 } 3709 3710 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 3711 return DAG.getNode(Opc, dl, VT, Op); 3712 } 3713 3714 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 3715 // Implement fcopysign with a fabs and a conditional fneg. 3716 SDValue Tmp0 = Op.getOperand(0); 3717 SDValue Tmp1 = Op.getOperand(1); 3718 SDLoc dl(Op); 3719 EVT VT = Op.getValueType(); 3720 EVT SrcVT = Tmp1.getValueType(); 3721 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 3722 Tmp0.getOpcode() == ARMISD::VMOVDRR; 3723 bool UseNEON = !InGPR && Subtarget->hasNEON(); 3724 3725 if (UseNEON) { 3726 // Use VBSL to copy the sign bit. 3727 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 3728 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 3729 DAG.getTargetConstant(EncodedVal, MVT::i32)); 3730 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 3731 if (VT == MVT::f64) 3732 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3733 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 3734 DAG.getConstant(32, MVT::i32)); 3735 else /*if (VT == MVT::f32)*/ 3736 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 3737 if (SrcVT == MVT::f32) { 3738 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 3739 if (VT == MVT::f64) 3740 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3741 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 3742 DAG.getConstant(32, MVT::i32)); 3743 } else if (VT == MVT::f32) 3744 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 3745 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 3746 DAG.getConstant(32, MVT::i32)); 3747 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 3748 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 3749 3750 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 3751 MVT::i32); 3752 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 3753 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 3754 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 3755 3756 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 3757 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 3758 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 3759 if (VT == MVT::f32) { 3760 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 3761 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 3762 DAG.getConstant(0, MVT::i32)); 3763 } else { 3764 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 3765 } 3766 3767 return Res; 3768 } 3769 3770 // Bitcast operand 1 to i32. 3771 if (SrcVT == MVT::f64) 3772 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3773 &Tmp1, 1).getValue(1); 3774 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 3775 3776 // Or in the signbit with integer operations. 3777 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 3778 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 3779 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 3780 if (VT == MVT::f32) { 3781 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 3782 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 3783 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 3784 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 3785 } 3786 3787 // f64: Or the high part with signbit and then combine two parts. 3788 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3789 &Tmp0, 1); 3790 SDValue Lo = Tmp0.getValue(0); 3791 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 3792 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 3793 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 3794 } 3795 3796 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 3797 MachineFunction &MF = DAG.getMachineFunction(); 3798 MachineFrameInfo *MFI = MF.getFrameInfo(); 3799 MFI->setReturnAddressIsTaken(true); 3800 3801 EVT VT = Op.getValueType(); 3802 SDLoc dl(Op); 3803 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3804 if (Depth) { 3805 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 3806 SDValue Offset = DAG.getConstant(4, MVT::i32); 3807 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 3808 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 3809 MachinePointerInfo(), false, false, false, 0); 3810 } 3811 3812 // Return LR, which contains the return address. Mark it an implicit live-in. 3813 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 3814 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 3815 } 3816 3817 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 3818 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3819 MFI->setFrameAddressIsTaken(true); 3820 3821 EVT VT = Op.getValueType(); 3822 SDLoc dl(Op); // FIXME probably not meaningful 3823 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3824 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 3825 ? ARM::R7 : ARM::R11; 3826 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 3827 while (Depth--) 3828 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 3829 MachinePointerInfo(), 3830 false, false, false, 0); 3831 return FrameAddr; 3832 } 3833 3834 /// ExpandBITCAST - If the target supports VFP, this function is called to 3835 /// expand a bit convert where either the source or destination type is i64 to 3836 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 3837 /// operand type is illegal (e.g., v2f32 for a target that doesn't support 3838 /// vectors), since the legalizer won't know what to do with that. 3839 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 3840 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3841 SDLoc dl(N); 3842 SDValue Op = N->getOperand(0); 3843 3844 // This function is only supposed to be called for i64 types, either as the 3845 // source or destination of the bit convert. 3846 EVT SrcVT = Op.getValueType(); 3847 EVT DstVT = N->getValueType(0); 3848 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 3849 "ExpandBITCAST called for non-i64 type"); 3850 3851 // Turn i64->f64 into VMOVDRR. 3852 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 3853 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3854 DAG.getConstant(0, MVT::i32)); 3855 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3856 DAG.getConstant(1, MVT::i32)); 3857 return DAG.getNode(ISD::BITCAST, dl, DstVT, 3858 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 3859 } 3860 3861 // Turn f64->i64 into VMOVRRD. 3862 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 3863 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 3864 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 3865 // Merge the pieces into a single i64 value. 3866 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 3867 } 3868 3869 return SDValue(); 3870 } 3871 3872 /// getZeroVector - Returns a vector of specified type with all zero elements. 3873 /// Zero vectors are used to represent vector negation and in those cases 3874 /// will be implemented with the NEON VNEG instruction. However, VNEG does 3875 /// not support i64 elements, so sometimes the zero vectors will need to be 3876 /// explicitly constructed. Regardless, use a canonical VMOV to create the 3877 /// zero vector. 3878 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) { 3879 assert(VT.isVector() && "Expected a vector type"); 3880 // The canonical modified immediate encoding of a zero vector is....0! 3881 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 3882 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 3883 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 3884 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3885 } 3886 3887 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two 3888 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 3889 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 3890 SelectionDAG &DAG) const { 3891 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3892 EVT VT = Op.getValueType(); 3893 unsigned VTBits = VT.getSizeInBits(); 3894 SDLoc dl(Op); 3895 SDValue ShOpLo = Op.getOperand(0); 3896 SDValue ShOpHi = Op.getOperand(1); 3897 SDValue ShAmt = Op.getOperand(2); 3898 SDValue ARMcc; 3899 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 3900 3901 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 3902 3903 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3904 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3905 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 3906 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3907 DAG.getConstant(VTBits, MVT::i32)); 3908 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 3909 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3910 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 3911 3912 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3913 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3914 ARMcc, DAG, dl); 3915 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 3916 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 3917 CCR, Cmp); 3918 3919 SDValue Ops[2] = { Lo, Hi }; 3920 return DAG.getMergeValues(Ops, 2, dl); 3921 } 3922 3923 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 3924 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 3925 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 3926 SelectionDAG &DAG) const { 3927 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3928 EVT VT = Op.getValueType(); 3929 unsigned VTBits = VT.getSizeInBits(); 3930 SDLoc dl(Op); 3931 SDValue ShOpLo = Op.getOperand(0); 3932 SDValue ShOpHi = Op.getOperand(1); 3933 SDValue ShAmt = Op.getOperand(2); 3934 SDValue ARMcc; 3935 3936 assert(Op.getOpcode() == ISD::SHL_PARTS); 3937 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3938 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3939 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 3940 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3941 DAG.getConstant(VTBits, MVT::i32)); 3942 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 3943 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 3944 3945 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3946 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3947 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3948 ARMcc, DAG, dl); 3949 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 3950 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 3951 CCR, Cmp); 3952 3953 SDValue Ops[2] = { Lo, Hi }; 3954 return DAG.getMergeValues(Ops, 2, dl); 3955 } 3956 3957 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 3958 SelectionDAG &DAG) const { 3959 // The rounding mode is in bits 23:22 of the FPSCR. 3960 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 3961 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 3962 // so that the shift + and get folded into a bitfield extract. 3963 SDLoc dl(Op); 3964 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 3965 DAG.getConstant(Intrinsic::arm_get_fpscr, 3966 MVT::i32)); 3967 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 3968 DAG.getConstant(1U << 22, MVT::i32)); 3969 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 3970 DAG.getConstant(22, MVT::i32)); 3971 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 3972 DAG.getConstant(3, MVT::i32)); 3973 } 3974 3975 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 3976 const ARMSubtarget *ST) { 3977 EVT VT = N->getValueType(0); 3978 SDLoc dl(N); 3979 3980 if (!ST->hasV6T2Ops()) 3981 return SDValue(); 3982 3983 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 3984 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 3985 } 3986 3987 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count 3988 /// for each 16-bit element from operand, repeated. The basic idea is to 3989 /// leverage vcnt to get the 8-bit counts, gather and add the results. 3990 /// 3991 /// Trace for v4i16: 3992 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 3993 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element) 3994 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi) 3995 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] 3996 /// [b0 b1 b2 b3 b4 b5 b6 b7] 3997 /// +[b1 b0 b3 b2 b5 b4 b7 b6] 3998 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0, 3999 /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) 4000 static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { 4001 EVT VT = N->getValueType(0); 4002 SDLoc DL(N); 4003 4004 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; 4005 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); 4006 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0); 4007 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1); 4008 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2); 4009 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3); 4010 } 4011 4012 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the 4013 /// bit-count for each 16-bit element from the operand. We need slightly 4014 /// different sequencing for v4i16 and v8i16 to stay within NEON's available 4015 /// 64/128-bit registers. 4016 /// 4017 /// Trace for v4i16: 4018 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 4019 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi) 4020 /// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ] 4021 /// v4i16:Extracted = [k0 k1 k2 k3 ] 4022 static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { 4023 EVT VT = N->getValueType(0); 4024 SDLoc DL(N); 4025 4026 SDValue BitCounts = getCTPOP16BitCounts(N, DAG); 4027 if (VT.is64BitVector()) { 4028 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts); 4029 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended, 4030 DAG.getIntPtrConstant(0)); 4031 } else { 4032 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, 4033 BitCounts, DAG.getIntPtrConstant(0)); 4034 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted); 4035 } 4036 } 4037 4038 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the 4039 /// bit-count for each 32-bit element from the operand. The idea here is 4040 /// to split the vector into 16-bit elements, leverage the 16-bit count 4041 /// routine, and then combine the results. 4042 /// 4043 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged): 4044 /// input = [v0 v1 ] (vi: 32-bit elements) 4045 /// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1]) 4046 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi) 4047 /// vrev: N0 = [k1 k0 k3 k2 ] 4048 /// [k0 k1 k2 k3 ] 4049 /// N1 =+[k1 k0 k3 k2 ] 4050 /// [k0 k2 k1 k3 ] 4051 /// N2 =+[k1 k3 k0 k2 ] 4052 /// [k0 k2 k1 k3 ] 4053 /// Extended =+[k1 k3 k0 k2 ] 4054 /// [k0 k2 ] 4055 /// Extracted=+[k1 k3 ] 4056 /// 4057 static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { 4058 EVT VT = N->getValueType(0); 4059 SDLoc DL(N); 4060 4061 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; 4062 4063 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0)); 4064 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG); 4065 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16); 4066 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0); 4067 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1); 4068 4069 if (VT.is64BitVector()) { 4070 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2); 4071 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended, 4072 DAG.getIntPtrConstant(0)); 4073 } else { 4074 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2, 4075 DAG.getIntPtrConstant(0)); 4076 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted); 4077 } 4078 } 4079 4080 static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, 4081 const ARMSubtarget *ST) { 4082 EVT VT = N->getValueType(0); 4083 4084 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON."); 4085 assert((VT == MVT::v2i32 || VT == MVT::v4i32 || 4086 VT == MVT::v4i16 || VT == MVT::v8i16) && 4087 "Unexpected type for custom ctpop lowering"); 4088 4089 if (VT.getVectorElementType() == MVT::i32) 4090 return lowerCTPOP32BitElements(N, DAG); 4091 else 4092 return lowerCTPOP16BitElements(N, DAG); 4093 } 4094 4095 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 4096 const ARMSubtarget *ST) { 4097 EVT VT = N->getValueType(0); 4098 SDLoc dl(N); 4099 4100 if (!VT.isVector()) 4101 return SDValue(); 4102 4103 // Lower vector shifts on NEON to use VSHL. 4104 assert(ST->hasNEON() && "unexpected vector shift"); 4105 4106 // Left shifts translate directly to the vshiftu intrinsic. 4107 if (N->getOpcode() == ISD::SHL) 4108 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 4109 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 4110 N->getOperand(0), N->getOperand(1)); 4111 4112 assert((N->getOpcode() == ISD::SRA || 4113 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 4114 4115 // NEON uses the same intrinsics for both left and right shifts. For 4116 // right shifts, the shift amounts are negative, so negate the vector of 4117 // shift amounts. 4118 EVT ShiftVT = N->getOperand(1).getValueType(); 4119 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 4120 getZeroVector(ShiftVT, DAG, dl), 4121 N->getOperand(1)); 4122 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 4123 Intrinsic::arm_neon_vshifts : 4124 Intrinsic::arm_neon_vshiftu); 4125 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 4126 DAG.getConstant(vshiftInt, MVT::i32), 4127 N->getOperand(0), NegatedCount); 4128 } 4129 4130 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 4131 const ARMSubtarget *ST) { 4132 EVT VT = N->getValueType(0); 4133 SDLoc dl(N); 4134 4135 // We can get here for a node like i32 = ISD::SHL i32, i64 4136 if (VT != MVT::i64) 4137 return SDValue(); 4138 4139 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 4140 "Unknown shift to lower!"); 4141 4142 // We only lower SRA, SRL of 1 here, all others use generic lowering. 4143 if (!isa<ConstantSDNode>(N->getOperand(1)) || 4144 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 4145 return SDValue(); 4146 4147 // If we are in thumb mode, we don't have RRX. 4148 if (ST->isThumb1Only()) return SDValue(); 4149 4150 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 4151 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 4152 DAG.getConstant(0, MVT::i32)); 4153 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 4154 DAG.getConstant(1, MVT::i32)); 4155 4156 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 4157 // captures the result into a carry flag. 4158 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 4159 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); 4160 4161 // The low part is an ARMISD::RRX operand, which shifts the carry in. 4162 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 4163 4164 // Merge the pieces into a single i64 value. 4165 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 4166 } 4167 4168 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 4169 SDValue TmpOp0, TmpOp1; 4170 bool Invert = false; 4171 bool Swap = false; 4172 unsigned Opc = 0; 4173 4174 SDValue Op0 = Op.getOperand(0); 4175 SDValue Op1 = Op.getOperand(1); 4176 SDValue CC = Op.getOperand(2); 4177 EVT VT = Op.getValueType(); 4178 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4179 SDLoc dl(Op); 4180 4181 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 4182 switch (SetCCOpcode) { 4183 default: llvm_unreachable("Illegal FP comparison"); 4184 case ISD::SETUNE: 4185 case ISD::SETNE: Invert = true; // Fallthrough 4186 case ISD::SETOEQ: 4187 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 4188 case ISD::SETOLT: 4189 case ISD::SETLT: Swap = true; // Fallthrough 4190 case ISD::SETOGT: 4191 case ISD::SETGT: Opc = ARMISD::VCGT; break; 4192 case ISD::SETOLE: 4193 case ISD::SETLE: Swap = true; // Fallthrough 4194 case ISD::SETOGE: 4195 case ISD::SETGE: Opc = ARMISD::VCGE; break; 4196 case ISD::SETUGE: Swap = true; // Fallthrough 4197 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 4198 case ISD::SETUGT: Swap = true; // Fallthrough 4199 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 4200 case ISD::SETUEQ: Invert = true; // Fallthrough 4201 case ISD::SETONE: 4202 // Expand this to (OLT | OGT). 4203 TmpOp0 = Op0; 4204 TmpOp1 = Op1; 4205 Opc = ISD::OR; 4206 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 4207 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 4208 break; 4209 case ISD::SETUO: Invert = true; // Fallthrough 4210 case ISD::SETO: 4211 // Expand this to (OLT | OGE). 4212 TmpOp0 = Op0; 4213 TmpOp1 = Op1; 4214 Opc = ISD::OR; 4215 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 4216 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 4217 break; 4218 } 4219 } else { 4220 // Integer comparisons. 4221 switch (SetCCOpcode) { 4222 default: llvm_unreachable("Illegal integer comparison"); 4223 case ISD::SETNE: Invert = true; 4224 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 4225 case ISD::SETLT: Swap = true; 4226 case ISD::SETGT: Opc = ARMISD::VCGT; break; 4227 case ISD::SETLE: Swap = true; 4228 case ISD::SETGE: Opc = ARMISD::VCGE; break; 4229 case ISD::SETULT: Swap = true; 4230 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 4231 case ISD::SETULE: Swap = true; 4232 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 4233 } 4234 4235 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 4236 if (Opc == ARMISD::VCEQ) { 4237 4238 SDValue AndOp; 4239 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 4240 AndOp = Op0; 4241 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 4242 AndOp = Op1; 4243 4244 // Ignore bitconvert. 4245 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 4246 AndOp = AndOp.getOperand(0); 4247 4248 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 4249 Opc = ARMISD::VTST; 4250 Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); 4251 Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); 4252 Invert = !Invert; 4253 } 4254 } 4255 } 4256 4257 if (Swap) 4258 std::swap(Op0, Op1); 4259 4260 // If one of the operands is a constant vector zero, attempt to fold the 4261 // comparison to a specialized compare-against-zero form. 4262 SDValue SingleOp; 4263 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 4264 SingleOp = Op0; 4265 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 4266 if (Opc == ARMISD::VCGE) 4267 Opc = ARMISD::VCLEZ; 4268 else if (Opc == ARMISD::VCGT) 4269 Opc = ARMISD::VCLTZ; 4270 SingleOp = Op1; 4271 } 4272 4273 SDValue Result; 4274 if (SingleOp.getNode()) { 4275 switch (Opc) { 4276 case ARMISD::VCEQ: 4277 Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; 4278 case ARMISD::VCGE: 4279 Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; 4280 case ARMISD::VCLEZ: 4281 Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; 4282 case ARMISD::VCGT: 4283 Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; 4284 case ARMISD::VCLTZ: 4285 Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; 4286 default: 4287 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 4288 } 4289 } else { 4290 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 4291 } 4292 4293 if (Invert) 4294 Result = DAG.getNOT(dl, Result, VT); 4295 4296 return Result; 4297 } 4298 4299 /// isNEONModifiedImm - Check if the specified splat value corresponds to a 4300 /// valid vector constant for a NEON instruction with a "modified immediate" 4301 /// operand (e.g., VMOV). If so, return the encoded value. 4302 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 4303 unsigned SplatBitSize, SelectionDAG &DAG, 4304 EVT &VT, bool is128Bits, NEONModImmType type) { 4305 unsigned OpCmode, Imm; 4306 4307 // SplatBitSize is set to the smallest size that splats the vector, so a 4308 // zero vector will always have SplatBitSize == 8. However, NEON modified 4309 // immediate instructions others than VMOV do not support the 8-bit encoding 4310 // of a zero vector, and the default encoding of zero is supposed to be the 4311 // 32-bit version. 4312 if (SplatBits == 0) 4313 SplatBitSize = 32; 4314 4315 switch (SplatBitSize) { 4316 case 8: 4317 if (type != VMOVModImm) 4318 return SDValue(); 4319 // Any 1-byte value is OK. Op=0, Cmode=1110. 4320 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 4321 OpCmode = 0xe; 4322 Imm = SplatBits; 4323 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 4324 break; 4325 4326 case 16: 4327 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 4328 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 4329 if ((SplatBits & ~0xff) == 0) { 4330 // Value = 0x00nn: Op=x, Cmode=100x. 4331 OpCmode = 0x8; 4332 Imm = SplatBits; 4333 break; 4334 } 4335 if ((SplatBits & ~0xff00) == 0) { 4336 // Value = 0xnn00: Op=x, Cmode=101x. 4337 OpCmode = 0xa; 4338 Imm = SplatBits >> 8; 4339 break; 4340 } 4341 return SDValue(); 4342 4343 case 32: 4344 // NEON's 32-bit VMOV supports splat values where: 4345 // * only one byte is nonzero, or 4346 // * the least significant byte is 0xff and the second byte is nonzero, or 4347 // * the least significant 2 bytes are 0xff and the third is nonzero. 4348 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 4349 if ((SplatBits & ~0xff) == 0) { 4350 // Value = 0x000000nn: Op=x, Cmode=000x. 4351 OpCmode = 0; 4352 Imm = SplatBits; 4353 break; 4354 } 4355 if ((SplatBits & ~0xff00) == 0) { 4356 // Value = 0x0000nn00: Op=x, Cmode=001x. 4357 OpCmode = 0x2; 4358 Imm = SplatBits >> 8; 4359 break; 4360 } 4361 if ((SplatBits & ~0xff0000) == 0) { 4362 // Value = 0x00nn0000: Op=x, Cmode=010x. 4363 OpCmode = 0x4; 4364 Imm = SplatBits >> 16; 4365 break; 4366 } 4367 if ((SplatBits & ~0xff000000) == 0) { 4368 // Value = 0xnn000000: Op=x, Cmode=011x. 4369 OpCmode = 0x6; 4370 Imm = SplatBits >> 24; 4371 break; 4372 } 4373 4374 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC 4375 if (type == OtherModImm) return SDValue(); 4376 4377 if ((SplatBits & ~0xffff) == 0 && 4378 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 4379 // Value = 0x0000nnff: Op=x, Cmode=1100. 4380 OpCmode = 0xc; 4381 Imm = SplatBits >> 8; 4382 SplatBits |= 0xff; 4383 break; 4384 } 4385 4386 if ((SplatBits & ~0xffffff) == 0 && 4387 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 4388 // Value = 0x00nnffff: Op=x, Cmode=1101. 4389 OpCmode = 0xd; 4390 Imm = SplatBits >> 16; 4391 SplatBits |= 0xffff; 4392 break; 4393 } 4394 4395 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 4396 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 4397 // VMOV.I32. A (very) minor optimization would be to replicate the value 4398 // and fall through here to test for a valid 64-bit splat. But, then the 4399 // caller would also need to check and handle the change in size. 4400 return SDValue(); 4401 4402 case 64: { 4403 if (type != VMOVModImm) 4404 return SDValue(); 4405 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 4406 uint64_t BitMask = 0xff; 4407 uint64_t Val = 0; 4408 unsigned ImmMask = 1; 4409 Imm = 0; 4410 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 4411 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 4412 Val |= BitMask; 4413 Imm |= ImmMask; 4414 } else if ((SplatBits & BitMask) != 0) { 4415 return SDValue(); 4416 } 4417 BitMask <<= 8; 4418 ImmMask <<= 1; 4419 } 4420 // Op=1, Cmode=1110. 4421 OpCmode = 0x1e; 4422 SplatBits = Val; 4423 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 4424 break; 4425 } 4426 4427 default: 4428 llvm_unreachable("unexpected size for isNEONModifiedImm"); 4429 } 4430 4431 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 4432 return DAG.getTargetConstant(EncodedVal, MVT::i32); 4433 } 4434 4435 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, 4436 const ARMSubtarget *ST) const { 4437 if (!ST->hasVFP3()) 4438 return SDValue(); 4439 4440 bool IsDouble = Op.getValueType() == MVT::f64; 4441 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); 4442 4443 // Try splatting with a VMOV.f32... 4444 APFloat FPVal = CFP->getValueAPF(); 4445 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); 4446 4447 if (ImmVal != -1) { 4448 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) { 4449 // We have code in place to select a valid ConstantFP already, no need to 4450 // do any mangling. 4451 return Op; 4452 } 4453 4454 // It's a float and we are trying to use NEON operations where 4455 // possible. Lower it to a splat followed by an extract. 4456 SDLoc DL(Op); 4457 SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); 4458 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, 4459 NewVal); 4460 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, 4461 DAG.getConstant(0, MVT::i32)); 4462 } 4463 4464 // The rest of our options are NEON only, make sure that's allowed before 4465 // proceeding.. 4466 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP())) 4467 return SDValue(); 4468 4469 EVT VMovVT; 4470 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue(); 4471 4472 // It wouldn't really be worth bothering for doubles except for one very 4473 // important value, which does happen to match: 0.0. So make sure we don't do 4474 // anything stupid. 4475 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32)) 4476 return SDValue(); 4477 4478 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). 4479 SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT, 4480 false, VMOVModImm); 4481 if (NewVal != SDValue()) { 4482 SDLoc DL(Op); 4483 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, 4484 NewVal); 4485 if (IsDouble) 4486 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); 4487 4488 // It's a float: cast and extract a vector element. 4489 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 4490 VecConstant); 4491 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 4492 DAG.getConstant(0, MVT::i32)); 4493 } 4494 4495 // Finally, try a VMVN.i32 4496 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT, 4497 false, VMVNModImm); 4498 if (NewVal != SDValue()) { 4499 SDLoc DL(Op); 4500 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); 4501 4502 if (IsDouble) 4503 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); 4504 4505 // It's a float: cast and extract a vector element. 4506 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 4507 VecConstant); 4508 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 4509 DAG.getConstant(0, MVT::i32)); 4510 } 4511 4512 return SDValue(); 4513 } 4514 4515 // check if an VEXT instruction can handle the shuffle mask when the 4516 // vector sources of the shuffle are the same. 4517 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { 4518 unsigned NumElts = VT.getVectorNumElements(); 4519 4520 // Assume that the first shuffle index is not UNDEF. Fail if it is. 4521 if (M[0] < 0) 4522 return false; 4523 4524 Imm = M[0]; 4525 4526 // If this is a VEXT shuffle, the immediate value is the index of the first 4527 // element. The other shuffle indices must be the successive elements after 4528 // the first one. 4529 unsigned ExpectedElt = Imm; 4530 for (unsigned i = 1; i < NumElts; ++i) { 4531 // Increment the expected index. If it wraps around, just follow it 4532 // back to index zero and keep going. 4533 ++ExpectedElt; 4534 if (ExpectedElt == NumElts) 4535 ExpectedElt = 0; 4536 4537 if (M[i] < 0) continue; // ignore UNDEF indices 4538 if (ExpectedElt != static_cast<unsigned>(M[i])) 4539 return false; 4540 } 4541 4542 return true; 4543 } 4544 4545 4546 static bool isVEXTMask(ArrayRef<int> M, EVT VT, 4547 bool &ReverseVEXT, unsigned &Imm) { 4548 unsigned NumElts = VT.getVectorNumElements(); 4549 ReverseVEXT = false; 4550 4551 // Assume that the first shuffle index is not UNDEF. Fail if it is. 4552 if (M[0] < 0) 4553 return false; 4554 4555 Imm = M[0]; 4556 4557 // If this is a VEXT shuffle, the immediate value is the index of the first 4558 // element. The other shuffle indices must be the successive elements after 4559 // the first one. 4560 unsigned ExpectedElt = Imm; 4561 for (unsigned i = 1; i < NumElts; ++i) { 4562 // Increment the expected index. If it wraps around, it may still be 4563 // a VEXT but the source vectors must be swapped. 4564 ExpectedElt += 1; 4565 if (ExpectedElt == NumElts * 2) { 4566 ExpectedElt = 0; 4567 ReverseVEXT = true; 4568 } 4569 4570 if (M[i] < 0) continue; // ignore UNDEF indices 4571 if (ExpectedElt != static_cast<unsigned>(M[i])) 4572 return false; 4573 } 4574 4575 // Adjust the index value if the source operands will be swapped. 4576 if (ReverseVEXT) 4577 Imm -= NumElts; 4578 4579 return true; 4580 } 4581 4582 /// isVREVMask - Check if a vector shuffle corresponds to a VREV 4583 /// instruction with the specified blocksize. (The order of the elements 4584 /// within each block of the vector is reversed.) 4585 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { 4586 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 4587 "Only possible block sizes for VREV are: 16, 32, 64"); 4588 4589 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4590 if (EltSz == 64) 4591 return false; 4592 4593 unsigned NumElts = VT.getVectorNumElements(); 4594 unsigned BlockElts = M[0] + 1; 4595 // If the first shuffle index is UNDEF, be optimistic. 4596 if (M[0] < 0) 4597 BlockElts = BlockSize / EltSz; 4598 4599 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 4600 return false; 4601 4602 for (unsigned i = 0; i < NumElts; ++i) { 4603 if (M[i] < 0) continue; // ignore UNDEF indices 4604 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 4605 return false; 4606 } 4607 4608 return true; 4609 } 4610 4611 static bool isVTBLMask(ArrayRef<int> M, EVT VT) { 4612 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of 4613 // range, then 0 is placed into the resulting vector. So pretty much any mask 4614 // of 8 elements can work here. 4615 return VT == MVT::v8i8 && M.size() == 8; 4616 } 4617 4618 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 4619 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4620 if (EltSz == 64) 4621 return false; 4622 4623 unsigned NumElts = VT.getVectorNumElements(); 4624 WhichResult = (M[0] == 0 ? 0 : 1); 4625 for (unsigned i = 0; i < NumElts; i += 2) { 4626 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 4627 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 4628 return false; 4629 } 4630 return true; 4631 } 4632 4633 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 4634 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4635 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 4636 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4637 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4638 if (EltSz == 64) 4639 return false; 4640 4641 unsigned NumElts = VT.getVectorNumElements(); 4642 WhichResult = (M[0] == 0 ? 0 : 1); 4643 for (unsigned i = 0; i < NumElts; i += 2) { 4644 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 4645 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 4646 return false; 4647 } 4648 return true; 4649 } 4650 4651 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 4652 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4653 if (EltSz == 64) 4654 return false; 4655 4656 unsigned NumElts = VT.getVectorNumElements(); 4657 WhichResult = (M[0] == 0 ? 0 : 1); 4658 for (unsigned i = 0; i != NumElts; ++i) { 4659 if (M[i] < 0) continue; // ignore UNDEF indices 4660 if ((unsigned) M[i] != 2 * i + WhichResult) 4661 return false; 4662 } 4663 4664 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4665 if (VT.is64BitVector() && EltSz == 32) 4666 return false; 4667 4668 return true; 4669 } 4670 4671 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 4672 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4673 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 4674 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4675 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4676 if (EltSz == 64) 4677 return false; 4678 4679 unsigned Half = VT.getVectorNumElements() / 2; 4680 WhichResult = (M[0] == 0 ? 0 : 1); 4681 for (unsigned j = 0; j != 2; ++j) { 4682 unsigned Idx = WhichResult; 4683 for (unsigned i = 0; i != Half; ++i) { 4684 int MIdx = M[i + j * Half]; 4685 if (MIdx >= 0 && (unsigned) MIdx != Idx) 4686 return false; 4687 Idx += 2; 4688 } 4689 } 4690 4691 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4692 if (VT.is64BitVector() && EltSz == 32) 4693 return false; 4694 4695 return true; 4696 } 4697 4698 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 4699 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4700 if (EltSz == 64) 4701 return false; 4702 4703 unsigned NumElts = VT.getVectorNumElements(); 4704 WhichResult = (M[0] == 0 ? 0 : 1); 4705 unsigned Idx = WhichResult * NumElts / 2; 4706 for (unsigned i = 0; i != NumElts; i += 2) { 4707 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 4708 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 4709 return false; 4710 Idx += 1; 4711 } 4712 4713 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4714 if (VT.is64BitVector() && EltSz == 32) 4715 return false; 4716 4717 return true; 4718 } 4719 4720 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 4721 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4722 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 4723 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4724 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4725 if (EltSz == 64) 4726 return false; 4727 4728 unsigned NumElts = VT.getVectorNumElements(); 4729 WhichResult = (M[0] == 0 ? 0 : 1); 4730 unsigned Idx = WhichResult * NumElts / 2; 4731 for (unsigned i = 0; i != NumElts; i += 2) { 4732 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 4733 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 4734 return false; 4735 Idx += 1; 4736 } 4737 4738 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4739 if (VT.is64BitVector() && EltSz == 32) 4740 return false; 4741 4742 return true; 4743 } 4744 4745 /// \return true if this is a reverse operation on an vector. 4746 static bool isReverseMask(ArrayRef<int> M, EVT VT) { 4747 unsigned NumElts = VT.getVectorNumElements(); 4748 // Make sure the mask has the right size. 4749 if (NumElts != M.size()) 4750 return false; 4751 4752 // Look for <15, ..., 3, -1, 1, 0>. 4753 for (unsigned i = 0; i != NumElts; ++i) 4754 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) 4755 return false; 4756 4757 return true; 4758 } 4759 4760 // If N is an integer constant that can be moved into a register in one 4761 // instruction, return an SDValue of such a constant (will become a MOV 4762 // instruction). Otherwise return null. 4763 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 4764 const ARMSubtarget *ST, SDLoc dl) { 4765 uint64_t Val; 4766 if (!isa<ConstantSDNode>(N)) 4767 return SDValue(); 4768 Val = cast<ConstantSDNode>(N)->getZExtValue(); 4769 4770 if (ST->isThumb1Only()) { 4771 if (Val <= 255 || ~Val <= 255) 4772 return DAG.getConstant(Val, MVT::i32); 4773 } else { 4774 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 4775 return DAG.getConstant(Val, MVT::i32); 4776 } 4777 return SDValue(); 4778 } 4779 4780 // If this is a case we can't handle, return null and let the default 4781 // expansion code take care of it. 4782 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 4783 const ARMSubtarget *ST) const { 4784 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 4785 SDLoc dl(Op); 4786 EVT VT = Op.getValueType(); 4787 4788 APInt SplatBits, SplatUndef; 4789 unsigned SplatBitSize; 4790 bool HasAnyUndefs; 4791 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 4792 if (SplatBitSize <= 64) { 4793 // Check if an immediate VMOV works. 4794 EVT VmovVT; 4795 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 4796 SplatUndef.getZExtValue(), SplatBitSize, 4797 DAG, VmovVT, VT.is128BitVector(), 4798 VMOVModImm); 4799 if (Val.getNode()) { 4800 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 4801 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4802 } 4803 4804 // Try an immediate VMVN. 4805 uint64_t NegatedImm = (~SplatBits).getZExtValue(); 4806 Val = isNEONModifiedImm(NegatedImm, 4807 SplatUndef.getZExtValue(), SplatBitSize, 4808 DAG, VmovVT, VT.is128BitVector(), 4809 VMVNModImm); 4810 if (Val.getNode()) { 4811 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 4812 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4813 } 4814 4815 // Use vmov.f32 to materialize other v2f32 and v4f32 splats. 4816 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { 4817 int ImmVal = ARM_AM::getFP32Imm(SplatBits); 4818 if (ImmVal != -1) { 4819 SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); 4820 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); 4821 } 4822 } 4823 } 4824 } 4825 4826 // Scan through the operands to see if only one value is used. 4827 // 4828 // As an optimisation, even if more than one value is used it may be more 4829 // profitable to splat with one value then change some lanes. 4830 // 4831 // Heuristically we decide to do this if the vector has a "dominant" value, 4832 // defined as splatted to more than half of the lanes. 4833 unsigned NumElts = VT.getVectorNumElements(); 4834 bool isOnlyLowElement = true; 4835 bool usesOnlyOneValue = true; 4836 bool hasDominantValue = false; 4837 bool isConstant = true; 4838 4839 // Map of the number of times a particular SDValue appears in the 4840 // element list. 4841 DenseMap<SDValue, unsigned> ValueCounts; 4842 SDValue Value; 4843 for (unsigned i = 0; i < NumElts; ++i) { 4844 SDValue V = Op.getOperand(i); 4845 if (V.getOpcode() == ISD::UNDEF) 4846 continue; 4847 if (i > 0) 4848 isOnlyLowElement = false; 4849 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 4850 isConstant = false; 4851 4852 ValueCounts.insert(std::make_pair(V, 0)); 4853 unsigned &Count = ValueCounts[V]; 4854 4855 // Is this value dominant? (takes up more than half of the lanes) 4856 if (++Count > (NumElts / 2)) { 4857 hasDominantValue = true; 4858 Value = V; 4859 } 4860 } 4861 if (ValueCounts.size() != 1) 4862 usesOnlyOneValue = false; 4863 if (!Value.getNode() && ValueCounts.size() > 0) 4864 Value = ValueCounts.begin()->first; 4865 4866 if (ValueCounts.size() == 0) 4867 return DAG.getUNDEF(VT); 4868 4869 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR. 4870 // Keep going if we are hitting this case. 4871 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) 4872 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 4873 4874 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4875 4876 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 4877 // i32 and try again. 4878 if (hasDominantValue && EltSize <= 32) { 4879 if (!isConstant) { 4880 SDValue N; 4881 4882 // If we are VDUPing a value that comes directly from a vector, that will 4883 // cause an unnecessary move to and from a GPR, where instead we could 4884 // just use VDUPLANE. We can only do this if the lane being extracted 4885 // is at a constant index, as the VDUP from lane instructions only have 4886 // constant-index forms. 4887 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 4888 isa<ConstantSDNode>(Value->getOperand(1))) { 4889 // We need to create a new undef vector to use for the VDUPLANE if the 4890 // size of the vector from which we get the value is different than the 4891 // size of the vector that we need to create. We will insert the element 4892 // such that the register coalescer will remove unnecessary copies. 4893 if (VT != Value->getOperand(0).getValueType()) { 4894 ConstantSDNode *constIndex; 4895 constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)); 4896 assert(constIndex && "The index is not a constant!"); 4897 unsigned index = constIndex->getAPIntValue().getLimitedValue() % 4898 VT.getVectorNumElements(); 4899 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4900 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), 4901 Value, DAG.getConstant(index, MVT::i32)), 4902 DAG.getConstant(index, MVT::i32)); 4903 } else 4904 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4905 Value->getOperand(0), Value->getOperand(1)); 4906 } else 4907 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); 4908 4909 if (!usesOnlyOneValue) { 4910 // The dominant value was splatted as 'N', but we now have to insert 4911 // all differing elements. 4912 for (unsigned I = 0; I < NumElts; ++I) { 4913 if (Op.getOperand(I) == Value) 4914 continue; 4915 SmallVector<SDValue, 3> Ops; 4916 Ops.push_back(N); 4917 Ops.push_back(Op.getOperand(I)); 4918 Ops.push_back(DAG.getConstant(I, MVT::i32)); 4919 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); 4920 } 4921 } 4922 return N; 4923 } 4924 if (VT.getVectorElementType().isFloatingPoint()) { 4925 SmallVector<SDValue, 8> Ops; 4926 for (unsigned i = 0; i < NumElts; ++i) 4927 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, 4928 Op.getOperand(i))); 4929 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 4930 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); 4931 Val = LowerBUILD_VECTOR(Val, DAG, ST); 4932 if (Val.getNode()) 4933 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4934 } 4935 if (usesOnlyOneValue) { 4936 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 4937 if (isConstant && Val.getNode()) 4938 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 4939 } 4940 } 4941 4942 // If all elements are constants and the case above didn't get hit, fall back 4943 // to the default expansion, which will generate a load from the constant 4944 // pool. 4945 if (isConstant) 4946 return SDValue(); 4947 4948 // Empirical tests suggest this is rarely worth it for vectors of length <= 2. 4949 if (NumElts >= 4) { 4950 SDValue shuffle = ReconstructShuffle(Op, DAG); 4951 if (shuffle != SDValue()) 4952 return shuffle; 4953 } 4954 4955 // Vectors with 32- or 64-bit elements can be built by directly assigning 4956 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 4957 // will be legalized. 4958 if (EltSize >= 32) { 4959 // Do the expansion with floating-point types, since that is what the VFP 4960 // registers are defined to use, and since i64 is not legal. 4961 EVT EltVT = EVT::getFloatingPointVT(EltSize); 4962 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 4963 SmallVector<SDValue, 8> Ops; 4964 for (unsigned i = 0; i < NumElts; ++i) 4965 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); 4966 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 4967 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4968 } 4969 4970 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we 4971 // know the default expansion would otherwise fall back on something even 4972 // worse. For a vector with one or two non-undef values, that's 4973 // scalar_to_vector for the elements followed by a shuffle (provided the 4974 // shuffle is valid for the target) and materialization element by element 4975 // on the stack followed by a load for everything else. 4976 if (!isConstant && !usesOnlyOneValue) { 4977 SDValue Vec = DAG.getUNDEF(VT); 4978 for (unsigned i = 0 ; i < NumElts; ++i) { 4979 SDValue V = Op.getOperand(i); 4980 if (V.getOpcode() == ISD::UNDEF) 4981 continue; 4982 SDValue LaneIdx = DAG.getConstant(i, MVT::i32); 4983 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); 4984 } 4985 return Vec; 4986 } 4987 4988 return SDValue(); 4989 } 4990 4991 // Gather data to see if the operation can be modelled as a 4992 // shuffle in combination with VEXTs. 4993 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, 4994 SelectionDAG &DAG) const { 4995 SDLoc dl(Op); 4996 EVT VT = Op.getValueType(); 4997 unsigned NumElts = VT.getVectorNumElements(); 4998 4999 SmallVector<SDValue, 2> SourceVecs; 5000 SmallVector<unsigned, 2> MinElts; 5001 SmallVector<unsigned, 2> MaxElts; 5002 5003 for (unsigned i = 0; i < NumElts; ++i) { 5004 SDValue V = Op.getOperand(i); 5005 if (V.getOpcode() == ISD::UNDEF) 5006 continue; 5007 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { 5008 // A shuffle can only come from building a vector from various 5009 // elements of other vectors. 5010 return SDValue(); 5011 } else if (V.getOperand(0).getValueType().getVectorElementType() != 5012 VT.getVectorElementType()) { 5013 // This code doesn't know how to handle shuffles where the vector 5014 // element types do not match (this happens because type legalization 5015 // promotes the return type of EXTRACT_VECTOR_ELT). 5016 // FIXME: It might be appropriate to extend this code to handle 5017 // mismatched types. 5018 return SDValue(); 5019 } 5020 5021 // Record this extraction against the appropriate vector if possible... 5022 SDValue SourceVec = V.getOperand(0); 5023 // If the element number isn't a constant, we can't effectively 5024 // analyze what's going on. 5025 if (!isa<ConstantSDNode>(V.getOperand(1))) 5026 return SDValue(); 5027 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); 5028 bool FoundSource = false; 5029 for (unsigned j = 0; j < SourceVecs.size(); ++j) { 5030 if (SourceVecs[j] == SourceVec) { 5031 if (MinElts[j] > EltNo) 5032 MinElts[j] = EltNo; 5033 if (MaxElts[j] < EltNo) 5034 MaxElts[j] = EltNo; 5035 FoundSource = true; 5036 break; 5037 } 5038 } 5039 5040 // Or record a new source if not... 5041 if (!FoundSource) { 5042 SourceVecs.push_back(SourceVec); 5043 MinElts.push_back(EltNo); 5044 MaxElts.push_back(EltNo); 5045 } 5046 } 5047 5048 // Currently only do something sane when at most two source vectors 5049 // involved. 5050 if (SourceVecs.size() > 2) 5051 return SDValue(); 5052 5053 SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; 5054 int VEXTOffsets[2] = {0, 0}; 5055 5056 // This loop extracts the usage patterns of the source vectors 5057 // and prepares appropriate SDValues for a shuffle if possible. 5058 for (unsigned i = 0; i < SourceVecs.size(); ++i) { 5059 if (SourceVecs[i].getValueType() == VT) { 5060 // No VEXT necessary 5061 ShuffleSrcs[i] = SourceVecs[i]; 5062 VEXTOffsets[i] = 0; 5063 continue; 5064 } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { 5065 // It probably isn't worth padding out a smaller vector just to 5066 // break it down again in a shuffle. 5067 return SDValue(); 5068 } 5069 5070 // Since only 64-bit and 128-bit vectors are legal on ARM and 5071 // we've eliminated the other cases... 5072 assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && 5073 "unexpected vector sizes in ReconstructShuffle"); 5074 5075 if (MaxElts[i] - MinElts[i] >= NumElts) { 5076 // Span too large for a VEXT to cope 5077 return SDValue(); 5078 } 5079 5080 if (MinElts[i] >= NumElts) { 5081 // The extraction can just take the second half 5082 VEXTOffsets[i] = NumElts; 5083 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 5084 SourceVecs[i], 5085 DAG.getIntPtrConstant(NumElts)); 5086 } else if (MaxElts[i] < NumElts) { 5087 // The extraction can just take the first half 5088 VEXTOffsets[i] = 0; 5089 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 5090 SourceVecs[i], 5091 DAG.getIntPtrConstant(0)); 5092 } else { 5093 // An actual VEXT is needed 5094 VEXTOffsets[i] = MinElts[i]; 5095 SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 5096 SourceVecs[i], 5097 DAG.getIntPtrConstant(0)); 5098 SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 5099 SourceVecs[i], 5100 DAG.getIntPtrConstant(NumElts)); 5101 ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, 5102 DAG.getConstant(VEXTOffsets[i], MVT::i32)); 5103 } 5104 } 5105 5106 SmallVector<int, 8> Mask; 5107 5108 for (unsigned i = 0; i < NumElts; ++i) { 5109 SDValue Entry = Op.getOperand(i); 5110 if (Entry.getOpcode() == ISD::UNDEF) { 5111 Mask.push_back(-1); 5112 continue; 5113 } 5114 5115 SDValue ExtractVec = Entry.getOperand(0); 5116 int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) 5117 .getOperand(1))->getSExtValue(); 5118 if (ExtractVec == SourceVecs[0]) { 5119 Mask.push_back(ExtractElt - VEXTOffsets[0]); 5120 } else { 5121 Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); 5122 } 5123 } 5124 5125 // Final check before we try to produce nonsense... 5126 if (isShuffleMaskLegal(Mask, VT)) 5127 return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], 5128 &Mask[0]); 5129 5130 return SDValue(); 5131 } 5132 5133 /// isShuffleMaskLegal - Targets can use this to indicate that they only 5134 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 5135 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 5136 /// are assumed to be legal. 5137 bool 5138 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 5139 EVT VT) const { 5140 if (VT.getVectorNumElements() == 4 && 5141 (VT.is128BitVector() || VT.is64BitVector())) { 5142 unsigned PFIndexes[4]; 5143 for (unsigned i = 0; i != 4; ++i) { 5144 if (M[i] < 0) 5145 PFIndexes[i] = 8; 5146 else 5147 PFIndexes[i] = M[i]; 5148 } 5149 5150 // Compute the index in the perfect shuffle table. 5151 unsigned PFTableIndex = 5152 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 5153 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 5154 unsigned Cost = (PFEntry >> 30); 5155 5156 if (Cost <= 4) 5157 return true; 5158 } 5159 5160 bool ReverseVEXT; 5161 unsigned Imm, WhichResult; 5162 5163 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 5164 return (EltSize >= 32 || 5165 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 5166 isVREVMask(M, VT, 64) || 5167 isVREVMask(M, VT, 32) || 5168 isVREVMask(M, VT, 16) || 5169 isVEXTMask(M, VT, ReverseVEXT, Imm) || 5170 isVTBLMask(M, VT) || 5171 isVTRNMask(M, VT, WhichResult) || 5172 isVUZPMask(M, VT, WhichResult) || 5173 isVZIPMask(M, VT, WhichResult) || 5174 isVTRN_v_undef_Mask(M, VT, WhichResult) || 5175 isVUZP_v_undef_Mask(M, VT, WhichResult) || 5176 isVZIP_v_undef_Mask(M, VT, WhichResult) || 5177 ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); 5178 } 5179 5180 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 5181 /// the specified operations to build the shuffle. 5182 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 5183 SDValue RHS, SelectionDAG &DAG, 5184 SDLoc dl) { 5185 unsigned OpNum = (PFEntry >> 26) & 0x0F; 5186 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 5187 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 5188 5189 enum { 5190 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 5191 OP_VREV, 5192 OP_VDUP0, 5193 OP_VDUP1, 5194 OP_VDUP2, 5195 OP_VDUP3, 5196 OP_VEXT1, 5197 OP_VEXT2, 5198 OP_VEXT3, 5199 OP_VUZPL, // VUZP, left result 5200 OP_VUZPR, // VUZP, right result 5201 OP_VZIPL, // VZIP, left result 5202 OP_VZIPR, // VZIP, right result 5203 OP_VTRNL, // VTRN, left result 5204 OP_VTRNR // VTRN, right result 5205 }; 5206 5207 if (OpNum == OP_COPY) { 5208 if (LHSID == (1*9+2)*9+3) return LHS; 5209 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 5210 return RHS; 5211 } 5212 5213 SDValue OpLHS, OpRHS; 5214 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 5215 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 5216 EVT VT = OpLHS.getValueType(); 5217 5218 switch (OpNum) { 5219 default: llvm_unreachable("Unknown shuffle opcode!"); 5220 case OP_VREV: 5221 // VREV divides the vector in half and swaps within the half. 5222 if (VT.getVectorElementType() == MVT::i32 || 5223 VT.getVectorElementType() == MVT::f32) 5224 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 5225 // vrev <4 x i16> -> VREV32 5226 if (VT.getVectorElementType() == MVT::i16) 5227 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); 5228 // vrev <4 x i8> -> VREV16 5229 assert(VT.getVectorElementType() == MVT::i8); 5230 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); 5231 case OP_VDUP0: 5232 case OP_VDUP1: 5233 case OP_VDUP2: 5234 case OP_VDUP3: 5235 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 5236 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 5237 case OP_VEXT1: 5238 case OP_VEXT2: 5239 case OP_VEXT3: 5240 return DAG.getNode(ARMISD::VEXT, dl, VT, 5241 OpLHS, OpRHS, 5242 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 5243 case OP_VUZPL: 5244 case OP_VUZPR: 5245 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 5246 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 5247 case OP_VZIPL: 5248 case OP_VZIPR: 5249 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 5250 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 5251 case OP_VTRNL: 5252 case OP_VTRNR: 5253 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 5254 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 5255 } 5256 } 5257 5258 static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, 5259 ArrayRef<int> ShuffleMask, 5260 SelectionDAG &DAG) { 5261 // Check to see if we can use the VTBL instruction. 5262 SDValue V1 = Op.getOperand(0); 5263 SDValue V2 = Op.getOperand(1); 5264 SDLoc DL(Op); 5265 5266 SmallVector<SDValue, 8> VTBLMask; 5267 for (ArrayRef<int>::iterator 5268 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) 5269 VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); 5270 5271 if (V2.getNode()->getOpcode() == ISD::UNDEF) 5272 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, 5273 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 5274 &VTBLMask[0], 8)); 5275 5276 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, 5277 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 5278 &VTBLMask[0], 8)); 5279 } 5280 5281 static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, 5282 SelectionDAG &DAG) { 5283 SDLoc DL(Op); 5284 SDValue OpLHS = Op.getOperand(0); 5285 EVT VT = OpLHS.getValueType(); 5286 5287 assert((VT == MVT::v8i16 || VT == MVT::v16i8) && 5288 "Expect an v8i16/v16i8 type"); 5289 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); 5290 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, 5291 // extract the first 8 bytes into the top double word and the last 8 bytes 5292 // into the bottom double word. The v8i16 case is similar. 5293 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; 5294 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, 5295 DAG.getConstant(ExtractNum, MVT::i32)); 5296 } 5297 5298 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 5299 SDValue V1 = Op.getOperand(0); 5300 SDValue V2 = Op.getOperand(1); 5301 SDLoc dl(Op); 5302 EVT VT = Op.getValueType(); 5303 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 5304 5305 // Convert shuffles that are directly supported on NEON to target-specific 5306 // DAG nodes, instead of keeping them as shuffles and matching them again 5307 // during code selection. This is more efficient and avoids the possibility 5308 // of inconsistencies between legalization and selection. 5309 // FIXME: floating-point vectors should be canonicalized to integer vectors 5310 // of the same time so that they get CSEd properly. 5311 ArrayRef<int> ShuffleMask = SVN->getMask(); 5312 5313 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 5314 if (EltSize <= 32) { 5315 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 5316 int Lane = SVN->getSplatIndex(); 5317 // If this is undef splat, generate it via "just" vdup, if possible. 5318 if (Lane == -1) Lane = 0; 5319 5320 // Test if V1 is a SCALAR_TO_VECTOR. 5321 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5322 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 5323 } 5324 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR 5325 // (and probably will turn into a SCALAR_TO_VECTOR once legalization 5326 // reaches it). 5327 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && 5328 !isa<ConstantSDNode>(V1.getOperand(0))) { 5329 bool IsScalarToVector = true; 5330 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) 5331 if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { 5332 IsScalarToVector = false; 5333 break; 5334 } 5335 if (IsScalarToVector) 5336 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 5337 } 5338 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 5339 DAG.getConstant(Lane, MVT::i32)); 5340 } 5341 5342 bool ReverseVEXT; 5343 unsigned Imm; 5344 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 5345 if (ReverseVEXT) 5346 std::swap(V1, V2); 5347 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 5348 DAG.getConstant(Imm, MVT::i32)); 5349 } 5350 5351 if (isVREVMask(ShuffleMask, VT, 64)) 5352 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 5353 if (isVREVMask(ShuffleMask, VT, 32)) 5354 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 5355 if (isVREVMask(ShuffleMask, VT, 16)) 5356 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 5357 5358 if (V2->getOpcode() == ISD::UNDEF && 5359 isSingletonVEXTMask(ShuffleMask, VT, Imm)) { 5360 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, 5361 DAG.getConstant(Imm, MVT::i32)); 5362 } 5363 5364 // Check for Neon shuffles that modify both input vectors in place. 5365 // If both results are used, i.e., if there are two shuffles with the same 5366 // source operands and with masks corresponding to both results of one of 5367 // these operations, DAG memoization will ensure that a single node is 5368 // used for both shuffles. 5369 unsigned WhichResult; 5370 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 5371 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 5372 V1, V2).getValue(WhichResult); 5373 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 5374 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 5375 V1, V2).getValue(WhichResult); 5376 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 5377 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 5378 V1, V2).getValue(WhichResult); 5379 5380 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 5381 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 5382 V1, V1).getValue(WhichResult); 5383 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 5384 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 5385 V1, V1).getValue(WhichResult); 5386 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 5387 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 5388 V1, V1).getValue(WhichResult); 5389 } 5390 5391 // If the shuffle is not directly supported and it has 4 elements, use 5392 // the PerfectShuffle-generated table to synthesize it from other shuffles. 5393 unsigned NumElts = VT.getVectorNumElements(); 5394 if (NumElts == 4) { 5395 unsigned PFIndexes[4]; 5396 for (unsigned i = 0; i != 4; ++i) { 5397 if (ShuffleMask[i] < 0) 5398 PFIndexes[i] = 8; 5399 else 5400 PFIndexes[i] = ShuffleMask[i]; 5401 } 5402 5403 // Compute the index in the perfect shuffle table. 5404 unsigned PFTableIndex = 5405 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 5406 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 5407 unsigned Cost = (PFEntry >> 30); 5408 5409 if (Cost <= 4) 5410 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 5411 } 5412 5413 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 5414 if (EltSize >= 32) { 5415 // Do the expansion with floating-point types, since that is what the VFP 5416 // registers are defined to use, and since i64 is not legal. 5417 EVT EltVT = EVT::getFloatingPointVT(EltSize); 5418 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 5419 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); 5420 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); 5421 SmallVector<SDValue, 8> Ops; 5422 for (unsigned i = 0; i < NumElts; ++i) { 5423 if (ShuffleMask[i] < 0) 5424 Ops.push_back(DAG.getUNDEF(EltVT)); 5425 else 5426 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 5427 ShuffleMask[i] < (int)NumElts ? V1 : V2, 5428 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 5429 MVT::i32))); 5430 } 5431 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 5432 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 5433 } 5434 5435 if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) 5436 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); 5437 5438 if (VT == MVT::v8i8) { 5439 SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); 5440 if (NewOp.getNode()) 5441 return NewOp; 5442 } 5443 5444 return SDValue(); 5445 } 5446 5447 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 5448 // INSERT_VECTOR_ELT is legal only for immediate indexes. 5449 SDValue Lane = Op.getOperand(2); 5450 if (!isa<ConstantSDNode>(Lane)) 5451 return SDValue(); 5452 5453 return Op; 5454 } 5455 5456 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 5457 // EXTRACT_VECTOR_ELT is legal only for immediate indexes. 5458 SDValue Lane = Op.getOperand(1); 5459 if (!isa<ConstantSDNode>(Lane)) 5460 return SDValue(); 5461 5462 SDValue Vec = Op.getOperand(0); 5463 if (Op.getValueType() == MVT::i32 && 5464 Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { 5465 SDLoc dl(Op); 5466 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 5467 } 5468 5469 return Op; 5470 } 5471 5472 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 5473 // The only time a CONCAT_VECTORS operation can have legal types is when 5474 // two 64-bit vectors are concatenated to a 128-bit vector. 5475 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 5476 "unexpected CONCAT_VECTORS"); 5477 SDLoc dl(Op); 5478 SDValue Val = DAG.getUNDEF(MVT::v2f64); 5479 SDValue Op0 = Op.getOperand(0); 5480 SDValue Op1 = Op.getOperand(1); 5481 if (Op0.getOpcode() != ISD::UNDEF) 5482 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 5483 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), 5484 DAG.getIntPtrConstant(0)); 5485 if (Op1.getOpcode() != ISD::UNDEF) 5486 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 5487 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), 5488 DAG.getIntPtrConstant(1)); 5489 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); 5490 } 5491 5492 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each 5493 /// element has been zero/sign-extended, depending on the isSigned parameter, 5494 /// from an integer type half its size. 5495 static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, 5496 bool isSigned) { 5497 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. 5498 EVT VT = N->getValueType(0); 5499 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { 5500 SDNode *BVN = N->getOperand(0).getNode(); 5501 if (BVN->getValueType(0) != MVT::v4i32 || 5502 BVN->getOpcode() != ISD::BUILD_VECTOR) 5503 return false; 5504 unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 5505 unsigned HiElt = 1 - LoElt; 5506 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); 5507 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); 5508 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); 5509 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); 5510 if (!Lo0 || !Hi0 || !Lo1 || !Hi1) 5511 return false; 5512 if (isSigned) { 5513 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && 5514 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) 5515 return true; 5516 } else { 5517 if (Hi0->isNullValue() && Hi1->isNullValue()) 5518 return true; 5519 } 5520 return false; 5521 } 5522 5523 if (N->getOpcode() != ISD::BUILD_VECTOR) 5524 return false; 5525 5526 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 5527 SDNode *Elt = N->getOperand(i).getNode(); 5528 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 5529 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 5530 unsigned HalfSize = EltSize / 2; 5531 if (isSigned) { 5532 if (!isIntN(HalfSize, C->getSExtValue())) 5533 return false; 5534 } else { 5535 if (!isUIntN(HalfSize, C->getZExtValue())) 5536 return false; 5537 } 5538 continue; 5539 } 5540 return false; 5541 } 5542 5543 return true; 5544 } 5545 5546 /// isSignExtended - Check if a node is a vector value that is sign-extended 5547 /// or a constant BUILD_VECTOR with sign-extended elements. 5548 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { 5549 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) 5550 return true; 5551 if (isExtendedBUILD_VECTOR(N, DAG, true)) 5552 return true; 5553 return false; 5554 } 5555 5556 /// isZeroExtended - Check if a node is a vector value that is zero-extended 5557 /// or a constant BUILD_VECTOR with zero-extended elements. 5558 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { 5559 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) 5560 return true; 5561 if (isExtendedBUILD_VECTOR(N, DAG, false)) 5562 return true; 5563 return false; 5564 } 5565 5566 static EVT getExtensionTo64Bits(const EVT &OrigVT) { 5567 if (OrigVT.getSizeInBits() >= 64) 5568 return OrigVT; 5569 5570 assert(OrigVT.isSimple() && "Expecting a simple value type"); 5571 5572 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; 5573 switch (OrigSimpleTy) { 5574 default: llvm_unreachable("Unexpected Vector Type"); 5575 case MVT::v2i8: 5576 case MVT::v2i16: 5577 return MVT::v2i32; 5578 case MVT::v4i8: 5579 return MVT::v4i16; 5580 } 5581 } 5582 5583 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total 5584 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. 5585 /// We insert the required extension here to get the vector to fill a D register. 5586 static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, 5587 const EVT &OrigTy, 5588 const EVT &ExtTy, 5589 unsigned ExtOpcode) { 5590 // The vector originally had a size of OrigTy. It was then extended to ExtTy. 5591 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than 5592 // 64-bits we need to insert a new extension so that it will be 64-bits. 5593 assert(ExtTy.is128BitVector() && "Unexpected extension size"); 5594 if (OrigTy.getSizeInBits() >= 64) 5595 return N; 5596 5597 // Must extend size to at least 64 bits to be used as an operand for VMULL. 5598 EVT NewVT = getExtensionTo64Bits(OrigTy); 5599 5600 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); 5601 } 5602 5603 /// SkipLoadExtensionForVMULL - return a load of the original vector size that 5604 /// does not do any sign/zero extension. If the original vector is less 5605 /// than 64 bits, an appropriate extension will be added after the load to 5606 /// reach a total size of 64 bits. We have to add the extension separately 5607 /// because ARM does not have a sign/zero extending load for vectors. 5608 static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { 5609 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); 5610 5611 // The load already has the right type. 5612 if (ExtendedTy == LD->getMemoryVT()) 5613 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), 5614 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 5615 LD->isNonTemporal(), LD->isInvariant(), 5616 LD->getAlignment()); 5617 5618 // We need to create a zextload/sextload. We cannot just create a load 5619 // followed by a zext/zext node because LowerMUL is also run during normal 5620 // operation legalization where we can't create illegal types. 5621 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, 5622 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), 5623 LD->getMemoryVT(), LD->isVolatile(), 5624 LD->isNonTemporal(), LD->getAlignment()); 5625 } 5626 5627 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, 5628 /// extending load, or BUILD_VECTOR with extended elements, return the 5629 /// unextended value. The unextended vector should be 64 bits so that it can 5630 /// be used as an operand to a VMULL instruction. If the original vector size 5631 /// before extension is less than 64 bits we add a an extension to resize 5632 /// the vector to 64 bits. 5633 static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { 5634 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 5635 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG, 5636 N->getOperand(0)->getValueType(0), 5637 N->getValueType(0), 5638 N->getOpcode()); 5639 5640 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 5641 return SkipLoadExtensionForVMULL(LD, DAG); 5642 5643 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will 5644 // have been legalized as a BITCAST from v4i32. 5645 if (N->getOpcode() == ISD::BITCAST) { 5646 SDNode *BVN = N->getOperand(0).getNode(); 5647 assert(BVN->getOpcode() == ISD::BUILD_VECTOR && 5648 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); 5649 unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 5650 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32, 5651 BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); 5652 } 5653 // Construct a new BUILD_VECTOR with elements truncated to half the size. 5654 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); 5655 EVT VT = N->getValueType(0); 5656 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; 5657 unsigned NumElts = VT.getVectorNumElements(); 5658 MVT TruncVT = MVT::getIntegerVT(EltSize); 5659 SmallVector<SDValue, 8> Ops; 5660 for (unsigned i = 0; i != NumElts; ++i) { 5661 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); 5662 const APInt &CInt = C->getAPIntValue(); 5663 // Element types smaller than 32 bits are not legal, so use i32 elements. 5664 // The values are implicitly truncated so sext vs. zext doesn't matter. 5665 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); 5666 } 5667 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), 5668 MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); 5669 } 5670 5671 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { 5672 unsigned Opcode = N->getOpcode(); 5673 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 5674 SDNode *N0 = N->getOperand(0).getNode(); 5675 SDNode *N1 = N->getOperand(1).getNode(); 5676 return N0->hasOneUse() && N1->hasOneUse() && 5677 isSignExtended(N0, DAG) && isSignExtended(N1, DAG); 5678 } 5679 return false; 5680 } 5681 5682 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { 5683 unsigned Opcode = N->getOpcode(); 5684 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 5685 SDNode *N0 = N->getOperand(0).getNode(); 5686 SDNode *N1 = N->getOperand(1).getNode(); 5687 return N0->hasOneUse() && N1->hasOneUse() && 5688 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); 5689 } 5690 return false; 5691 } 5692 5693 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 5694 // Multiplications are only custom-lowered for 128-bit vectors so that 5695 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 5696 EVT VT = Op.getValueType(); 5697 assert(VT.is128BitVector() && VT.isInteger() && 5698 "unexpected type for custom-lowering ISD::MUL"); 5699 SDNode *N0 = Op.getOperand(0).getNode(); 5700 SDNode *N1 = Op.getOperand(1).getNode(); 5701 unsigned NewOpc = 0; 5702 bool isMLA = false; 5703 bool isN0SExt = isSignExtended(N0, DAG); 5704 bool isN1SExt = isSignExtended(N1, DAG); 5705 if (isN0SExt && isN1SExt) 5706 NewOpc = ARMISD::VMULLs; 5707 else { 5708 bool isN0ZExt = isZeroExtended(N0, DAG); 5709 bool isN1ZExt = isZeroExtended(N1, DAG); 5710 if (isN0ZExt && isN1ZExt) 5711 NewOpc = ARMISD::VMULLu; 5712 else if (isN1SExt || isN1ZExt) { 5713 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these 5714 // into (s/zext A * s/zext C) + (s/zext B * s/zext C) 5715 if (isN1SExt && isAddSubSExt(N0, DAG)) { 5716 NewOpc = ARMISD::VMULLs; 5717 isMLA = true; 5718 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { 5719 NewOpc = ARMISD::VMULLu; 5720 isMLA = true; 5721 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { 5722 std::swap(N0, N1); 5723 NewOpc = ARMISD::VMULLu; 5724 isMLA = true; 5725 } 5726 } 5727 5728 if (!NewOpc) { 5729 if (VT == MVT::v2i64) 5730 // Fall through to expand this. It is not legal. 5731 return SDValue(); 5732 else 5733 // Other vector multiplications are legal. 5734 return Op; 5735 } 5736 } 5737 5738 // Legalize to a VMULL instruction. 5739 SDLoc DL(Op); 5740 SDValue Op0; 5741 SDValue Op1 = SkipExtensionForVMULL(N1, DAG); 5742 if (!isMLA) { 5743 Op0 = SkipExtensionForVMULL(N0, DAG); 5744 assert(Op0.getValueType().is64BitVector() && 5745 Op1.getValueType().is64BitVector() && 5746 "unexpected types for extended operands to VMULL"); 5747 return DAG.getNode(NewOpc, DL, VT, Op0, Op1); 5748 } 5749 5750 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during 5751 // isel lowering to take advantage of no-stall back to back vmul + vmla. 5752 // vmull q0, d4, d6 5753 // vmlal q0, d5, d6 5754 // is faster than 5755 // vaddl q0, d4, d5 5756 // vmovl q1, d6 5757 // vmul q0, q0, q1 5758 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG); 5759 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG); 5760 EVT Op1VT = Op1.getValueType(); 5761 return DAG.getNode(N0->getOpcode(), DL, VT, 5762 DAG.getNode(NewOpc, DL, VT, 5763 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), 5764 DAG.getNode(NewOpc, DL, VT, 5765 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); 5766 } 5767 5768 static SDValue 5769 LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { 5770 // Convert to float 5771 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); 5772 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); 5773 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); 5774 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); 5775 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); 5776 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); 5777 // Get reciprocal estimate. 5778 // float4 recip = vrecpeq_f32(yf); 5779 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5780 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); 5781 // Because char has a smaller range than uchar, we can actually get away 5782 // without any newton steps. This requires that we use a weird bias 5783 // of 0xb000, however (again, this has been exhaustively tested). 5784 // float4 result = as_float4(as_int4(xf*recip) + 0xb000); 5785 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); 5786 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); 5787 Y = DAG.getConstant(0xb000, MVT::i32); 5788 Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); 5789 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); 5790 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); 5791 // Convert back to short. 5792 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); 5793 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); 5794 return X; 5795 } 5796 5797 static SDValue 5798 LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { 5799 SDValue N2; 5800 // Convert to float. 5801 // float4 yf = vcvt_f32_s32(vmovl_s16(y)); 5802 // float4 xf = vcvt_f32_s32(vmovl_s16(x)); 5803 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); 5804 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); 5805 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 5806 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 5807 5808 // Use reciprocal estimate and one refinement step. 5809 // float4 recip = vrecpeq_f32(yf); 5810 // recip *= vrecpsq_f32(yf, recip); 5811 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5812 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); 5813 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5814 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5815 N1, N2); 5816 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5817 // Because short has a smaller range than ushort, we can actually get away 5818 // with only a single newton step. This requires that we use a weird bias 5819 // of 89, however (again, this has been exhaustively tested). 5820 // float4 result = as_float4(as_int4(xf*recip) + 0x89); 5821 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 5822 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 5823 N1 = DAG.getConstant(0x89, MVT::i32); 5824 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 5825 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 5826 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 5827 // Convert back to integer and return. 5828 // return vmovn_s32(vcvt_s32_f32(result)); 5829 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 5830 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 5831 return N0; 5832 } 5833 5834 static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { 5835 EVT VT = Op.getValueType(); 5836 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 5837 "unexpected type for custom-lowering ISD::SDIV"); 5838 5839 SDLoc dl(Op); 5840 SDValue N0 = Op.getOperand(0); 5841 SDValue N1 = Op.getOperand(1); 5842 SDValue N2, N3; 5843 5844 if (VT == MVT::v8i8) { 5845 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); 5846 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); 5847 5848 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5849 DAG.getIntPtrConstant(4)); 5850 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5851 DAG.getIntPtrConstant(4)); 5852 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5853 DAG.getIntPtrConstant(0)); 5854 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5855 DAG.getIntPtrConstant(0)); 5856 5857 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 5858 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 5859 5860 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 5861 N0 = LowerCONCAT_VECTORS(N0, DAG); 5862 5863 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); 5864 return N0; 5865 } 5866 return LowerSDIV_v4i16(N0, N1, dl, DAG); 5867 } 5868 5869 static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { 5870 EVT VT = Op.getValueType(); 5871 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 5872 "unexpected type for custom-lowering ISD::UDIV"); 5873 5874 SDLoc dl(Op); 5875 SDValue N0 = Op.getOperand(0); 5876 SDValue N1 = Op.getOperand(1); 5877 SDValue N2, N3; 5878 5879 if (VT == MVT::v8i8) { 5880 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); 5881 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); 5882 5883 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5884 DAG.getIntPtrConstant(4)); 5885 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5886 DAG.getIntPtrConstant(4)); 5887 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5888 DAG.getIntPtrConstant(0)); 5889 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5890 DAG.getIntPtrConstant(0)); 5891 5892 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 5893 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 5894 5895 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 5896 N0 = LowerCONCAT_VECTORS(N0, DAG); 5897 5898 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 5899 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), 5900 N0); 5901 return N0; 5902 } 5903 5904 // v4i16 sdiv ... Convert to float. 5905 // float4 yf = vcvt_f32_s32(vmovl_u16(y)); 5906 // float4 xf = vcvt_f32_s32(vmovl_u16(x)); 5907 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); 5908 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); 5909 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 5910 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 5911 5912 // Use reciprocal estimate and two refinement steps. 5913 // float4 recip = vrecpeq_f32(yf); 5914 // recip *= vrecpsq_f32(yf, recip); 5915 // recip *= vrecpsq_f32(yf, recip); 5916 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5917 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1); 5918 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5919 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5920 BN1, N2); 5921 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5922 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5923 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5924 BN1, N2); 5925 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5926 // Simply multiplying by the reciprocal estimate can leave us a few ulps 5927 // too low, so we add 2 ulps (exhaustive testing shows that this is enough, 5928 // and that it will never cause us to return an answer too large). 5929 // float4 result = as_float4(as_int4(xf*recip) + 2); 5930 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 5931 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 5932 N1 = DAG.getConstant(2, MVT::i32); 5933 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 5934 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 5935 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 5936 // Convert back to integer and return. 5937 // return vmovn_u32(vcvt_s32_f32(result)); 5938 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 5939 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 5940 return N0; 5941 } 5942 5943 static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { 5944 EVT VT = Op.getNode()->getValueType(0); 5945 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 5946 5947 unsigned Opc; 5948 bool ExtraOp = false; 5949 switch (Op.getOpcode()) { 5950 default: llvm_unreachable("Invalid code"); 5951 case ISD::ADDC: Opc = ARMISD::ADDC; break; 5952 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; 5953 case ISD::SUBC: Opc = ARMISD::SUBC; break; 5954 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; 5955 } 5956 5957 if (!ExtraOp) 5958 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), 5959 Op.getOperand(1)); 5960 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), 5961 Op.getOperand(1), Op.getOperand(2)); 5962 } 5963 5964 SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { 5965 assert(Subtarget->isTargetDarwin()); 5966 5967 // For iOS, we want to call an alternative entry point: __sincos_stret, 5968 // return values are passed via sret. 5969 SDLoc dl(Op); 5970 SDValue Arg = Op.getOperand(0); 5971 EVT ArgVT = Arg.getValueType(); 5972 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); 5973 5974 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 5975 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5976 5977 // Pair of floats / doubles used to pass the result. 5978 StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); 5979 5980 // Create stack object for sret. 5981 const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy); 5982 const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy); 5983 int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); 5984 SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy()); 5985 5986 ArgListTy Args; 5987 ArgListEntry Entry; 5988 5989 Entry.Node = SRet; 5990 Entry.Ty = RetTy->getPointerTo(); 5991 Entry.isSExt = false; 5992 Entry.isZExt = false; 5993 Entry.isSRet = true; 5994 Args.push_back(Entry); 5995 5996 Entry.Node = Arg; 5997 Entry.Ty = ArgTy; 5998 Entry.isSExt = false; 5999 Entry.isZExt = false; 6000 Args.push_back(Entry); 6001 6002 const char *LibcallName = (ArgVT == MVT::f64) 6003 ? "__sincos_stret" : "__sincosf_stret"; 6004 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); 6005 6006 TargetLowering:: 6007 CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()), 6008 false, false, false, false, 0, 6009 CallingConv::C, /*isTaillCall=*/false, 6010 /*doesNotRet=*/false, /*isReturnValueUsed*/false, 6011 Callee, Args, DAG, dl); 6012 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 6013 6014 SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, 6015 MachinePointerInfo(), false, false, false, 0); 6016 6017 // Address of cos field. 6018 SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet, 6019 DAG.getIntPtrConstant(ArgVT.getStoreSize())); 6020 SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, 6021 MachinePointerInfo(), false, false, false, 0); 6022 6023 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); 6024 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, 6025 LoadSin.getValue(0), LoadCos.getValue(0)); 6026 } 6027 6028 static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { 6029 // Monotonic load/store is legal for all targets 6030 if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) 6031 return Op; 6032 6033 // Aquire/Release load/store is not legal for targets without a 6034 // dmb or equivalent available. 6035 return SDValue(); 6036 } 6037 6038 static void 6039 ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, 6040 SelectionDAG &DAG) { 6041 SDLoc dl(Node); 6042 assert (Node->getValueType(0) == MVT::i64 && 6043 "Only know how to expand i64 atomics"); 6044 AtomicSDNode *AN = cast<AtomicSDNode>(Node); 6045 6046 SmallVector<SDValue, 6> Ops; 6047 Ops.push_back(Node->getOperand(0)); // Chain 6048 Ops.push_back(Node->getOperand(1)); // Ptr 6049 for(unsigned i=2; i<Node->getNumOperands(); i++) { 6050 // Low part 6051 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 6052 Node->getOperand(i), DAG.getIntPtrConstant(0))); 6053 // High part 6054 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 6055 Node->getOperand(i), DAG.getIntPtrConstant(1))); 6056 } 6057 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 6058 SDValue Result = 6059 DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(), 6060 cast<MemSDNode>(Node)->getMemOperand(), AN->getOrdering(), 6061 AN->getSynchScope()); 6062 SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; 6063 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); 6064 Results.push_back(Result.getValue(2)); 6065 } 6066 6067 static void ReplaceREADCYCLECOUNTER(SDNode *N, 6068 SmallVectorImpl<SDValue> &Results, 6069 SelectionDAG &DAG, 6070 const ARMSubtarget *Subtarget) { 6071 SDLoc DL(N); 6072 SDValue Cycles32, OutChain; 6073 6074 if (Subtarget->hasPerfMon()) { 6075 // Under Power Management extensions, the cycle-count is: 6076 // mrc p15, #0, <Rt>, c9, c13, #0 6077 SDValue Ops[] = { N->getOperand(0), // Chain 6078 DAG.getConstant(Intrinsic::arm_mrc, MVT::i32), 6079 DAG.getConstant(15, MVT::i32), 6080 DAG.getConstant(0, MVT::i32), 6081 DAG.getConstant(9, MVT::i32), 6082 DAG.getConstant(13, MVT::i32), 6083 DAG.getConstant(0, MVT::i32) 6084 }; 6085 6086 Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, 6087 DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], 6088 array_lengthof(Ops)); 6089 OutChain = Cycles32.getValue(1); 6090 } else { 6091 // Intrinsic is defined to return 0 on unsupported platforms. Technically 6092 // there are older ARM CPUs that have implementation-specific ways of 6093 // obtaining this information (FIXME!). 6094 Cycles32 = DAG.getConstant(0, MVT::i32); 6095 OutChain = DAG.getEntryNode(); 6096 } 6097 6098 6099 SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, 6100 Cycles32, DAG.getConstant(0, MVT::i32)); 6101 Results.push_back(Cycles64); 6102 Results.push_back(OutChain); 6103 } 6104 6105 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 6106 switch (Op.getOpcode()) { 6107 default: llvm_unreachable("Don't know how to custom lower this!"); 6108 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 6109 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 6110 case ISD::GlobalAddress: 6111 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 6112 LowerGlobalAddressELF(Op, DAG); 6113 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 6114 case ISD::SELECT: return LowerSELECT(Op, DAG); 6115 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 6116 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 6117 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 6118 case ISD::VASTART: return LowerVASTART(Op, DAG); 6119 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); 6120 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); 6121 case ISD::SINT_TO_FP: 6122 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 6123 case ISD::FP_TO_SINT: 6124 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 6125 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 6126 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 6127 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 6128 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 6129 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 6130 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 6131 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 6132 Subtarget); 6133 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); 6134 case ISD::SHL: 6135 case ISD::SRL: 6136 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 6137 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 6138 case ISD::SRL_PARTS: 6139 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 6140 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 6141 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); 6142 case ISD::SETCC: return LowerVSETCC(Op, DAG); 6143 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); 6144 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 6145 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 6146 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 6147 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 6148 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 6149 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 6150 case ISD::MUL: return LowerMUL(Op, DAG); 6151 case ISD::SDIV: return LowerSDIV(Op, DAG); 6152 case ISD::UDIV: return LowerUDIV(Op, DAG); 6153 case ISD::ADDC: 6154 case ISD::ADDE: 6155 case ISD::SUBC: 6156 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); 6157 case ISD::ATOMIC_LOAD: 6158 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); 6159 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); 6160 case ISD::SDIVREM: 6161 case ISD::UDIVREM: return LowerDivRem(Op, DAG); 6162 } 6163 } 6164 6165 /// ReplaceNodeResults - Replace the results of node with an illegal result 6166 /// type with new values built out of custom code. 6167 void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 6168 SmallVectorImpl<SDValue>&Results, 6169 SelectionDAG &DAG) const { 6170 SDValue Res; 6171 switch (N->getOpcode()) { 6172 default: 6173 llvm_unreachable("Don't know how to custom expand this!"); 6174 case ISD::BITCAST: 6175 Res = ExpandBITCAST(N, DAG); 6176 break; 6177 case ISD::SRL: 6178 case ISD::SRA: 6179 Res = Expand64BitShift(N, DAG, Subtarget); 6180 break; 6181 case ISD::READCYCLECOUNTER: 6182 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); 6183 return; 6184 case ISD::ATOMIC_STORE: 6185 case ISD::ATOMIC_LOAD: 6186 case ISD::ATOMIC_LOAD_ADD: 6187 case ISD::ATOMIC_LOAD_AND: 6188 case ISD::ATOMIC_LOAD_NAND: 6189 case ISD::ATOMIC_LOAD_OR: 6190 case ISD::ATOMIC_LOAD_SUB: 6191 case ISD::ATOMIC_LOAD_XOR: 6192 case ISD::ATOMIC_SWAP: 6193 case ISD::ATOMIC_CMP_SWAP: 6194 case ISD::ATOMIC_LOAD_MIN: 6195 case ISD::ATOMIC_LOAD_UMIN: 6196 case ISD::ATOMIC_LOAD_MAX: 6197 case ISD::ATOMIC_LOAD_UMAX: 6198 ReplaceATOMIC_OP_64(N, Results, DAG); 6199 return; 6200 } 6201 if (Res.getNode()) 6202 Results.push_back(Res); 6203 } 6204 6205 //===----------------------------------------------------------------------===// 6206 // ARM Scheduler Hooks 6207 //===----------------------------------------------------------------------===// 6208 6209 MachineBasicBlock * 6210 ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 6211 MachineBasicBlock *BB, 6212 unsigned Size) const { 6213 unsigned dest = MI->getOperand(0).getReg(); 6214 unsigned ptr = MI->getOperand(1).getReg(); 6215 unsigned oldval = MI->getOperand(2).getReg(); 6216 unsigned newval = MI->getOperand(3).getReg(); 6217 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6218 AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm()); 6219 DebugLoc dl = MI->getDebugLoc(); 6220 bool isThumb2 = Subtarget->isThumb2(); 6221 6222 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6223 unsigned scratch = MRI.createVirtualRegister(isThumb2 ? 6224 (const TargetRegisterClass*)&ARM::rGPRRegClass : 6225 (const TargetRegisterClass*)&ARM::GPRRegClass); 6226 6227 if (isThumb2) { 6228 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 6229 MRI.constrainRegClass(oldval, &ARM::rGPRRegClass); 6230 MRI.constrainRegClass(newval, &ARM::rGPRRegClass); 6231 } 6232 6233 unsigned ldrOpc, strOpc; 6234 getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); 6235 6236 MachineFunction *MF = BB->getParent(); 6237 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6238 MachineFunction::iterator It = BB; 6239 ++It; // insert the new blocks after the current block 6240 6241 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 6242 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 6243 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6244 MF->insert(It, loop1MBB); 6245 MF->insert(It, loop2MBB); 6246 MF->insert(It, exitMBB); 6247 6248 // Transfer the remainder of BB and its successor edges to exitMBB. 6249 exitMBB->splice(exitMBB->begin(), BB, 6250 llvm::next(MachineBasicBlock::iterator(MI)), 6251 BB->end()); 6252 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6253 6254 // thisMBB: 6255 // ... 6256 // fallthrough --> loop1MBB 6257 BB->addSuccessor(loop1MBB); 6258 6259 // loop1MBB: 6260 // ldrex dest, [ptr] 6261 // cmp dest, oldval 6262 // bne exitMBB 6263 BB = loop1MBB; 6264 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 6265 if (ldrOpc == ARM::t2LDREX) 6266 MIB.addImm(0); 6267 AddDefaultPred(MIB); 6268 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 6269 .addReg(dest).addReg(oldval)); 6270 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6271 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6272 BB->addSuccessor(loop2MBB); 6273 BB->addSuccessor(exitMBB); 6274 6275 // loop2MBB: 6276 // strex scratch, newval, [ptr] 6277 // cmp scratch, #0 6278 // bne loop1MBB 6279 BB = loop2MBB; 6280 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); 6281 if (strOpc == ARM::t2STREX) 6282 MIB.addImm(0); 6283 AddDefaultPred(MIB); 6284 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6285 .addReg(scratch).addImm(0)); 6286 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6287 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6288 BB->addSuccessor(loop1MBB); 6289 BB->addSuccessor(exitMBB); 6290 6291 // exitMBB: 6292 // ... 6293 BB = exitMBB; 6294 6295 MI->eraseFromParent(); // The instruction is gone now. 6296 6297 return BB; 6298 } 6299 6300 MachineBasicBlock * 6301 ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 6302 unsigned Size, unsigned BinOpcode) const { 6303 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 6304 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6305 6306 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6307 MachineFunction *MF = BB->getParent(); 6308 MachineFunction::iterator It = BB; 6309 ++It; 6310 6311 unsigned dest = MI->getOperand(0).getReg(); 6312 unsigned ptr = MI->getOperand(1).getReg(); 6313 unsigned incr = MI->getOperand(2).getReg(); 6314 AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); 6315 DebugLoc dl = MI->getDebugLoc(); 6316 bool isThumb2 = Subtarget->isThumb2(); 6317 6318 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6319 if (isThumb2) { 6320 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 6321 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 6322 MRI.constrainRegClass(incr, &ARM::rGPRRegClass); 6323 } 6324 6325 unsigned ldrOpc, strOpc; 6326 getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); 6327 6328 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6329 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6330 MF->insert(It, loopMBB); 6331 MF->insert(It, exitMBB); 6332 6333 // Transfer the remainder of BB and its successor edges to exitMBB. 6334 exitMBB->splice(exitMBB->begin(), BB, 6335 llvm::next(MachineBasicBlock::iterator(MI)), 6336 BB->end()); 6337 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6338 6339 const TargetRegisterClass *TRC = isThumb2 ? 6340 (const TargetRegisterClass*)&ARM::rGPRRegClass : 6341 (const TargetRegisterClass*)&ARM::GPRRegClass; 6342 unsigned scratch = MRI.createVirtualRegister(TRC); 6343 unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); 6344 6345 // thisMBB: 6346 // ... 6347 // fallthrough --> loopMBB 6348 BB->addSuccessor(loopMBB); 6349 6350 // loopMBB: 6351 // ldrex dest, ptr 6352 // <binop> scratch2, dest, incr 6353 // strex scratch, scratch2, ptr 6354 // cmp scratch, #0 6355 // bne- loopMBB 6356 // fallthrough --> exitMBB 6357 BB = loopMBB; 6358 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 6359 if (ldrOpc == ARM::t2LDREX) 6360 MIB.addImm(0); 6361 AddDefaultPred(MIB); 6362 if (BinOpcode) { 6363 // operand order needs to go the other way for NAND 6364 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 6365 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 6366 addReg(incr).addReg(dest)).addReg(0); 6367 else 6368 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 6369 addReg(dest).addReg(incr)).addReg(0); 6370 } 6371 6372 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 6373 if (strOpc == ARM::t2STREX) 6374 MIB.addImm(0); 6375 AddDefaultPred(MIB); 6376 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6377 .addReg(scratch).addImm(0)); 6378 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6379 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6380 6381 BB->addSuccessor(loopMBB); 6382 BB->addSuccessor(exitMBB); 6383 6384 // exitMBB: 6385 // ... 6386 BB = exitMBB; 6387 6388 MI->eraseFromParent(); // The instruction is gone now. 6389 6390 return BB; 6391 } 6392 6393 MachineBasicBlock * 6394 ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, 6395 MachineBasicBlock *BB, 6396 unsigned Size, 6397 bool signExtend, 6398 ARMCC::CondCodes Cond) const { 6399 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6400 6401 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6402 MachineFunction *MF = BB->getParent(); 6403 MachineFunction::iterator It = BB; 6404 ++It; 6405 6406 unsigned dest = MI->getOperand(0).getReg(); 6407 unsigned ptr = MI->getOperand(1).getReg(); 6408 unsigned incr = MI->getOperand(2).getReg(); 6409 unsigned oldval = dest; 6410 AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); 6411 DebugLoc dl = MI->getDebugLoc(); 6412 bool isThumb2 = Subtarget->isThumb2(); 6413 6414 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6415 if (isThumb2) { 6416 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 6417 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 6418 MRI.constrainRegClass(incr, &ARM::rGPRRegClass); 6419 } 6420 6421 unsigned ldrOpc, strOpc, extendOpc; 6422 getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); 6423 switch (Size) { 6424 default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!"); 6425 case 1: 6426 extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; 6427 break; 6428 case 2: 6429 extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; 6430 break; 6431 case 4: 6432 extendOpc = 0; 6433 break; 6434 } 6435 6436 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6437 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6438 MF->insert(It, loopMBB); 6439 MF->insert(It, exitMBB); 6440 6441 // Transfer the remainder of BB and its successor edges to exitMBB. 6442 exitMBB->splice(exitMBB->begin(), BB, 6443 llvm::next(MachineBasicBlock::iterator(MI)), 6444 BB->end()); 6445 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6446 6447 const TargetRegisterClass *TRC = isThumb2 ? 6448 (const TargetRegisterClass*)&ARM::rGPRRegClass : 6449 (const TargetRegisterClass*)&ARM::GPRRegClass; 6450 unsigned scratch = MRI.createVirtualRegister(TRC); 6451 unsigned scratch2 = MRI.createVirtualRegister(TRC); 6452 6453 // thisMBB: 6454 // ... 6455 // fallthrough --> loopMBB 6456 BB->addSuccessor(loopMBB); 6457 6458 // loopMBB: 6459 // ldrex dest, ptr 6460 // (sign extend dest, if required) 6461 // cmp dest, incr 6462 // cmov.cond scratch2, incr, dest 6463 // strex scratch, scratch2, ptr 6464 // cmp scratch, #0 6465 // bne- loopMBB 6466 // fallthrough --> exitMBB 6467 BB = loopMBB; 6468 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 6469 if (ldrOpc == ARM::t2LDREX) 6470 MIB.addImm(0); 6471 AddDefaultPred(MIB); 6472 6473 // Sign extend the value, if necessary. 6474 if (signExtend && extendOpc) { 6475 oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass 6476 : &ARM::GPRnopcRegClass); 6477 if (!isThumb2) 6478 MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass); 6479 AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) 6480 .addReg(dest) 6481 .addImm(0)); 6482 } 6483 6484 // Build compare and cmov instructions. 6485 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 6486 .addReg(oldval).addReg(incr)); 6487 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) 6488 .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR); 6489 6490 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 6491 if (strOpc == ARM::t2STREX) 6492 MIB.addImm(0); 6493 AddDefaultPred(MIB); 6494 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6495 .addReg(scratch).addImm(0)); 6496 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6497 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6498 6499 BB->addSuccessor(loopMBB); 6500 BB->addSuccessor(exitMBB); 6501 6502 // exitMBB: 6503 // ... 6504 BB = exitMBB; 6505 6506 MI->eraseFromParent(); // The instruction is gone now. 6507 6508 return BB; 6509 } 6510 6511 MachineBasicBlock * 6512 ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, 6513 unsigned Op1, unsigned Op2, 6514 bool NeedsCarry, bool IsCmpxchg, 6515 bool IsMinMax, ARMCC::CondCodes CC) const { 6516 // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0. 6517 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6518 6519 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6520 MachineFunction *MF = BB->getParent(); 6521 MachineFunction::iterator It = BB; 6522 ++It; 6523 6524 bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64); 6525 unsigned offset = (isStore ? -2 : 0); 6526 unsigned destlo = MI->getOperand(0).getReg(); 6527 unsigned desthi = MI->getOperand(1).getReg(); 6528 unsigned ptr = MI->getOperand(offset+2).getReg(); 6529 unsigned vallo = MI->getOperand(offset+3).getReg(); 6530 unsigned valhi = MI->getOperand(offset+4).getReg(); 6531 unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5); 6532 AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(OrdIdx).getImm()); 6533 DebugLoc dl = MI->getDebugLoc(); 6534 bool isThumb2 = Subtarget->isThumb2(); 6535 6536 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6537 if (isThumb2) { 6538 MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); 6539 MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); 6540 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 6541 MRI.constrainRegClass(vallo, &ARM::rGPRRegClass); 6542 MRI.constrainRegClass(valhi, &ARM::rGPRRegClass); 6543 } 6544 6545 unsigned ldrOpc, strOpc; 6546 getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); 6547 6548 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6549 MachineBasicBlock *contBB = 0, *cont2BB = 0; 6550 if (IsCmpxchg || IsMinMax) 6551 contBB = MF->CreateMachineBasicBlock(LLVM_BB); 6552 if (IsCmpxchg) 6553 cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); 6554 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6555 6556 MF->insert(It, loopMBB); 6557 if (IsCmpxchg || IsMinMax) MF->insert(It, contBB); 6558 if (IsCmpxchg) MF->insert(It, cont2BB); 6559 MF->insert(It, exitMBB); 6560 6561 // Transfer the remainder of BB and its successor edges to exitMBB. 6562 exitMBB->splice(exitMBB->begin(), BB, 6563 llvm::next(MachineBasicBlock::iterator(MI)), 6564 BB->end()); 6565 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6566 6567 const TargetRegisterClass *TRC = isThumb2 ? 6568 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6569 (const TargetRegisterClass*)&ARM::GPRRegClass; 6570 unsigned storesuccess = MRI.createVirtualRegister(TRC); 6571 6572 // thisMBB: 6573 // ... 6574 // fallthrough --> loopMBB 6575 BB->addSuccessor(loopMBB); 6576 6577 // loopMBB: 6578 // ldrexd r2, r3, ptr 6579 // <binopa> r0, r2, incr 6580 // <binopb> r1, r3, incr 6581 // strexd storesuccess, r0, r1, ptr 6582 // cmp storesuccess, #0 6583 // bne- loopMBB 6584 // fallthrough --> exitMBB 6585 BB = loopMBB; 6586 6587 if (!isStore) { 6588 // Load 6589 if (isThumb2) { 6590 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) 6591 .addReg(destlo, RegState::Define) 6592 .addReg(desthi, RegState::Define) 6593 .addReg(ptr)); 6594 } else { 6595 unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6596 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) 6597 .addReg(GPRPair0, RegState::Define).addReg(ptr)); 6598 // Copy r2/r3 into dest. (This copy will normally be coalesced.) 6599 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) 6600 .addReg(GPRPair0, 0, ARM::gsub_0); 6601 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) 6602 .addReg(GPRPair0, 0, ARM::gsub_1); 6603 } 6604 } 6605 6606 unsigned StoreLo, StoreHi; 6607 if (IsCmpxchg) { 6608 // Add early exit 6609 for (unsigned i = 0; i < 2; i++) { 6610 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : 6611 ARM::CMPrr)) 6612 .addReg(i == 0 ? destlo : desthi) 6613 .addReg(i == 0 ? vallo : valhi)); 6614 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6615 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6616 BB->addSuccessor(exitMBB); 6617 BB->addSuccessor(i == 0 ? contBB : cont2BB); 6618 BB = (i == 0 ? contBB : cont2BB); 6619 } 6620 6621 // Copy to physregs for strexd 6622 StoreLo = MI->getOperand(5).getReg(); 6623 StoreHi = MI->getOperand(6).getReg(); 6624 } else if (Op1) { 6625 // Perform binary operation 6626 unsigned tmpRegLo = MRI.createVirtualRegister(TRC); 6627 AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo) 6628 .addReg(destlo).addReg(vallo)) 6629 .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); 6630 unsigned tmpRegHi = MRI.createVirtualRegister(TRC); 6631 AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi) 6632 .addReg(desthi).addReg(valhi)) 6633 .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); 6634 6635 StoreLo = tmpRegLo; 6636 StoreHi = tmpRegHi; 6637 } else { 6638 // Copy to physregs for strexd 6639 StoreLo = vallo; 6640 StoreHi = valhi; 6641 } 6642 if (IsMinMax) { 6643 // Compare and branch to exit block. 6644 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6645 .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR); 6646 BB->addSuccessor(exitMBB); 6647 BB->addSuccessor(contBB); 6648 BB = contBB; 6649 StoreLo = vallo; 6650 StoreHi = valhi; 6651 } 6652 6653 // Store 6654 if (isThumb2) { 6655 MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass); 6656 MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass); 6657 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) 6658 .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); 6659 } else { 6660 // Marshal a pair... 6661 unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6662 unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6663 unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6664 BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); 6665 BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) 6666 .addReg(UndefPair) 6667 .addReg(StoreLo) 6668 .addImm(ARM::gsub_0); 6669 BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair) 6670 .addReg(r1) 6671 .addReg(StoreHi) 6672 .addImm(ARM::gsub_1); 6673 6674 // ...and store it 6675 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) 6676 .addReg(StorePair).addReg(ptr)); 6677 } 6678 // Cmp+jump 6679 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6680 .addReg(storesuccess).addImm(0)); 6681 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6682 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6683 6684 BB->addSuccessor(loopMBB); 6685 BB->addSuccessor(exitMBB); 6686 6687 // exitMBB: 6688 // ... 6689 BB = exitMBB; 6690 6691 MI->eraseFromParent(); // The instruction is gone now. 6692 6693 return BB; 6694 } 6695 6696 MachineBasicBlock * 6697 ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const { 6698 6699 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6700 6701 unsigned destlo = MI->getOperand(0).getReg(); 6702 unsigned desthi = MI->getOperand(1).getReg(); 6703 unsigned ptr = MI->getOperand(2).getReg(); 6704 AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); 6705 DebugLoc dl = MI->getDebugLoc(); 6706 bool isThumb2 = Subtarget->isThumb2(); 6707 6708 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6709 if (isThumb2) { 6710 MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); 6711 MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); 6712 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 6713 } 6714 unsigned ldrOpc, strOpc; 6715 getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); 6716 6717 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc)); 6718 6719 if (isThumb2) { 6720 MIB.addReg(destlo, RegState::Define) 6721 .addReg(desthi, RegState::Define) 6722 .addReg(ptr); 6723 6724 } else { 6725 unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6726 MIB.addReg(GPRPair0, RegState::Define).addReg(ptr); 6727 6728 // Copy GPRPair0 into dest. (This copy will normally be coalesced.) 6729 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo) 6730 .addReg(GPRPair0, 0, ARM::gsub_0); 6731 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi) 6732 .addReg(GPRPair0, 0, ARM::gsub_1); 6733 } 6734 AddDefaultPred(MIB); 6735 6736 MI->eraseFromParent(); // The instruction is gone now. 6737 6738 return BB; 6739 } 6740 6741 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and 6742 /// registers the function context. 6743 void ARMTargetLowering:: 6744 SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, 6745 MachineBasicBlock *DispatchBB, int FI) const { 6746 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6747 DebugLoc dl = MI->getDebugLoc(); 6748 MachineFunction *MF = MBB->getParent(); 6749 MachineRegisterInfo *MRI = &MF->getRegInfo(); 6750 MachineConstantPool *MCP = MF->getConstantPool(); 6751 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 6752 const Function *F = MF->getFunction(); 6753 6754 bool isThumb = Subtarget->isThumb(); 6755 bool isThumb2 = Subtarget->isThumb2(); 6756 6757 unsigned PCLabelId = AFI->createPICLabelUId(); 6758 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; 6759 ARMConstantPoolValue *CPV = 6760 ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); 6761 unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); 6762 6763 const TargetRegisterClass *TRC = isThumb ? 6764 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6765 (const TargetRegisterClass*)&ARM::GPRRegClass; 6766 6767 // Grab constant pool and fixed stack memory operands. 6768 MachineMemOperand *CPMMO = 6769 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(), 6770 MachineMemOperand::MOLoad, 4, 4); 6771 6772 MachineMemOperand *FIMMOSt = 6773 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 6774 MachineMemOperand::MOStore, 4, 4); 6775 6776 // Load the address of the dispatch MBB into the jump buffer. 6777 if (isThumb2) { 6778 // Incoming value: jbuf 6779 // ldr.n r5, LCPI1_1 6780 // orr r5, r5, #1 6781 // add r5, pc 6782 // str r5, [$jbuf, #+4] ; &jbuf[1] 6783 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6784 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) 6785 .addConstantPoolIndex(CPI) 6786 .addMemOperand(CPMMO)); 6787 // Set the low bit because of thumb mode. 6788 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6789 AddDefaultCC( 6790 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) 6791 .addReg(NewVReg1, RegState::Kill) 6792 .addImm(0x01))); 6793 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6794 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) 6795 .addReg(NewVReg2, RegState::Kill) 6796 .addImm(PCLabelId); 6797 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) 6798 .addReg(NewVReg3, RegState::Kill) 6799 .addFrameIndex(FI) 6800 .addImm(36) // &jbuf[1] :: pc 6801 .addMemOperand(FIMMOSt)); 6802 } else if (isThumb) { 6803 // Incoming value: jbuf 6804 // ldr.n r1, LCPI1_4 6805 // add r1, pc 6806 // mov r2, #1 6807 // orrs r1, r2 6808 // add r2, $jbuf, #+4 ; &jbuf[1] 6809 // str r1, [r2] 6810 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6811 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) 6812 .addConstantPoolIndex(CPI) 6813 .addMemOperand(CPMMO)); 6814 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6815 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) 6816 .addReg(NewVReg1, RegState::Kill) 6817 .addImm(PCLabelId); 6818 // Set the low bit because of thumb mode. 6819 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6820 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) 6821 .addReg(ARM::CPSR, RegState::Define) 6822 .addImm(1)); 6823 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6824 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) 6825 .addReg(ARM::CPSR, RegState::Define) 6826 .addReg(NewVReg2, RegState::Kill) 6827 .addReg(NewVReg3, RegState::Kill)); 6828 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 6829 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) 6830 .addFrameIndex(FI) 6831 .addImm(36)); // &jbuf[1] :: pc 6832 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) 6833 .addReg(NewVReg4, RegState::Kill) 6834 .addReg(NewVReg5, RegState::Kill) 6835 .addImm(0) 6836 .addMemOperand(FIMMOSt)); 6837 } else { 6838 // Incoming value: jbuf 6839 // ldr r1, LCPI1_1 6840 // add r1, pc, r1 6841 // str r1, [$jbuf, #+4] ; &jbuf[1] 6842 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6843 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) 6844 .addConstantPoolIndex(CPI) 6845 .addImm(0) 6846 .addMemOperand(CPMMO)); 6847 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6848 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) 6849 .addReg(NewVReg1, RegState::Kill) 6850 .addImm(PCLabelId)); 6851 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) 6852 .addReg(NewVReg2, RegState::Kill) 6853 .addFrameIndex(FI) 6854 .addImm(36) // &jbuf[1] :: pc 6855 .addMemOperand(FIMMOSt)); 6856 } 6857 } 6858 6859 MachineBasicBlock *ARMTargetLowering:: 6860 EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { 6861 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6862 DebugLoc dl = MI->getDebugLoc(); 6863 MachineFunction *MF = MBB->getParent(); 6864 MachineRegisterInfo *MRI = &MF->getRegInfo(); 6865 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 6866 MachineFrameInfo *MFI = MF->getFrameInfo(); 6867 int FI = MFI->getFunctionContextIndex(); 6868 6869 const TargetRegisterClass *TRC = Subtarget->isThumb() ? 6870 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6871 (const TargetRegisterClass*)&ARM::GPRnopcRegClass; 6872 6873 // Get a mapping of the call site numbers to all of the landing pads they're 6874 // associated with. 6875 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; 6876 unsigned MaxCSNum = 0; 6877 MachineModuleInfo &MMI = MF->getMMI(); 6878 for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; 6879 ++BB) { 6880 if (!BB->isLandingPad()) continue; 6881 6882 // FIXME: We should assert that the EH_LABEL is the first MI in the landing 6883 // pad. 6884 for (MachineBasicBlock::iterator 6885 II = BB->begin(), IE = BB->end(); II != IE; ++II) { 6886 if (!II->isEHLabel()) continue; 6887 6888 MCSymbol *Sym = II->getOperand(0).getMCSymbol(); 6889 if (!MMI.hasCallSiteLandingPad(Sym)) continue; 6890 6891 SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym); 6892 for (SmallVectorImpl<unsigned>::iterator 6893 CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); 6894 CSI != CSE; ++CSI) { 6895 CallSiteNumToLPad[*CSI].push_back(BB); 6896 MaxCSNum = std::max(MaxCSNum, *CSI); 6897 } 6898 break; 6899 } 6900 } 6901 6902 // Get an ordered list of the machine basic blocks for the jump table. 6903 std::vector<MachineBasicBlock*> LPadList; 6904 SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; 6905 LPadList.reserve(CallSiteNumToLPad.size()); 6906 for (unsigned I = 1; I <= MaxCSNum; ++I) { 6907 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; 6908 for (SmallVectorImpl<MachineBasicBlock*>::iterator 6909 II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { 6910 LPadList.push_back(*II); 6911 InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); 6912 } 6913 } 6914 6915 assert(!LPadList.empty() && 6916 "No landing pad destinations for the dispatch jump table!"); 6917 6918 // Create the jump table and associated information. 6919 MachineJumpTableInfo *JTI = 6920 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); 6921 unsigned MJTI = JTI->createJumpTableIndex(LPadList); 6922 unsigned UId = AFI->createJumpTableUId(); 6923 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 6924 6925 // Create the MBBs for the dispatch code. 6926 6927 // Shove the dispatch's address into the return slot in the function context. 6928 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); 6929 DispatchBB->setIsLandingPad(); 6930 6931 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); 6932 unsigned trap_opcode; 6933 if (Subtarget->isThumb()) 6934 trap_opcode = ARM::tTRAP; 6935 else 6936 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP; 6937 6938 BuildMI(TrapBB, dl, TII->get(trap_opcode)); 6939 DispatchBB->addSuccessor(TrapBB); 6940 6941 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); 6942 DispatchBB->addSuccessor(DispContBB); 6943 6944 // Insert and MBBs. 6945 MF->insert(MF->end(), DispatchBB); 6946 MF->insert(MF->end(), DispContBB); 6947 MF->insert(MF->end(), TrapBB); 6948 6949 // Insert code into the entry block that creates and registers the function 6950 // context. 6951 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); 6952 6953 MachineMemOperand *FIMMOLd = 6954 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 6955 MachineMemOperand::MOLoad | 6956 MachineMemOperand::MOVolatile, 4, 4); 6957 6958 MachineInstrBuilder MIB; 6959 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); 6960 6961 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); 6962 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); 6963 6964 // Add a register mask with no preserved registers. This results in all 6965 // registers being marked as clobbered. 6966 MIB.addRegMask(RI.getNoPreservedMask()); 6967 6968 unsigned NumLPads = LPadList.size(); 6969 if (Subtarget->isThumb2()) { 6970 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6971 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) 6972 .addFrameIndex(FI) 6973 .addImm(4) 6974 .addMemOperand(FIMMOLd)); 6975 6976 if (NumLPads < 256) { 6977 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) 6978 .addReg(NewVReg1) 6979 .addImm(LPadList.size())); 6980 } else { 6981 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6982 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) 6983 .addImm(NumLPads & 0xFFFF)); 6984 6985 unsigned VReg2 = VReg1; 6986 if ((NumLPads & 0xFFFF0000) != 0) { 6987 VReg2 = MRI->createVirtualRegister(TRC); 6988 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) 6989 .addReg(VReg1) 6990 .addImm(NumLPads >> 16)); 6991 } 6992 6993 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) 6994 .addReg(NewVReg1) 6995 .addReg(VReg2)); 6996 } 6997 6998 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) 6999 .addMBB(TrapBB) 7000 .addImm(ARMCC::HI) 7001 .addReg(ARM::CPSR); 7002 7003 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 7004 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) 7005 .addJumpTableIndex(MJTI) 7006 .addImm(UId)); 7007 7008 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 7009 AddDefaultCC( 7010 AddDefaultPred( 7011 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) 7012 .addReg(NewVReg3, RegState::Kill) 7013 .addReg(NewVReg1) 7014 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 7015 7016 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) 7017 .addReg(NewVReg4, RegState::Kill) 7018 .addReg(NewVReg1) 7019 .addJumpTableIndex(MJTI) 7020 .addImm(UId); 7021 } else if (Subtarget->isThumb()) { 7022 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 7023 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) 7024 .addFrameIndex(FI) 7025 .addImm(1) 7026 .addMemOperand(FIMMOLd)); 7027 7028 if (NumLPads < 256) { 7029 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) 7030 .addReg(NewVReg1) 7031 .addImm(NumLPads)); 7032 } else { 7033 MachineConstantPool *ConstantPool = MF->getConstantPool(); 7034 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 7035 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 7036 7037 // MachineConstantPool wants an explicit alignment. 7038 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 7039 if (Align == 0) 7040 Align = getDataLayout()->getTypeAllocSize(C->getType()); 7041 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 7042 7043 unsigned VReg1 = MRI->createVirtualRegister(TRC); 7044 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) 7045 .addReg(VReg1, RegState::Define) 7046 .addConstantPoolIndex(Idx)); 7047 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) 7048 .addReg(NewVReg1) 7049 .addReg(VReg1)); 7050 } 7051 7052 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) 7053 .addMBB(TrapBB) 7054 .addImm(ARMCC::HI) 7055 .addReg(ARM::CPSR); 7056 7057 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 7058 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) 7059 .addReg(ARM::CPSR, RegState::Define) 7060 .addReg(NewVReg1) 7061 .addImm(2)); 7062 7063 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 7064 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) 7065 .addJumpTableIndex(MJTI) 7066 .addImm(UId)); 7067 7068 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 7069 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) 7070 .addReg(ARM::CPSR, RegState::Define) 7071 .addReg(NewVReg2, RegState::Kill) 7072 .addReg(NewVReg3)); 7073 7074 MachineMemOperand *JTMMOLd = 7075 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 7076 MachineMemOperand::MOLoad, 4, 4); 7077 7078 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 7079 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) 7080 .addReg(NewVReg4, RegState::Kill) 7081 .addImm(0) 7082 .addMemOperand(JTMMOLd)); 7083 7084 unsigned NewVReg6 = NewVReg5; 7085 if (RelocM == Reloc::PIC_) { 7086 NewVReg6 = MRI->createVirtualRegister(TRC); 7087 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) 7088 .addReg(ARM::CPSR, RegState::Define) 7089 .addReg(NewVReg5, RegState::Kill) 7090 .addReg(NewVReg3)); 7091 } 7092 7093 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) 7094 .addReg(NewVReg6, RegState::Kill) 7095 .addJumpTableIndex(MJTI) 7096 .addImm(UId); 7097 } else { 7098 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 7099 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) 7100 .addFrameIndex(FI) 7101 .addImm(4) 7102 .addMemOperand(FIMMOLd)); 7103 7104 if (NumLPads < 256) { 7105 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) 7106 .addReg(NewVReg1) 7107 .addImm(NumLPads)); 7108 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { 7109 unsigned VReg1 = MRI->createVirtualRegister(TRC); 7110 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) 7111 .addImm(NumLPads & 0xFFFF)); 7112 7113 unsigned VReg2 = VReg1; 7114 if ((NumLPads & 0xFFFF0000) != 0) { 7115 VReg2 = MRI->createVirtualRegister(TRC); 7116 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) 7117 .addReg(VReg1) 7118 .addImm(NumLPads >> 16)); 7119 } 7120 7121 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 7122 .addReg(NewVReg1) 7123 .addReg(VReg2)); 7124 } else { 7125 MachineConstantPool *ConstantPool = MF->getConstantPool(); 7126 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 7127 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 7128 7129 // MachineConstantPool wants an explicit alignment. 7130 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 7131 if (Align == 0) 7132 Align = getDataLayout()->getTypeAllocSize(C->getType()); 7133 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 7134 7135 unsigned VReg1 = MRI->createVirtualRegister(TRC); 7136 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) 7137 .addReg(VReg1, RegState::Define) 7138 .addConstantPoolIndex(Idx) 7139 .addImm(0)); 7140 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 7141 .addReg(NewVReg1) 7142 .addReg(VReg1, RegState::Kill)); 7143 } 7144 7145 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) 7146 .addMBB(TrapBB) 7147 .addImm(ARMCC::HI) 7148 .addReg(ARM::CPSR); 7149 7150 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 7151 AddDefaultCC( 7152 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) 7153 .addReg(NewVReg1) 7154 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 7155 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 7156 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) 7157 .addJumpTableIndex(MJTI) 7158 .addImm(UId)); 7159 7160 MachineMemOperand *JTMMOLd = 7161 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 7162 MachineMemOperand::MOLoad, 4, 4); 7163 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 7164 AddDefaultPred( 7165 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) 7166 .addReg(NewVReg3, RegState::Kill) 7167 .addReg(NewVReg4) 7168 .addImm(0) 7169 .addMemOperand(JTMMOLd)); 7170 7171 if (RelocM == Reloc::PIC_) { 7172 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) 7173 .addReg(NewVReg5, RegState::Kill) 7174 .addReg(NewVReg4) 7175 .addJumpTableIndex(MJTI) 7176 .addImm(UId); 7177 } else { 7178 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr)) 7179 .addReg(NewVReg5, RegState::Kill) 7180 .addJumpTableIndex(MJTI) 7181 .addImm(UId); 7182 } 7183 } 7184 7185 // Add the jump table entries as successors to the MBB. 7186 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs; 7187 for (std::vector<MachineBasicBlock*>::iterator 7188 I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { 7189 MachineBasicBlock *CurMBB = *I; 7190 if (SeenMBBs.insert(CurMBB)) 7191 DispContBB->addSuccessor(CurMBB); 7192 } 7193 7194 // N.B. the order the invoke BBs are processed in doesn't matter here. 7195 const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); 7196 SmallVector<MachineBasicBlock*, 64> MBBLPads; 7197 for (SmallPtrSet<MachineBasicBlock*, 64>::iterator 7198 I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { 7199 MachineBasicBlock *BB = *I; 7200 7201 // Remove the landing pad successor from the invoke block and replace it 7202 // with the new dispatch block. 7203 SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(), 7204 BB->succ_end()); 7205 while (!Successors.empty()) { 7206 MachineBasicBlock *SMBB = Successors.pop_back_val(); 7207 if (SMBB->isLandingPad()) { 7208 BB->removeSuccessor(SMBB); 7209 MBBLPads.push_back(SMBB); 7210 } 7211 } 7212 7213 BB->addSuccessor(DispatchBB); 7214 7215 // Find the invoke call and mark all of the callee-saved registers as 7216 // 'implicit defined' so that they're spilled. This prevents code from 7217 // moving instructions to before the EH block, where they will never be 7218 // executed. 7219 for (MachineBasicBlock::reverse_iterator 7220 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { 7221 if (!II->isCall()) continue; 7222 7223 DenseMap<unsigned, bool> DefRegs; 7224 for (MachineInstr::mop_iterator 7225 OI = II->operands_begin(), OE = II->operands_end(); 7226 OI != OE; ++OI) { 7227 if (!OI->isReg()) continue; 7228 DefRegs[OI->getReg()] = true; 7229 } 7230 7231 MachineInstrBuilder MIB(*MF, &*II); 7232 7233 for (unsigned i = 0; SavedRegs[i] != 0; ++i) { 7234 unsigned Reg = SavedRegs[i]; 7235 if (Subtarget->isThumb2() && 7236 !ARM::tGPRRegClass.contains(Reg) && 7237 !ARM::hGPRRegClass.contains(Reg)) 7238 continue; 7239 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg)) 7240 continue; 7241 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg)) 7242 continue; 7243 if (!DefRegs[Reg]) 7244 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); 7245 } 7246 7247 break; 7248 } 7249 } 7250 7251 // Mark all former landing pads as non-landing pads. The dispatch is the only 7252 // landing pad now. 7253 for (SmallVectorImpl<MachineBasicBlock*>::iterator 7254 I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) 7255 (*I)->setIsLandingPad(false); 7256 7257 // The instruction is gone now. 7258 MI->eraseFromParent(); 7259 7260 return MBB; 7261 } 7262 7263 static 7264 MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 7265 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 7266 E = MBB->succ_end(); I != E; ++I) 7267 if (*I != Succ) 7268 return *I; 7269 llvm_unreachable("Expecting a BB with two successors!"); 7270 } 7271 7272 /// Return the load opcode for a given load size. If load size >= 8, 7273 /// neon opcode will be returned. 7274 static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) { 7275 if (LdSize >= 8) 7276 return LdSize == 16 ? ARM::VLD1q32wb_fixed 7277 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0; 7278 if (IsThumb1) 7279 return LdSize == 4 ? ARM::tLDRi 7280 : LdSize == 2 ? ARM::tLDRHi 7281 : LdSize == 1 ? ARM::tLDRBi : 0; 7282 if (IsThumb2) 7283 return LdSize == 4 ? ARM::t2LDR_POST 7284 : LdSize == 2 ? ARM::t2LDRH_POST 7285 : LdSize == 1 ? ARM::t2LDRB_POST : 0; 7286 return LdSize == 4 ? ARM::LDR_POST_IMM 7287 : LdSize == 2 ? ARM::LDRH_POST 7288 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0; 7289 } 7290 7291 /// Return the store opcode for a given store size. If store size >= 8, 7292 /// neon opcode will be returned. 7293 static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) { 7294 if (StSize >= 8) 7295 return StSize == 16 ? ARM::VST1q32wb_fixed 7296 : StSize == 8 ? ARM::VST1d32wb_fixed : 0; 7297 if (IsThumb1) 7298 return StSize == 4 ? ARM::tSTRi 7299 : StSize == 2 ? ARM::tSTRHi 7300 : StSize == 1 ? ARM::tSTRBi : 0; 7301 if (IsThumb2) 7302 return StSize == 4 ? ARM::t2STR_POST 7303 : StSize == 2 ? ARM::t2STRH_POST 7304 : StSize == 1 ? ARM::t2STRB_POST : 0; 7305 return StSize == 4 ? ARM::STR_POST_IMM 7306 : StSize == 2 ? ARM::STRH_POST 7307 : StSize == 1 ? ARM::STRB_POST_IMM : 0; 7308 } 7309 7310 /// Emit a post-increment load operation with given size. The instructions 7311 /// will be added to BB at Pos. 7312 static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos, 7313 const TargetInstrInfo *TII, DebugLoc dl, 7314 unsigned LdSize, unsigned Data, unsigned AddrIn, 7315 unsigned AddrOut, bool IsThumb1, bool IsThumb2) { 7316 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2); 7317 assert(LdOpc != 0 && "Should have a load opcode"); 7318 if (LdSize >= 8) { 7319 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) 7320 .addReg(AddrOut, RegState::Define).addReg(AddrIn) 7321 .addImm(0)); 7322 } else if (IsThumb1) { 7323 // load + update AddrIn 7324 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) 7325 .addReg(AddrIn).addImm(0)); 7326 MachineInstrBuilder MIB = 7327 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); 7328 MIB = AddDefaultT1CC(MIB); 7329 MIB.addReg(AddrIn).addImm(LdSize); 7330 AddDefaultPred(MIB); 7331 } else if (IsThumb2) { 7332 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) 7333 .addReg(AddrOut, RegState::Define).addReg(AddrIn) 7334 .addImm(LdSize)); 7335 } else { // arm 7336 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) 7337 .addReg(AddrOut, RegState::Define).addReg(AddrIn) 7338 .addReg(0).addImm(LdSize)); 7339 } 7340 } 7341 7342 /// Emit a post-increment store operation with given size. The instructions 7343 /// will be added to BB at Pos. 7344 static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos, 7345 const TargetInstrInfo *TII, DebugLoc dl, 7346 unsigned StSize, unsigned Data, unsigned AddrIn, 7347 unsigned AddrOut, bool IsThumb1, bool IsThumb2) { 7348 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2); 7349 assert(StOpc != 0 && "Should have a store opcode"); 7350 if (StSize >= 8) { 7351 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) 7352 .addReg(AddrIn).addImm(0).addReg(Data)); 7353 } else if (IsThumb1) { 7354 // store + update AddrIn 7355 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data) 7356 .addReg(AddrIn).addImm(0)); 7357 MachineInstrBuilder MIB = 7358 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); 7359 MIB = AddDefaultT1CC(MIB); 7360 MIB.addReg(AddrIn).addImm(StSize); 7361 AddDefaultPred(MIB); 7362 } else if (IsThumb2) { 7363 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) 7364 .addReg(Data).addReg(AddrIn).addImm(StSize)); 7365 } else { // arm 7366 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) 7367 .addReg(Data).addReg(AddrIn).addReg(0) 7368 .addImm(StSize)); 7369 } 7370 } 7371 7372 MachineBasicBlock * 7373 ARMTargetLowering::EmitStructByval(MachineInstr *MI, 7374 MachineBasicBlock *BB) const { 7375 // This pseudo instruction has 3 operands: dst, src, size 7376 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). 7377 // Otherwise, we will generate unrolled scalar copies. 7378 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 7379 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7380 MachineFunction::iterator It = BB; 7381 ++It; 7382 7383 unsigned dest = MI->getOperand(0).getReg(); 7384 unsigned src = MI->getOperand(1).getReg(); 7385 unsigned SizeVal = MI->getOperand(2).getImm(); 7386 unsigned Align = MI->getOperand(3).getImm(); 7387 DebugLoc dl = MI->getDebugLoc(); 7388 7389 MachineFunction *MF = BB->getParent(); 7390 MachineRegisterInfo &MRI = MF->getRegInfo(); 7391 unsigned UnitSize = 0; 7392 const TargetRegisterClass *TRC = 0; 7393 const TargetRegisterClass *VecTRC = 0; 7394 7395 bool IsThumb1 = Subtarget->isThumb1Only(); 7396 bool IsThumb2 = Subtarget->isThumb2(); 7397 7398 if (Align & 1) { 7399 UnitSize = 1; 7400 } else if (Align & 2) { 7401 UnitSize = 2; 7402 } else { 7403 // Check whether we can use NEON instructions. 7404 if (!MF->getFunction()->getAttributes(). 7405 hasAttribute(AttributeSet::FunctionIndex, 7406 Attribute::NoImplicitFloat) && 7407 Subtarget->hasNEON()) { 7408 if ((Align % 16 == 0) && SizeVal >= 16) 7409 UnitSize = 16; 7410 else if ((Align % 8 == 0) && SizeVal >= 8) 7411 UnitSize = 8; 7412 } 7413 // Can't use NEON instructions. 7414 if (UnitSize == 0) 7415 UnitSize = 4; 7416 } 7417 7418 // Select the correct opcode and register class for unit size load/store 7419 bool IsNeon = UnitSize >= 8; 7420 TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass 7421 : (const TargetRegisterClass *)&ARM::GPRRegClass; 7422 if (IsNeon) 7423 VecTRC = UnitSize == 16 7424 ? (const TargetRegisterClass *)&ARM::DPairRegClass 7425 : UnitSize == 8 7426 ? (const TargetRegisterClass *)&ARM::DPRRegClass 7427 : 0; 7428 7429 unsigned BytesLeft = SizeVal % UnitSize; 7430 unsigned LoopSize = SizeVal - BytesLeft; 7431 7432 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) { 7433 // Use LDR and STR to copy. 7434 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize) 7435 // [destOut] = STR_POST(scratch, destIn, UnitSize) 7436 unsigned srcIn = src; 7437 unsigned destIn = dest; 7438 for (unsigned i = 0; i < LoopSize; i+=UnitSize) { 7439 unsigned srcOut = MRI.createVirtualRegister(TRC); 7440 unsigned destOut = MRI.createVirtualRegister(TRC); 7441 unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); 7442 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut, 7443 IsThumb1, IsThumb2); 7444 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut, 7445 IsThumb1, IsThumb2); 7446 srcIn = srcOut; 7447 destIn = destOut; 7448 } 7449 7450 // Handle the leftover bytes with LDRB and STRB. 7451 // [scratch, srcOut] = LDRB_POST(srcIn, 1) 7452 // [destOut] = STRB_POST(scratch, destIn, 1) 7453 for (unsigned i = 0; i < BytesLeft; i++) { 7454 unsigned srcOut = MRI.createVirtualRegister(TRC); 7455 unsigned destOut = MRI.createVirtualRegister(TRC); 7456 unsigned scratch = MRI.createVirtualRegister(TRC); 7457 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut, 7458 IsThumb1, IsThumb2); 7459 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut, 7460 IsThumb1, IsThumb2); 7461 srcIn = srcOut; 7462 destIn = destOut; 7463 } 7464 MI->eraseFromParent(); // The instruction is gone now. 7465 return BB; 7466 } 7467 7468 // Expand the pseudo op to a loop. 7469 // thisMBB: 7470 // ... 7471 // movw varEnd, # --> with thumb2 7472 // movt varEnd, # 7473 // ldrcp varEnd, idx --> without thumb2 7474 // fallthrough --> loopMBB 7475 // loopMBB: 7476 // PHI varPhi, varEnd, varLoop 7477 // PHI srcPhi, src, srcLoop 7478 // PHI destPhi, dst, destLoop 7479 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 7480 // [destLoop] = STR_POST(scratch, destPhi, UnitSize) 7481 // subs varLoop, varPhi, #UnitSize 7482 // bne loopMBB 7483 // fallthrough --> exitMBB 7484 // exitMBB: 7485 // epilogue to handle left-over bytes 7486 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 7487 // [destOut] = STRB_POST(scratch, destLoop, 1) 7488 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 7489 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 7490 MF->insert(It, loopMBB); 7491 MF->insert(It, exitMBB); 7492 7493 // Transfer the remainder of BB and its successor edges to exitMBB. 7494 exitMBB->splice(exitMBB->begin(), BB, 7495 llvm::next(MachineBasicBlock::iterator(MI)), 7496 BB->end()); 7497 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7498 7499 // Load an immediate to varEnd. 7500 unsigned varEnd = MRI.createVirtualRegister(TRC); 7501 if (IsThumb2) { 7502 unsigned Vtmp = varEnd; 7503 if ((LoopSize & 0xFFFF0000) != 0) 7504 Vtmp = MRI.createVirtualRegister(TRC); 7505 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp) 7506 .addImm(LoopSize & 0xFFFF)); 7507 7508 if ((LoopSize & 0xFFFF0000) != 0) 7509 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) 7510 .addReg(Vtmp).addImm(LoopSize >> 16)); 7511 } else { 7512 MachineConstantPool *ConstantPool = MF->getConstantPool(); 7513 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 7514 const Constant *C = ConstantInt::get(Int32Ty, LoopSize); 7515 7516 // MachineConstantPool wants an explicit alignment. 7517 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 7518 if (Align == 0) 7519 Align = getDataLayout()->getTypeAllocSize(C->getType()); 7520 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 7521 7522 if (IsThumb1) 7523 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg( 7524 varEnd, RegState::Define).addConstantPoolIndex(Idx)); 7525 else 7526 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg( 7527 varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0)); 7528 } 7529 BB->addSuccessor(loopMBB); 7530 7531 // Generate the loop body: 7532 // varPhi = PHI(varLoop, varEnd) 7533 // srcPhi = PHI(srcLoop, src) 7534 // destPhi = PHI(destLoop, dst) 7535 MachineBasicBlock *entryBB = BB; 7536 BB = loopMBB; 7537 unsigned varLoop = MRI.createVirtualRegister(TRC); 7538 unsigned varPhi = MRI.createVirtualRegister(TRC); 7539 unsigned srcLoop = MRI.createVirtualRegister(TRC); 7540 unsigned srcPhi = MRI.createVirtualRegister(TRC); 7541 unsigned destLoop = MRI.createVirtualRegister(TRC); 7542 unsigned destPhi = MRI.createVirtualRegister(TRC); 7543 7544 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) 7545 .addReg(varLoop).addMBB(loopMBB) 7546 .addReg(varEnd).addMBB(entryBB); 7547 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi) 7548 .addReg(srcLoop).addMBB(loopMBB) 7549 .addReg(src).addMBB(entryBB); 7550 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi) 7551 .addReg(destLoop).addMBB(loopMBB) 7552 .addReg(dest).addMBB(entryBB); 7553 7554 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 7555 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) 7556 unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); 7557 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop, 7558 IsThumb1, IsThumb2); 7559 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop, 7560 IsThumb1, IsThumb2); 7561 7562 // Decrement loop variable by UnitSize. 7563 if (IsThumb1) { 7564 MachineInstrBuilder MIB = 7565 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop); 7566 MIB = AddDefaultT1CC(MIB); 7567 MIB.addReg(varPhi).addImm(UnitSize); 7568 AddDefaultPred(MIB); 7569 } else { 7570 MachineInstrBuilder MIB = 7571 BuildMI(*BB, BB->end(), dl, 7572 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); 7573 AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); 7574 MIB->getOperand(5).setReg(ARM::CPSR); 7575 MIB->getOperand(5).setIsDef(true); 7576 } 7577 BuildMI(*BB, BB->end(), dl, 7578 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc)) 7579 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 7580 7581 // loopMBB can loop back to loopMBB or fall through to exitMBB. 7582 BB->addSuccessor(loopMBB); 7583 BB->addSuccessor(exitMBB); 7584 7585 // Add epilogue to handle BytesLeft. 7586 BB = exitMBB; 7587 MachineInstr *StartOfExit = exitMBB->begin(); 7588 7589 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 7590 // [destOut] = STRB_POST(scratch, destLoop, 1) 7591 unsigned srcIn = srcLoop; 7592 unsigned destIn = destLoop; 7593 for (unsigned i = 0; i < BytesLeft; i++) { 7594 unsigned srcOut = MRI.createVirtualRegister(TRC); 7595 unsigned destOut = MRI.createVirtualRegister(TRC); 7596 unsigned scratch = MRI.createVirtualRegister(TRC); 7597 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut, 7598 IsThumb1, IsThumb2); 7599 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut, 7600 IsThumb1, IsThumb2); 7601 srcIn = srcOut; 7602 destIn = destOut; 7603 } 7604 7605 MI->eraseFromParent(); // The instruction is gone now. 7606 return BB; 7607 } 7608 7609 MachineBasicBlock * 7610 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 7611 MachineBasicBlock *BB) const { 7612 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 7613 DebugLoc dl = MI->getDebugLoc(); 7614 bool isThumb2 = Subtarget->isThumb2(); 7615 switch (MI->getOpcode()) { 7616 default: { 7617 MI->dump(); 7618 llvm_unreachable("Unexpected instr type to insert"); 7619 } 7620 // The Thumb2 pre-indexed stores have the same MI operands, they just 7621 // define them differently in the .td files from the isel patterns, so 7622 // they need pseudos. 7623 case ARM::t2STR_preidx: 7624 MI->setDesc(TII->get(ARM::t2STR_PRE)); 7625 return BB; 7626 case ARM::t2STRB_preidx: 7627 MI->setDesc(TII->get(ARM::t2STRB_PRE)); 7628 return BB; 7629 case ARM::t2STRH_preidx: 7630 MI->setDesc(TII->get(ARM::t2STRH_PRE)); 7631 return BB; 7632 7633 case ARM::STRi_preidx: 7634 case ARM::STRBi_preidx: { 7635 unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? 7636 ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; 7637 // Decode the offset. 7638 unsigned Offset = MI->getOperand(4).getImm(); 7639 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; 7640 Offset = ARM_AM::getAM2Offset(Offset); 7641 if (isSub) 7642 Offset = -Offset; 7643 7644 MachineMemOperand *MMO = *MI->memoperands_begin(); 7645 BuildMI(*BB, MI, dl, TII->get(NewOpc)) 7646 .addOperand(MI->getOperand(0)) // Rn_wb 7647 .addOperand(MI->getOperand(1)) // Rt 7648 .addOperand(MI->getOperand(2)) // Rn 7649 .addImm(Offset) // offset (skip GPR==zero_reg) 7650 .addOperand(MI->getOperand(5)) // pred 7651 .addOperand(MI->getOperand(6)) 7652 .addMemOperand(MMO); 7653 MI->eraseFromParent(); 7654 return BB; 7655 } 7656 case ARM::STRr_preidx: 7657 case ARM::STRBr_preidx: 7658 case ARM::STRH_preidx: { 7659 unsigned NewOpc; 7660 switch (MI->getOpcode()) { 7661 default: llvm_unreachable("unexpected opcode!"); 7662 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; 7663 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; 7664 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; 7665 } 7666 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); 7667 for (unsigned i = 0; i < MI->getNumOperands(); ++i) 7668 MIB.addOperand(MI->getOperand(i)); 7669 MI->eraseFromParent(); 7670 return BB; 7671 } 7672 case ARM::ATOMIC_LOAD_ADD_I8: 7673 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 7674 case ARM::ATOMIC_LOAD_ADD_I16: 7675 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 7676 case ARM::ATOMIC_LOAD_ADD_I32: 7677 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 7678 7679 case ARM::ATOMIC_LOAD_AND_I8: 7680 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7681 case ARM::ATOMIC_LOAD_AND_I16: 7682 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7683 case ARM::ATOMIC_LOAD_AND_I32: 7684 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7685 7686 case ARM::ATOMIC_LOAD_OR_I8: 7687 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7688 case ARM::ATOMIC_LOAD_OR_I16: 7689 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7690 case ARM::ATOMIC_LOAD_OR_I32: 7691 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7692 7693 case ARM::ATOMIC_LOAD_XOR_I8: 7694 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7695 case ARM::ATOMIC_LOAD_XOR_I16: 7696 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7697 case ARM::ATOMIC_LOAD_XOR_I32: 7698 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7699 7700 case ARM::ATOMIC_LOAD_NAND_I8: 7701 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 7702 case ARM::ATOMIC_LOAD_NAND_I16: 7703 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 7704 case ARM::ATOMIC_LOAD_NAND_I32: 7705 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 7706 7707 case ARM::ATOMIC_LOAD_SUB_I8: 7708 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 7709 case ARM::ATOMIC_LOAD_SUB_I16: 7710 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 7711 case ARM::ATOMIC_LOAD_SUB_I32: 7712 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 7713 7714 case ARM::ATOMIC_LOAD_MIN_I8: 7715 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); 7716 case ARM::ATOMIC_LOAD_MIN_I16: 7717 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); 7718 case ARM::ATOMIC_LOAD_MIN_I32: 7719 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); 7720 7721 case ARM::ATOMIC_LOAD_MAX_I8: 7722 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); 7723 case ARM::ATOMIC_LOAD_MAX_I16: 7724 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); 7725 case ARM::ATOMIC_LOAD_MAX_I32: 7726 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); 7727 7728 case ARM::ATOMIC_LOAD_UMIN_I8: 7729 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); 7730 case ARM::ATOMIC_LOAD_UMIN_I16: 7731 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); 7732 case ARM::ATOMIC_LOAD_UMIN_I32: 7733 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); 7734 7735 case ARM::ATOMIC_LOAD_UMAX_I8: 7736 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); 7737 case ARM::ATOMIC_LOAD_UMAX_I16: 7738 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); 7739 case ARM::ATOMIC_LOAD_UMAX_I32: 7740 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); 7741 7742 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 7743 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 7744 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 7745 7746 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 7747 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 7748 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 7749 7750 case ARM::ATOMIC_LOAD_I64: 7751 return EmitAtomicLoad64(MI, BB); 7752 7753 case ARM::ATOMIC_LOAD_ADD_I64: 7754 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, 7755 isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, 7756 /*NeedsCarry*/ true); 7757 case ARM::ATOMIC_LOAD_SUB_I64: 7758 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7759 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7760 /*NeedsCarry*/ true); 7761 case ARM::ATOMIC_LOAD_OR_I64: 7762 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, 7763 isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7764 case ARM::ATOMIC_LOAD_XOR_I64: 7765 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, 7766 isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7767 case ARM::ATOMIC_LOAD_AND_I64: 7768 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, 7769 isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7770 case ARM::ATOMIC_STORE_I64: 7771 case ARM::ATOMIC_SWAP_I64: 7772 return EmitAtomicBinary64(MI, BB, 0, 0, false); 7773 case ARM::ATOMIC_CMP_SWAP_I64: 7774 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7775 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7776 /*NeedsCarry*/ false, /*IsCmpxchg*/true); 7777 case ARM::ATOMIC_LOAD_MIN_I64: 7778 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7779 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7780 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7781 /*IsMinMax*/ true, ARMCC::LT); 7782 case ARM::ATOMIC_LOAD_MAX_I64: 7783 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7784 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7785 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7786 /*IsMinMax*/ true, ARMCC::GE); 7787 case ARM::ATOMIC_LOAD_UMIN_I64: 7788 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7789 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7790 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7791 /*IsMinMax*/ true, ARMCC::LO); 7792 case ARM::ATOMIC_LOAD_UMAX_I64: 7793 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7794 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7795 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7796 /*IsMinMax*/ true, ARMCC::HS); 7797 7798 case ARM::tMOVCCr_pseudo: { 7799 // To "insert" a SELECT_CC instruction, we actually have to insert the 7800 // diamond control-flow pattern. The incoming instruction knows the 7801 // destination vreg to set, the condition code register to branch on, the 7802 // true/false values to select between, and a branch opcode to use. 7803 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7804 MachineFunction::iterator It = BB; 7805 ++It; 7806 7807 // thisMBB: 7808 // ... 7809 // TrueVal = ... 7810 // cmpTY ccX, r1, r2 7811 // bCC copy1MBB 7812 // fallthrough --> copy0MBB 7813 MachineBasicBlock *thisMBB = BB; 7814 MachineFunction *F = BB->getParent(); 7815 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 7816 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 7817 F->insert(It, copy0MBB); 7818 F->insert(It, sinkMBB); 7819 7820 // Transfer the remainder of BB and its successor edges to sinkMBB. 7821 sinkMBB->splice(sinkMBB->begin(), BB, 7822 llvm::next(MachineBasicBlock::iterator(MI)), 7823 BB->end()); 7824 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 7825 7826 BB->addSuccessor(copy0MBB); 7827 BB->addSuccessor(sinkMBB); 7828 7829 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 7830 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 7831 7832 // copy0MBB: 7833 // %FalseValue = ... 7834 // # fallthrough to sinkMBB 7835 BB = copy0MBB; 7836 7837 // Update machine-CFG edges 7838 BB->addSuccessor(sinkMBB); 7839 7840 // sinkMBB: 7841 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 7842 // ... 7843 BB = sinkMBB; 7844 BuildMI(*BB, BB->begin(), dl, 7845 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 7846 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 7847 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 7848 7849 MI->eraseFromParent(); // The pseudo instruction is gone now. 7850 return BB; 7851 } 7852 7853 case ARM::BCCi64: 7854 case ARM::BCCZi64: { 7855 // If there is an unconditional branch to the other successor, remove it. 7856 BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); 7857 7858 // Compare both parts that make up the double comparison separately for 7859 // equality. 7860 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 7861 7862 unsigned LHS1 = MI->getOperand(1).getReg(); 7863 unsigned LHS2 = MI->getOperand(2).getReg(); 7864 if (RHSisZero) { 7865 AddDefaultPred(BuildMI(BB, dl, 7866 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7867 .addReg(LHS1).addImm(0)); 7868 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7869 .addReg(LHS2).addImm(0) 7870 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 7871 } else { 7872 unsigned RHS1 = MI->getOperand(3).getReg(); 7873 unsigned RHS2 = MI->getOperand(4).getReg(); 7874 AddDefaultPred(BuildMI(BB, dl, 7875 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 7876 .addReg(LHS1).addReg(RHS1)); 7877 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 7878 .addReg(LHS2).addReg(RHS2) 7879 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 7880 } 7881 7882 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 7883 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 7884 if (MI->getOperand(0).getImm() == ARMCC::NE) 7885 std::swap(destMBB, exitMBB); 7886 7887 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 7888 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 7889 if (isThumb2) 7890 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); 7891 else 7892 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); 7893 7894 MI->eraseFromParent(); // The pseudo instruction is gone now. 7895 return BB; 7896 } 7897 7898 case ARM::Int_eh_sjlj_setjmp: 7899 case ARM::Int_eh_sjlj_setjmp_nofp: 7900 case ARM::tInt_eh_sjlj_setjmp: 7901 case ARM::t2Int_eh_sjlj_setjmp: 7902 case ARM::t2Int_eh_sjlj_setjmp_nofp: 7903 EmitSjLjDispatchBlock(MI, BB); 7904 return BB; 7905 7906 case ARM::ABS: 7907 case ARM::t2ABS: { 7908 // To insert an ABS instruction, we have to insert the 7909 // diamond control-flow pattern. The incoming instruction knows the 7910 // source vreg to test against 0, the destination vreg to set, 7911 // the condition code register to branch on, the 7912 // true/false values to select between, and a branch opcode to use. 7913 // It transforms 7914 // V1 = ABS V0 7915 // into 7916 // V2 = MOVS V0 7917 // BCC (branch to SinkBB if V0 >= 0) 7918 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) 7919 // SinkBB: V1 = PHI(V2, V3) 7920 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7921 MachineFunction::iterator BBI = BB; 7922 ++BBI; 7923 MachineFunction *Fn = BB->getParent(); 7924 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); 7925 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); 7926 Fn->insert(BBI, RSBBB); 7927 Fn->insert(BBI, SinkBB); 7928 7929 unsigned int ABSSrcReg = MI->getOperand(1).getReg(); 7930 unsigned int ABSDstReg = MI->getOperand(0).getReg(); 7931 bool isThumb2 = Subtarget->isThumb2(); 7932 MachineRegisterInfo &MRI = Fn->getRegInfo(); 7933 // In Thumb mode S must not be specified if source register is the SP or 7934 // PC and if destination register is the SP, so restrict register class 7935 unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? 7936 (const TargetRegisterClass*)&ARM::rGPRRegClass : 7937 (const TargetRegisterClass*)&ARM::GPRRegClass); 7938 7939 // Transfer the remainder of BB and its successor edges to sinkMBB. 7940 SinkBB->splice(SinkBB->begin(), BB, 7941 llvm::next(MachineBasicBlock::iterator(MI)), 7942 BB->end()); 7943 SinkBB->transferSuccessorsAndUpdatePHIs(BB); 7944 7945 BB->addSuccessor(RSBBB); 7946 BB->addSuccessor(SinkBB); 7947 7948 // fall through to SinkMBB 7949 RSBBB->addSuccessor(SinkBB); 7950 7951 // insert a cmp at the end of BB 7952 AddDefaultPred(BuildMI(BB, dl, 7953 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7954 .addReg(ABSSrcReg).addImm(0)); 7955 7956 // insert a bcc with opposite CC to ARMCC::MI at the end of BB 7957 BuildMI(BB, dl, 7958 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) 7959 .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); 7960 7961 // insert rsbri in RSBBB 7962 // Note: BCC and rsbri will be converted into predicated rsbmi 7963 // by if-conversion pass 7964 BuildMI(*RSBBB, RSBBB->begin(), dl, 7965 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) 7966 .addReg(ABSSrcReg, RegState::Kill) 7967 .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 7968 7969 // insert PHI in SinkBB, 7970 // reuse ABSDstReg to not change uses of ABS instruction 7971 BuildMI(*SinkBB, SinkBB->begin(), dl, 7972 TII->get(ARM::PHI), ABSDstReg) 7973 .addReg(NewRsbDstReg).addMBB(RSBBB) 7974 .addReg(ABSSrcReg).addMBB(BB); 7975 7976 // remove ABS instruction 7977 MI->eraseFromParent(); 7978 7979 // return last added BB 7980 return SinkBB; 7981 } 7982 case ARM::COPY_STRUCT_BYVAL_I32: 7983 ++NumLoopByVals; 7984 return EmitStructByval(MI, BB); 7985 } 7986 } 7987 7988 void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, 7989 SDNode *Node) const { 7990 if (!MI->hasPostISelHook()) { 7991 assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && 7992 "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); 7993 return; 7994 } 7995 7996 const MCInstrDesc *MCID = &MI->getDesc(); 7997 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, 7998 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional 7999 // operand is still set to noreg. If needed, set the optional operand's 8000 // register to CPSR, and remove the redundant implicit def. 8001 // 8002 // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>). 8003 8004 // Rename pseudo opcodes. 8005 unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); 8006 if (NewOpc) { 8007 const ARMBaseInstrInfo *TII = 8008 static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); 8009 MCID = &TII->get(NewOpc); 8010 8011 assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && 8012 "converted opcode should be the same except for cc_out"); 8013 8014 MI->setDesc(*MCID); 8015 8016 // Add the optional cc_out operand 8017 MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); 8018 } 8019 unsigned ccOutIdx = MCID->getNumOperands() - 1; 8020 8021 // Any ARM instruction that sets the 's' bit should specify an optional 8022 // "cc_out" operand in the last operand position. 8023 if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { 8024 assert(!NewOpc && "Optional cc_out operand required"); 8025 return; 8026 } 8027 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it 8028 // since we already have an optional CPSR def. 8029 bool definesCPSR = false; 8030 bool deadCPSR = false; 8031 for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands(); 8032 i != e; ++i) { 8033 const MachineOperand &MO = MI->getOperand(i); 8034 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { 8035 definesCPSR = true; 8036 if (MO.isDead()) 8037 deadCPSR = true; 8038 MI->RemoveOperand(i); 8039 break; 8040 } 8041 } 8042 if (!definesCPSR) { 8043 assert(!NewOpc && "Optional cc_out operand required"); 8044 return; 8045 } 8046 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); 8047 if (deadCPSR) { 8048 assert(!MI->getOperand(ccOutIdx).getReg() && 8049 "expect uninitialized optional cc_out operand"); 8050 return; 8051 } 8052 8053 // If this instruction was defined with an optional CPSR def and its dag node 8054 // had a live implicit CPSR def, then activate the optional CPSR def. 8055 MachineOperand &MO = MI->getOperand(ccOutIdx); 8056 MO.setReg(ARM::CPSR); 8057 MO.setIsDef(true); 8058 } 8059 8060 //===----------------------------------------------------------------------===// 8061 // ARM Optimization Hooks 8062 //===----------------------------------------------------------------------===// 8063 8064 // Helper function that checks if N is a null or all ones constant. 8065 static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { 8066 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); 8067 if (!C) 8068 return false; 8069 return AllOnes ? C->isAllOnesValue() : C->isNullValue(); 8070 } 8071 8072 // Return true if N is conditionally 0 or all ones. 8073 // Detects these expressions where cc is an i1 value: 8074 // 8075 // (select cc 0, y) [AllOnes=0] 8076 // (select cc y, 0) [AllOnes=0] 8077 // (zext cc) [AllOnes=0] 8078 // (sext cc) [AllOnes=0/1] 8079 // (select cc -1, y) [AllOnes=1] 8080 // (select cc y, -1) [AllOnes=1] 8081 // 8082 // Invert is set when N is the null/all ones constant when CC is false. 8083 // OtherOp is set to the alternative value of N. 8084 static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, 8085 SDValue &CC, bool &Invert, 8086 SDValue &OtherOp, 8087 SelectionDAG &DAG) { 8088 switch (N->getOpcode()) { 8089 default: return false; 8090 case ISD::SELECT: { 8091 CC = N->getOperand(0); 8092 SDValue N1 = N->getOperand(1); 8093 SDValue N2 = N->getOperand(2); 8094 if (isZeroOrAllOnes(N1, AllOnes)) { 8095 Invert = false; 8096 OtherOp = N2; 8097 return true; 8098 } 8099 if (isZeroOrAllOnes(N2, AllOnes)) { 8100 Invert = true; 8101 OtherOp = N1; 8102 return true; 8103 } 8104 return false; 8105 } 8106 case ISD::ZERO_EXTEND: 8107 // (zext cc) can never be the all ones value. 8108 if (AllOnes) 8109 return false; 8110 // Fall through. 8111 case ISD::SIGN_EXTEND: { 8112 EVT VT = N->getValueType(0); 8113 CC = N->getOperand(0); 8114 if (CC.getValueType() != MVT::i1) 8115 return false; 8116 Invert = !AllOnes; 8117 if (AllOnes) 8118 // When looking for an AllOnes constant, N is an sext, and the 'other' 8119 // value is 0. 8120 OtherOp = DAG.getConstant(0, VT); 8121 else if (N->getOpcode() == ISD::ZERO_EXTEND) 8122 // When looking for a 0 constant, N can be zext or sext. 8123 OtherOp = DAG.getConstant(1, VT); 8124 else 8125 OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); 8126 return true; 8127 } 8128 } 8129 } 8130 8131 // Combine a constant select operand into its use: 8132 // 8133 // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 8134 // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 8135 // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1] 8136 // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 8137 // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 8138 // 8139 // The transform is rejected if the select doesn't have a constant operand that 8140 // is null, or all ones when AllOnes is set. 8141 // 8142 // Also recognize sext/zext from i1: 8143 // 8144 // (add (zext cc), x) -> (select cc (add x, 1), x) 8145 // (add (sext cc), x) -> (select cc (add x, -1), x) 8146 // 8147 // These transformations eventually create predicated instructions. 8148 // 8149 // @param N The node to transform. 8150 // @param Slct The N operand that is a select. 8151 // @param OtherOp The other N operand (x above). 8152 // @param DCI Context. 8153 // @param AllOnes Require the select constant to be all ones instead of null. 8154 // @returns The new node, or SDValue() on failure. 8155 static 8156 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 8157 TargetLowering::DAGCombinerInfo &DCI, 8158 bool AllOnes = false) { 8159 SelectionDAG &DAG = DCI.DAG; 8160 EVT VT = N->getValueType(0); 8161 SDValue NonConstantVal; 8162 SDValue CCOp; 8163 bool SwapSelectOps; 8164 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps, 8165 NonConstantVal, DAG)) 8166 return SDValue(); 8167 8168 // Slct is now know to be the desired identity constant when CC is true. 8169 SDValue TrueVal = OtherOp; 8170 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 8171 OtherOp, NonConstantVal); 8172 // Unless SwapSelectOps says CC should be false. 8173 if (SwapSelectOps) 8174 std::swap(TrueVal, FalseVal); 8175 8176 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, 8177 CCOp, TrueVal, FalseVal); 8178 } 8179 8180 // Attempt combineSelectAndUse on each operand of a commutative operator N. 8181 static 8182 SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, 8183 TargetLowering::DAGCombinerInfo &DCI) { 8184 SDValue N0 = N->getOperand(0); 8185 SDValue N1 = N->getOperand(1); 8186 if (N0.getNode()->hasOneUse()) { 8187 SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes); 8188 if (Result.getNode()) 8189 return Result; 8190 } 8191 if (N1.getNode()->hasOneUse()) { 8192 SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes); 8193 if (Result.getNode()) 8194 return Result; 8195 } 8196 return SDValue(); 8197 } 8198 8199 // AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction 8200 // (only after legalization). 8201 static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, 8202 TargetLowering::DAGCombinerInfo &DCI, 8203 const ARMSubtarget *Subtarget) { 8204 8205 // Only perform optimization if after legalize, and if NEON is available. We 8206 // also expected both operands to be BUILD_VECTORs. 8207 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() 8208 || N0.getOpcode() != ISD::BUILD_VECTOR 8209 || N1.getOpcode() != ISD::BUILD_VECTOR) 8210 return SDValue(); 8211 8212 // Check output type since VPADDL operand elements can only be 8, 16, or 32. 8213 EVT VT = N->getValueType(0); 8214 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) 8215 return SDValue(); 8216 8217 // Check that the vector operands are of the right form. 8218 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR 8219 // operands, where N is the size of the formed vector. 8220 // Each EXTRACT_VECTOR should have the same input vector and odd or even 8221 // index such that we have a pair wise add pattern. 8222 8223 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. 8224 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 8225 return SDValue(); 8226 SDValue Vec = N0->getOperand(0)->getOperand(0); 8227 SDNode *V = Vec.getNode(); 8228 unsigned nextIndex = 0; 8229 8230 // For each operands to the ADD which are BUILD_VECTORs, 8231 // check to see if each of their operands are an EXTRACT_VECTOR with 8232 // the same vector and appropriate index. 8233 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { 8234 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT 8235 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 8236 8237 SDValue ExtVec0 = N0->getOperand(i); 8238 SDValue ExtVec1 = N1->getOperand(i); 8239 8240 // First operand is the vector, verify its the same. 8241 if (V != ExtVec0->getOperand(0).getNode() || 8242 V != ExtVec1->getOperand(0).getNode()) 8243 return SDValue(); 8244 8245 // Second is the constant, verify its correct. 8246 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1)); 8247 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1)); 8248 8249 // For the constant, we want to see all the even or all the odd. 8250 if (!C0 || !C1 || C0->getZExtValue() != nextIndex 8251 || C1->getZExtValue() != nextIndex+1) 8252 return SDValue(); 8253 8254 // Increment index. 8255 nextIndex+=2; 8256 } else 8257 return SDValue(); 8258 } 8259 8260 // Create VPADDL node. 8261 SelectionDAG &DAG = DCI.DAG; 8262 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8263 8264 // Build operand list. 8265 SmallVector<SDValue, 8> Ops; 8266 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, 8267 TLI.getPointerTy())); 8268 8269 // Input is the vector. 8270 Ops.push_back(Vec); 8271 8272 // Get widened type and narrowed type. 8273 MVT widenType; 8274 unsigned numElem = VT.getVectorNumElements(); 8275 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { 8276 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; 8277 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; 8278 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; 8279 default: 8280 llvm_unreachable("Invalid vector element type for padd optimization."); 8281 } 8282 8283 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), 8284 widenType, &Ops[0], Ops.size()); 8285 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp); 8286 } 8287 8288 static SDValue findMUL_LOHI(SDValue V) { 8289 if (V->getOpcode() == ISD::UMUL_LOHI || 8290 V->getOpcode() == ISD::SMUL_LOHI) 8291 return V; 8292 return SDValue(); 8293 } 8294 8295 static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, 8296 TargetLowering::DAGCombinerInfo &DCI, 8297 const ARMSubtarget *Subtarget) { 8298 8299 if (Subtarget->isThumb1Only()) return SDValue(); 8300 8301 // Only perform the checks after legalize when the pattern is available. 8302 if (DCI.isBeforeLegalize()) return SDValue(); 8303 8304 // Look for multiply add opportunities. 8305 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where 8306 // each add nodes consumes a value from ISD::UMUL_LOHI and there is 8307 // a glue link from the first add to the second add. 8308 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by 8309 // a S/UMLAL instruction. 8310 // loAdd UMUL_LOHI 8311 // \ / :lo \ :hi 8312 // \ / \ [no multiline comment] 8313 // ADDC | hiAdd 8314 // \ :glue / / 8315 // \ / / 8316 // ADDE 8317 // 8318 assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); 8319 SDValue AddcOp0 = AddcNode->getOperand(0); 8320 SDValue AddcOp1 = AddcNode->getOperand(1); 8321 8322 // Check if the two operands are from the same mul_lohi node. 8323 if (AddcOp0.getNode() == AddcOp1.getNode()) 8324 return SDValue(); 8325 8326 assert(AddcNode->getNumValues() == 2 && 8327 AddcNode->getValueType(0) == MVT::i32 && 8328 "Expect ADDC with two result values. First: i32"); 8329 8330 // Check that we have a glued ADDC node. 8331 if (AddcNode->getValueType(1) != MVT::Glue) 8332 return SDValue(); 8333 8334 // Check that the ADDC adds the low result of the S/UMUL_LOHI. 8335 if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && 8336 AddcOp0->getOpcode() != ISD::SMUL_LOHI && 8337 AddcOp1->getOpcode() != ISD::UMUL_LOHI && 8338 AddcOp1->getOpcode() != ISD::SMUL_LOHI) 8339 return SDValue(); 8340 8341 // Look for the glued ADDE. 8342 SDNode* AddeNode = AddcNode->getGluedUser(); 8343 if (AddeNode == NULL) 8344 return SDValue(); 8345 8346 // Make sure it is really an ADDE. 8347 if (AddeNode->getOpcode() != ISD::ADDE) 8348 return SDValue(); 8349 8350 assert(AddeNode->getNumOperands() == 3 && 8351 AddeNode->getOperand(2).getValueType() == MVT::Glue && 8352 "ADDE node has the wrong inputs"); 8353 8354 // Check for the triangle shape. 8355 SDValue AddeOp0 = AddeNode->getOperand(0); 8356 SDValue AddeOp1 = AddeNode->getOperand(1); 8357 8358 // Make sure that the ADDE operands are not coming from the same node. 8359 if (AddeOp0.getNode() == AddeOp1.getNode()) 8360 return SDValue(); 8361 8362 // Find the MUL_LOHI node walking up ADDE's operands. 8363 bool IsLeftOperandMUL = false; 8364 SDValue MULOp = findMUL_LOHI(AddeOp0); 8365 if (MULOp == SDValue()) 8366 MULOp = findMUL_LOHI(AddeOp1); 8367 else 8368 IsLeftOperandMUL = true; 8369 if (MULOp == SDValue()) 8370 return SDValue(); 8371 8372 // Figure out the right opcode. 8373 unsigned Opc = MULOp->getOpcode(); 8374 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; 8375 8376 // Figure out the high and low input values to the MLAL node. 8377 SDValue* HiMul = &MULOp; 8378 SDValue* HiAdd = NULL; 8379 SDValue* LoMul = NULL; 8380 SDValue* LowAdd = NULL; 8381 8382 if (IsLeftOperandMUL) 8383 HiAdd = &AddeOp1; 8384 else 8385 HiAdd = &AddeOp0; 8386 8387 8388 if (AddcOp0->getOpcode() == Opc) { 8389 LoMul = &AddcOp0; 8390 LowAdd = &AddcOp1; 8391 } 8392 if (AddcOp1->getOpcode() == Opc) { 8393 LoMul = &AddcOp1; 8394 LowAdd = &AddcOp0; 8395 } 8396 8397 if (LoMul == NULL) 8398 return SDValue(); 8399 8400 if (LoMul->getNode() != HiMul->getNode()) 8401 return SDValue(); 8402 8403 // Create the merged node. 8404 SelectionDAG &DAG = DCI.DAG; 8405 8406 // Build operand list. 8407 SmallVector<SDValue, 8> Ops; 8408 Ops.push_back(LoMul->getOperand(0)); 8409 Ops.push_back(LoMul->getOperand(1)); 8410 Ops.push_back(*LowAdd); 8411 Ops.push_back(*HiAdd); 8412 8413 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), 8414 DAG.getVTList(MVT::i32, MVT::i32), 8415 &Ops[0], Ops.size()); 8416 8417 // Replace the ADDs' nodes uses by the MLA node's values. 8418 SDValue HiMLALResult(MLALNode.getNode(), 1); 8419 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); 8420 8421 SDValue LoMLALResult(MLALNode.getNode(), 0); 8422 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); 8423 8424 // Return original node to notify the driver to stop replacing. 8425 SDValue resNode(AddcNode, 0); 8426 return resNode; 8427 } 8428 8429 /// PerformADDCCombine - Target-specific dag combine transform from 8430 /// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL. 8431 static SDValue PerformADDCCombine(SDNode *N, 8432 TargetLowering::DAGCombinerInfo &DCI, 8433 const ARMSubtarget *Subtarget) { 8434 8435 return AddCombineTo64bitMLAL(N, DCI, Subtarget); 8436 8437 } 8438 8439 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 8440 /// operands N0 and N1. This is a helper for PerformADDCombine that is 8441 /// called with the default operands, and if that fails, with commuted 8442 /// operands. 8443 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 8444 TargetLowering::DAGCombinerInfo &DCI, 8445 const ARMSubtarget *Subtarget){ 8446 8447 // Attempt to create vpaddl for this add. 8448 SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); 8449 if (Result.getNode()) 8450 return Result; 8451 8452 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 8453 if (N0.getNode()->hasOneUse()) { 8454 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 8455 if (Result.getNode()) return Result; 8456 } 8457 return SDValue(); 8458 } 8459 8460 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 8461 /// 8462 static SDValue PerformADDCombine(SDNode *N, 8463 TargetLowering::DAGCombinerInfo &DCI, 8464 const ARMSubtarget *Subtarget) { 8465 SDValue N0 = N->getOperand(0); 8466 SDValue N1 = N->getOperand(1); 8467 8468 // First try with the default operand order. 8469 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget); 8470 if (Result.getNode()) 8471 return Result; 8472 8473 // If that didn't work, try again with the operands commuted. 8474 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); 8475 } 8476 8477 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 8478 /// 8479 static SDValue PerformSUBCombine(SDNode *N, 8480 TargetLowering::DAGCombinerInfo &DCI) { 8481 SDValue N0 = N->getOperand(0); 8482 SDValue N1 = N->getOperand(1); 8483 8484 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 8485 if (N1.getNode()->hasOneUse()) { 8486 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 8487 if (Result.getNode()) return Result; 8488 } 8489 8490 return SDValue(); 8491 } 8492 8493 /// PerformVMULCombine 8494 /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the 8495 /// special multiplier accumulator forwarding. 8496 /// vmul d3, d0, d2 8497 /// vmla d3, d1, d2 8498 /// is faster than 8499 /// vadd d3, d0, d1 8500 /// vmul d3, d3, d2 8501 // However, for (A + B) * (A + B), 8502 // vadd d2, d0, d1 8503 // vmul d3, d0, d2 8504 // vmla d3, d1, d2 8505 // is slower than 8506 // vadd d2, d0, d1 8507 // vmul d3, d2, d2 8508 static SDValue PerformVMULCombine(SDNode *N, 8509 TargetLowering::DAGCombinerInfo &DCI, 8510 const ARMSubtarget *Subtarget) { 8511 if (!Subtarget->hasVMLxForwarding()) 8512 return SDValue(); 8513 8514 SelectionDAG &DAG = DCI.DAG; 8515 SDValue N0 = N->getOperand(0); 8516 SDValue N1 = N->getOperand(1); 8517 unsigned Opcode = N0.getOpcode(); 8518 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 8519 Opcode != ISD::FADD && Opcode != ISD::FSUB) { 8520 Opcode = N1.getOpcode(); 8521 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 8522 Opcode != ISD::FADD && Opcode != ISD::FSUB) 8523 return SDValue(); 8524 std::swap(N0, N1); 8525 } 8526 8527 if (N0 == N1) 8528 return SDValue(); 8529 8530 EVT VT = N->getValueType(0); 8531 SDLoc DL(N); 8532 SDValue N00 = N0->getOperand(0); 8533 SDValue N01 = N0->getOperand(1); 8534 return DAG.getNode(Opcode, DL, VT, 8535 DAG.getNode(ISD::MUL, DL, VT, N00, N1), 8536 DAG.getNode(ISD::MUL, DL, VT, N01, N1)); 8537 } 8538 8539 static SDValue PerformMULCombine(SDNode *N, 8540 TargetLowering::DAGCombinerInfo &DCI, 8541 const ARMSubtarget *Subtarget) { 8542 SelectionDAG &DAG = DCI.DAG; 8543 8544 if (Subtarget->isThumb1Only()) 8545 return SDValue(); 8546 8547 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 8548 return SDValue(); 8549 8550 EVT VT = N->getValueType(0); 8551 if (VT.is64BitVector() || VT.is128BitVector()) 8552 return PerformVMULCombine(N, DCI, Subtarget); 8553 if (VT != MVT::i32) 8554 return SDValue(); 8555 8556 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8557 if (!C) 8558 return SDValue(); 8559 8560 int64_t MulAmt = C->getSExtValue(); 8561 unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt); 8562 8563 ShiftAmt = ShiftAmt & (32 - 1); 8564 SDValue V = N->getOperand(0); 8565 SDLoc DL(N); 8566 8567 SDValue Res; 8568 MulAmt >>= ShiftAmt; 8569 8570 if (MulAmt >= 0) { 8571 if (isPowerOf2_32(MulAmt - 1)) { 8572 // (mul x, 2^N + 1) => (add (shl x, N), x) 8573 Res = DAG.getNode(ISD::ADD, DL, VT, 8574 V, 8575 DAG.getNode(ISD::SHL, DL, VT, 8576 V, 8577 DAG.getConstant(Log2_32(MulAmt - 1), 8578 MVT::i32))); 8579 } else if (isPowerOf2_32(MulAmt + 1)) { 8580 // (mul x, 2^N - 1) => (sub (shl x, N), x) 8581 Res = DAG.getNode(ISD::SUB, DL, VT, 8582 DAG.getNode(ISD::SHL, DL, VT, 8583 V, 8584 DAG.getConstant(Log2_32(MulAmt + 1), 8585 MVT::i32)), 8586 V); 8587 } else 8588 return SDValue(); 8589 } else { 8590 uint64_t MulAmtAbs = -MulAmt; 8591 if (isPowerOf2_32(MulAmtAbs + 1)) { 8592 // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) 8593 Res = DAG.getNode(ISD::SUB, DL, VT, 8594 V, 8595 DAG.getNode(ISD::SHL, DL, VT, 8596 V, 8597 DAG.getConstant(Log2_32(MulAmtAbs + 1), 8598 MVT::i32))); 8599 } else if (isPowerOf2_32(MulAmtAbs - 1)) { 8600 // (mul x, -(2^N + 1)) => - (add (shl x, N), x) 8601 Res = DAG.getNode(ISD::ADD, DL, VT, 8602 V, 8603 DAG.getNode(ISD::SHL, DL, VT, 8604 V, 8605 DAG.getConstant(Log2_32(MulAmtAbs-1), 8606 MVT::i32))); 8607 Res = DAG.getNode(ISD::SUB, DL, VT, 8608 DAG.getConstant(0, MVT::i32),Res); 8609 8610 } else 8611 return SDValue(); 8612 } 8613 8614 if (ShiftAmt != 0) 8615 Res = DAG.getNode(ISD::SHL, DL, VT, 8616 Res, DAG.getConstant(ShiftAmt, MVT::i32)); 8617 8618 // Do not add new nodes to DAG combiner worklist. 8619 DCI.CombineTo(N, Res, false); 8620 return SDValue(); 8621 } 8622 8623 static SDValue PerformANDCombine(SDNode *N, 8624 TargetLowering::DAGCombinerInfo &DCI, 8625 const ARMSubtarget *Subtarget) { 8626 8627 // Attempt to use immediate-form VBIC 8628 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 8629 SDLoc dl(N); 8630 EVT VT = N->getValueType(0); 8631 SelectionDAG &DAG = DCI.DAG; 8632 8633 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8634 return SDValue(); 8635 8636 APInt SplatBits, SplatUndef; 8637 unsigned SplatBitSize; 8638 bool HasAnyUndefs; 8639 if (BVN && 8640 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 8641 if (SplatBitSize <= 64) { 8642 EVT VbicVT; 8643 SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), 8644 SplatUndef.getZExtValue(), SplatBitSize, 8645 DAG, VbicVT, VT.is128BitVector(), 8646 OtherModImm); 8647 if (Val.getNode()) { 8648 SDValue Input = 8649 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); 8650 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); 8651 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); 8652 } 8653 } 8654 } 8655 8656 if (!Subtarget->isThumb1Only()) { 8657 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) 8658 SDValue Result = combineSelectAndUseCommutative(N, true, DCI); 8659 if (Result.getNode()) 8660 return Result; 8661 } 8662 8663 return SDValue(); 8664 } 8665 8666 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR 8667 static SDValue PerformORCombine(SDNode *N, 8668 TargetLowering::DAGCombinerInfo &DCI, 8669 const ARMSubtarget *Subtarget) { 8670 // Attempt to use immediate-form VORR 8671 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 8672 SDLoc dl(N); 8673 EVT VT = N->getValueType(0); 8674 SelectionDAG &DAG = DCI.DAG; 8675 8676 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8677 return SDValue(); 8678 8679 APInt SplatBits, SplatUndef; 8680 unsigned SplatBitSize; 8681 bool HasAnyUndefs; 8682 if (BVN && Subtarget->hasNEON() && 8683 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 8684 if (SplatBitSize <= 64) { 8685 EVT VorrVT; 8686 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 8687 SplatUndef.getZExtValue(), SplatBitSize, 8688 DAG, VorrVT, VT.is128BitVector(), 8689 OtherModImm); 8690 if (Val.getNode()) { 8691 SDValue Input = 8692 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); 8693 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); 8694 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); 8695 } 8696 } 8697 } 8698 8699 if (!Subtarget->isThumb1Only()) { 8700 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 8701 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 8702 if (Result.getNode()) 8703 return Result; 8704 } 8705 8706 // The code below optimizes (or (and X, Y), Z). 8707 // The AND operand needs to have a single user to make these optimizations 8708 // profitable. 8709 SDValue N0 = N->getOperand(0); 8710 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) 8711 return SDValue(); 8712 SDValue N1 = N->getOperand(1); 8713 8714 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. 8715 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && 8716 DAG.getTargetLoweringInfo().isTypeLegal(VT)) { 8717 APInt SplatUndef; 8718 unsigned SplatBitSize; 8719 bool HasAnyUndefs; 8720 8721 APInt SplatBits0, SplatBits1; 8722 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); 8723 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); 8724 // Ensure that the second operand of both ands are constants 8725 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, 8726 HasAnyUndefs) && !HasAnyUndefs) { 8727 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, 8728 HasAnyUndefs) && !HasAnyUndefs) { 8729 // Ensure that the bit width of the constants are the same and that 8730 // the splat arguments are logical inverses as per the pattern we 8731 // are trying to simplify. 8732 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && 8733 SplatBits0 == ~SplatBits1) { 8734 // Canonicalize the vector type to make instruction selection 8735 // simpler. 8736 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 8737 SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, 8738 N0->getOperand(1), 8739 N0->getOperand(0), 8740 N1->getOperand(0)); 8741 return DAG.getNode(ISD::BITCAST, dl, VT, Result); 8742 } 8743 } 8744 } 8745 } 8746 8747 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 8748 // reasonable. 8749 8750 // BFI is only available on V6T2+ 8751 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 8752 return SDValue(); 8753 8754 SDLoc DL(N); 8755 // 1) or (and A, mask), val => ARMbfi A, val, mask 8756 // iff (val & mask) == val 8757 // 8758 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 8759 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 8760 // && mask == ~mask2 8761 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 8762 // && ~mask == mask2 8763 // (i.e., copy a bitfield value into another bitfield of the same width) 8764 8765 if (VT != MVT::i32) 8766 return SDValue(); 8767 8768 SDValue N00 = N0.getOperand(0); 8769 8770 // The value and the mask need to be constants so we can verify this is 8771 // actually a bitfield set. If the mask is 0xffff, we can do better 8772 // via a movt instruction, so don't use BFI in that case. 8773 SDValue MaskOp = N0.getOperand(1); 8774 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp); 8775 if (!MaskC) 8776 return SDValue(); 8777 unsigned Mask = MaskC->getZExtValue(); 8778 if (Mask == 0xffff) 8779 return SDValue(); 8780 SDValue Res; 8781 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 8782 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 8783 if (N1C) { 8784 unsigned Val = N1C->getZExtValue(); 8785 if ((Val & ~Mask) != Val) 8786 return SDValue(); 8787 8788 if (ARM::isBitFieldInvertedMask(Mask)) { 8789 Val >>= countTrailingZeros(~Mask); 8790 8791 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, 8792 DAG.getConstant(Val, MVT::i32), 8793 DAG.getConstant(Mask, MVT::i32)); 8794 8795 // Do not add new nodes to DAG combiner worklist. 8796 DCI.CombineTo(N, Res, false); 8797 return SDValue(); 8798 } 8799 } else if (N1.getOpcode() == ISD::AND) { 8800 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 8801 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 8802 if (!N11C) 8803 return SDValue(); 8804 unsigned Mask2 = N11C->getZExtValue(); 8805 8806 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern 8807 // as is to match. 8808 if (ARM::isBitFieldInvertedMask(Mask) && 8809 (Mask == ~Mask2)) { 8810 // The pack halfword instruction works better for masks that fit it, 8811 // so use that when it's available. 8812 if (Subtarget->hasT2ExtractPack() && 8813 (Mask == 0xffff || Mask == 0xffff0000)) 8814 return SDValue(); 8815 // 2a 8816 unsigned amt = countTrailingZeros(Mask2); 8817 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 8818 DAG.getConstant(amt, MVT::i32)); 8819 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, 8820 DAG.getConstant(Mask, MVT::i32)); 8821 // Do not add new nodes to DAG combiner worklist. 8822 DCI.CombineTo(N, Res, false); 8823 return SDValue(); 8824 } else if (ARM::isBitFieldInvertedMask(~Mask) && 8825 (~Mask == Mask2)) { 8826 // The pack halfword instruction works better for masks that fit it, 8827 // so use that when it's available. 8828 if (Subtarget->hasT2ExtractPack() && 8829 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 8830 return SDValue(); 8831 // 2b 8832 unsigned lsb = countTrailingZeros(Mask); 8833 Res = DAG.getNode(ISD::SRL, DL, VT, N00, 8834 DAG.getConstant(lsb, MVT::i32)); 8835 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 8836 DAG.getConstant(Mask2, MVT::i32)); 8837 // Do not add new nodes to DAG combiner worklist. 8838 DCI.CombineTo(N, Res, false); 8839 return SDValue(); 8840 } 8841 } 8842 8843 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && 8844 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) && 8845 ARM::isBitFieldInvertedMask(~Mask)) { 8846 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask 8847 // where lsb(mask) == #shamt and masked bits of B are known zero. 8848 SDValue ShAmt = N00.getOperand(1); 8849 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 8850 unsigned LSB = countTrailingZeros(Mask); 8851 if (ShAmtC != LSB) 8852 return SDValue(); 8853 8854 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), 8855 DAG.getConstant(~Mask, MVT::i32)); 8856 8857 // Do not add new nodes to DAG combiner worklist. 8858 DCI.CombineTo(N, Res, false); 8859 } 8860 8861 return SDValue(); 8862 } 8863 8864 static SDValue PerformXORCombine(SDNode *N, 8865 TargetLowering::DAGCombinerInfo &DCI, 8866 const ARMSubtarget *Subtarget) { 8867 EVT VT = N->getValueType(0); 8868 SelectionDAG &DAG = DCI.DAG; 8869 8870 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8871 return SDValue(); 8872 8873 if (!Subtarget->isThumb1Only()) { 8874 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 8875 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 8876 if (Result.getNode()) 8877 return Result; 8878 } 8879 8880 return SDValue(); 8881 } 8882 8883 /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff 8884 /// the bits being cleared by the AND are not demanded by the BFI. 8885 static SDValue PerformBFICombine(SDNode *N, 8886 TargetLowering::DAGCombinerInfo &DCI) { 8887 SDValue N1 = N->getOperand(1); 8888 if (N1.getOpcode() == ISD::AND) { 8889 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 8890 if (!N11C) 8891 return SDValue(); 8892 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 8893 unsigned LSB = countTrailingZeros(~InvMask); 8894 unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB; 8895 unsigned Mask = (1 << Width)-1; 8896 unsigned Mask2 = N11C->getZExtValue(); 8897 if ((Mask & (~Mask2)) == 0) 8898 return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), 8899 N->getOperand(0), N1.getOperand(0), 8900 N->getOperand(2)); 8901 } 8902 return SDValue(); 8903 } 8904 8905 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for 8906 /// ARMISD::VMOVRRD. 8907 static SDValue PerformVMOVRRDCombine(SDNode *N, 8908 TargetLowering::DAGCombinerInfo &DCI) { 8909 // vmovrrd(vmovdrr x, y) -> x,y 8910 SDValue InDouble = N->getOperand(0); 8911 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 8912 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 8913 8914 // vmovrrd(load f64) -> (load i32), (load i32) 8915 SDNode *InNode = InDouble.getNode(); 8916 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && 8917 InNode->getValueType(0) == MVT::f64 && 8918 InNode->getOperand(1).getOpcode() == ISD::FrameIndex && 8919 !cast<LoadSDNode>(InNode)->isVolatile()) { 8920 // TODO: Should this be done for non-FrameIndex operands? 8921 LoadSDNode *LD = cast<LoadSDNode>(InNode); 8922 8923 SelectionDAG &DAG = DCI.DAG; 8924 SDLoc DL(LD); 8925 SDValue BasePtr = LD->getBasePtr(); 8926 SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, 8927 LD->getPointerInfo(), LD->isVolatile(), 8928 LD->isNonTemporal(), LD->isInvariant(), 8929 LD->getAlignment()); 8930 8931 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 8932 DAG.getConstant(4, MVT::i32)); 8933 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, 8934 LD->getPointerInfo(), LD->isVolatile(), 8935 LD->isNonTemporal(), LD->isInvariant(), 8936 std::min(4U, LD->getAlignment() / 2)); 8937 8938 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); 8939 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); 8940 DCI.RemoveFromWorklist(LD); 8941 DAG.DeleteNode(LD); 8942 return Result; 8943 } 8944 8945 return SDValue(); 8946 } 8947 8948 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for 8949 /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. 8950 static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { 8951 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) 8952 SDValue Op0 = N->getOperand(0); 8953 SDValue Op1 = N->getOperand(1); 8954 if (Op0.getOpcode() == ISD::BITCAST) 8955 Op0 = Op0.getOperand(0); 8956 if (Op1.getOpcode() == ISD::BITCAST) 8957 Op1 = Op1.getOperand(0); 8958 if (Op0.getOpcode() == ARMISD::VMOVRRD && 8959 Op0.getNode() == Op1.getNode() && 8960 Op0.getResNo() == 0 && Op1.getResNo() == 1) 8961 return DAG.getNode(ISD::BITCAST, SDLoc(N), 8962 N->getValueType(0), Op0.getOperand(0)); 8963 return SDValue(); 8964 } 8965 8966 /// PerformSTORECombine - Target-specific dag combine xforms for 8967 /// ISD::STORE. 8968 static SDValue PerformSTORECombine(SDNode *N, 8969 TargetLowering::DAGCombinerInfo &DCI) { 8970 StoreSDNode *St = cast<StoreSDNode>(N); 8971 if (St->isVolatile()) 8972 return SDValue(); 8973 8974 // Optimize trunc store (of multiple scalars) to shuffle and store. First, 8975 // pack all of the elements in one place. Next, store to memory in fewer 8976 // chunks. 8977 SDValue StVal = St->getValue(); 8978 EVT VT = StVal.getValueType(); 8979 if (St->isTruncatingStore() && VT.isVector()) { 8980 SelectionDAG &DAG = DCI.DAG; 8981 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8982 EVT StVT = St->getMemoryVT(); 8983 unsigned NumElems = VT.getVectorNumElements(); 8984 assert(StVT != VT && "Cannot truncate to the same type"); 8985 unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); 8986 unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); 8987 8988 // From, To sizes and ElemCount must be pow of two 8989 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); 8990 8991 // We are going to use the original vector elt for storing. 8992 // Accumulated smaller vector elements must be a multiple of the store size. 8993 if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); 8994 8995 unsigned SizeRatio = FromEltSz / ToEltSz; 8996 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); 8997 8998 // Create a type on which we perform the shuffle. 8999 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), 9000 NumElems*SizeRatio); 9001 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); 9002 9003 SDLoc DL(St); 9004 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); 9005 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); 9006 for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; 9007 9008 // Can't shuffle using an illegal type. 9009 if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); 9010 9011 SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, 9012 DAG.getUNDEF(WideVec.getValueType()), 9013 ShuffleVec.data()); 9014 // At this point all of the data is stored at the bottom of the 9015 // register. We now need to save it to mem. 9016 9017 // Find the largest store unit 9018 MVT StoreType = MVT::i8; 9019 for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; 9020 tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { 9021 MVT Tp = (MVT::SimpleValueType)tp; 9022 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) 9023 StoreType = Tp; 9024 } 9025 // Didn't find a legal store type. 9026 if (!TLI.isTypeLegal(StoreType)) 9027 return SDValue(); 9028 9029 // Bitcast the original vector into a vector of store-size units 9030 EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), 9031 StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); 9032 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); 9033 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); 9034 SmallVector<SDValue, 8> Chains; 9035 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, 9036 TLI.getPointerTy()); 9037 SDValue BasePtr = St->getBasePtr(); 9038 9039 // Perform one or more big stores into memory. 9040 unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); 9041 for (unsigned I = 0; I < E; I++) { 9042 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 9043 StoreType, ShuffWide, 9044 DAG.getIntPtrConstant(I)); 9045 SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, 9046 St->getPointerInfo(), St->isVolatile(), 9047 St->isNonTemporal(), St->getAlignment()); 9048 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, 9049 Increment); 9050 Chains.push_back(Ch); 9051 } 9052 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], 9053 Chains.size()); 9054 } 9055 9056 if (!ISD::isNormalStore(St)) 9057 return SDValue(); 9058 9059 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and 9060 // ARM stores of arguments in the same cache line. 9061 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && 9062 StVal.getNode()->hasOneUse()) { 9063 SelectionDAG &DAG = DCI.DAG; 9064 SDLoc DL(St); 9065 SDValue BasePtr = St->getBasePtr(); 9066 SDValue NewST1 = DAG.getStore(St->getChain(), DL, 9067 StVal.getNode()->getOperand(0), BasePtr, 9068 St->getPointerInfo(), St->isVolatile(), 9069 St->isNonTemporal(), St->getAlignment()); 9070 9071 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 9072 DAG.getConstant(4, MVT::i32)); 9073 return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), 9074 OffsetPtr, St->getPointerInfo(), St->isVolatile(), 9075 St->isNonTemporal(), 9076 std::min(4U, St->getAlignment() / 2)); 9077 } 9078 9079 if (StVal.getValueType() != MVT::i64 || 9080 StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 9081 return SDValue(); 9082 9083 // Bitcast an i64 store extracted from a vector to f64. 9084 // Otherwise, the i64 value will be legalized to a pair of i32 values. 9085 SelectionDAG &DAG = DCI.DAG; 9086 SDLoc dl(StVal); 9087 SDValue IntVec = StVal.getOperand(0); 9088 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 9089 IntVec.getValueType().getVectorNumElements()); 9090 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); 9091 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 9092 Vec, StVal.getOperand(1)); 9093 dl = SDLoc(N); 9094 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); 9095 // Make the DAGCombiner fold the bitcasts. 9096 DCI.AddToWorklist(Vec.getNode()); 9097 DCI.AddToWorklist(ExtElt.getNode()); 9098 DCI.AddToWorklist(V.getNode()); 9099 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), 9100 St->getPointerInfo(), St->isVolatile(), 9101 St->isNonTemporal(), St->getAlignment(), 9102 St->getTBAAInfo()); 9103 } 9104 9105 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node 9106 /// are normal, non-volatile loads. If so, it is profitable to bitcast an 9107 /// i64 vector to have f64 elements, since the value can then be loaded 9108 /// directly into a VFP register. 9109 static bool hasNormalLoadOperand(SDNode *N) { 9110 unsigned NumElts = N->getValueType(0).getVectorNumElements(); 9111 for (unsigned i = 0; i < NumElts; ++i) { 9112 SDNode *Elt = N->getOperand(i).getNode(); 9113 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile()) 9114 return true; 9115 } 9116 return false; 9117 } 9118 9119 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for 9120 /// ISD::BUILD_VECTOR. 9121 static SDValue PerformBUILD_VECTORCombine(SDNode *N, 9122 TargetLowering::DAGCombinerInfo &DCI){ 9123 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): 9124 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value 9125 // into a pair of GPRs, which is fine when the value is used as a scalar, 9126 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. 9127 SelectionDAG &DAG = DCI.DAG; 9128 if (N->getNumOperands() == 2) { 9129 SDValue RV = PerformVMOVDRRCombine(N, DAG); 9130 if (RV.getNode()) 9131 return RV; 9132 } 9133 9134 // Load i64 elements as f64 values so that type legalization does not split 9135 // them up into i32 values. 9136 EVT VT = N->getValueType(0); 9137 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) 9138 return SDValue(); 9139 SDLoc dl(N); 9140 SmallVector<SDValue, 8> Ops; 9141 unsigned NumElts = VT.getVectorNumElements(); 9142 for (unsigned i = 0; i < NumElts; ++i) { 9143 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); 9144 Ops.push_back(V); 9145 // Make the DAGCombiner fold the bitcast. 9146 DCI.AddToWorklist(V.getNode()); 9147 } 9148 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); 9149 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); 9150 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 9151 } 9152 9153 /// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR. 9154 static SDValue 9155 PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 9156 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR. 9157 // At that time, we may have inserted bitcasts from integer to float. 9158 // If these bitcasts have survived DAGCombine, change the lowering of this 9159 // BUILD_VECTOR in something more vector friendly, i.e., that does not 9160 // force to use floating point types. 9161 9162 // Make sure we can change the type of the vector. 9163 // This is possible iff: 9164 // 1. The vector is only used in a bitcast to a integer type. I.e., 9165 // 1.1. Vector is used only once. 9166 // 1.2. Use is a bit convert to an integer type. 9167 // 2. The size of its operands are 32-bits (64-bits are not legal). 9168 EVT VT = N->getValueType(0); 9169 EVT EltVT = VT.getVectorElementType(); 9170 9171 // Check 1.1. and 2. 9172 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse()) 9173 return SDValue(); 9174 9175 // By construction, the input type must be float. 9176 assert(EltVT == MVT::f32 && "Unexpected type!"); 9177 9178 // Check 1.2. 9179 SDNode *Use = *N->use_begin(); 9180 if (Use->getOpcode() != ISD::BITCAST || 9181 Use->getValueType(0).isFloatingPoint()) 9182 return SDValue(); 9183 9184 // Check profitability. 9185 // Model is, if more than half of the relevant operands are bitcast from 9186 // i32, turn the build_vector into a sequence of insert_vector_elt. 9187 // Relevant operands are everything that is not statically 9188 // (i.e., at compile time) bitcasted. 9189 unsigned NumOfBitCastedElts = 0; 9190 unsigned NumElts = VT.getVectorNumElements(); 9191 unsigned NumOfRelevantElts = NumElts; 9192 for (unsigned Idx = 0; Idx < NumElts; ++Idx) { 9193 SDValue Elt = N->getOperand(Idx); 9194 if (Elt->getOpcode() == ISD::BITCAST) { 9195 // Assume only bit cast to i32 will go away. 9196 if (Elt->getOperand(0).getValueType() == MVT::i32) 9197 ++NumOfBitCastedElts; 9198 } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt)) 9199 // Constants are statically casted, thus do not count them as 9200 // relevant operands. 9201 --NumOfRelevantElts; 9202 } 9203 9204 // Check if more than half of the elements require a non-free bitcast. 9205 if (NumOfBitCastedElts <= NumOfRelevantElts / 2) 9206 return SDValue(); 9207 9208 SelectionDAG &DAG = DCI.DAG; 9209 // Create the new vector type. 9210 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 9211 // Check if the type is legal. 9212 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9213 if (!TLI.isTypeLegal(VecVT)) 9214 return SDValue(); 9215 9216 // Combine: 9217 // ARMISD::BUILD_VECTOR E1, E2, ..., EN. 9218 // => BITCAST INSERT_VECTOR_ELT 9219 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1), 9220 // (BITCAST EN), N. 9221 SDValue Vec = DAG.getUNDEF(VecVT); 9222 SDLoc dl(N); 9223 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) { 9224 SDValue V = N->getOperand(Idx); 9225 if (V.getOpcode() == ISD::UNDEF) 9226 continue; 9227 if (V.getOpcode() == ISD::BITCAST && 9228 V->getOperand(0).getValueType() == MVT::i32) 9229 // Fold obvious case. 9230 V = V.getOperand(0); 9231 else { 9232 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); 9233 // Make the DAGCombiner fold the bitcasts. 9234 DCI.AddToWorklist(V.getNode()); 9235 } 9236 SDValue LaneIdx = DAG.getConstant(Idx, MVT::i32); 9237 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx); 9238 } 9239 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec); 9240 // Make the DAGCombiner fold the bitcasts. 9241 DCI.AddToWorklist(Vec.getNode()); 9242 return Vec; 9243 } 9244 9245 /// PerformInsertEltCombine - Target-specific dag combine xforms for 9246 /// ISD::INSERT_VECTOR_ELT. 9247 static SDValue PerformInsertEltCombine(SDNode *N, 9248 TargetLowering::DAGCombinerInfo &DCI) { 9249 // Bitcast an i64 load inserted into a vector to f64. 9250 // Otherwise, the i64 value will be legalized to a pair of i32 values. 9251 EVT VT = N->getValueType(0); 9252 SDNode *Elt = N->getOperand(1).getNode(); 9253 if (VT.getVectorElementType() != MVT::i64 || 9254 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile()) 9255 return SDValue(); 9256 9257 SelectionDAG &DAG = DCI.DAG; 9258 SDLoc dl(N); 9259 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 9260 VT.getVectorNumElements()); 9261 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); 9262 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); 9263 // Make the DAGCombiner fold the bitcasts. 9264 DCI.AddToWorklist(Vec.getNode()); 9265 DCI.AddToWorklist(V.getNode()); 9266 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, 9267 Vec, V, N->getOperand(2)); 9268 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); 9269 } 9270 9271 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for 9272 /// ISD::VECTOR_SHUFFLE. 9273 static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { 9274 // The LLVM shufflevector instruction does not require the shuffle mask 9275 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does 9276 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the 9277 // operands do not match the mask length, they are extended by concatenating 9278 // them with undef vectors. That is probably the right thing for other 9279 // targets, but for NEON it is better to concatenate two double-register 9280 // size vector operands into a single quad-register size vector. Do that 9281 // transformation here: 9282 // shuffle(concat(v1, undef), concat(v2, undef)) -> 9283 // shuffle(concat(v1, v2), undef) 9284 SDValue Op0 = N->getOperand(0); 9285 SDValue Op1 = N->getOperand(1); 9286 if (Op0.getOpcode() != ISD::CONCAT_VECTORS || 9287 Op1.getOpcode() != ISD::CONCAT_VECTORS || 9288 Op0.getNumOperands() != 2 || 9289 Op1.getNumOperands() != 2) 9290 return SDValue(); 9291 SDValue Concat0Op1 = Op0.getOperand(1); 9292 SDValue Concat1Op1 = Op1.getOperand(1); 9293 if (Concat0Op1.getOpcode() != ISD::UNDEF || 9294 Concat1Op1.getOpcode() != ISD::UNDEF) 9295 return SDValue(); 9296 // Skip the transformation if any of the types are illegal. 9297 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9298 EVT VT = N->getValueType(0); 9299 if (!TLI.isTypeLegal(VT) || 9300 !TLI.isTypeLegal(Concat0Op1.getValueType()) || 9301 !TLI.isTypeLegal(Concat1Op1.getValueType())) 9302 return SDValue(); 9303 9304 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 9305 Op0.getOperand(0), Op1.getOperand(0)); 9306 // Translate the shuffle mask. 9307 SmallVector<int, 16> NewMask; 9308 unsigned NumElts = VT.getVectorNumElements(); 9309 unsigned HalfElts = NumElts/2; 9310 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 9311 for (unsigned n = 0; n < NumElts; ++n) { 9312 int MaskElt = SVN->getMaskElt(n); 9313 int NewElt = -1; 9314 if (MaskElt < (int)HalfElts) 9315 NewElt = MaskElt; 9316 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) 9317 NewElt = HalfElts + MaskElt - NumElts; 9318 NewMask.push_back(NewElt); 9319 } 9320 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat, 9321 DAG.getUNDEF(VT), NewMask.data()); 9322 } 9323 9324 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and 9325 /// NEON load/store intrinsics to merge base address updates. 9326 static SDValue CombineBaseUpdate(SDNode *N, 9327 TargetLowering::DAGCombinerInfo &DCI) { 9328 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 9329 return SDValue(); 9330 9331 SelectionDAG &DAG = DCI.DAG; 9332 bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || 9333 N->getOpcode() == ISD::INTRINSIC_W_CHAIN); 9334 unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); 9335 SDValue Addr = N->getOperand(AddrOpIdx); 9336 9337 // Search for a use of the address operand that is an increment. 9338 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), 9339 UE = Addr.getNode()->use_end(); UI != UE; ++UI) { 9340 SDNode *User = *UI; 9341 if (User->getOpcode() != ISD::ADD || 9342 UI.getUse().getResNo() != Addr.getResNo()) 9343 continue; 9344 9345 // Check that the add is independent of the load/store. Otherwise, folding 9346 // it would create a cycle. 9347 if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) 9348 continue; 9349 9350 // Find the new opcode for the updating load/store. 9351 bool isLoad = true; 9352 bool isLaneOp = false; 9353 unsigned NewOpc = 0; 9354 unsigned NumVecs = 0; 9355 if (isIntrinsic) { 9356 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 9357 switch (IntNo) { 9358 default: llvm_unreachable("unexpected intrinsic for Neon base update"); 9359 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; 9360 NumVecs = 1; break; 9361 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; 9362 NumVecs = 2; break; 9363 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; 9364 NumVecs = 3; break; 9365 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; 9366 NumVecs = 4; break; 9367 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; 9368 NumVecs = 2; isLaneOp = true; break; 9369 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; 9370 NumVecs = 3; isLaneOp = true; break; 9371 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; 9372 NumVecs = 4; isLaneOp = true; break; 9373 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; 9374 NumVecs = 1; isLoad = false; break; 9375 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; 9376 NumVecs = 2; isLoad = false; break; 9377 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; 9378 NumVecs = 3; isLoad = false; break; 9379 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; 9380 NumVecs = 4; isLoad = false; break; 9381 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; 9382 NumVecs = 2; isLoad = false; isLaneOp = true; break; 9383 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; 9384 NumVecs = 3; isLoad = false; isLaneOp = true; break; 9385 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; 9386 NumVecs = 4; isLoad = false; isLaneOp = true; break; 9387 } 9388 } else { 9389 isLaneOp = true; 9390 switch (N->getOpcode()) { 9391 default: llvm_unreachable("unexpected opcode for Neon base update"); 9392 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; 9393 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; 9394 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; 9395 } 9396 } 9397 9398 // Find the size of memory referenced by the load/store. 9399 EVT VecTy; 9400 if (isLoad) 9401 VecTy = N->getValueType(0); 9402 else 9403 VecTy = N->getOperand(AddrOpIdx+1).getValueType(); 9404 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; 9405 if (isLaneOp) 9406 NumBytes /= VecTy.getVectorNumElements(); 9407 9408 // If the increment is a constant, it must match the memory ref size. 9409 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); 9410 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { 9411 uint64_t IncVal = CInc->getZExtValue(); 9412 if (IncVal != NumBytes) 9413 continue; 9414 } else if (NumBytes >= 3 * 16) { 9415 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two 9416 // separate instructions that make it harder to use a non-constant update. 9417 continue; 9418 } 9419 9420 // Create the new updating load/store node. 9421 EVT Tys[6]; 9422 unsigned NumResultVecs = (isLoad ? NumVecs : 0); 9423 unsigned n; 9424 for (n = 0; n < NumResultVecs; ++n) 9425 Tys[n] = VecTy; 9426 Tys[n++] = MVT::i32; 9427 Tys[n] = MVT::Other; 9428 SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); 9429 SmallVector<SDValue, 8> Ops; 9430 Ops.push_back(N->getOperand(0)); // incoming chain 9431 Ops.push_back(N->getOperand(AddrOpIdx)); 9432 Ops.push_back(Inc); 9433 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { 9434 Ops.push_back(N->getOperand(i)); 9435 } 9436 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); 9437 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, 9438 Ops.data(), Ops.size(), 9439 MemInt->getMemoryVT(), 9440 MemInt->getMemOperand()); 9441 9442 // Update the uses. 9443 std::vector<SDValue> NewResults; 9444 for (unsigned i = 0; i < NumResultVecs; ++i) { 9445 NewResults.push_back(SDValue(UpdN.getNode(), i)); 9446 } 9447 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain 9448 DCI.CombineTo(N, NewResults); 9449 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); 9450 9451 break; 9452 } 9453 return SDValue(); 9454 } 9455 9456 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a 9457 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic 9458 /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and 9459 /// return true. 9460 static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 9461 SelectionDAG &DAG = DCI.DAG; 9462 EVT VT = N->getValueType(0); 9463 // vldN-dup instructions only support 64-bit vectors for N > 1. 9464 if (!VT.is64BitVector()) 9465 return false; 9466 9467 // Check if the VDUPLANE operand is a vldN-dup intrinsic. 9468 SDNode *VLD = N->getOperand(0).getNode(); 9469 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) 9470 return false; 9471 unsigned NumVecs = 0; 9472 unsigned NewOpc = 0; 9473 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); 9474 if (IntNo == Intrinsic::arm_neon_vld2lane) { 9475 NumVecs = 2; 9476 NewOpc = ARMISD::VLD2DUP; 9477 } else if (IntNo == Intrinsic::arm_neon_vld3lane) { 9478 NumVecs = 3; 9479 NewOpc = ARMISD::VLD3DUP; 9480 } else if (IntNo == Intrinsic::arm_neon_vld4lane) { 9481 NumVecs = 4; 9482 NewOpc = ARMISD::VLD4DUP; 9483 } else { 9484 return false; 9485 } 9486 9487 // First check that all the vldN-lane uses are VDUPLANEs and that the lane 9488 // numbers match the load. 9489 unsigned VLDLaneNo = 9490 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); 9491 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 9492 UI != UE; ++UI) { 9493 // Ignore uses of the chain result. 9494 if (UI.getUse().getResNo() == NumVecs) 9495 continue; 9496 SDNode *User = *UI; 9497 if (User->getOpcode() != ARMISD::VDUPLANE || 9498 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) 9499 return false; 9500 } 9501 9502 // Create the vldN-dup node. 9503 EVT Tys[5]; 9504 unsigned n; 9505 for (n = 0; n < NumVecs; ++n) 9506 Tys[n] = VT; 9507 Tys[n] = MVT::Other; 9508 SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); 9509 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; 9510 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); 9511 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, 9512 Ops, 2, VLDMemInt->getMemoryVT(), 9513 VLDMemInt->getMemOperand()); 9514 9515 // Update the uses. 9516 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 9517 UI != UE; ++UI) { 9518 unsigned ResNo = UI.getUse().getResNo(); 9519 // Ignore uses of the chain result. 9520 if (ResNo == NumVecs) 9521 continue; 9522 SDNode *User = *UI; 9523 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); 9524 } 9525 9526 // Now the vldN-lane intrinsic is dead except for its chain result. 9527 // Update uses of the chain. 9528 std::vector<SDValue> VLDDupResults; 9529 for (unsigned n = 0; n < NumVecs; ++n) 9530 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); 9531 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); 9532 DCI.CombineTo(VLD, VLDDupResults); 9533 9534 return true; 9535 } 9536 9537 /// PerformVDUPLANECombine - Target-specific dag combine xforms for 9538 /// ARMISD::VDUPLANE. 9539 static SDValue PerformVDUPLANECombine(SDNode *N, 9540 TargetLowering::DAGCombinerInfo &DCI) { 9541 SDValue Op = N->getOperand(0); 9542 9543 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses 9544 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. 9545 if (CombineVLDDUP(N, DCI)) 9546 return SDValue(N, 0); 9547 9548 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 9549 // redundant. Ignore bit_converts for now; element sizes are checked below. 9550 while (Op.getOpcode() == ISD::BITCAST) 9551 Op = Op.getOperand(0); 9552 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 9553 return SDValue(); 9554 9555 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 9556 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 9557 // The canonical VMOV for a zero vector uses a 32-bit element size. 9558 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 9559 unsigned EltBits; 9560 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 9561 EltSize = 8; 9562 EVT VT = N->getValueType(0); 9563 if (EltSize > VT.getVectorElementType().getSizeInBits()) 9564 return SDValue(); 9565 9566 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); 9567 } 9568 9569 // isConstVecPow2 - Return true if each vector element is a power of 2, all 9570 // elements are the same constant, C, and Log2(C) ranges from 1 to 32. 9571 static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) 9572 { 9573 integerPart cN; 9574 integerPart c0 = 0; 9575 for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); 9576 I != E; I++) { 9577 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I)); 9578 if (!C) 9579 return false; 9580 9581 bool isExact; 9582 APFloat APF = C->getValueAPF(); 9583 if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) 9584 != APFloat::opOK || !isExact) 9585 return false; 9586 9587 c0 = (I == 0) ? cN : c0; 9588 if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) 9589 return false; 9590 } 9591 C = c0; 9592 return true; 9593 } 9594 9595 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) 9596 /// can replace combinations of VMUL and VCVT (floating-point to integer) 9597 /// when the VMUL has a constant operand that is a power of 2. 9598 /// 9599 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 9600 /// vmul.f32 d16, d17, d16 9601 /// vcvt.s32.f32 d16, d16 9602 /// becomes: 9603 /// vcvt.s32.f32 d16, d16, #3 9604 static SDValue PerformVCVTCombine(SDNode *N, 9605 TargetLowering::DAGCombinerInfo &DCI, 9606 const ARMSubtarget *Subtarget) { 9607 SelectionDAG &DAG = DCI.DAG; 9608 SDValue Op = N->getOperand(0); 9609 9610 if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || 9611 Op.getOpcode() != ISD::FMUL) 9612 return SDValue(); 9613 9614 uint64_t C; 9615 SDValue N0 = Op->getOperand(0); 9616 SDValue ConstVec = Op->getOperand(1); 9617 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; 9618 9619 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 9620 !isConstVecPow2(ConstVec, isSigned, C)) 9621 return SDValue(); 9622 9623 MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); 9624 MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); 9625 if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { 9626 // These instructions only exist converting from f32 to i32. We can handle 9627 // smaller integers by generating an extra truncate, but larger ones would 9628 // be lossy. 9629 return SDValue(); 9630 } 9631 9632 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : 9633 Intrinsic::arm_neon_vcvtfp2fxu; 9634 unsigned NumLanes = Op.getValueType().getVectorNumElements(); 9635 SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), 9636 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, 9637 DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, 9638 DAG.getConstant(Log2_64(C), MVT::i32)); 9639 9640 if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) 9641 FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv); 9642 9643 return FixConv; 9644 } 9645 9646 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) 9647 /// can replace combinations of VCVT (integer to floating-point) and VDIV 9648 /// when the VDIV has a constant operand that is a power of 2. 9649 /// 9650 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 9651 /// vcvt.f32.s32 d16, d16 9652 /// vdiv.f32 d16, d17, d16 9653 /// becomes: 9654 /// vcvt.f32.s32 d16, d16, #3 9655 static SDValue PerformVDIVCombine(SDNode *N, 9656 TargetLowering::DAGCombinerInfo &DCI, 9657 const ARMSubtarget *Subtarget) { 9658 SelectionDAG &DAG = DCI.DAG; 9659 SDValue Op = N->getOperand(0); 9660 unsigned OpOpcode = Op.getNode()->getOpcode(); 9661 9662 if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || 9663 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) 9664 return SDValue(); 9665 9666 uint64_t C; 9667 SDValue ConstVec = N->getOperand(1); 9668 bool isSigned = OpOpcode == ISD::SINT_TO_FP; 9669 9670 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 9671 !isConstVecPow2(ConstVec, isSigned, C)) 9672 return SDValue(); 9673 9674 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); 9675 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); 9676 if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { 9677 // These instructions only exist converting from i32 to f32. We can handle 9678 // smaller integers by generating an extra extend, but larger ones would 9679 // be lossy. 9680 return SDValue(); 9681 } 9682 9683 SDValue ConvInput = Op.getOperand(0); 9684 unsigned NumLanes = Op.getValueType().getVectorNumElements(); 9685 if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) 9686 ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, 9687 SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, 9688 ConvInput); 9689 9690 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : 9691 Intrinsic::arm_neon_vcvtfxu2fp; 9692 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), 9693 Op.getValueType(), 9694 DAG.getConstant(IntrinsicOpcode, MVT::i32), 9695 ConvInput, DAG.getConstant(Log2_64(C), MVT::i32)); 9696 } 9697 9698 /// Getvshiftimm - Check if this is a valid build_vector for the immediate 9699 /// operand of a vector shift operation, where all the elements of the 9700 /// build_vector must have the same constant integer value. 9701 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 9702 // Ignore bit_converts. 9703 while (Op.getOpcode() == ISD::BITCAST) 9704 Op = Op.getOperand(0); 9705 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 9706 APInt SplatBits, SplatUndef; 9707 unsigned SplatBitSize; 9708 bool HasAnyUndefs; 9709 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 9710 HasAnyUndefs, ElementBits) || 9711 SplatBitSize > ElementBits) 9712 return false; 9713 Cnt = SplatBits.getSExtValue(); 9714 return true; 9715 } 9716 9717 /// isVShiftLImm - Check if this is a valid build_vector for the immediate 9718 /// operand of a vector shift left operation. That value must be in the range: 9719 /// 0 <= Value < ElementBits for a left shift; or 9720 /// 0 <= Value <= ElementBits for a long left shift. 9721 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 9722 assert(VT.isVector() && "vector shift count is not a vector type"); 9723 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 9724 if (! getVShiftImm(Op, ElementBits, Cnt)) 9725 return false; 9726 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 9727 } 9728 9729 /// isVShiftRImm - Check if this is a valid build_vector for the immediate 9730 /// operand of a vector shift right operation. For a shift opcode, the value 9731 /// is positive, but for an intrinsic the value count must be negative. The 9732 /// absolute value must be in the range: 9733 /// 1 <= |Value| <= ElementBits for a right shift; or 9734 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 9735 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 9736 int64_t &Cnt) { 9737 assert(VT.isVector() && "vector shift count is not a vector type"); 9738 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 9739 if (! getVShiftImm(Op, ElementBits, Cnt)) 9740 return false; 9741 if (isIntrinsic) 9742 Cnt = -Cnt; 9743 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 9744 } 9745 9746 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 9747 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 9748 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 9749 switch (IntNo) { 9750 default: 9751 // Don't do anything for most intrinsics. 9752 break; 9753 9754 // Vector shifts: check for immediate versions and lower them. 9755 // Note: This is done during DAG combining instead of DAG legalizing because 9756 // the build_vectors for 64-bit vector element shift counts are generally 9757 // not legal, and it is hard to see their values after they get legalized to 9758 // loads from a constant pool. 9759 case Intrinsic::arm_neon_vshifts: 9760 case Intrinsic::arm_neon_vshiftu: 9761 case Intrinsic::arm_neon_vshiftls: 9762 case Intrinsic::arm_neon_vshiftlu: 9763 case Intrinsic::arm_neon_vshiftn: 9764 case Intrinsic::arm_neon_vrshifts: 9765 case Intrinsic::arm_neon_vrshiftu: 9766 case Intrinsic::arm_neon_vrshiftn: 9767 case Intrinsic::arm_neon_vqshifts: 9768 case Intrinsic::arm_neon_vqshiftu: 9769 case Intrinsic::arm_neon_vqshiftsu: 9770 case Intrinsic::arm_neon_vqshiftns: 9771 case Intrinsic::arm_neon_vqshiftnu: 9772 case Intrinsic::arm_neon_vqshiftnsu: 9773 case Intrinsic::arm_neon_vqrshiftns: 9774 case Intrinsic::arm_neon_vqrshiftnu: 9775 case Intrinsic::arm_neon_vqrshiftnsu: { 9776 EVT VT = N->getOperand(1).getValueType(); 9777 int64_t Cnt; 9778 unsigned VShiftOpc = 0; 9779 9780 switch (IntNo) { 9781 case Intrinsic::arm_neon_vshifts: 9782 case Intrinsic::arm_neon_vshiftu: 9783 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 9784 VShiftOpc = ARMISD::VSHL; 9785 break; 9786 } 9787 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 9788 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 9789 ARMISD::VSHRs : ARMISD::VSHRu); 9790 break; 9791 } 9792 return SDValue(); 9793 9794 case Intrinsic::arm_neon_vshiftls: 9795 case Intrinsic::arm_neon_vshiftlu: 9796 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 9797 break; 9798 llvm_unreachable("invalid shift count for vshll intrinsic"); 9799 9800 case Intrinsic::arm_neon_vrshifts: 9801 case Intrinsic::arm_neon_vrshiftu: 9802 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 9803 break; 9804 return SDValue(); 9805 9806 case Intrinsic::arm_neon_vqshifts: 9807 case Intrinsic::arm_neon_vqshiftu: 9808 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 9809 break; 9810 return SDValue(); 9811 9812 case Intrinsic::arm_neon_vqshiftsu: 9813 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 9814 break; 9815 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 9816 9817 case Intrinsic::arm_neon_vshiftn: 9818 case Intrinsic::arm_neon_vrshiftn: 9819 case Intrinsic::arm_neon_vqshiftns: 9820 case Intrinsic::arm_neon_vqshiftnu: 9821 case Intrinsic::arm_neon_vqshiftnsu: 9822 case Intrinsic::arm_neon_vqrshiftns: 9823 case Intrinsic::arm_neon_vqrshiftnu: 9824 case Intrinsic::arm_neon_vqrshiftnsu: 9825 // Narrowing shifts require an immediate right shift. 9826 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 9827 break; 9828 llvm_unreachable("invalid shift count for narrowing vector shift " 9829 "intrinsic"); 9830 9831 default: 9832 llvm_unreachable("unhandled vector shift"); 9833 } 9834 9835 switch (IntNo) { 9836 case Intrinsic::arm_neon_vshifts: 9837 case Intrinsic::arm_neon_vshiftu: 9838 // Opcode already set above. 9839 break; 9840 case Intrinsic::arm_neon_vshiftls: 9841 case Intrinsic::arm_neon_vshiftlu: 9842 if (Cnt == VT.getVectorElementType().getSizeInBits()) 9843 VShiftOpc = ARMISD::VSHLLi; 9844 else 9845 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 9846 ARMISD::VSHLLs : ARMISD::VSHLLu); 9847 break; 9848 case Intrinsic::arm_neon_vshiftn: 9849 VShiftOpc = ARMISD::VSHRN; break; 9850 case Intrinsic::arm_neon_vrshifts: 9851 VShiftOpc = ARMISD::VRSHRs; break; 9852 case Intrinsic::arm_neon_vrshiftu: 9853 VShiftOpc = ARMISD::VRSHRu; break; 9854 case Intrinsic::arm_neon_vrshiftn: 9855 VShiftOpc = ARMISD::VRSHRN; break; 9856 case Intrinsic::arm_neon_vqshifts: 9857 VShiftOpc = ARMISD::VQSHLs; break; 9858 case Intrinsic::arm_neon_vqshiftu: 9859 VShiftOpc = ARMISD::VQSHLu; break; 9860 case Intrinsic::arm_neon_vqshiftsu: 9861 VShiftOpc = ARMISD::VQSHLsu; break; 9862 case Intrinsic::arm_neon_vqshiftns: 9863 VShiftOpc = ARMISD::VQSHRNs; break; 9864 case Intrinsic::arm_neon_vqshiftnu: 9865 VShiftOpc = ARMISD::VQSHRNu; break; 9866 case Intrinsic::arm_neon_vqshiftnsu: 9867 VShiftOpc = ARMISD::VQSHRNsu; break; 9868 case Intrinsic::arm_neon_vqrshiftns: 9869 VShiftOpc = ARMISD::VQRSHRNs; break; 9870 case Intrinsic::arm_neon_vqrshiftnu: 9871 VShiftOpc = ARMISD::VQRSHRNu; break; 9872 case Intrinsic::arm_neon_vqrshiftnsu: 9873 VShiftOpc = ARMISD::VQRSHRNsu; break; 9874 } 9875 9876 return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), 9877 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 9878 } 9879 9880 case Intrinsic::arm_neon_vshiftins: { 9881 EVT VT = N->getOperand(1).getValueType(); 9882 int64_t Cnt; 9883 unsigned VShiftOpc = 0; 9884 9885 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 9886 VShiftOpc = ARMISD::VSLI; 9887 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 9888 VShiftOpc = ARMISD::VSRI; 9889 else { 9890 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 9891 } 9892 9893 return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), 9894 N->getOperand(1), N->getOperand(2), 9895 DAG.getConstant(Cnt, MVT::i32)); 9896 } 9897 9898 case Intrinsic::arm_neon_vqrshifts: 9899 case Intrinsic::arm_neon_vqrshiftu: 9900 // No immediate versions of these to check for. 9901 break; 9902 } 9903 9904 return SDValue(); 9905 } 9906 9907 /// PerformShiftCombine - Checks for immediate versions of vector shifts and 9908 /// lowers them. As with the vector shift intrinsics, this is done during DAG 9909 /// combining instead of DAG legalizing because the build_vectors for 64-bit 9910 /// vector element shift counts are generally not legal, and it is hard to see 9911 /// their values after they get legalized to loads from a constant pool. 9912 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 9913 const ARMSubtarget *ST) { 9914 EVT VT = N->getValueType(0); 9915 if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { 9916 // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 9917 // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. 9918 SDValue N1 = N->getOperand(1); 9919 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 9920 SDValue N0 = N->getOperand(0); 9921 if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && 9922 DAG.MaskedValueIsZero(N0.getOperand(0), 9923 APInt::getHighBitsSet(32, 16))) 9924 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1); 9925 } 9926 } 9927 9928 // Nothing to be done for scalar shifts. 9929 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9930 if (!VT.isVector() || !TLI.isTypeLegal(VT)) 9931 return SDValue(); 9932 9933 assert(ST->hasNEON() && "unexpected vector shift"); 9934 int64_t Cnt; 9935 9936 switch (N->getOpcode()) { 9937 default: llvm_unreachable("unexpected shift opcode"); 9938 9939 case ISD::SHL: 9940 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 9941 return DAG.getNode(ARMISD::VSHL, SDLoc(N), VT, N->getOperand(0), 9942 DAG.getConstant(Cnt, MVT::i32)); 9943 break; 9944 9945 case ISD::SRA: 9946 case ISD::SRL: 9947 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 9948 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 9949 ARMISD::VSHRs : ARMISD::VSHRu); 9950 return DAG.getNode(VShiftOpc, SDLoc(N), VT, N->getOperand(0), 9951 DAG.getConstant(Cnt, MVT::i32)); 9952 } 9953 } 9954 return SDValue(); 9955 } 9956 9957 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 9958 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 9959 static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 9960 const ARMSubtarget *ST) { 9961 SDValue N0 = N->getOperand(0); 9962 9963 // Check for sign- and zero-extensions of vector extract operations of 8- 9964 // and 16-bit vector elements. NEON supports these directly. They are 9965 // handled during DAG combining because type legalization will promote them 9966 // to 32-bit types and it is messy to recognize the operations after that. 9967 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 9968 SDValue Vec = N0.getOperand(0); 9969 SDValue Lane = N0.getOperand(1); 9970 EVT VT = N->getValueType(0); 9971 EVT EltVT = N0.getValueType(); 9972 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9973 9974 if (VT == MVT::i32 && 9975 (EltVT == MVT::i8 || EltVT == MVT::i16) && 9976 TLI.isTypeLegal(Vec.getValueType()) && 9977 isa<ConstantSDNode>(Lane)) { 9978 9979 unsigned Opc = 0; 9980 switch (N->getOpcode()) { 9981 default: llvm_unreachable("unexpected opcode"); 9982 case ISD::SIGN_EXTEND: 9983 Opc = ARMISD::VGETLANEs; 9984 break; 9985 case ISD::ZERO_EXTEND: 9986 case ISD::ANY_EXTEND: 9987 Opc = ARMISD::VGETLANEu; 9988 break; 9989 } 9990 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane); 9991 } 9992 } 9993 9994 return SDValue(); 9995 } 9996 9997 /// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 9998 /// to match f32 max/min patterns to use NEON vmax/vmin instructions. 9999 static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 10000 const ARMSubtarget *ST) { 10001 // If the target supports NEON, try to use vmax/vmin instructions for f32 10002 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 10003 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 10004 // a NaN; only do the transformation when it matches that behavior. 10005 10006 // For now only do this when using NEON for FP operations; if using VFP, it 10007 // is not obvious that the benefit outweighs the cost of switching to the 10008 // NEON pipeline. 10009 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 10010 N->getValueType(0) != MVT::f32) 10011 return SDValue(); 10012 10013 SDValue CondLHS = N->getOperand(0); 10014 SDValue CondRHS = N->getOperand(1); 10015 SDValue LHS = N->getOperand(2); 10016 SDValue RHS = N->getOperand(3); 10017 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 10018 10019 unsigned Opcode = 0; 10020 bool IsReversed; 10021 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 10022 IsReversed = false; // x CC y ? x : y 10023 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 10024 IsReversed = true ; // x CC y ? y : x 10025 } else { 10026 return SDValue(); 10027 } 10028 10029 bool IsUnordered; 10030 switch (CC) { 10031 default: break; 10032 case ISD::SETOLT: 10033 case ISD::SETOLE: 10034 case ISD::SETLT: 10035 case ISD::SETLE: 10036 case ISD::SETULT: 10037 case ISD::SETULE: 10038 // If LHS is NaN, an ordered comparison will be false and the result will 10039 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 10040 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 10041 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 10042 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 10043 break; 10044 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 10045 // will return -0, so vmin can only be used for unsafe math or if one of 10046 // the operands is known to be nonzero. 10047 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 10048 !DAG.getTarget().Options.UnsafeFPMath && 10049 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 10050 break; 10051 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 10052 break; 10053 10054 case ISD::SETOGT: 10055 case ISD::SETOGE: 10056 case ISD::SETGT: 10057 case ISD::SETGE: 10058 case ISD::SETUGT: 10059 case ISD::SETUGE: 10060 // If LHS is NaN, an ordered comparison will be false and the result will 10061 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 10062 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 10063 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 10064 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 10065 break; 10066 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 10067 // will return +0, so vmax can only be used for unsafe math or if one of 10068 // the operands is known to be nonzero. 10069 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 10070 !DAG.getTarget().Options.UnsafeFPMath && 10071 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 10072 break; 10073 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 10074 break; 10075 } 10076 10077 if (!Opcode) 10078 return SDValue(); 10079 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); 10080 } 10081 10082 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. 10083 SDValue 10084 ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { 10085 SDValue Cmp = N->getOperand(4); 10086 if (Cmp.getOpcode() != ARMISD::CMPZ) 10087 // Only looking at EQ and NE cases. 10088 return SDValue(); 10089 10090 EVT VT = N->getValueType(0); 10091 SDLoc dl(N); 10092 SDValue LHS = Cmp.getOperand(0); 10093 SDValue RHS = Cmp.getOperand(1); 10094 SDValue FalseVal = N->getOperand(0); 10095 SDValue TrueVal = N->getOperand(1); 10096 SDValue ARMcc = N->getOperand(2); 10097 ARMCC::CondCodes CC = 10098 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 10099 10100 // Simplify 10101 // mov r1, r0 10102 // cmp r1, x 10103 // mov r0, y 10104 // moveq r0, x 10105 // to 10106 // cmp r0, x 10107 // movne r0, y 10108 // 10109 // mov r1, r0 10110 // cmp r1, x 10111 // mov r0, x 10112 // movne r0, y 10113 // to 10114 // cmp r0, x 10115 // movne r0, y 10116 /// FIXME: Turn this into a target neutral optimization? 10117 SDValue Res; 10118 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { 10119 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, 10120 N->getOperand(3), Cmp); 10121 } else if (CC == ARMCC::EQ && TrueVal == RHS) { 10122 SDValue ARMcc; 10123 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); 10124 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, 10125 N->getOperand(3), NewCmp); 10126 } 10127 10128 if (Res.getNode()) { 10129 APInt KnownZero, KnownOne; 10130 DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); 10131 // Capture demanded bits information that would be otherwise lost. 10132 if (KnownZero == 0xfffffffe) 10133 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 10134 DAG.getValueType(MVT::i1)); 10135 else if (KnownZero == 0xffffff00) 10136 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 10137 DAG.getValueType(MVT::i8)); 10138 else if (KnownZero == 0xffff0000) 10139 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 10140 DAG.getValueType(MVT::i16)); 10141 } 10142 10143 return Res; 10144 } 10145 10146 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 10147 DAGCombinerInfo &DCI) const { 10148 switch (N->getOpcode()) { 10149 default: break; 10150 case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); 10151 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); 10152 case ISD::SUB: return PerformSUBCombine(N, DCI); 10153 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 10154 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 10155 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); 10156 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); 10157 case ARMISD::BFI: return PerformBFICombine(N, DCI); 10158 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 10159 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); 10160 case ISD::STORE: return PerformSTORECombine(N, DCI); 10161 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); 10162 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); 10163 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); 10164 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 10165 case ISD::FP_TO_SINT: 10166 case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); 10167 case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); 10168 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 10169 case ISD::SHL: 10170 case ISD::SRA: 10171 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 10172 case ISD::SIGN_EXTEND: 10173 case ISD::ZERO_EXTEND: 10174 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 10175 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 10176 case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); 10177 case ARMISD::VLD2DUP: 10178 case ARMISD::VLD3DUP: 10179 case ARMISD::VLD4DUP: 10180 return CombineBaseUpdate(N, DCI); 10181 case ARMISD::BUILD_VECTOR: 10182 return PerformARMBUILD_VECTORCombine(N, DCI); 10183 case ISD::INTRINSIC_VOID: 10184 case ISD::INTRINSIC_W_CHAIN: 10185 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 10186 case Intrinsic::arm_neon_vld1: 10187 case Intrinsic::arm_neon_vld2: 10188 case Intrinsic::arm_neon_vld3: 10189 case Intrinsic::arm_neon_vld4: 10190 case Intrinsic::arm_neon_vld2lane: 10191 case Intrinsic::arm_neon_vld3lane: 10192 case Intrinsic::arm_neon_vld4lane: 10193 case Intrinsic::arm_neon_vst1: 10194 case Intrinsic::arm_neon_vst2: 10195 case Intrinsic::arm_neon_vst3: 10196 case Intrinsic::arm_neon_vst4: 10197 case Intrinsic::arm_neon_vst2lane: 10198 case Intrinsic::arm_neon_vst3lane: 10199 case Intrinsic::arm_neon_vst4lane: 10200 return CombineBaseUpdate(N, DCI); 10201 default: break; 10202 } 10203 break; 10204 } 10205 return SDValue(); 10206 } 10207 10208 bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, 10209 EVT VT) const { 10210 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); 10211 } 10212 10213 bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { 10214 // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus 10215 bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); 10216 10217 switch (VT.getSimpleVT().SimpleTy) { 10218 default: 10219 return false; 10220 case MVT::i8: 10221 case MVT::i16: 10222 case MVT::i32: { 10223 // Unaligned access can use (for example) LRDB, LRDH, LDR 10224 if (AllowsUnaligned) { 10225 if (Fast) 10226 *Fast = Subtarget->hasV7Ops(); 10227 return true; 10228 } 10229 return false; 10230 } 10231 case MVT::f64: 10232 case MVT::v2f64: { 10233 // For any little-endian targets with neon, we can support unaligned ld/st 10234 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. 10235 // A big-endian target may also explictly support unaligned accesses 10236 if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { 10237 if (Fast) 10238 *Fast = true; 10239 return true; 10240 } 10241 return false; 10242 } 10243 } 10244 } 10245 10246 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, 10247 unsigned AlignCheck) { 10248 return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && 10249 (DstAlign == 0 || DstAlign % AlignCheck == 0)); 10250 } 10251 10252 EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, 10253 unsigned DstAlign, unsigned SrcAlign, 10254 bool IsMemset, bool ZeroMemset, 10255 bool MemcpyStrSrc, 10256 MachineFunction &MF) const { 10257 const Function *F = MF.getFunction(); 10258 10259 // See if we can use NEON instructions for this... 10260 if ((!IsMemset || ZeroMemset) && 10261 Subtarget->hasNEON() && 10262 !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 10263 Attribute::NoImplicitFloat)) { 10264 bool Fast; 10265 if (Size >= 16 && 10266 (memOpAlign(SrcAlign, DstAlign, 16) || 10267 (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) { 10268 return MVT::v2f64; 10269 } else if (Size >= 8 && 10270 (memOpAlign(SrcAlign, DstAlign, 8) || 10271 (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) { 10272 return MVT::f64; 10273 } 10274 } 10275 10276 // Lowering to i32/i16 if the size permits. 10277 if (Size >= 4) 10278 return MVT::i32; 10279 else if (Size >= 2) 10280 return MVT::i16; 10281 10282 // Let the target-independent logic figure it out. 10283 return MVT::Other; 10284 } 10285 10286 bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 10287 if (Val.getOpcode() != ISD::LOAD) 10288 return false; 10289 10290 EVT VT1 = Val.getValueType(); 10291 if (!VT1.isSimple() || !VT1.isInteger() || 10292 !VT2.isSimple() || !VT2.isInteger()) 10293 return false; 10294 10295 switch (VT1.getSimpleVT().SimpleTy) { 10296 default: break; 10297 case MVT::i1: 10298 case MVT::i8: 10299 case MVT::i16: 10300 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits. 10301 return true; 10302 } 10303 10304 return false; 10305 } 10306 10307 bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { 10308 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 10309 return false; 10310 10311 if (!isTypeLegal(EVT::getEVT(Ty1))) 10312 return false; 10313 10314 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); 10315 10316 // Assuming the caller doesn't have a zeroext or signext return parameter, 10317 // truncation all the way down to i1 is valid. 10318 return true; 10319 } 10320 10321 10322 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 10323 if (V < 0) 10324 return false; 10325 10326 unsigned Scale = 1; 10327 switch (VT.getSimpleVT().SimpleTy) { 10328 default: return false; 10329 case MVT::i1: 10330 case MVT::i8: 10331 // Scale == 1; 10332 break; 10333 case MVT::i16: 10334 // Scale == 2; 10335 Scale = 2; 10336 break; 10337 case MVT::i32: 10338 // Scale == 4; 10339 Scale = 4; 10340 break; 10341 } 10342 10343 if ((V & (Scale - 1)) != 0) 10344 return false; 10345 V /= Scale; 10346 return V == (V & ((1LL << 5) - 1)); 10347 } 10348 10349 static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 10350 const ARMSubtarget *Subtarget) { 10351 bool isNeg = false; 10352 if (V < 0) { 10353 isNeg = true; 10354 V = - V; 10355 } 10356 10357 switch (VT.getSimpleVT().SimpleTy) { 10358 default: return false; 10359 case MVT::i1: 10360 case MVT::i8: 10361 case MVT::i16: 10362 case MVT::i32: 10363 // + imm12 or - imm8 10364 if (isNeg) 10365 return V == (V & ((1LL << 8) - 1)); 10366 return V == (V & ((1LL << 12) - 1)); 10367 case MVT::f32: 10368 case MVT::f64: 10369 // Same as ARM mode. FIXME: NEON? 10370 if (!Subtarget->hasVFP2()) 10371 return false; 10372 if ((V & 3) != 0) 10373 return false; 10374 V >>= 2; 10375 return V == (V & ((1LL << 8) - 1)); 10376 } 10377 } 10378 10379 /// isLegalAddressImmediate - Return true if the integer value can be used 10380 /// as the offset of the target addressing mode for load / store of the 10381 /// given type. 10382 static bool isLegalAddressImmediate(int64_t V, EVT VT, 10383 const ARMSubtarget *Subtarget) { 10384 if (V == 0) 10385 return true; 10386 10387 if (!VT.isSimple()) 10388 return false; 10389 10390 if (Subtarget->isThumb1Only()) 10391 return isLegalT1AddressImmediate(V, VT); 10392 else if (Subtarget->isThumb2()) 10393 return isLegalT2AddressImmediate(V, VT, Subtarget); 10394 10395 // ARM mode. 10396 if (V < 0) 10397 V = - V; 10398 switch (VT.getSimpleVT().SimpleTy) { 10399 default: return false; 10400 case MVT::i1: 10401 case MVT::i8: 10402 case MVT::i32: 10403 // +- imm12 10404 return V == (V & ((1LL << 12) - 1)); 10405 case MVT::i16: 10406 // +- imm8 10407 return V == (V & ((1LL << 8) - 1)); 10408 case MVT::f32: 10409 case MVT::f64: 10410 if (!Subtarget->hasVFP2()) // FIXME: NEON? 10411 return false; 10412 if ((V & 3) != 0) 10413 return false; 10414 V >>= 2; 10415 return V == (V & ((1LL << 8) - 1)); 10416 } 10417 } 10418 10419 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 10420 EVT VT) const { 10421 int Scale = AM.Scale; 10422 if (Scale < 0) 10423 return false; 10424 10425 switch (VT.getSimpleVT().SimpleTy) { 10426 default: return false; 10427 case MVT::i1: 10428 case MVT::i8: 10429 case MVT::i16: 10430 case MVT::i32: 10431 if (Scale == 1) 10432 return true; 10433 // r + r << imm 10434 Scale = Scale & ~1; 10435 return Scale == 2 || Scale == 4 || Scale == 8; 10436 case MVT::i64: 10437 // r + r 10438 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 10439 return true; 10440 return false; 10441 case MVT::isVoid: 10442 // Note, we allow "void" uses (basically, uses that aren't loads or 10443 // stores), because arm allows folding a scale into many arithmetic 10444 // operations. This should be made more precise and revisited later. 10445 10446 // Allow r << imm, but the imm has to be a multiple of two. 10447 if (Scale & 1) return false; 10448 return isPowerOf2_32(Scale); 10449 } 10450 } 10451 10452 /// isLegalAddressingMode - Return true if the addressing mode represented 10453 /// by AM is legal for this target, for a load/store of the specified type. 10454 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 10455 Type *Ty) const { 10456 EVT VT = getValueType(Ty, true); 10457 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 10458 return false; 10459 10460 // Can never fold addr of global into load/store. 10461 if (AM.BaseGV) 10462 return false; 10463 10464 switch (AM.Scale) { 10465 case 0: // no scale reg, must be "r+i" or "r", or "i". 10466 break; 10467 case 1: 10468 if (Subtarget->isThumb1Only()) 10469 return false; 10470 // FALL THROUGH. 10471 default: 10472 // ARM doesn't support any R+R*scale+imm addr modes. 10473 if (AM.BaseOffs) 10474 return false; 10475 10476 if (!VT.isSimple()) 10477 return false; 10478 10479 if (Subtarget->isThumb2()) 10480 return isLegalT2ScaledAddressingMode(AM, VT); 10481 10482 int Scale = AM.Scale; 10483 switch (VT.getSimpleVT().SimpleTy) { 10484 default: return false; 10485 case MVT::i1: 10486 case MVT::i8: 10487 case MVT::i32: 10488 if (Scale < 0) Scale = -Scale; 10489 if (Scale == 1) 10490 return true; 10491 // r + r << imm 10492 return isPowerOf2_32(Scale & ~1); 10493 case MVT::i16: 10494 case MVT::i64: 10495 // r + r 10496 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 10497 return true; 10498 return false; 10499 10500 case MVT::isVoid: 10501 // Note, we allow "void" uses (basically, uses that aren't loads or 10502 // stores), because arm allows folding a scale into many arithmetic 10503 // operations. This should be made more precise and revisited later. 10504 10505 // Allow r << imm, but the imm has to be a multiple of two. 10506 if (Scale & 1) return false; 10507 return isPowerOf2_32(Scale); 10508 } 10509 } 10510 return true; 10511 } 10512 10513 /// isLegalICmpImmediate - Return true if the specified immediate is legal 10514 /// icmp immediate, that is the target has icmp instructions which can compare 10515 /// a register against the immediate without having to materialize the 10516 /// immediate into a register. 10517 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 10518 // Thumb2 and ARM modes can use cmn for negative immediates. 10519 if (!Subtarget->isThumb()) 10520 return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1; 10521 if (Subtarget->isThumb2()) 10522 return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1; 10523 // Thumb1 doesn't have cmn, and only 8-bit immediates. 10524 return Imm >= 0 && Imm <= 255; 10525 } 10526 10527 /// isLegalAddImmediate - Return true if the specified immediate is a legal add 10528 /// *or sub* immediate, that is the target has add or sub instructions which can 10529 /// add a register with the immediate without having to materialize the 10530 /// immediate into a register. 10531 bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { 10532 // Same encoding for add/sub, just flip the sign. 10533 int64_t AbsImm = llvm::abs64(Imm); 10534 if (!Subtarget->isThumb()) 10535 return ARM_AM::getSOImmVal(AbsImm) != -1; 10536 if (Subtarget->isThumb2()) 10537 return ARM_AM::getT2SOImmVal(AbsImm) != -1; 10538 // Thumb1 only has 8-bit unsigned immediate. 10539 return AbsImm >= 0 && AbsImm <= 255; 10540 } 10541 10542 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 10543 bool isSEXTLoad, SDValue &Base, 10544 SDValue &Offset, bool &isInc, 10545 SelectionDAG &DAG) { 10546 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 10547 return false; 10548 10549 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 10550 // AddressingMode 3 10551 Base = Ptr->getOperand(0); 10552 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10553 int RHSC = (int)RHS->getZExtValue(); 10554 if (RHSC < 0 && RHSC > -256) { 10555 assert(Ptr->getOpcode() == ISD::ADD); 10556 isInc = false; 10557 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 10558 return true; 10559 } 10560 } 10561 isInc = (Ptr->getOpcode() == ISD::ADD); 10562 Offset = Ptr->getOperand(1); 10563 return true; 10564 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 10565 // AddressingMode 2 10566 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10567 int RHSC = (int)RHS->getZExtValue(); 10568 if (RHSC < 0 && RHSC > -0x1000) { 10569 assert(Ptr->getOpcode() == ISD::ADD); 10570 isInc = false; 10571 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 10572 Base = Ptr->getOperand(0); 10573 return true; 10574 } 10575 } 10576 10577 if (Ptr->getOpcode() == ISD::ADD) { 10578 isInc = true; 10579 ARM_AM::ShiftOpc ShOpcVal= 10580 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); 10581 if (ShOpcVal != ARM_AM::no_shift) { 10582 Base = Ptr->getOperand(1); 10583 Offset = Ptr->getOperand(0); 10584 } else { 10585 Base = Ptr->getOperand(0); 10586 Offset = Ptr->getOperand(1); 10587 } 10588 return true; 10589 } 10590 10591 isInc = (Ptr->getOpcode() == ISD::ADD); 10592 Base = Ptr->getOperand(0); 10593 Offset = Ptr->getOperand(1); 10594 return true; 10595 } 10596 10597 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 10598 return false; 10599 } 10600 10601 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 10602 bool isSEXTLoad, SDValue &Base, 10603 SDValue &Offset, bool &isInc, 10604 SelectionDAG &DAG) { 10605 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 10606 return false; 10607 10608 Base = Ptr->getOperand(0); 10609 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10610 int RHSC = (int)RHS->getZExtValue(); 10611 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 10612 assert(Ptr->getOpcode() == ISD::ADD); 10613 isInc = false; 10614 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 10615 return true; 10616 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 10617 isInc = Ptr->getOpcode() == ISD::ADD; 10618 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 10619 return true; 10620 } 10621 } 10622 10623 return false; 10624 } 10625 10626 /// getPreIndexedAddressParts - returns true by value, base pointer and 10627 /// offset pointer and addressing mode by reference if the node's address 10628 /// can be legally represented as pre-indexed load / store address. 10629 bool 10630 ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 10631 SDValue &Offset, 10632 ISD::MemIndexedMode &AM, 10633 SelectionDAG &DAG) const { 10634 if (Subtarget->isThumb1Only()) 10635 return false; 10636 10637 EVT VT; 10638 SDValue Ptr; 10639 bool isSEXTLoad = false; 10640 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 10641 Ptr = LD->getBasePtr(); 10642 VT = LD->getMemoryVT(); 10643 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 10644 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 10645 Ptr = ST->getBasePtr(); 10646 VT = ST->getMemoryVT(); 10647 } else 10648 return false; 10649 10650 bool isInc; 10651 bool isLegal = false; 10652 if (Subtarget->isThumb2()) 10653 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 10654 Offset, isInc, DAG); 10655 else 10656 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 10657 Offset, isInc, DAG); 10658 if (!isLegal) 10659 return false; 10660 10661 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 10662 return true; 10663 } 10664 10665 /// getPostIndexedAddressParts - returns true by value, base pointer and 10666 /// offset pointer and addressing mode by reference if this node can be 10667 /// combined with a load / store to form a post-indexed load / store. 10668 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 10669 SDValue &Base, 10670 SDValue &Offset, 10671 ISD::MemIndexedMode &AM, 10672 SelectionDAG &DAG) const { 10673 if (Subtarget->isThumb1Only()) 10674 return false; 10675 10676 EVT VT; 10677 SDValue Ptr; 10678 bool isSEXTLoad = false; 10679 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 10680 VT = LD->getMemoryVT(); 10681 Ptr = LD->getBasePtr(); 10682 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 10683 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 10684 VT = ST->getMemoryVT(); 10685 Ptr = ST->getBasePtr(); 10686 } else 10687 return false; 10688 10689 bool isInc; 10690 bool isLegal = false; 10691 if (Subtarget->isThumb2()) 10692 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 10693 isInc, DAG); 10694 else 10695 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 10696 isInc, DAG); 10697 if (!isLegal) 10698 return false; 10699 10700 if (Ptr != Base) { 10701 // Swap base ptr and offset to catch more post-index load / store when 10702 // it's legal. In Thumb2 mode, offset must be an immediate. 10703 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 10704 !Subtarget->isThumb2()) 10705 std::swap(Base, Offset); 10706 10707 // Post-indexed load / store update the base pointer. 10708 if (Ptr != Base) 10709 return false; 10710 } 10711 10712 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 10713 return true; 10714 } 10715 10716 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 10717 APInt &KnownZero, 10718 APInt &KnownOne, 10719 const SelectionDAG &DAG, 10720 unsigned Depth) const { 10721 unsigned BitWidth = KnownOne.getBitWidth(); 10722 KnownZero = KnownOne = APInt(BitWidth, 0); 10723 switch (Op.getOpcode()) { 10724 default: break; 10725 case ARMISD::ADDC: 10726 case ARMISD::ADDE: 10727 case ARMISD::SUBC: 10728 case ARMISD::SUBE: 10729 // These nodes' second result is a boolean 10730 if (Op.getResNo() == 0) 10731 break; 10732 KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); 10733 break; 10734 case ARMISD::CMOV: { 10735 // Bits are known zero/one if known on the LHS and RHS. 10736 DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); 10737 if (KnownZero == 0 && KnownOne == 0) return; 10738 10739 APInt KnownZeroRHS, KnownOneRHS; 10740 DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); 10741 KnownZero &= KnownZeroRHS; 10742 KnownOne &= KnownOneRHS; 10743 return; 10744 } 10745 } 10746 } 10747 10748 //===----------------------------------------------------------------------===// 10749 // ARM Inline Assembly Support 10750 //===----------------------------------------------------------------------===// 10751 10752 bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { 10753 // Looking for "rev" which is V6+. 10754 if (!Subtarget->hasV6Ops()) 10755 return false; 10756 10757 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); 10758 std::string AsmStr = IA->getAsmString(); 10759 SmallVector<StringRef, 4> AsmPieces; 10760 SplitString(AsmStr, AsmPieces, ";\n"); 10761 10762 switch (AsmPieces.size()) { 10763 default: return false; 10764 case 1: 10765 AsmStr = AsmPieces[0]; 10766 AsmPieces.clear(); 10767 SplitString(AsmStr, AsmPieces, " \t,"); 10768 10769 // rev $0, $1 10770 if (AsmPieces.size() == 3 && 10771 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && 10772 IA->getConstraintString().compare(0, 4, "=l,l") == 0) { 10773 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); 10774 if (Ty && Ty->getBitWidth() == 32) 10775 return IntrinsicLowering::LowerToByteSwap(CI); 10776 } 10777 break; 10778 } 10779 10780 return false; 10781 } 10782 10783 /// getConstraintType - Given a constraint letter, return the type of 10784 /// constraint it is for this target. 10785 ARMTargetLowering::ConstraintType 10786 ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 10787 if (Constraint.size() == 1) { 10788 switch (Constraint[0]) { 10789 default: break; 10790 case 'l': return C_RegisterClass; 10791 case 'w': return C_RegisterClass; 10792 case 'h': return C_RegisterClass; 10793 case 'x': return C_RegisterClass; 10794 case 't': return C_RegisterClass; 10795 case 'j': return C_Other; // Constant for movw. 10796 // An address with a single base register. Due to the way we 10797 // currently handle addresses it is the same as an 'r' memory constraint. 10798 case 'Q': return C_Memory; 10799 } 10800 } else if (Constraint.size() == 2) { 10801 switch (Constraint[0]) { 10802 default: break; 10803 // All 'U+' constraints are addresses. 10804 case 'U': return C_Memory; 10805 } 10806 } 10807 return TargetLowering::getConstraintType(Constraint); 10808 } 10809 10810 /// Examine constraint type and operand type and determine a weight value. 10811 /// This object must already have been set up with the operand type 10812 /// and the current alternative constraint selected. 10813 TargetLowering::ConstraintWeight 10814 ARMTargetLowering::getSingleConstraintMatchWeight( 10815 AsmOperandInfo &info, const char *constraint) const { 10816 ConstraintWeight weight = CW_Invalid; 10817 Value *CallOperandVal = info.CallOperandVal; 10818 // If we don't have a value, we can't do a match, 10819 // but allow it at the lowest weight. 10820 if (CallOperandVal == NULL) 10821 return CW_Default; 10822 Type *type = CallOperandVal->getType(); 10823 // Look at the constraint type. 10824 switch (*constraint) { 10825 default: 10826 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 10827 break; 10828 case 'l': 10829 if (type->isIntegerTy()) { 10830 if (Subtarget->isThumb()) 10831 weight = CW_SpecificReg; 10832 else 10833 weight = CW_Register; 10834 } 10835 break; 10836 case 'w': 10837 if (type->isFloatingPointTy()) 10838 weight = CW_Register; 10839 break; 10840 } 10841 return weight; 10842 } 10843 10844 typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; 10845 RCPair 10846 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 10847 MVT VT) const { 10848 if (Constraint.size() == 1) { 10849 // GCC ARM Constraint Letters 10850 switch (Constraint[0]) { 10851 case 'l': // Low regs or general regs. 10852 if (Subtarget->isThumb()) 10853 return RCPair(0U, &ARM::tGPRRegClass); 10854 return RCPair(0U, &ARM::GPRRegClass); 10855 case 'h': // High regs or no regs. 10856 if (Subtarget->isThumb()) 10857 return RCPair(0U, &ARM::hGPRRegClass); 10858 break; 10859 case 'r': 10860 return RCPair(0U, &ARM::GPRRegClass); 10861 case 'w': 10862 if (VT == MVT::Other) 10863 break; 10864 if (VT == MVT::f32) 10865 return RCPair(0U, &ARM::SPRRegClass); 10866 if (VT.getSizeInBits() == 64) 10867 return RCPair(0U, &ARM::DPRRegClass); 10868 if (VT.getSizeInBits() == 128) 10869 return RCPair(0U, &ARM::QPRRegClass); 10870 break; 10871 case 'x': 10872 if (VT == MVT::Other) 10873 break; 10874 if (VT == MVT::f32) 10875 return RCPair(0U, &ARM::SPR_8RegClass); 10876 if (VT.getSizeInBits() == 64) 10877 return RCPair(0U, &ARM::DPR_8RegClass); 10878 if (VT.getSizeInBits() == 128) 10879 return RCPair(0U, &ARM::QPR_8RegClass); 10880 break; 10881 case 't': 10882 if (VT == MVT::f32) 10883 return RCPair(0U, &ARM::SPRRegClass); 10884 break; 10885 } 10886 } 10887 if (StringRef("{cc}").equals_lower(Constraint)) 10888 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass); 10889 10890 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 10891 } 10892 10893 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 10894 /// vector. If it is invalid, don't add anything to Ops. 10895 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 10896 std::string &Constraint, 10897 std::vector<SDValue>&Ops, 10898 SelectionDAG &DAG) const { 10899 SDValue Result(0, 0); 10900 10901 // Currently only support length 1 constraints. 10902 if (Constraint.length() != 1) return; 10903 10904 char ConstraintLetter = Constraint[0]; 10905 switch (ConstraintLetter) { 10906 default: break; 10907 case 'j': 10908 case 'I': case 'J': case 'K': case 'L': 10909 case 'M': case 'N': case 'O': 10910 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 10911 if (!C) 10912 return; 10913 10914 int64_t CVal64 = C->getSExtValue(); 10915 int CVal = (int) CVal64; 10916 // None of these constraints allow values larger than 32 bits. Check 10917 // that the value fits in an int. 10918 if (CVal != CVal64) 10919 return; 10920 10921 switch (ConstraintLetter) { 10922 case 'j': 10923 // Constant suitable for movw, must be between 0 and 10924 // 65535. 10925 if (Subtarget->hasV6T2Ops()) 10926 if (CVal >= 0 && CVal <= 65535) 10927 break; 10928 return; 10929 case 'I': 10930 if (Subtarget->isThumb1Only()) { 10931 // This must be a constant between 0 and 255, for ADD 10932 // immediates. 10933 if (CVal >= 0 && CVal <= 255) 10934 break; 10935 } else if (Subtarget->isThumb2()) { 10936 // A constant that can be used as an immediate value in a 10937 // data-processing instruction. 10938 if (ARM_AM::getT2SOImmVal(CVal) != -1) 10939 break; 10940 } else { 10941 // A constant that can be used as an immediate value in a 10942 // data-processing instruction. 10943 if (ARM_AM::getSOImmVal(CVal) != -1) 10944 break; 10945 } 10946 return; 10947 10948 case 'J': 10949 if (Subtarget->isThumb()) { // FIXME thumb2 10950 // This must be a constant between -255 and -1, for negated ADD 10951 // immediates. This can be used in GCC with an "n" modifier that 10952 // prints the negated value, for use with SUB instructions. It is 10953 // not useful otherwise but is implemented for compatibility. 10954 if (CVal >= -255 && CVal <= -1) 10955 break; 10956 } else { 10957 // This must be a constant between -4095 and 4095. It is not clear 10958 // what this constraint is intended for. Implemented for 10959 // compatibility with GCC. 10960 if (CVal >= -4095 && CVal <= 4095) 10961 break; 10962 } 10963 return; 10964 10965 case 'K': 10966 if (Subtarget->isThumb1Only()) { 10967 // A 32-bit value where only one byte has a nonzero value. Exclude 10968 // zero to match GCC. This constraint is used by GCC internally for 10969 // constants that can be loaded with a move/shift combination. 10970 // It is not useful otherwise but is implemented for compatibility. 10971 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 10972 break; 10973 } else if (Subtarget->isThumb2()) { 10974 // A constant whose bitwise inverse can be used as an immediate 10975 // value in a data-processing instruction. This can be used in GCC 10976 // with a "B" modifier that prints the inverted value, for use with 10977 // BIC and MVN instructions. It is not useful otherwise but is 10978 // implemented for compatibility. 10979 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 10980 break; 10981 } else { 10982 // A constant whose bitwise inverse can be used as an immediate 10983 // value in a data-processing instruction. This can be used in GCC 10984 // with a "B" modifier that prints the inverted value, for use with 10985 // BIC and MVN instructions. It is not useful otherwise but is 10986 // implemented for compatibility. 10987 if (ARM_AM::getSOImmVal(~CVal) != -1) 10988 break; 10989 } 10990 return; 10991 10992 case 'L': 10993 if (Subtarget->isThumb1Only()) { 10994 // This must be a constant between -7 and 7, 10995 // for 3-operand ADD/SUB immediate instructions. 10996 if (CVal >= -7 && CVal < 7) 10997 break; 10998 } else if (Subtarget->isThumb2()) { 10999 // A constant whose negation can be used as an immediate value in a 11000 // data-processing instruction. This can be used in GCC with an "n" 11001 // modifier that prints the negated value, for use with SUB 11002 // instructions. It is not useful otherwise but is implemented for 11003 // compatibility. 11004 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 11005 break; 11006 } else { 11007 // A constant whose negation can be used as an immediate value in a 11008 // data-processing instruction. This can be used in GCC with an "n" 11009 // modifier that prints the negated value, for use with SUB 11010 // instructions. It is not useful otherwise but is implemented for 11011 // compatibility. 11012 if (ARM_AM::getSOImmVal(-CVal) != -1) 11013 break; 11014 } 11015 return; 11016 11017 case 'M': 11018 if (Subtarget->isThumb()) { // FIXME thumb2 11019 // This must be a multiple of 4 between 0 and 1020, for 11020 // ADD sp + immediate. 11021 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 11022 break; 11023 } else { 11024 // A power of two or a constant between 0 and 32. This is used in 11025 // GCC for the shift amount on shifted register operands, but it is 11026 // useful in general for any shift amounts. 11027 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 11028 break; 11029 } 11030 return; 11031 11032 case 'N': 11033 if (Subtarget->isThumb()) { // FIXME thumb2 11034 // This must be a constant between 0 and 31, for shift amounts. 11035 if (CVal >= 0 && CVal <= 31) 11036 break; 11037 } 11038 return; 11039 11040 case 'O': 11041 if (Subtarget->isThumb()) { // FIXME thumb2 11042 // This must be a multiple of 4 between -508 and 508, for 11043 // ADD/SUB sp = sp + immediate. 11044 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 11045 break; 11046 } 11047 return; 11048 } 11049 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 11050 break; 11051 } 11052 11053 if (Result.getNode()) { 11054 Ops.push_back(Result); 11055 return; 11056 } 11057 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 11058 } 11059 11060 SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { 11061 assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"); 11062 unsigned Opcode = Op->getOpcode(); 11063 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && 11064 "Invalid opcode for Div/Rem lowering"); 11065 bool isSigned = (Opcode == ISD::SDIVREM); 11066 EVT VT = Op->getValueType(0); 11067 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 11068 11069 RTLIB::Libcall LC; 11070 switch (VT.getSimpleVT().SimpleTy) { 11071 default: llvm_unreachable("Unexpected request for libcall!"); 11072 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; 11073 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; 11074 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; 11075 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; 11076 } 11077 11078 SDValue InChain = DAG.getEntryNode(); 11079 11080 TargetLowering::ArgListTy Args; 11081 TargetLowering::ArgListEntry Entry; 11082 for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { 11083 EVT ArgVT = Op->getOperand(i).getValueType(); 11084 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); 11085 Entry.Node = Op->getOperand(i); 11086 Entry.Ty = ArgTy; 11087 Entry.isSExt = isSigned; 11088 Entry.isZExt = !isSigned; 11089 Args.push_back(Entry); 11090 } 11091 11092 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), 11093 getPointerTy()); 11094 11095 Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); 11096 11097 SDLoc dl(Op); 11098 TargetLowering:: 11099 CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, 11100 0, getLibcallCallingConv(LC), /*isTailCall=*/false, 11101 /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, 11102 Callee, Args, DAG, dl); 11103 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); 11104 11105 return CallInfo.first; 11106 } 11107 11108 bool 11109 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 11110 // The ARM target isn't yet aware of offsets. 11111 return false; 11112 } 11113 11114 bool ARM::isBitFieldInvertedMask(unsigned v) { 11115 if (v == 0xffffffff) 11116 return false; 11117 11118 // there can be 1's on either or both "outsides", all the "inside" 11119 // bits must be 0's 11120 unsigned TO = CountTrailingOnes_32(v); 11121 unsigned LO = CountLeadingOnes_32(v); 11122 v = (v >> TO) << TO; 11123 v = (v << LO) >> LO; 11124 return v == 0; 11125 } 11126 11127 /// isFPImmLegal - Returns true if the target can instruction select the 11128 /// specified FP immediate natively. If false, the legalizer will 11129 /// materialize the FP immediate as a load from a constant pool. 11130 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 11131 if (!Subtarget->hasVFP3()) 11132 return false; 11133 if (VT == MVT::f32) 11134 return ARM_AM::getFP32Imm(Imm) != -1; 11135 if (VT == MVT::f64) 11136 return ARM_AM::getFP64Imm(Imm) != -1; 11137 return false; 11138 } 11139 11140 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as 11141 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment 11142 /// specified in the intrinsic calls. 11143 bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 11144 const CallInst &I, 11145 unsigned Intrinsic) const { 11146 switch (Intrinsic) { 11147 case Intrinsic::arm_neon_vld1: 11148 case Intrinsic::arm_neon_vld2: 11149 case Intrinsic::arm_neon_vld3: 11150 case Intrinsic::arm_neon_vld4: 11151 case Intrinsic::arm_neon_vld2lane: 11152 case Intrinsic::arm_neon_vld3lane: 11153 case Intrinsic::arm_neon_vld4lane: { 11154 Info.opc = ISD::INTRINSIC_W_CHAIN; 11155 // Conservatively set memVT to the entire set of vectors loaded. 11156 uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; 11157 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 11158 Info.ptrVal = I.getArgOperand(0); 11159 Info.offset = 0; 11160 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 11161 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 11162 Info.vol = false; // volatile loads with NEON intrinsics not supported 11163 Info.readMem = true; 11164 Info.writeMem = false; 11165 return true; 11166 } 11167 case Intrinsic::arm_neon_vst1: 11168 case Intrinsic::arm_neon_vst2: 11169 case Intrinsic::arm_neon_vst3: 11170 case Intrinsic::arm_neon_vst4: 11171 case Intrinsic::arm_neon_vst2lane: 11172 case Intrinsic::arm_neon_vst3lane: 11173 case Intrinsic::arm_neon_vst4lane: { 11174 Info.opc = ISD::INTRINSIC_VOID; 11175 // Conservatively set memVT to the entire set of vectors stored. 11176 unsigned NumElts = 0; 11177 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { 11178 Type *ArgTy = I.getArgOperand(ArgI)->getType(); 11179 if (!ArgTy->isVectorTy()) 11180 break; 11181 NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; 11182 } 11183 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 11184 Info.ptrVal = I.getArgOperand(0); 11185 Info.offset = 0; 11186 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 11187 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 11188 Info.vol = false; // volatile stores with NEON intrinsics not supported 11189 Info.readMem = false; 11190 Info.writeMem = true; 11191 return true; 11192 } 11193 case Intrinsic::arm_ldrex: { 11194 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 11195 Info.opc = ISD::INTRINSIC_W_CHAIN; 11196 Info.memVT = MVT::getVT(PtrTy->getElementType()); 11197 Info.ptrVal = I.getArgOperand(0); 11198 Info.offset = 0; 11199 Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); 11200 Info.vol = true; 11201 Info.readMem = true; 11202 Info.writeMem = false; 11203 return true; 11204 } 11205 case Intrinsic::arm_strex: { 11206 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); 11207 Info.opc = ISD::INTRINSIC_W_CHAIN; 11208 Info.memVT = MVT::getVT(PtrTy->getElementType()); 11209 Info.ptrVal = I.getArgOperand(1); 11210 Info.offset = 0; 11211 Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); 11212 Info.vol = true; 11213 Info.readMem = false; 11214 Info.writeMem = true; 11215 return true; 11216 } 11217 case Intrinsic::arm_strexd: { 11218 Info.opc = ISD::INTRINSIC_W_CHAIN; 11219 Info.memVT = MVT::i64; 11220 Info.ptrVal = I.getArgOperand(2); 11221 Info.offset = 0; 11222 Info.align = 8; 11223 Info.vol = true; 11224 Info.readMem = false; 11225 Info.writeMem = true; 11226 return true; 11227 } 11228 case Intrinsic::arm_ldrexd: { 11229 Info.opc = ISD::INTRINSIC_W_CHAIN; 11230 Info.memVT = MVT::i64; 11231 Info.ptrVal = I.getArgOperand(0); 11232 Info.offset = 0; 11233 Info.align = 8; 11234 Info.vol = true; 11235 Info.readMem = true; 11236 Info.writeMem = false; 11237 return true; 11238 } 11239 default: 11240 break; 11241 } 11242 11243 return false; 11244 } 11245