1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the ARMSelectionDAGInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARMTargetMachine.h" 15 #include "llvm/CodeGen/SelectionDAG.h" 16 #include "llvm/IR/DerivedTypes.h" 17 using namespace llvm; 18 19 #define DEBUG_TYPE "arm-selectiondag-info" 20 21 // Emit, if possible, a specialized version of the given Libcall. Typically this 22 // means selecting the appropriately aligned version, but we also convert memset 23 // of 0 into memclr. 24 SDValue ARMSelectionDAGInfo:: 25 EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, 26 SDValue Chain, 27 SDValue Dst, SDValue Src, 28 SDValue Size, unsigned Align, 29 RTLIB::Libcall LC) const { 30 const ARMSubtarget &Subtarget = 31 DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); 32 const ARMTargetLowering *TLI = Subtarget.getTargetLowering(); 33 34 // Only use a specialized AEABI function if the default version of this 35 // Libcall is an AEABI function. 36 if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0) 37 return SDValue(); 38 39 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be 40 // able to translate memset to memclr and use the value to index the function 41 // name array. 42 enum { 43 AEABI_MEMCPY = 0, 44 AEABI_MEMMOVE, 45 AEABI_MEMSET, 46 AEABI_MEMCLR 47 } AEABILibcall; 48 switch (LC) { 49 case RTLIB::MEMCPY: 50 AEABILibcall = AEABI_MEMCPY; 51 break; 52 case RTLIB::MEMMOVE: 53 AEABILibcall = AEABI_MEMMOVE; 54 break; 55 case RTLIB::MEMSET: 56 AEABILibcall = AEABI_MEMSET; 57 if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src)) 58 if (ConstantSrc->getZExtValue() == 0) 59 AEABILibcall = AEABI_MEMCLR; 60 break; 61 default: 62 return SDValue(); 63 } 64 65 // Choose the most-aligned libcall variant that we can 66 enum { 67 ALIGN1 = 0, 68 ALIGN4, 69 ALIGN8 70 } AlignVariant; 71 if ((Align & 7) == 0) 72 AlignVariant = ALIGN8; 73 else if ((Align & 3) == 0) 74 AlignVariant = ALIGN4; 75 else 76 AlignVariant = ALIGN1; 77 78 TargetLowering::ArgListTy Args; 79 TargetLowering::ArgListEntry Entry; 80 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 81 Entry.Node = Dst; 82 Args.push_back(Entry); 83 if (AEABILibcall == AEABI_MEMCLR) { 84 Entry.Node = Size; 85 Args.push_back(Entry); 86 } else if (AEABILibcall == AEABI_MEMSET) { 87 // Adjust parameters for memset, EABI uses format (ptr, size, value), 88 // GNU library uses (ptr, value, size) 89 // See RTABI section 4.3.4 90 Entry.Node = Size; 91 Args.push_back(Entry); 92 93 // Extend or truncate the argument to be an i32 value for the call. 94 if (Src.getValueType().bitsGT(MVT::i32)) 95 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); 96 else if (Src.getValueType().bitsLT(MVT::i32)) 97 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); 98 99 Entry.Node = Src; 100 Entry.Ty = Type::getInt32Ty(*DAG.getContext()); 101 Entry.isSExt = false; 102 Args.push_back(Entry); 103 } else { 104 Entry.Node = Src; 105 Args.push_back(Entry); 106 107 Entry.Node = Size; 108 Args.push_back(Entry); 109 } 110 111 char const *FunctionNames[4][3] = { 112 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" }, 113 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, 114 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" }, 115 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } 116 }; 117 TargetLowering::CallLoweringInfo CLI(DAG); 118 CLI.setDebugLoc(dl) 119 .setChain(Chain) 120 .setCallee( 121 TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), 122 DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], 123 TLI->getPointerTy(DAG.getDataLayout())), 124 std::move(Args), 0) 125 .setDiscardResult(); 126 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); 127 128 return CallResult.second; 129 } 130 131 SDValue 132 ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, 133 SDValue Chain, 134 SDValue Dst, SDValue Src, 135 SDValue Size, unsigned Align, 136 bool isVolatile, bool AlwaysInline, 137 MachinePointerInfo DstPtrInfo, 138 MachinePointerInfo SrcPtrInfo) const { 139 const ARMSubtarget &Subtarget = 140 DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); 141 // Do repeated 4-byte loads and stores. To be improved. 142 // This requires 4-byte alignment. 143 if ((Align & 3) != 0) 144 return SDValue(); 145 // This requires the copy size to be a constant, preferably 146 // within a subtarget-specific limit. 147 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 148 if (!ConstantSize) 149 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 150 RTLIB::MEMCPY); 151 uint64_t SizeVal = ConstantSize->getZExtValue(); 152 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) 153 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 154 RTLIB::MEMCPY); 155 156 unsigned BytesLeft = SizeVal & 3; 157 unsigned NumMemOps = SizeVal >> 2; 158 unsigned EmittedNumMemOps = 0; 159 EVT VT = MVT::i32; 160 unsigned VTSize = 4; 161 unsigned i = 0; 162 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers 163 const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6; 164 SDValue TFOps[6]; 165 SDValue Loads[6]; 166 uint64_t SrcOff = 0, DstOff = 0; 167 168 // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the 169 // same number of stores. The loads and stores will get combined into 170 // ldm/stm later on. 171 while (EmittedNumMemOps < NumMemOps) { 172 for (i = 0; 173 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { 174 Loads[i] = DAG.getLoad(VT, dl, Chain, 175 DAG.getNode(ISD::ADD, dl, MVT::i32, Src, 176 DAG.getConstant(SrcOff, dl, MVT::i32)), 177 SrcPtrInfo.getWithOffset(SrcOff), isVolatile, 178 false, false, 0); 179 TFOps[i] = Loads[i].getValue(1); 180 SrcOff += VTSize; 181 } 182 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 183 makeArrayRef(TFOps, i)); 184 185 for (i = 0; 186 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { 187 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 188 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, 189 DAG.getConstant(DstOff, dl, MVT::i32)), 190 DstPtrInfo.getWithOffset(DstOff), 191 isVolatile, false, 0); 192 DstOff += VTSize; 193 } 194 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 195 makeArrayRef(TFOps, i)); 196 197 EmittedNumMemOps += i; 198 } 199 200 if (BytesLeft == 0) 201 return Chain; 202 203 // Issue loads / stores for the trailing (1 - 3) bytes. 204 unsigned BytesLeftSave = BytesLeft; 205 i = 0; 206 while (BytesLeft) { 207 if (BytesLeft >= 2) { 208 VT = MVT::i16; 209 VTSize = 2; 210 } else { 211 VT = MVT::i8; 212 VTSize = 1; 213 } 214 215 Loads[i] = DAG.getLoad(VT, dl, Chain, 216 DAG.getNode(ISD::ADD, dl, MVT::i32, Src, 217 DAG.getConstant(SrcOff, dl, MVT::i32)), 218 SrcPtrInfo.getWithOffset(SrcOff), 219 false, false, false, 0); 220 TFOps[i] = Loads[i].getValue(1); 221 ++i; 222 SrcOff += VTSize; 223 BytesLeft -= VTSize; 224 } 225 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 226 makeArrayRef(TFOps, i)); 227 228 i = 0; 229 BytesLeft = BytesLeftSave; 230 while (BytesLeft) { 231 if (BytesLeft >= 2) { 232 VT = MVT::i16; 233 VTSize = 2; 234 } else { 235 VT = MVT::i8; 236 VTSize = 1; 237 } 238 239 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 240 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, 241 DAG.getConstant(DstOff, dl, MVT::i32)), 242 DstPtrInfo.getWithOffset(DstOff), false, false, 0); 243 ++i; 244 DstOff += VTSize; 245 BytesLeft -= VTSize; 246 } 247 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 248 makeArrayRef(TFOps, i)); 249 } 250 251 252 SDValue ARMSelectionDAGInfo:: 253 EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl, 254 SDValue Chain, 255 SDValue Dst, SDValue Src, 256 SDValue Size, unsigned Align, 257 bool isVolatile, 258 MachinePointerInfo DstPtrInfo, 259 MachinePointerInfo SrcPtrInfo) const { 260 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 261 RTLIB::MEMMOVE); 262 } 263 264 265 SDValue ARMSelectionDAGInfo:: 266 EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, 267 SDValue Chain, SDValue Dst, 268 SDValue Src, SDValue Size, 269 unsigned Align, bool isVolatile, 270 MachinePointerInfo DstPtrInfo) const { 271 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 272 RTLIB::MEMSET); 273 } 274