1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the ARMSelectionDAGInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARMTargetMachine.h" 15 #include "llvm/CodeGen/SelectionDAG.h" 16 #include "llvm/IR/DerivedTypes.h" 17 using namespace llvm; 18 19 #define DEBUG_TYPE "arm-selectiondag-info" 20 21 // Emit, if possible, a specialized version of the given Libcall. Typically this 22 // means selecting the appropriately aligned version, but we also convert memset 23 // of 0 into memclr. 24 SDValue ARMSelectionDAGInfo:: 25 EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, 26 SDValue Chain, 27 SDValue Dst, SDValue Src, 28 SDValue Size, unsigned Align, 29 RTLIB::Libcall LC) const { 30 const ARMSubtarget &Subtarget = 31 DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); 32 const ARMTargetLowering *TLI = Subtarget.getTargetLowering(); 33 34 // Only use a specialized AEABI function if the default version of this 35 // Libcall is an AEABI function. 36 if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0) 37 return SDValue(); 38 39 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be 40 // able to translate memset to memclr and use the value to index the function 41 // name array. 42 enum { 43 AEABI_MEMCPY = 0, 44 AEABI_MEMMOVE, 45 AEABI_MEMSET, 46 AEABI_MEMCLR 47 } AEABILibcall; 48 switch (LC) { 49 case RTLIB::MEMCPY: 50 AEABILibcall = AEABI_MEMCPY; 51 break; 52 case RTLIB::MEMMOVE: 53 AEABILibcall = AEABI_MEMMOVE; 54 break; 55 case RTLIB::MEMSET: 56 AEABILibcall = AEABI_MEMSET; 57 if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src)) 58 if (ConstantSrc->getZExtValue() == 0) 59 AEABILibcall = AEABI_MEMCLR; 60 break; 61 default: 62 return SDValue(); 63 } 64 65 // Choose the most-aligned libcall variant that we can 66 enum { 67 ALIGN1 = 0, 68 ALIGN4, 69 ALIGN8 70 } AlignVariant; 71 if ((Align & 7) == 0) 72 AlignVariant = ALIGN8; 73 else if ((Align & 3) == 0) 74 AlignVariant = ALIGN4; 75 else 76 AlignVariant = ALIGN1; 77 78 TargetLowering::ArgListTy Args; 79 TargetLowering::ArgListEntry Entry; 80 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 81 Entry.Node = Dst; 82 Args.push_back(Entry); 83 if (AEABILibcall == AEABI_MEMCLR) { 84 Entry.Node = Size; 85 Args.push_back(Entry); 86 } else if (AEABILibcall == AEABI_MEMSET) { 87 // Adjust parameters for memset, EABI uses format (ptr, size, value), 88 // GNU library uses (ptr, value, size) 89 // See RTABI section 4.3.4 90 Entry.Node = Size; 91 Args.push_back(Entry); 92 93 // Extend or truncate the argument to be an i32 value for the call. 94 if (Src.getValueType().bitsGT(MVT::i32)) 95 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); 96 else if (Src.getValueType().bitsLT(MVT::i32)) 97 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); 98 99 Entry.Node = Src; 100 Entry.Ty = Type::getInt32Ty(*DAG.getContext()); 101 Entry.isSExt = false; 102 Args.push_back(Entry); 103 } else { 104 Entry.Node = Src; 105 Args.push_back(Entry); 106 107 Entry.Node = Size; 108 Args.push_back(Entry); 109 } 110 111 char const *FunctionNames[4][3] = { 112 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" }, 113 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, 114 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" }, 115 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } 116 }; 117 TargetLowering::CallLoweringInfo CLI(DAG); 118 CLI.setDebugLoc(dl) 119 .setChain(Chain) 120 .setCallee( 121 TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), 122 DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], 123 TLI->getPointerTy(DAG.getDataLayout())), 124 std::move(Args), 0) 125 .setDiscardResult(); 126 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); 127 128 return CallResult.second; 129 } 130 131 SDValue 132 ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, 133 SDValue Chain, 134 SDValue Dst, SDValue Src, 135 SDValue Size, unsigned Align, 136 bool isVolatile, bool AlwaysInline, 137 MachinePointerInfo DstPtrInfo, 138 MachinePointerInfo SrcPtrInfo) const { 139 const ARMSubtarget &Subtarget = 140 DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); 141 // Do repeated 4-byte loads and stores. To be improved. 142 // This requires 4-byte alignment. 143 if ((Align & 3) != 0) 144 return SDValue(); 145 // This requires the copy size to be a constant, preferably 146 // within a subtarget-specific limit. 147 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 148 if (!ConstantSize) 149 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 150 RTLIB::MEMCPY); 151 uint64_t SizeVal = ConstantSize->getZExtValue(); 152 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) 153 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 154 RTLIB::MEMCPY); 155 156 unsigned BytesLeft = SizeVal & 3; 157 unsigned NumMemOps = SizeVal >> 2; 158 unsigned EmittedNumMemOps = 0; 159 EVT VT = MVT::i32; 160 unsigned VTSize = 4; 161 unsigned i = 0; 162 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers 163 const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6; 164 SDValue TFOps[6]; 165 SDValue Loads[6]; 166 uint64_t SrcOff = 0, DstOff = 0; 167 168 // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to 169 // VLDM/VSTM and make this code emit it when appropriate. This would reduce 170 // pressure on the general purpose registers. However this seems harder to map 171 // onto the register allocator's view of the world. 172 173 // The number of MEMCPY pseudo-instructions to emit. We use up to 174 // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm 175 // later on. This is a lower bound on the number of MEMCPY operations we must 176 // emit. 177 unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; 178 179 // Code size optimisation: do not inline memcpy if expansion results in 180 // more instructions than the libary call. 181 if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) { 182 return SDValue(); 183 } 184 185 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); 186 187 for (unsigned I = 0; I != NumMEMCPYs; ++I) { 188 // Evenly distribute registers among MEMCPY operations to reduce register 189 // pressure. 190 unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; 191 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; 192 193 Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src, 194 DAG.getConstant(NumRegs, dl, MVT::i32)); 195 Src = Dst.getValue(1); 196 Chain = Dst.getValue(2); 197 198 DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); 199 SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); 200 201 EmittedNumMemOps = NextEmittedNumMemOps; 202 } 203 204 if (BytesLeft == 0) 205 return Chain; 206 207 // Issue loads / stores for the trailing (1 - 3) bytes. 208 unsigned BytesLeftSave = BytesLeft; 209 i = 0; 210 while (BytesLeft) { 211 if (BytesLeft >= 2) { 212 VT = MVT::i16; 213 VTSize = 2; 214 } else { 215 VT = MVT::i8; 216 VTSize = 1; 217 } 218 219 Loads[i] = DAG.getLoad(VT, dl, Chain, 220 DAG.getNode(ISD::ADD, dl, MVT::i32, Src, 221 DAG.getConstant(SrcOff, dl, MVT::i32)), 222 SrcPtrInfo.getWithOffset(SrcOff), 223 false, false, false, 0); 224 TFOps[i] = Loads[i].getValue(1); 225 ++i; 226 SrcOff += VTSize; 227 BytesLeft -= VTSize; 228 } 229 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 230 makeArrayRef(TFOps, i)); 231 232 i = 0; 233 BytesLeft = BytesLeftSave; 234 while (BytesLeft) { 235 if (BytesLeft >= 2) { 236 VT = MVT::i16; 237 VTSize = 2; 238 } else { 239 VT = MVT::i8; 240 VTSize = 1; 241 } 242 243 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 244 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, 245 DAG.getConstant(DstOff, dl, MVT::i32)), 246 DstPtrInfo.getWithOffset(DstOff), false, false, 0); 247 ++i; 248 DstOff += VTSize; 249 BytesLeft -= VTSize; 250 } 251 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 252 makeArrayRef(TFOps, i)); 253 } 254 255 256 SDValue ARMSelectionDAGInfo:: 257 EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl, 258 SDValue Chain, 259 SDValue Dst, SDValue Src, 260 SDValue Size, unsigned Align, 261 bool isVolatile, 262 MachinePointerInfo DstPtrInfo, 263 MachinePointerInfo SrcPtrInfo) const { 264 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 265 RTLIB::MEMMOVE); 266 } 267 268 269 SDValue ARMSelectionDAGInfo:: 270 EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, 271 SDValue Chain, SDValue Dst, 272 SDValue Src, SDValue Size, 273 unsigned Align, bool isVolatile, 274 MachinePointerInfo DstPtrInfo) const { 275 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 276 RTLIB::MEMSET); 277 } 278