1 //===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the SystemZSelectionDAGInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "SystemZTargetMachine.h" 15 #include "llvm/CodeGen/SelectionDAG.h" 16 17 using namespace llvm; 18 19 #define DEBUG_TYPE "systemz-selectiondag-info" 20 21 SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const DataLayout &DL) 22 : TargetSelectionDAGInfo(&DL) {} 23 24 SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { 25 } 26 27 // Decide whether it is best to use a loop or straight-line code for 28 // a block operation of Size bytes with source address Src and destination 29 // address Dest. Sequence is the opcode to use for straight-line code 30 // (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP). 31 // Return the chain for the completed operation. 32 static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence, 33 unsigned Loop, SDValue Chain, SDValue Dst, 34 SDValue Src, uint64_t Size) { 35 EVT PtrVT = Src.getValueType(); 36 // The heuristic we use is to prefer loops for anything that would 37 // require 7 or more MVCs. With these kinds of sizes there isn't 38 // much to choose between straight-line code and looping code, 39 // since the time will be dominated by the MVCs themselves. 40 // However, the loop has 4 or 5 instructions (depending on whether 41 // the base addresses can be proved equal), so there doesn't seem 42 // much point using a loop for 5 * 256 bytes or fewer. Anything in 43 // the range (5 * 256, 6 * 256) will need another instruction after 44 // the loop, so it doesn't seem worth using a loop then either. 45 // The next value up, 6 * 256, can be implemented in the same 46 // number of straight-line MVCs as 6 * 256 - 1. 47 if (Size > 6 * 256) 48 return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, 49 DAG.getConstant(Size, DL, PtrVT), 50 DAG.getConstant(Size / 256, DL, PtrVT)); 51 return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src, 52 DAG.getConstant(Size, DL, PtrVT)); 53 } 54 55 SDValue SystemZSelectionDAGInfo:: 56 EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 57 SDValue Dst, SDValue Src, SDValue Size, unsigned Align, 58 bool IsVolatile, bool AlwaysInline, 59 MachinePointerInfo DstPtrInfo, 60 MachinePointerInfo SrcPtrInfo) const { 61 if (IsVolatile) 62 return SDValue(); 63 64 if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) 65 return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, 66 Chain, Dst, Src, CSize->getZExtValue()); 67 return SDValue(); 68 } 69 70 // Handle a memset of 1, 2, 4 or 8 bytes with the operands given by 71 // Chain, Dst, ByteVal and Size. These cases are expected to use 72 // MVI, MVHHI, MVHI and MVGHI respectively. 73 static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 74 SDValue Dst, uint64_t ByteVal, uint64_t Size, 75 unsigned Align, 76 MachinePointerInfo DstPtrInfo) { 77 uint64_t StoreVal = ByteVal; 78 for (unsigned I = 1; I < Size; ++I) 79 StoreVal |= ByteVal << (I * 8); 80 return DAG.getStore(Chain, DL, 81 DAG.getConstant(StoreVal, DL, 82 MVT::getIntegerVT(Size * 8)), 83 Dst, DstPtrInfo, false, false, Align); 84 } 85 86 SDValue SystemZSelectionDAGInfo:: 87 EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 88 SDValue Dst, SDValue Byte, SDValue Size, 89 unsigned Align, bool IsVolatile, 90 MachinePointerInfo DstPtrInfo) const { 91 EVT PtrVT = Dst.getValueType(); 92 93 if (IsVolatile) 94 return SDValue(); 95 96 if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { 97 uint64_t Bytes = CSize->getZExtValue(); 98 if (Bytes == 0) 99 return SDValue(); 100 if (auto *CByte = dyn_cast<ConstantSDNode>(Byte)) { 101 // Handle cases that can be done using at most two of 102 // MVI, MVHI, MVHHI and MVGHI. The latter two can only be 103 // used if ByteVal is all zeros or all ones; in other casees, 104 // we can move at most 2 halfwords. 105 uint64_t ByteVal = CByte->getZExtValue(); 106 if (ByteVal == 0 || ByteVal == 255 ? 107 Bytes <= 16 && countPopulation(Bytes) <= 2 : 108 Bytes <= 4) { 109 unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes); 110 unsigned Size2 = Bytes - Size1; 111 SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, 112 Align, DstPtrInfo); 113 if (Size2 == 0) 114 return Chain1; 115 Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, 116 DAG.getConstant(Size1, DL, PtrVT)); 117 DstPtrInfo = DstPtrInfo.getWithOffset(Size1); 118 SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, 119 std::min(Align, Size1), DstPtrInfo); 120 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); 121 } 122 } else { 123 // Handle one and two bytes using STC. 124 if (Bytes <= 2) { 125 SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, 126 false, false, Align); 127 if (Bytes == 1) 128 return Chain1; 129 SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, 130 DAG.getConstant(1, DL, PtrVT)); 131 SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, 132 DstPtrInfo.getWithOffset(1), 133 false, false, 1); 134 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); 135 } 136 } 137 assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); 138 139 // Handle the special case of a memset of 0, which can use XC. 140 auto *CByte = dyn_cast<ConstantSDNode>(Byte); 141 if (CByte && CByte->getZExtValue() == 0) 142 return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP, 143 Chain, Dst, Dst, Bytes); 144 145 // Copy the byte to the first location and then use MVC to copy 146 // it to the rest. 147 Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, 148 false, false, Align); 149 SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, 150 DAG.getConstant(1, DL, PtrVT)); 151 return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, 152 Chain, DstPlus1, Dst, Bytes - 1); 153 } 154 return SDValue(); 155 } 156 157 // Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size), 158 // deciding whether to use a loop or straight-line code. 159 static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 160 SDValue Src1, SDValue Src2, uint64_t Size) { 161 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 162 EVT PtrVT = Src1.getValueType(); 163 // A two-CLC sequence is a clear win over a loop, not least because it 164 // needs only one branch. A three-CLC sequence needs the same number 165 // of branches as a loop (i.e. 2), but is shorter. That brings us to 166 // lengths greater than 768 bytes. It seems relatively likely that 167 // a difference will be found within the first 768 bytes, so we just 168 // optimize for the smallest number of branch instructions, in order 169 // to avoid polluting the prediction buffer too much. A loop only ever 170 // needs 2 branches, whereas a straight-line sequence would need 3 or more. 171 if (Size > 3 * 256) 172 return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2, 173 DAG.getConstant(Size, DL, PtrVT), 174 DAG.getConstant(Size / 256, DL, PtrVT)); 175 return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, 176 DAG.getConstant(Size, DL, PtrVT)); 177 } 178 179 // Convert the current CC value into an integer that is 0 if CC == 0, 180 // less than zero if CC == 1 and greater than zero if CC >= 2. 181 // The sequence starts with IPM, which puts CC into bits 29 and 28 182 // of an integer and clears bits 30 and 31. 183 static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) { 184 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); 185 SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, 186 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); 187 SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, 188 DAG.getConstant(31, DL, MVT::i32)); 189 return ROTL; 190 } 191 192 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 193 EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 194 SDValue Src1, SDValue Src2, SDValue Size, 195 MachinePointerInfo Op1PtrInfo, 196 MachinePointerInfo Op2PtrInfo) const { 197 if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { 198 uint64_t Bytes = CSize->getZExtValue(); 199 assert(Bytes > 0 && "Caller should have handled 0-size case"); 200 Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); 201 SDValue Glue = Chain.getValue(1); 202 return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); 203 } 204 return std::make_pair(SDValue(), SDValue()); 205 } 206 207 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 208 EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 209 SDValue Src, SDValue Char, SDValue Length, 210 MachinePointerInfo SrcPtrInfo) const { 211 // Use SRST to find the character. End is its address on success. 212 EVT PtrVT = Src.getValueType(); 213 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); 214 Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); 215 Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); 216 Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, 217 DAG.getConstant(255, DL, MVT::i32)); 218 SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); 219 SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, 220 Limit, Src, Char); 221 Chain = End.getValue(1); 222 SDValue Glue = End.getValue(2); 223 224 // Now select between End and null, depending on whether the character 225 // was found. 226 SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT), 227 DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), 228 DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), 229 Glue}; 230 VTs = DAG.getVTList(PtrVT, MVT::Glue); 231 End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); 232 return std::make_pair(End, Chain); 233 } 234 235 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 236 EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 237 SDValue Dest, SDValue Src, 238 MachinePointerInfo DestPtrInfo, 239 MachinePointerInfo SrcPtrInfo, bool isStpcpy) const { 240 SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other); 241 SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src, 242 DAG.getConstant(0, DL, MVT::i32)); 243 return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1)); 244 } 245 246 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 247 EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 248 SDValue Src1, SDValue Src2, 249 MachinePointerInfo Op1PtrInfo, 250 MachinePointerInfo Op2PtrInfo) const { 251 SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue); 252 SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2, 253 DAG.getConstant(0, DL, MVT::i32)); 254 Chain = Unused.getValue(1); 255 SDValue Glue = Chain.getValue(2); 256 return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); 257 } 258 259 // Search from Src for a null character, stopping once Src reaches Limit. 260 // Return a pair of values, the first being the number of nonnull characters 261 // and the second being the out chain. 262 // 263 // This can be used for strlen by setting Limit to 0. 264 static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL, 265 SDValue Chain, SDValue Src, 266 SDValue Limit) { 267 EVT PtrVT = Src.getValueType(); 268 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); 269 SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, 270 Limit, Src, DAG.getConstant(0, DL, MVT::i32)); 271 Chain = End.getValue(1); 272 SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); 273 return std::make_pair(Len, Chain); 274 } 275 276 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 277 EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 278 SDValue Src, MachinePointerInfo SrcPtrInfo) const { 279 EVT PtrVT = Src.getValueType(); 280 return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, DL, PtrVT)); 281 } 282 283 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 284 EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 285 SDValue Src, SDValue MaxLength, 286 MachinePointerInfo SrcPtrInfo) const { 287 EVT PtrVT = Src.getValueType(); 288 MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT); 289 SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength); 290 return getBoundedStrlen(DAG, DL, Chain, Src, Limit); 291 } 292