1 //===-- Generate random but valid function descriptors -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "automemcpy/RandomFunctionGenerator.h" 10 11 #include <llvm/ADT/None.h> 12 #include <llvm/ADT/StringRef.h> 13 #include <llvm/Support/raw_ostream.h> 14 15 #include <set> 16 17 namespace llvm { 18 namespace automemcpy { 19 20 // Exploration parameters 21 // ---------------------- 22 // Here we define a set of values that will contraint the exploration and 23 // limit combinatorial explosion. 24 25 // We limit the number of cases for individual sizes to sizes up to 4. 26 // More individual sizes don't bring much over the overlapping strategy. 27 static constexpr int kMaxIndividualSize = 4; 28 29 // We limit Overlapping Strategy to sizes up to 256. 30 // An overlap of 256B means accessing 128B at once which is usually not 31 // feasible by current CPUs. We rely on the compiler to generate multiple 32 // loads/stores if needed but higher sizes are unlikely to benefit from hardware 33 // acceleration. 34 static constexpr int kMaxOverlapSize = 256; 35 36 // For the loop strategies, we make sure that they iterate at least a certain 37 // number of times to amortize the cost of looping. 38 static constexpr int kLoopMinIter = 3; 39 static constexpr int kAlignedLoopMinIter = 2; 40 41 // We restrict the size of the block of data to handle in a loop. 42 // Generally speaking block size <= 16 perform poorly. 43 static constexpr int kLoopBlockSize[] = {16, 32, 64}; 44 45 // We restrict alignment to the following values. 46 static constexpr int kLoopAlignments[] = {16, 32, 64}; 47 48 // We make sure that the region bounds are one of the following values. 49 static constexpr int kAnchors[] = {0, 1, 2, 4, 8, 16, 32, 48, 50 64, 96, 128, 256, 512, 1024, kMaxSize}; 51 52 // We also allow disabling loops, aligned loops and accelerators. 53 static constexpr bool kDisableLoop = false; 54 static constexpr bool kDisableAlignedLoop = false; 55 static constexpr bool kDisableAccelerator = false; 56 57 // For memcpy, we can also explore whether aligning on source or destination has 58 // an effect. 59 static constexpr bool kExploreAlignmentArg = true; 60 61 // The function we generate code for. 62 // BCMP is specifically disabled for now. 63 static constexpr int kFunctionTypes[] = { 64 (int)FunctionType::MEMCPY, 65 (int)FunctionType::MEMCMP, 66 // (int)FunctionType::BCMP, 67 (int)FunctionType::MEMSET, 68 (int)FunctionType::BZERO, 69 }; 70 71 // The actual implementation of each function can be handled via primitive types 72 // (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN). 73 // We want to move toward delegating the code generation entirely to the 74 // compiler but for now we have to make use of -per microarchitecture- custom 75 // implementations. Scalar being more portable but also less performant, we 76 // remove it as well. 77 static constexpr int kElementClasses[] = { 78 // (int)ElementTypeClass::SCALAR, 79 (int)ElementTypeClass::NATIVE, 80 // (int)ElementTypeClass::BUILTIN 81 }; 82 83 RandomFunctionGenerator::RandomFunctionGenerator() 84 : Solver(Context), Type(Context.int_const("Type")), 85 ContiguousBegin(Context.int_const("ContiguousBegin")), 86 ContiguousEnd(Context.int_const("ContiguousEnd")), 87 OverlapBegin(Context.int_const("OverlapBegin")), 88 OverlapEnd(Context.int_const("OverlapEnd")), 89 LoopBegin(Context.int_const("LoopBegin")), 90 LoopEnd(Context.int_const("LoopEnd")), 91 LoopBlockSize(Context.int_const("LoopBlockSize")), 92 AlignedLoopBegin(Context.int_const("AlignedLoopBegin")), 93 AlignedLoopEnd(Context.int_const("AlignedLoopEnd")), 94 AlignedLoopBlockSize(Context.int_const("AlignedLoopBlockSize")), 95 AlignedAlignment(Context.int_const("AlignedAlignment")), 96 AlignedArg(Context.int_const("AlignedArg")), 97 AcceleratorBegin(Context.int_const("AcceleratorBegin")), 98 AcceleratorEnd(Context.int_const("AcceleratorEnd")), 99 ElementClass(Context.int_const("ElementClass")) { 100 // All possible functions. 101 Solver.add(inSetConstraint(Type, kFunctionTypes)); 102 103 // Add constraints for region bounds. 104 addBoundsAndAnchors(ContiguousBegin, ContiguousEnd); 105 addBoundsAndAnchors(OverlapBegin, OverlapEnd); 106 addBoundsAndAnchors(LoopBegin, LoopEnd); 107 addBoundsAndAnchors(AlignedLoopBegin, AlignedLoopEnd); 108 addBoundsAndAnchors(AcceleratorBegin, AcceleratorEnd); 109 // We always consider strategies in this order, and we 110 // always end with the `Accelerator` strategy, as it's typically more 111 // efficient for large sizes. 112 // Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator 113 Solver.add(ContiguousEnd == OverlapBegin); 114 Solver.add(OverlapEnd == LoopBegin); 115 Solver.add(LoopEnd == AlignedLoopBegin); 116 Solver.add(AlignedLoopEnd == AcceleratorBegin); 117 // Fix endpoints: The minimum size that we want to copy is 0, and we always 118 // start with the `Contiguous` strategy. The max size is `kMaxSize`. 119 Solver.add(ContiguousBegin == 0); 120 Solver.add(AcceleratorEnd == kMaxSize); 121 // Contiguous 122 Solver.add(ContiguousEnd <= kMaxIndividualSize + 1); 123 // Overlap 124 Solver.add(OverlapEnd <= kMaxOverlapSize + 1); 125 // Overlap only ever makes sense when accessing multiple bytes at a time. 126 // i.e. Overlap<1> is useless. 127 Solver.add(OverlapBegin == OverlapEnd || OverlapBegin >= 2); 128 // Loop 129 addLoopConstraints(LoopBegin, LoopEnd, LoopBlockSize, kLoopMinIter); 130 // Aligned Loop 131 addLoopConstraints(AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize, 132 kAlignedLoopMinIter); 133 Solver.add(inSetConstraint(AlignedAlignment, kLoopAlignments)); 134 Solver.add(AlignedLoopBegin == AlignedLoopEnd || AlignedLoopBegin >= 64); 135 Solver.add(AlignedLoopBlockSize >= AlignedAlignment); 136 Solver.add(AlignedLoopBlockSize >= LoopBlockSize); 137 z3::expr IsMemcpy = Type == (int)FunctionType::MEMCPY; 138 z3::expr ExploreAlignment = IsMemcpy && kExploreAlignmentArg; 139 Solver.add( 140 (ExploreAlignment && 141 inSetConstraint(AlignedArg, {(int)AlignArg::_1, (int)AlignArg::_2})) || 142 (!ExploreAlignment && AlignedArg == (int)AlignArg::_1)); 143 // Accelerator 144 Solver.add(IsMemcpy || 145 (AcceleratorBegin == 146 AcceleratorEnd)); // Only Memcpy has accelerator for now. 147 // Element classes 148 Solver.add(inSetConstraint(ElementClass, kElementClasses)); 149 150 if (kDisableLoop) 151 Solver.add(LoopBegin == LoopEnd); 152 if (kDisableAlignedLoop) 153 Solver.add(AlignedLoopBegin == AlignedLoopEnd); 154 if (kDisableAccelerator) 155 Solver.add(AcceleratorBegin == AcceleratorEnd); 156 } 157 158 // Creates SizeSpan from Begin/End values. 159 // Returns llvm::None if Begin==End. 160 static Optional<SizeSpan> AsSizeSpan(size_t Begin, size_t End) { 161 if (Begin == End) 162 return None; 163 SizeSpan SS; 164 SS.Begin = Begin; 165 SS.End = End; 166 return SS; 167 } 168 169 // Generic method to create a `Region` struct with a Span or None if span is 170 // empty. 171 template <typename Region> 172 static Optional<Region> As(size_t Begin, size_t End) { 173 if (auto Span = AsSizeSpan(Begin, End)) { 174 Region Output; 175 Output.Span = *Span; 176 return Output; 177 } 178 return None; 179 } 180 181 // Returns a Loop struct or None if span is empty. 182 static Optional<Loop> AsLoop(size_t Begin, size_t End, size_t BlockSize) { 183 if (auto Span = AsSizeSpan(Begin, End)) { 184 Loop Output; 185 Output.Span = *Span; 186 Output.BlockSize = BlockSize; 187 return Output; 188 } 189 return None; 190 } 191 192 // Returns an AlignedLoop struct or None if span is empty. 193 static Optional<AlignedLoop> AsAlignedLoop(size_t Begin, size_t End, 194 size_t BlockSize, size_t Alignment, 195 AlignArg AlignTo) { 196 if (auto Loop = AsLoop(Begin, End, BlockSize)) { 197 AlignedLoop Output; 198 Output.Loop = *Loop; 199 Output.Alignment = Alignment; 200 Output.AlignTo = AlignTo; 201 return Output; 202 } 203 return None; 204 } 205 206 Optional<FunctionDescriptor> RandomFunctionGenerator::next() { 207 if (Solver.check() != z3::sat) 208 return {}; 209 210 z3::model m = Solver.get_model(); 211 212 // Helper method to get the current numerical value of a z3::expr. 213 const auto E = [&m](z3::expr &V) -> int { 214 return m.eval(V).get_numeral_int(); 215 }; 216 217 // Fill is the function descriptor to return. 218 FunctionDescriptor R; 219 R.Type = FunctionType(E(Type)); 220 R.Contiguous = As<Contiguous>(E(ContiguousBegin), E(ContiguousEnd)); 221 R.Overlap = As<Overlap>(E(OverlapBegin), E(OverlapEnd)); 222 R.Loop = AsLoop(E(LoopBegin), E(LoopEnd), E(LoopBlockSize)); 223 R.AlignedLoop = AsAlignedLoop(E(AlignedLoopBegin), E(AlignedLoopEnd), 224 E(AlignedLoopBlockSize), E(AlignedAlignment), 225 AlignArg(E(AlignedArg))); 226 R.Accelerator = As<Accelerator>(E(AcceleratorBegin), E(AcceleratorEnd)); 227 R.ElementClass = ElementTypeClass(E(ElementClass)); 228 229 // Express current state as a set of constraints. 230 z3::expr CurrentLayout = 231 (Type == E(Type)) && (ContiguousBegin == E(ContiguousBegin)) && 232 (ContiguousEnd == E(ContiguousEnd)) && 233 (OverlapBegin == E(OverlapBegin)) && (OverlapEnd == E(OverlapEnd)) && 234 (LoopBegin == E(LoopBegin)) && (LoopEnd == E(LoopEnd)) && 235 (LoopBlockSize == E(LoopBlockSize)) && 236 (AlignedLoopBegin == E(AlignedLoopBegin)) && 237 (AlignedLoopEnd == E(AlignedLoopEnd)) && 238 (AlignedLoopBlockSize == E(AlignedLoopBlockSize)) && 239 (AlignedAlignment == E(AlignedAlignment)) && 240 (AlignedArg == E(AlignedArg)) && 241 (AcceleratorBegin == E(AcceleratorBegin)) && 242 (AcceleratorEnd == E(AcceleratorEnd)) && 243 (ElementClass == E(ElementClass)); 244 245 // Ask solver to never show this configuration ever again. 246 Solver.add(!CurrentLayout); 247 return R; 248 } 249 250 // Make sure `Variable` is one of the provided values. 251 z3::expr RandomFunctionGenerator::inSetConstraint(z3::expr &Variable, 252 ArrayRef<int> Values) const { 253 z3::expr_vector Args(Variable.ctx()); 254 for (int Value : Values) 255 Args.push_back(Variable == Value); 256 return z3::mk_or(Args); 257 } 258 259 void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr &Begin, 260 z3::expr &End) { 261 // Begin and End are picked amongst a set of predefined values. 262 Solver.add(inSetConstraint(Begin, kAnchors)); 263 Solver.add(inSetConstraint(End, kAnchors)); 264 Solver.add(Begin >= 0); 265 Solver.add(Begin <= End); 266 Solver.add(End <= kMaxSize); 267 } 268 269 void RandomFunctionGenerator::addLoopConstraints(const z3::expr &LoopBegin, 270 const z3::expr &LoopEnd, 271 z3::expr &LoopBlockSize, 272 int LoopMinIter) { 273 Solver.add(inSetConstraint(LoopBlockSize, kLoopBlockSize)); 274 Solver.add(LoopBegin == LoopEnd || 275 (LoopBegin > (LoopMinIter * LoopBlockSize))); 276 } 277 278 } // namespace automemcpy 279 } // namespace llvm 280