1*00c943a5SGuillaume Chatelet //===-- Generate random but valid function descriptors -------------------===//
2*00c943a5SGuillaume Chatelet //
3*00c943a5SGuillaume Chatelet // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*00c943a5SGuillaume Chatelet // See https://llvm.org/LICENSE.txt for license information.
5*00c943a5SGuillaume Chatelet // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*00c943a5SGuillaume Chatelet //
7*00c943a5SGuillaume Chatelet //===----------------------------------------------------------------------===//
8*00c943a5SGuillaume Chatelet
9*00c943a5SGuillaume Chatelet #include "automemcpy/RandomFunctionGenerator.h"
10*00c943a5SGuillaume Chatelet
11*00c943a5SGuillaume Chatelet #include <llvm/ADT/None.h>
12*00c943a5SGuillaume Chatelet #include <llvm/ADT/StringRef.h>
13*00c943a5SGuillaume Chatelet #include <llvm/Support/raw_ostream.h>
14*00c943a5SGuillaume Chatelet
15*00c943a5SGuillaume Chatelet #include <set>
16*00c943a5SGuillaume Chatelet
17*00c943a5SGuillaume Chatelet namespace llvm {
18*00c943a5SGuillaume Chatelet namespace automemcpy {
19*00c943a5SGuillaume Chatelet
20*00c943a5SGuillaume Chatelet // Exploration parameters
21*00c943a5SGuillaume Chatelet // ----------------------
22*00c943a5SGuillaume Chatelet // Here we define a set of values that will contraint the exploration and
23*00c943a5SGuillaume Chatelet // limit combinatorial explosion.
24*00c943a5SGuillaume Chatelet
25*00c943a5SGuillaume Chatelet // We limit the number of cases for individual sizes to sizes up to 4.
26*00c943a5SGuillaume Chatelet // More individual sizes don't bring much over the overlapping strategy.
27*00c943a5SGuillaume Chatelet static constexpr int kMaxIndividualSize = 4;
28*00c943a5SGuillaume Chatelet
29*00c943a5SGuillaume Chatelet // We limit Overlapping Strategy to sizes up to 256.
30*00c943a5SGuillaume Chatelet // An overlap of 256B means accessing 128B at once which is usually not
31*00c943a5SGuillaume Chatelet // feasible by current CPUs. We rely on the compiler to generate multiple
32*00c943a5SGuillaume Chatelet // loads/stores if needed but higher sizes are unlikely to benefit from hardware
33*00c943a5SGuillaume Chatelet // acceleration.
34*00c943a5SGuillaume Chatelet static constexpr int kMaxOverlapSize = 256;
35*00c943a5SGuillaume Chatelet
36*00c943a5SGuillaume Chatelet // For the loop strategies, we make sure that they iterate at least a certain
37*00c943a5SGuillaume Chatelet // number of times to amortize the cost of looping.
38*00c943a5SGuillaume Chatelet static constexpr int kLoopMinIter = 3;
39*00c943a5SGuillaume Chatelet static constexpr int kAlignedLoopMinIter = 2;
40*00c943a5SGuillaume Chatelet
41*00c943a5SGuillaume Chatelet // We restrict the size of the block of data to handle in a loop.
42*00c943a5SGuillaume Chatelet // Generally speaking block size <= 16 perform poorly.
43*00c943a5SGuillaume Chatelet static constexpr int kLoopBlockSize[] = {16, 32, 64};
44*00c943a5SGuillaume Chatelet
45*00c943a5SGuillaume Chatelet // We restrict alignment to the following values.
46*00c943a5SGuillaume Chatelet static constexpr int kLoopAlignments[] = {16, 32, 64};
47*00c943a5SGuillaume Chatelet
48*00c943a5SGuillaume Chatelet // We make sure that the region bounds are one of the following values.
49*00c943a5SGuillaume Chatelet static constexpr int kAnchors[] = {0, 1, 2, 4, 8, 16, 32, 48,
50*00c943a5SGuillaume Chatelet 64, 96, 128, 256, 512, 1024, kMaxSize};
51*00c943a5SGuillaume Chatelet
52*00c943a5SGuillaume Chatelet // We also allow disabling loops, aligned loops and accelerators.
53*00c943a5SGuillaume Chatelet static constexpr bool kDisableLoop = false;
54*00c943a5SGuillaume Chatelet static constexpr bool kDisableAlignedLoop = false;
55*00c943a5SGuillaume Chatelet static constexpr bool kDisableAccelerator = false;
56*00c943a5SGuillaume Chatelet
57*00c943a5SGuillaume Chatelet // For memcpy, we can also explore whether aligning on source or destination has
58*00c943a5SGuillaume Chatelet // an effect.
59*00c943a5SGuillaume Chatelet static constexpr bool kExploreAlignmentArg = true;
60*00c943a5SGuillaume Chatelet
61*00c943a5SGuillaume Chatelet // The function we generate code for.
62*00c943a5SGuillaume Chatelet // BCMP is specifically disabled for now.
63*00c943a5SGuillaume Chatelet static constexpr int kFunctionTypes[] = {
64*00c943a5SGuillaume Chatelet (int)FunctionType::MEMCPY,
65*00c943a5SGuillaume Chatelet (int)FunctionType::MEMCMP,
66*00c943a5SGuillaume Chatelet // (int)FunctionType::BCMP,
67*00c943a5SGuillaume Chatelet (int)FunctionType::MEMSET,
68*00c943a5SGuillaume Chatelet (int)FunctionType::BZERO,
69*00c943a5SGuillaume Chatelet };
70*00c943a5SGuillaume Chatelet
71*00c943a5SGuillaume Chatelet // The actual implementation of each function can be handled via primitive types
72*00c943a5SGuillaume Chatelet // (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN).
73*00c943a5SGuillaume Chatelet // We want to move toward delegating the code generation entirely to the
74*00c943a5SGuillaume Chatelet // compiler but for now we have to make use of -per microarchitecture- custom
75*00c943a5SGuillaume Chatelet // implementations. Scalar being more portable but also less performant, we
76*00c943a5SGuillaume Chatelet // remove it as well.
77*00c943a5SGuillaume Chatelet static constexpr int kElementClasses[] = {
78*00c943a5SGuillaume Chatelet // (int)ElementTypeClass::SCALAR,
79*00c943a5SGuillaume Chatelet (int)ElementTypeClass::NATIVE,
80*00c943a5SGuillaume Chatelet // (int)ElementTypeClass::BUILTIN
81*00c943a5SGuillaume Chatelet };
82*00c943a5SGuillaume Chatelet
RandomFunctionGenerator()83*00c943a5SGuillaume Chatelet RandomFunctionGenerator::RandomFunctionGenerator()
84*00c943a5SGuillaume Chatelet : Solver(Context), Type(Context.int_const("Type")),
85*00c943a5SGuillaume Chatelet ContiguousBegin(Context.int_const("ContiguousBegin")),
86*00c943a5SGuillaume Chatelet ContiguousEnd(Context.int_const("ContiguousEnd")),
87*00c943a5SGuillaume Chatelet OverlapBegin(Context.int_const("OverlapBegin")),
88*00c943a5SGuillaume Chatelet OverlapEnd(Context.int_const("OverlapEnd")),
89*00c943a5SGuillaume Chatelet LoopBegin(Context.int_const("LoopBegin")),
90*00c943a5SGuillaume Chatelet LoopEnd(Context.int_const("LoopEnd")),
91*00c943a5SGuillaume Chatelet LoopBlockSize(Context.int_const("LoopBlockSize")),
92*00c943a5SGuillaume Chatelet AlignedLoopBegin(Context.int_const("AlignedLoopBegin")),
93*00c943a5SGuillaume Chatelet AlignedLoopEnd(Context.int_const("AlignedLoopEnd")),
94*00c943a5SGuillaume Chatelet AlignedLoopBlockSize(Context.int_const("AlignedLoopBlockSize")),
95*00c943a5SGuillaume Chatelet AlignedAlignment(Context.int_const("AlignedAlignment")),
96*00c943a5SGuillaume Chatelet AlignedArg(Context.int_const("AlignedArg")),
97*00c943a5SGuillaume Chatelet AcceleratorBegin(Context.int_const("AcceleratorBegin")),
98*00c943a5SGuillaume Chatelet AcceleratorEnd(Context.int_const("AcceleratorEnd")),
99*00c943a5SGuillaume Chatelet ElementClass(Context.int_const("ElementClass")) {
100*00c943a5SGuillaume Chatelet // All possible functions.
101*00c943a5SGuillaume Chatelet Solver.add(inSetConstraint(Type, kFunctionTypes));
102*00c943a5SGuillaume Chatelet
103*00c943a5SGuillaume Chatelet // Add constraints for region bounds.
104*00c943a5SGuillaume Chatelet addBoundsAndAnchors(ContiguousBegin, ContiguousEnd);
105*00c943a5SGuillaume Chatelet addBoundsAndAnchors(OverlapBegin, OverlapEnd);
106*00c943a5SGuillaume Chatelet addBoundsAndAnchors(LoopBegin, LoopEnd);
107*00c943a5SGuillaume Chatelet addBoundsAndAnchors(AlignedLoopBegin, AlignedLoopEnd);
108*00c943a5SGuillaume Chatelet addBoundsAndAnchors(AcceleratorBegin, AcceleratorEnd);
109*00c943a5SGuillaume Chatelet // We always consider strategies in this order, and we
110*00c943a5SGuillaume Chatelet // always end with the `Accelerator` strategy, as it's typically more
111*00c943a5SGuillaume Chatelet // efficient for large sizes.
112*00c943a5SGuillaume Chatelet // Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator
113*00c943a5SGuillaume Chatelet Solver.add(ContiguousEnd == OverlapBegin);
114*00c943a5SGuillaume Chatelet Solver.add(OverlapEnd == LoopBegin);
115*00c943a5SGuillaume Chatelet Solver.add(LoopEnd == AlignedLoopBegin);
116*00c943a5SGuillaume Chatelet Solver.add(AlignedLoopEnd == AcceleratorBegin);
117*00c943a5SGuillaume Chatelet // Fix endpoints: The minimum size that we want to copy is 0, and we always
118*00c943a5SGuillaume Chatelet // start with the `Contiguous` strategy. The max size is `kMaxSize`.
119*00c943a5SGuillaume Chatelet Solver.add(ContiguousBegin == 0);
120*00c943a5SGuillaume Chatelet Solver.add(AcceleratorEnd == kMaxSize);
121*00c943a5SGuillaume Chatelet // Contiguous
122*00c943a5SGuillaume Chatelet Solver.add(ContiguousEnd <= kMaxIndividualSize + 1);
123*00c943a5SGuillaume Chatelet // Overlap
124*00c943a5SGuillaume Chatelet Solver.add(OverlapEnd <= kMaxOverlapSize + 1);
125*00c943a5SGuillaume Chatelet // Overlap only ever makes sense when accessing multiple bytes at a time.
126*00c943a5SGuillaume Chatelet // i.e. Overlap<1> is useless.
127*00c943a5SGuillaume Chatelet Solver.add(OverlapBegin == OverlapEnd || OverlapBegin >= 2);
128*00c943a5SGuillaume Chatelet // Loop
129*00c943a5SGuillaume Chatelet addLoopConstraints(LoopBegin, LoopEnd, LoopBlockSize, kLoopMinIter);
130*00c943a5SGuillaume Chatelet // Aligned Loop
131*00c943a5SGuillaume Chatelet addLoopConstraints(AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize,
132*00c943a5SGuillaume Chatelet kAlignedLoopMinIter);
133*00c943a5SGuillaume Chatelet Solver.add(inSetConstraint(AlignedAlignment, kLoopAlignments));
134*00c943a5SGuillaume Chatelet Solver.add(AlignedLoopBegin == AlignedLoopEnd || AlignedLoopBegin >= 64);
135*00c943a5SGuillaume Chatelet Solver.add(AlignedLoopBlockSize >= AlignedAlignment);
136*00c943a5SGuillaume Chatelet Solver.add(AlignedLoopBlockSize >= LoopBlockSize);
137*00c943a5SGuillaume Chatelet z3::expr IsMemcpy = Type == (int)FunctionType::MEMCPY;
138*00c943a5SGuillaume Chatelet z3::expr ExploreAlignment = IsMemcpy && kExploreAlignmentArg;
139*00c943a5SGuillaume Chatelet Solver.add(
140*00c943a5SGuillaume Chatelet (ExploreAlignment &&
141*00c943a5SGuillaume Chatelet inSetConstraint(AlignedArg, {(int)AlignArg::_1, (int)AlignArg::_2})) ||
142*00c943a5SGuillaume Chatelet (!ExploreAlignment && AlignedArg == (int)AlignArg::_1));
143*00c943a5SGuillaume Chatelet // Accelerator
144*00c943a5SGuillaume Chatelet Solver.add(IsMemcpy ||
145*00c943a5SGuillaume Chatelet (AcceleratorBegin ==
146*00c943a5SGuillaume Chatelet AcceleratorEnd)); // Only Memcpy has accelerator for now.
147*00c943a5SGuillaume Chatelet // Element classes
148*00c943a5SGuillaume Chatelet Solver.add(inSetConstraint(ElementClass, kElementClasses));
149*00c943a5SGuillaume Chatelet
150*00c943a5SGuillaume Chatelet if (kDisableLoop)
151*00c943a5SGuillaume Chatelet Solver.add(LoopBegin == LoopEnd);
152*00c943a5SGuillaume Chatelet if (kDisableAlignedLoop)
153*00c943a5SGuillaume Chatelet Solver.add(AlignedLoopBegin == AlignedLoopEnd);
154*00c943a5SGuillaume Chatelet if (kDisableAccelerator)
155*00c943a5SGuillaume Chatelet Solver.add(AcceleratorBegin == AcceleratorEnd);
156*00c943a5SGuillaume Chatelet }
157*00c943a5SGuillaume Chatelet
158*00c943a5SGuillaume Chatelet // Creates SizeSpan from Begin/End values.
159*00c943a5SGuillaume Chatelet // Returns llvm::None if Begin==End.
AsSizeSpan(size_t Begin,size_t End)160*00c943a5SGuillaume Chatelet static Optional<SizeSpan> AsSizeSpan(size_t Begin, size_t End) {
161*00c943a5SGuillaume Chatelet if (Begin == End)
162*00c943a5SGuillaume Chatelet return None;
163*00c943a5SGuillaume Chatelet SizeSpan SS;
164*00c943a5SGuillaume Chatelet SS.Begin = Begin;
165*00c943a5SGuillaume Chatelet SS.End = End;
166*00c943a5SGuillaume Chatelet return SS;
167*00c943a5SGuillaume Chatelet }
168*00c943a5SGuillaume Chatelet
169*00c943a5SGuillaume Chatelet // Generic method to create a `Region` struct with a Span or None if span is
170*00c943a5SGuillaume Chatelet // empty.
171*00c943a5SGuillaume Chatelet template <typename Region>
As(size_t Begin,size_t End)172*00c943a5SGuillaume Chatelet static Optional<Region> As(size_t Begin, size_t End) {
173*00c943a5SGuillaume Chatelet if (auto Span = AsSizeSpan(Begin, End)) {
174*00c943a5SGuillaume Chatelet Region Output;
175*00c943a5SGuillaume Chatelet Output.Span = *Span;
176*00c943a5SGuillaume Chatelet return Output;
177*00c943a5SGuillaume Chatelet }
178*00c943a5SGuillaume Chatelet return None;
179*00c943a5SGuillaume Chatelet }
180*00c943a5SGuillaume Chatelet
181*00c943a5SGuillaume Chatelet // Returns a Loop struct or None if span is empty.
AsLoop(size_t Begin,size_t End,size_t BlockSize)182*00c943a5SGuillaume Chatelet static Optional<Loop> AsLoop(size_t Begin, size_t End, size_t BlockSize) {
183*00c943a5SGuillaume Chatelet if (auto Span = AsSizeSpan(Begin, End)) {
184*00c943a5SGuillaume Chatelet Loop Output;
185*00c943a5SGuillaume Chatelet Output.Span = *Span;
186*00c943a5SGuillaume Chatelet Output.BlockSize = BlockSize;
187*00c943a5SGuillaume Chatelet return Output;
188*00c943a5SGuillaume Chatelet }
189*00c943a5SGuillaume Chatelet return None;
190*00c943a5SGuillaume Chatelet }
191*00c943a5SGuillaume Chatelet
192*00c943a5SGuillaume Chatelet // Returns an AlignedLoop struct or None if span is empty.
AsAlignedLoop(size_t Begin,size_t End,size_t BlockSize,size_t Alignment,AlignArg AlignTo)193*00c943a5SGuillaume Chatelet static Optional<AlignedLoop> AsAlignedLoop(size_t Begin, size_t End,
194*00c943a5SGuillaume Chatelet size_t BlockSize, size_t Alignment,
195*00c943a5SGuillaume Chatelet AlignArg AlignTo) {
196*00c943a5SGuillaume Chatelet if (auto Loop = AsLoop(Begin, End, BlockSize)) {
197*00c943a5SGuillaume Chatelet AlignedLoop Output;
198*00c943a5SGuillaume Chatelet Output.Loop = *Loop;
199*00c943a5SGuillaume Chatelet Output.Alignment = Alignment;
200*00c943a5SGuillaume Chatelet Output.AlignTo = AlignTo;
201*00c943a5SGuillaume Chatelet return Output;
202*00c943a5SGuillaume Chatelet }
203*00c943a5SGuillaume Chatelet return None;
204*00c943a5SGuillaume Chatelet }
205*00c943a5SGuillaume Chatelet
next()206*00c943a5SGuillaume Chatelet Optional<FunctionDescriptor> RandomFunctionGenerator::next() {
207*00c943a5SGuillaume Chatelet if (Solver.check() != z3::sat)
208*00c943a5SGuillaume Chatelet return {};
209*00c943a5SGuillaume Chatelet
210*00c943a5SGuillaume Chatelet z3::model m = Solver.get_model();
211*00c943a5SGuillaume Chatelet
212*00c943a5SGuillaume Chatelet // Helper method to get the current numerical value of a z3::expr.
213*00c943a5SGuillaume Chatelet const auto E = [&m](z3::expr &V) -> int {
214*00c943a5SGuillaume Chatelet return m.eval(V).get_numeral_int();
215*00c943a5SGuillaume Chatelet };
216*00c943a5SGuillaume Chatelet
217*00c943a5SGuillaume Chatelet // Fill is the function descriptor to return.
218*00c943a5SGuillaume Chatelet FunctionDescriptor R;
219*00c943a5SGuillaume Chatelet R.Type = FunctionType(E(Type));
220*00c943a5SGuillaume Chatelet R.Contiguous = As<Contiguous>(E(ContiguousBegin), E(ContiguousEnd));
221*00c943a5SGuillaume Chatelet R.Overlap = As<Overlap>(E(OverlapBegin), E(OverlapEnd));
222*00c943a5SGuillaume Chatelet R.Loop = AsLoop(E(LoopBegin), E(LoopEnd), E(LoopBlockSize));
223*00c943a5SGuillaume Chatelet R.AlignedLoop = AsAlignedLoop(E(AlignedLoopBegin), E(AlignedLoopEnd),
224*00c943a5SGuillaume Chatelet E(AlignedLoopBlockSize), E(AlignedAlignment),
225*00c943a5SGuillaume Chatelet AlignArg(E(AlignedArg)));
226*00c943a5SGuillaume Chatelet R.Accelerator = As<Accelerator>(E(AcceleratorBegin), E(AcceleratorEnd));
227*00c943a5SGuillaume Chatelet R.ElementClass = ElementTypeClass(E(ElementClass));
228*00c943a5SGuillaume Chatelet
229*00c943a5SGuillaume Chatelet // Express current state as a set of constraints.
230*00c943a5SGuillaume Chatelet z3::expr CurrentLayout =
231*00c943a5SGuillaume Chatelet (Type == E(Type)) && (ContiguousBegin == E(ContiguousBegin)) &&
232*00c943a5SGuillaume Chatelet (ContiguousEnd == E(ContiguousEnd)) &&
233*00c943a5SGuillaume Chatelet (OverlapBegin == E(OverlapBegin)) && (OverlapEnd == E(OverlapEnd)) &&
234*00c943a5SGuillaume Chatelet (LoopBegin == E(LoopBegin)) && (LoopEnd == E(LoopEnd)) &&
235*00c943a5SGuillaume Chatelet (LoopBlockSize == E(LoopBlockSize)) &&
236*00c943a5SGuillaume Chatelet (AlignedLoopBegin == E(AlignedLoopBegin)) &&
237*00c943a5SGuillaume Chatelet (AlignedLoopEnd == E(AlignedLoopEnd)) &&
238*00c943a5SGuillaume Chatelet (AlignedLoopBlockSize == E(AlignedLoopBlockSize)) &&
239*00c943a5SGuillaume Chatelet (AlignedAlignment == E(AlignedAlignment)) &&
240*00c943a5SGuillaume Chatelet (AlignedArg == E(AlignedArg)) &&
241*00c943a5SGuillaume Chatelet (AcceleratorBegin == E(AcceleratorBegin)) &&
242*00c943a5SGuillaume Chatelet (AcceleratorEnd == E(AcceleratorEnd)) &&
243*00c943a5SGuillaume Chatelet (ElementClass == E(ElementClass));
244*00c943a5SGuillaume Chatelet
245*00c943a5SGuillaume Chatelet // Ask solver to never show this configuration ever again.
246*00c943a5SGuillaume Chatelet Solver.add(!CurrentLayout);
247*00c943a5SGuillaume Chatelet return R;
248*00c943a5SGuillaume Chatelet }
249*00c943a5SGuillaume Chatelet
250*00c943a5SGuillaume Chatelet // Make sure `Variable` is one of the provided values.
inSetConstraint(z3::expr & Variable,ArrayRef<int> Values) const251*00c943a5SGuillaume Chatelet z3::expr RandomFunctionGenerator::inSetConstraint(z3::expr &Variable,
252*00c943a5SGuillaume Chatelet ArrayRef<int> Values) const {
253*00c943a5SGuillaume Chatelet z3::expr_vector Args(Variable.ctx());
254*00c943a5SGuillaume Chatelet for (int Value : Values)
255*00c943a5SGuillaume Chatelet Args.push_back(Variable == Value);
256*00c943a5SGuillaume Chatelet return z3::mk_or(Args);
257*00c943a5SGuillaume Chatelet }
258*00c943a5SGuillaume Chatelet
addBoundsAndAnchors(z3::expr & Begin,z3::expr & End)259*00c943a5SGuillaume Chatelet void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr &Begin,
260*00c943a5SGuillaume Chatelet z3::expr &End) {
261*00c943a5SGuillaume Chatelet // Begin and End are picked amongst a set of predefined values.
262*00c943a5SGuillaume Chatelet Solver.add(inSetConstraint(Begin, kAnchors));
263*00c943a5SGuillaume Chatelet Solver.add(inSetConstraint(End, kAnchors));
264*00c943a5SGuillaume Chatelet Solver.add(Begin >= 0);
265*00c943a5SGuillaume Chatelet Solver.add(Begin <= End);
266*00c943a5SGuillaume Chatelet Solver.add(End <= kMaxSize);
267*00c943a5SGuillaume Chatelet }
268*00c943a5SGuillaume Chatelet
addLoopConstraints(const z3::expr & LoopBegin,const z3::expr & LoopEnd,z3::expr & LoopBlockSize,int LoopMinIter)269*00c943a5SGuillaume Chatelet void RandomFunctionGenerator::addLoopConstraints(const z3::expr &LoopBegin,
270*00c943a5SGuillaume Chatelet const z3::expr &LoopEnd,
271*00c943a5SGuillaume Chatelet z3::expr &LoopBlockSize,
272*00c943a5SGuillaume Chatelet int LoopMinIter) {
273*00c943a5SGuillaume Chatelet Solver.add(inSetConstraint(LoopBlockSize, kLoopBlockSize));
274*00c943a5SGuillaume Chatelet Solver.add(LoopBegin == LoopEnd ||
275*00c943a5SGuillaume Chatelet (LoopBegin > (LoopMinIter * LoopBlockSize)));
276*00c943a5SGuillaume Chatelet }
277*00c943a5SGuillaume Chatelet
278*00c943a5SGuillaume Chatelet } // namespace automemcpy
279*00c943a5SGuillaume Chatelet } // namespace llvm
280