1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a model runner using Tensorflow C APIs, allowing the
11 // loading of a model from a command line option.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/CallGraph.h"
15 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
16 #include "llvm/Analysis/MLInlineAdvisor.h"
17 #include "llvm/Analysis/Utils/TFUtils.h"
18 #include "llvm/IR/LLVMContext.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/ManagedStatic.h"
21 
22 #include <vector>
23 
24 using namespace llvm;
25 
26 static cl::opt<std::string> TrainingLog(
27     "training-log", cl::Hidden,
28     cl::desc("Path where the development - mode inlining log is saved."));
29 
30 static cl::opt<std::string> TFModelUnderTrainingPath(
31     "ml-inliner-model-under-training", cl::Hidden,
32     cl::desc("Path to SavedModel from the previous training iteration."));
33 
34 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
35                                          cl::Hidden, cl::init("action_"),
36                                          cl::desc("Prefix for feature names."));
37 
38 static cl::opt<std::string> TFDecisionName(
39     "ml-inliner-trained-model-decision-name", cl::Hidden,
40     cl::init("StatefulPartitionedCall"),
41     cl::desc("Name of the graph operation representing the decision."));
42 
43 namespace {
44 /// An InlineEvent, used by TrainingLogger.
45 struct InlineEvent {
46   /// What the default policy's decision would have been.
47   bool DefaultDecision = false;
48 
49   /// What we advised. When training off the default policy, this is the same as
50   /// DefaultDecision.
51   bool AdvisedDecision = false;
52 
53   /// What actually happened. This would be 'false' in the case of an inline
54   /// error, even if AdvisedDecision were true, otherwise it agrees with
55   /// AdvisedDecision.
56   bool Effect = false;
57 
58   /// What the change in size was: size_after - size_before
59   int64_t Reward = 0;
60 };
61 
62 /// Collect data we may use for training a model, and write it as a textual
63 /// Tensorflow SequenceExample
64 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
65 /// protobuf (https://developers.google.com/protocol-buffers).
66 /// Because this is a protobuf, we cannot just stream the events as they come.
67 /// Internally, TrainingLogger stores data in column-major format, because that
68 /// lines up with how TF SequenceExample represents it.
69 class TrainingLogger final {
70 public:
71   TrainingLogger() {
72     for (size_t I = 0; I < NumberOfFeatures; ++I) {
73       Features.push_back(InlineFeatures());
74     }
75   }
76 
77   /// Log one inlining event.
78   void logInlineEvent(const InlineEvent &Event,
79                       const MLModelRunner &ModelRunner) {
80     for (size_t I = 0; I < NumberOfFeatures; ++I) {
81       Features[I].push_back(ModelRunner.getFeature(I));
82     }
83     Decisions.push_back(Event.AdvisedDecision);
84     Effects.push_back(Event.Effect);
85     Rewards.push_back(Event.Reward);
86     DefaultDecisions.push_back(Event.DefaultDecision);
87   }
88 
89   void printTensor(raw_fd_ostream &OutFile) {
90     if (DefaultDecisions.empty())
91       return;
92     OutFile << "feature_lists: {\n";
93 
94     for (size_t I = 0; I < Features.size(); I++) {
95       writeTensor(OutFile, FeatureNameMap.at(I), Features[I]);
96     }
97     writeTensor(OutFile, DefaultDecisionName, DefaultDecisions);
98     writeTensor(OutFile, DecisionName, Decisions);
99     writeTensor(OutFile, RewardName, Rewards);
100 
101     OutFile << "}\n";
102   }
103 
104 private:
105   template <typename T>
106   void writeTensor(raw_fd_ostream &OutFile, StringRef TensorName,
107                    const std::vector<T> &Tensor) {
108     OutFile << "  feature_list: {\n";
109     OutFile << "    key: "
110             << "\"" << TensorName << "\" ";
111     OutFile << "value: {\n";
112     for (const auto &Feature : Tensor) {
113       OutFile << "      feature: { int64_list: { value: [" << Feature
114               << "] } }\n";
115     }
116     OutFile << "    }\n";
117     OutFile << "  }\n";
118   }
119 
120   std::vector<InlineFeatures> Features;
121   std::vector<bool> DefaultDecisions;
122   std::vector<bool> Decisions;
123   std::vector<bool> Effects;
124   std::vector<int64_t> Rewards;
125   std::vector<bool> Mandatory;
126 };
127 
128 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
129 /// the offline training scenario. Note that training happens outside of the
130 /// compiler, this facility is concerned with producing training data ("logs").
131 /// This InlineAdvisor can operate in the following modes:
132 ///
133 /// 1) collect logs for the default policy. This is useful for bootstrapping
134 /// training, which will be considerably faster by starting from a reasonable
135 /// policy.
136 ///
137 /// 2) collect logs for the ML policy, using a model from a previous
138 /// training. Potentially, that model uses internally some small random
139 /// perturbation of its weights, to induce exploration (setting this up is the
140 /// responsibility of the training algorithm). The logs would then be used to
141 /// retrain and improve on this model.
142 ///
143 /// 3) use the provided model, with no logging. This is useful for end to end
144 /// validation - the model, in this case, is a release candidate and shouldn't
145 /// have random perturbations. It is a convenience feature: rather than needing
146 /// to take the release candidate model and compile it in 'release' mode,
147 /// validate it, then potentially discard it, it's easier to just pass the model
148 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
149 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
150 /// release mode. The expectation is that a well-trained model provides a good
151 /// policy over a sufficiently diverse codebase, over many changes (i.e.
152 /// training happens seldom).
153 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
154 public:
155   DevelopmentModeMLInlineAdvisor(
156       Module &M, ModuleAnalysisManager &MAM,
157       std::unique_ptr<MLModelRunner> ModelRunner,
158       std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference);
159 
160   size_t getTotalSizeEstimate();
161 
162   virtual ~DevelopmentModeMLInlineAdvisor();
163   void updateNativeSizeEstimate(int64_t Change) { CurrentNativeSize += Change; }
164   void resetNativeSize(Function *F) {
165     FAM.invalidate<InlineSizeEstimatorAnalysis>(*F);
166   }
167 
168   std::unique_ptr<MLInlineAdvice>
169   getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
170   std::unique_ptr<MLInlineAdvice>
171   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
172 
173   size_t getNativeSizeEstimate(const Function &F) const;
174 
175 private:
176   bool isLogging() const { return !TrainingLog.empty(); }
177 
178   std::function<bool(CallBase &)> GetDefaultAdvice;
179   TrainingLogger Logger;
180   const bool IsDoingInference;
181 
182   const int32_t InitialNativeSize;
183   int32_t CurrentNativeSize = 0;
184 };
185 
186 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
187 /// decisions, for training/logging.
188 class LoggingMLInlineAdvice : public MLInlineAdvice {
189 public:
190   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
191                         OptimizationRemarkEmitter &ORE, bool Recommendation,
192                         TrainingLogger &Logger, size_t CallerSizeEstimateBefore,
193                         size_t CalleeSizeEstimateBefore, bool DefaultDecision)
194       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
195         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
196         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
197         DefaultDecision(DefaultDecision) {}
198 
199   virtual ~LoggingMLInlineAdvice() = default;
200 
201 private:
202   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
203     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
204   }
205   void recordInliningImpl() override {
206     MLInlineAdvice::recordInliningImpl();
207     getAdvisor()->resetNativeSize(Caller);
208     int Reward = std::numeric_limits<int>::max();
209     if (!getAdvisor()->isForcedToStop()) {
210       int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller) +
211                             CalleeSizeEstimateBefore;
212       Reward = NativeSizeAfter -
213                (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
214       getAdvisor()->updateNativeSizeEstimate(Reward);
215     }
216     log(Reward, /*Success=*/true);
217   }
218 
219   void recordInliningWithCalleeDeletedImpl() override {
220     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
221     getAdvisor()->resetNativeSize(Caller);
222     if (!getAdvisor()->isForcedToStop()) {
223       int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller);
224       int Reward = NativeSizeAfter -
225                    (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
226       getAdvisor()->updateNativeSizeEstimate(Reward);
227       log(Reward, /*Success=*/true);
228     }
229   }
230 
231   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
232     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
233     log(NoReward, /*Success=*/false);
234   }
235 
236   void recordUnattemptedInliningImpl() override {
237     MLInlineAdvice::recordUnattemptedInliningImpl();
238     log(NoReward, /*Success=*/false);
239   }
240 
241   void log(int64_t Reward, bool Success) {
242     InlineEvent Event;
243     Event.AdvisedDecision = isInliningRecommended();
244     Event.DefaultDecision = DefaultDecision;
245     Event.Effect = Success;
246     Event.Reward = Reward;
247     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
248   }
249 
250   static const int64_t NoReward = 0;
251   TrainingLogger &Logger;
252   const size_t CallerSizeEstimateBefore;
253   const size_t CalleeSizeEstimateBefore;
254   const bool DefaultDecision;
255 };
256 
257 /// A pseudo model runner. We use it to store feature values when collecting
258 /// logs for the default policy, but never ask it to 'run'.
259 class NoInferenceModelRunner : public MLModelRunner {
260 public:
261   NoInferenceModelRunner(LLVMContext &Ctx)
262       : MLModelRunner(Ctx), Features(NumberOfFeatures) {}
263   void setFeature(FeatureIndex Index, int64_t Value) override {
264     Features[static_cast<int>(Index)] = Value;
265   }
266 
267   int64_t getFeature(int Index) const override { return Features[Index]; }
268   bool run() override {
269     llvm_unreachable("We shouldn't call run on this model runner.");
270   }
271 
272 private:
273   InlineFeatures Features;
274 };
275 
276 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs
277 /// to dynamically load and evaluate a TF SavedModel
278 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is
279 /// sacrificed for ease of use while training.
280 class ModelUnderTrainingRunner final : public MLModelRunner {
281 public:
282   ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath);
283 
284   bool run() override;
285 
286   // Disallows copy and assign.
287   ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete;
288   ModelUnderTrainingRunner &
289   operator=(const ModelUnderTrainingRunner &) = delete;
290 
291   void setFeature(FeatureIndex Index, int64_t Value) override;
292   int64_t getFeature(int Index) const override;
293   bool isValid() const { return !!Evaluator; }
294 
295 private:
296   std::unique_ptr<TFModelEvaluator> Evaluator;
297 
298   // The training framework needs some additional features, that just need to
299   // be set to 0.
300   struct TensorSpec {
301     std::string Name;
302     std::function<void(TFModelEvaluator *, size_t Index,
303                        const std::vector<int64_t> &Dim)>
304         Initializer;
305   };
306 
307   const std::vector<TensorSpec> TrainingOnlyFeatures{
308       {"inlining_default",
309        [](TFModelEvaluator *Evaluator, size_t Index,
310           const std::vector<int64_t> &Dim) {
311          Evaluator->initInput<int64_t>(Index, Dim);
312        }},
313       {"discount",
314        [](TFModelEvaluator *Evaluator, size_t Index,
315           const std::vector<int64_t> &Dim) {
316          Evaluator->initInput<float>(Index, Dim);
317        }},
318       {"reward",
319        [](TFModelEvaluator *Evaluator, size_t Index,
320           const std::vector<int64_t> &Dim) {
321          Evaluator->initInput<float>(Index, Dim);
322        }},
323       {"step_type", [](TFModelEvaluator *Evaluator, size_t Index,
324                        const std::vector<int64_t> &Dim) {
325          Evaluator->initInput<int32_t>(Index, Dim);
326        }}};
327 };
328 } // namespace
329 
330 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
331     Module &M, ModuleAnalysisManager &MAM,
332     std::unique_ptr<MLModelRunner> ModelRunner,
333     std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference)
334     : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
335       GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference),
336       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
337       CurrentNativeSize(InitialNativeSize) {
338   // We cannot have the case of neither inference nor logging.
339   assert(IsDoingInference || isLogging());
340 }
341 
342 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
343   if (TrainingLog.empty())
344     return;
345   std::error_code ErrorCode;
346   raw_fd_ostream OutFile(TrainingLog, ErrorCode);
347   Logger.printTensor(OutFile);
348 }
349 
350 size_t
351 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
352   auto &R =
353       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
354   if (!R) {
355     F.getParent()->getContext().emitError(
356         "Native size estimator is not present.");
357     return 0;
358   }
359   return *R;
360 }
361 
362 std::unique_ptr<MLInlineAdvice>
363 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice(
364     CallBase &CB, OptimizationRemarkEmitter &ORE) {
365   if (!isLogging())
366     return MLInlineAdvisor::getMandatoryAdvice(CB, ORE);
367   return std::make_unique<LoggingMLInlineAdvice>(
368       /*Advisor=*/this,
369       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/Logger,
370       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
371       /*CalleeSizeEstimateBefore=*/
372       getNativeSizeEstimate(*CB.getCalledFunction()),
373       /*DefaultDecision=*/true);
374 }
375 
376 std::unique_ptr<MLInlineAdvice>
377 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
378     CallBase &CB, OptimizationRemarkEmitter &ORE) {
379   if (IsDoingInference && !isLogging())
380     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
381 
382   bool DefaultAdvice = GetDefaultAdvice(CB);
383   auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice;
384   return std::make_unique<LoggingMLInlineAdvice>(
385       /*Advisor=*/this,
386       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
387       /*Logger=*/Logger,
388       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
389       /*CalleeSizeEstimateBefore=*/
390       getNativeSizeEstimate(*CB.getCalledFunction()),
391       /*DefaultDecision=*/DefaultAdvice);
392 }
393 
394 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
395   size_t Ret = 0;
396   for (auto &F : M) {
397     if (F.isDeclaration())
398       continue;
399     if (isFunctionDeleted(&F))
400       continue;
401     Ret += getNativeSizeEstimate(F);
402   }
403   return Ret;
404 }
405 
406 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
407                                                    const std::string &ModelPath)
408     : MLModelRunner(Ctx) {
409   std::vector<std::string> InputNames;
410   std::vector<std::string> OutputNames;
411   for (size_t I = 0; I < NumberOfFeatures; ++I)
412     InputNames.push_back(TFFeedPrefix + FeatureNameMap[I]);
413   for (size_t I = 0; I < TrainingOnlyFeatures.size(); ++I)
414     InputNames.push_back(TFFeedPrefix + TrainingOnlyFeatures[I].Name);
415   OutputNames.push_back(TFDecisionName);
416 
417   Evaluator =
418       std::make_unique<TFModelEvaluator>(ModelPath, InputNames, OutputNames);
419   if (!Evaluator || !Evaluator->isValid()) {
420     Ctx.emitError("Failed to create inliner saved model evaluator");
421     Evaluator.reset();
422     return;
423   }
424 
425   static const std::vector<int64_t> Dim{1};
426 
427   size_t InputIndex = 0;
428   for (; InputIndex < NumberOfFeatures; ++InputIndex) {
429     Evaluator->initInput<int64_t>(InputIndex, Dim);
430   }
431 
432   for (; InputIndex < InputNames.size(); ++InputIndex) {
433     TrainingOnlyFeatures[InputIndex - NumberOfFeatures].Initializer(
434         Evaluator.get(), InputIndex, Dim);
435   }
436 }
437 
438 bool ModelUnderTrainingRunner::run() {
439   auto ER = Evaluator->evaluate();
440   if (!ER.hasValue()) {
441     Ctx.emitError("Error evaluating model.");
442     return false;
443   }
444   int64_t Decision = *ER->getTensorValue<int64_t>(0);
445   return static_cast<bool>(Decision);
446 }
447 
448 int64_t ModelUnderTrainingRunner::getFeature(int Index) const {
449   return *Evaluator->getInput<int64_t>(Index);
450 }
451 
452 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) {
453   size_t NumericIndex = static_cast<size_t>(Index);
454   *(Evaluator->getInput<int64_t>(NumericIndex)) = Value;
455 }
456 
457 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
458     Module &M, ModuleAnalysisManager &MAM,
459     std::function<bool(CallBase &)> GetDefaultAdvice) {
460   auto &Ctx = M.getContext();
461   if (TrainingLog.empty() !=
462       !InlineSizeEstimatorAnalysis::isEvaluatorRequested()) {
463     Ctx.emitError("For development mode, if training logs are requested, then "
464                   "a size estimator must be available; either that, or neither "
465                   "are specified.");
466     return nullptr;
467   }
468 
469   std::unique_ptr<MLModelRunner> Runner;
470 
471   bool IsDoingInference = false;
472   if (TFModelUnderTrainingPath.empty())
473     Runner.reset(new NoInferenceModelRunner(Ctx));
474   else {
475     Runner = std::make_unique<ModelUnderTrainingRunner>(
476         Ctx, TFModelUnderTrainingPath);
477     if (!Runner) {
478       Ctx.emitError("Could not load the policy model from the provided path");
479       return nullptr;
480     }
481     IsDoingInference = true;
482   }
483   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
484       M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference);
485 }