1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a model runner using Tensorflow C APIs, allowing the
11 // loading of a model from a command line option.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Config/config.h"
15 #if defined(LLVM_HAVE_TF_API)
16 
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
19 #include "llvm/Analysis/MLInlineAdvisor.h"
20 #include "llvm/Analysis/Utils/TFUtils.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/ManagedStatic.h"
24 
25 #include <vector>
26 
27 using namespace llvm;
28 
29 static cl::opt<std::string> TrainingLog(
30     "training-log", cl::Hidden,
31     cl::desc("Path where the development - mode inlining log is saved."));
32 
33 static cl::opt<std::string> TFModelUnderTrainingPath(
34     "ml-inliner-model-under-training", cl::Hidden,
35     cl::desc("Path to SavedModel from the previous training iteration."));
36 
37 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
38                                          cl::Hidden, cl::init("action_"),
39                                          cl::desc("Prefix for feature names."));
40 
41 static cl::opt<std::string> TFDecisionName(
42     "ml-inliner-trained-model-decision-name", cl::Hidden,
43     cl::init("StatefulPartitionedCall"),
44     cl::desc("Name of the graph operation representing the decision."));
45 
46 namespace {
47 /// An InlineEvent, used by TrainingLogger.
48 struct InlineEvent {
49   /// What the default policy's decision would have been.
50   bool DefaultDecision = false;
51 
52   /// What we advised. When training off the default policy, this is the same as
53   /// DefaultDecision.
54   bool AdvisedDecision = false;
55 
56   /// What actually happened. This would be 'false' in the case of an inline
57   /// error, even if AdvisedDecision were true, otherwise it agrees with
58   /// AdvisedDecision.
59   bool Effect = false;
60 
61   /// What the change in size was: size_after - size_before
62   int64_t Reward = 0;
63 };
64 
65 /// Collect data we may use for training a model, and write it as a textual
66 /// Tensorflow SequenceExample
67 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
68 /// protobuf (https://developers.google.com/protocol-buffers).
69 /// Because this is a protobuf, we cannot just stream the events as they come.
70 /// Internally, TrainingLogger stores data in column-major format, because that
71 /// lines up with how TF SequenceExample represents it.
72 class TrainingLogger final {
73 public:
74   TrainingLogger() {
75     for (size_t I = 0; I < NumberOfFeatures; ++I) {
76       Features.push_back(InlineFeatures());
77     }
78   }
79 
80   /// Log one inlining event.
81   void logInlineEvent(const InlineEvent &Event,
82                       const MLModelRunner &ModelRunner) {
83     for (size_t I = 0; I < NumberOfFeatures; ++I) {
84       Features[I].push_back(ModelRunner.getFeature(I));
85     }
86     Decisions.push_back(Event.AdvisedDecision);
87     Effects.push_back(Event.Effect);
88     Rewards.push_back(Event.Reward);
89     DefaultDecisions.push_back(Event.DefaultDecision);
90   }
91 
92   void printTensor(raw_fd_ostream &OutFile) {
93     if (DefaultDecisions.empty())
94       return;
95     OutFile << "feature_lists: {\n";
96 
97     for (size_t I = 0; I < Features.size(); I++) {
98       writeTensor(OutFile, FeatureNameMap.at(I), Features[I]);
99     }
100     writeTensor(OutFile, DefaultDecisionName, DefaultDecisions);
101     writeTensor(OutFile, DecisionName, Decisions);
102     writeTensor(OutFile, RewardName, Rewards);
103 
104     OutFile << "}\n";
105   }
106 
107 private:
108   template <typename T>
109   void writeTensor(raw_fd_ostream &OutFile, StringRef TensorName,
110                    const std::vector<T> &Tensor) {
111     OutFile << "  feature_list: {\n";
112     OutFile << "    key: "
113             << "\"" << TensorName << "\" ";
114     OutFile << "value: {\n";
115     for (const auto &Feature : Tensor) {
116       OutFile << "      feature: { int64_list: { value: [" << Feature
117               << "] } }\n";
118     }
119     OutFile << "    }\n";
120     OutFile << "  }\n";
121   }
122 
123   std::vector<InlineFeatures> Features;
124   std::vector<bool> DefaultDecisions;
125   std::vector<bool> Decisions;
126   std::vector<bool> Effects;
127   std::vector<int64_t> Rewards;
128   std::vector<bool> Mandatory;
129 };
130 
131 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
132 /// the offline training scenario. Note that training happens outside of the
133 /// compiler, this facility is concerned with producing training data ("logs").
134 /// This InlineAdvisor can operate in the following modes:
135 ///
136 /// 1) collect logs for the default policy. This is useful for bootstrapping
137 /// training, which will be considerably faster by starting from a reasonable
138 /// policy.
139 ///
140 /// 2) collect logs for the ML policy, using a model from a previous
141 /// training. Potentially, that model uses internally some small random
142 /// perturbation of its weights, to induce exploration (setting this up is the
143 /// responsibility of the training algorithm). The logs would then be used to
144 /// retrain and improve on this model.
145 ///
146 /// 3) use the provided model, with no logging. This is useful for end to end
147 /// validation - the model, in this case, is a release candidate and shouldn't
148 /// have random perturbations. It is a convenience feature: rather than needing
149 /// to take the release candidate model and compile it in 'release' mode,
150 /// validate it, then potentially discard it, it's easier to just pass the model
151 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
152 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
153 /// release mode. The expectation is that a well-trained model provides a good
154 /// policy over a sufficiently diverse codebase, over many changes (i.e.
155 /// training happens seldom).
156 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
157 public:
158   DevelopmentModeMLInlineAdvisor(
159       Module &M, ModuleAnalysisManager &MAM,
160       std::unique_ptr<MLModelRunner> ModelRunner,
161       std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference);
162 
163   size_t getTotalSizeEstimate();
164 
165   virtual ~DevelopmentModeMLInlineAdvisor();
166   void updateNativeSizeEstimate(int64_t Change) { CurrentNativeSize += Change; }
167   void resetNativeSize(Function *F) {
168     FAM.invalidate<InlineSizeEstimatorAnalysis>(*F);
169   }
170 
171   std::unique_ptr<MLInlineAdvice>
172   getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
173   std::unique_ptr<MLInlineAdvice>
174   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
175 
176   size_t getNativeSizeEstimate(const Function &F) const;
177 
178 private:
179   bool isLogging() const { return !TrainingLog.empty(); }
180 
181   std::function<bool(CallBase &)> GetDefaultAdvice;
182   TrainingLogger Logger;
183   const bool IsDoingInference;
184 
185   const int32_t InitialNativeSize;
186   int32_t CurrentNativeSize = 0;
187 };
188 
189 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
190 /// decisions, for training/logging.
191 class LoggingMLInlineAdvice : public MLInlineAdvice {
192 public:
193   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
194                         OptimizationRemarkEmitter &ORE, bool Recommendation,
195                         TrainingLogger &Logger, size_t CallerSizeEstimateBefore,
196                         size_t CalleeSizeEstimateBefore, bool DefaultDecision)
197       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
198         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
199         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
200         DefaultDecision(DefaultDecision) {}
201 
202   virtual ~LoggingMLInlineAdvice() = default;
203 
204 private:
205   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
206     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
207   }
208   void recordInliningImpl() override {
209     MLInlineAdvice::recordInliningImpl();
210     getAdvisor()->resetNativeSize(Caller);
211     int Reward = std::numeric_limits<int>::max();
212     if (!getAdvisor()->isForcedToStop()) {
213       int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller) +
214                             CalleeSizeEstimateBefore;
215       Reward = NativeSizeAfter -
216                (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
217       getAdvisor()->updateNativeSizeEstimate(Reward);
218     }
219     log(Reward, /*Success=*/true);
220   }
221 
222   void recordInliningWithCalleeDeletedImpl() override {
223     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
224     getAdvisor()->resetNativeSize(Caller);
225     if (!getAdvisor()->isForcedToStop()) {
226       int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller);
227       int Reward = NativeSizeAfter -
228                    (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
229       getAdvisor()->updateNativeSizeEstimate(Reward);
230       log(Reward, /*Success=*/true);
231     }
232   }
233 
234   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
235     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
236     log(NoReward, /*Success=*/false);
237   }
238 
239   void recordUnattemptedInliningImpl() override {
240     MLInlineAdvice::recordUnattemptedInliningImpl();
241     log(NoReward, /*Success=*/false);
242   }
243 
244   void log(int64_t Reward, bool Success) {
245     InlineEvent Event;
246     Event.AdvisedDecision = isInliningRecommended();
247     Event.DefaultDecision = DefaultDecision;
248     Event.Effect = Success;
249     Event.Reward = Reward;
250     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
251   }
252 
253   static const int64_t NoReward = 0;
254   TrainingLogger &Logger;
255   const size_t CallerSizeEstimateBefore;
256   const size_t CalleeSizeEstimateBefore;
257   const bool DefaultDecision;
258 };
259 
260 /// A pseudo model runner. We use it to store feature values when collecting
261 /// logs for the default policy, but never ask it to 'run'.
262 class NoInferenceModelRunner : public MLModelRunner {
263 public:
264   NoInferenceModelRunner(LLVMContext &Ctx)
265       : MLModelRunner(Ctx), Features(NumberOfFeatures) {}
266   void setFeature(FeatureIndex Index, int64_t Value) override {
267     Features[static_cast<int>(Index)] = Value;
268   }
269 
270   int64_t getFeature(int Index) const override { return Features[Index]; }
271   bool run() override {
272     llvm_unreachable("We shouldn't call run on this model runner.");
273   }
274 
275 private:
276   InlineFeatures Features;
277 };
278 
279 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs
280 /// to dynamically load and evaluate a TF SavedModel
281 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is
282 /// sacrificed for ease of use while training.
283 class ModelUnderTrainingRunner final : public MLModelRunner {
284 public:
285   ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath);
286 
287   bool run() override;
288 
289   // Disallows copy and assign.
290   ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete;
291   ModelUnderTrainingRunner &
292   operator=(const ModelUnderTrainingRunner &) = delete;
293 
294   void setFeature(FeatureIndex Index, int64_t Value) override;
295   int64_t getFeature(int Index) const override;
296   bool isValid() const { return !!Evaluator; }
297 
298 private:
299   std::unique_ptr<TFModelEvaluator> Evaluator;
300 
301   // The training framework needs some additional features, that just need to
302   // be set to 0.
303   struct TensorSpec {
304     std::string Name;
305     std::function<void(TFModelEvaluator *, size_t Index,
306                        const std::vector<int64_t> &Dim)>
307         Initializer;
308   };
309 
310   const std::vector<TensorSpec> TrainingOnlyFeatures{
311       {"inlining_default",
312        [](TFModelEvaluator *Evaluator, size_t Index,
313           const std::vector<int64_t> &Dim) {
314          Evaluator->initInput<int64_t>(Index, Dim);
315        }},
316       {"discount",
317        [](TFModelEvaluator *Evaluator, size_t Index,
318           const std::vector<int64_t> &Dim) {
319          Evaluator->initInput<float>(Index, Dim);
320        }},
321       {"reward",
322        [](TFModelEvaluator *Evaluator, size_t Index,
323           const std::vector<int64_t> &Dim) {
324          Evaluator->initInput<float>(Index, Dim);
325        }},
326       {"step_type", [](TFModelEvaluator *Evaluator, size_t Index,
327                        const std::vector<int64_t> &Dim) {
328          Evaluator->initInput<int32_t>(Index, Dim);
329        }}};
330 };
331 } // namespace
332 
333 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
334     Module &M, ModuleAnalysisManager &MAM,
335     std::unique_ptr<MLModelRunner> ModelRunner,
336     std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference)
337     : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
338       GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference),
339       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
340       CurrentNativeSize(InitialNativeSize) {
341   // We cannot have the case of neither inference nor logging.
342   assert(IsDoingInference || isLogging());
343 }
344 
345 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
346   if (TrainingLog.empty())
347     return;
348   std::error_code ErrorCode;
349   raw_fd_ostream OutFile(TrainingLog, ErrorCode);
350   Logger.printTensor(OutFile);
351 }
352 
353 size_t
354 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
355   auto &R =
356       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
357   if (!R) {
358     F.getParent()->getContext().emitError(
359         "Native size estimator is not present.");
360     return 0;
361   }
362   return *R;
363 }
364 
365 std::unique_ptr<MLInlineAdvice>
366 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice(
367     CallBase &CB, OptimizationRemarkEmitter &ORE) {
368   if (!isLogging())
369     return MLInlineAdvisor::getMandatoryAdvice(CB, ORE);
370   return std::make_unique<LoggingMLInlineAdvice>(
371       /*Advisor=*/this,
372       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/Logger,
373       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
374       /*CalleeSizeEstimateBefore=*/
375       getNativeSizeEstimate(*CB.getCalledFunction()),
376       /*DefaultDecision=*/true);
377 }
378 
379 std::unique_ptr<MLInlineAdvice>
380 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
381     CallBase &CB, OptimizationRemarkEmitter &ORE) {
382   if (IsDoingInference && !isLogging())
383     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
384 
385   bool DefaultAdvice = GetDefaultAdvice(CB);
386   auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice;
387   return std::make_unique<LoggingMLInlineAdvice>(
388       /*Advisor=*/this,
389       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
390       /*Logger=*/Logger,
391       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
392       /*CalleeSizeEstimateBefore=*/
393       getNativeSizeEstimate(*CB.getCalledFunction()),
394       /*DefaultDecision=*/DefaultAdvice);
395 }
396 
397 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
398   size_t Ret = 0;
399   for (auto &F : M) {
400     if (F.isDeclaration())
401       continue;
402     if (isFunctionDeleted(&F))
403       continue;
404     Ret += getNativeSizeEstimate(F);
405   }
406   return Ret;
407 }
408 
409 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
410                                                    const std::string &ModelPath)
411     : MLModelRunner(Ctx) {
412   std::vector<std::string> InputNames;
413   std::vector<std::string> OutputNames;
414   for (size_t I = 0; I < NumberOfFeatures; ++I)
415     InputNames.push_back(TFFeedPrefix + FeatureNameMap[I]);
416   for (size_t I = 0; I < TrainingOnlyFeatures.size(); ++I)
417     InputNames.push_back(TFFeedPrefix + TrainingOnlyFeatures[I].Name);
418   OutputNames.push_back(TFDecisionName);
419 
420   Evaluator =
421       std::make_unique<TFModelEvaluator>(ModelPath, InputNames, OutputNames);
422   if (!Evaluator || !Evaluator->isValid()) {
423     Ctx.emitError("Failed to create inliner saved model evaluator");
424     Evaluator.reset();
425     return;
426   }
427 
428   static const std::vector<int64_t> Dim{1};
429 
430   size_t InputIndex = 0;
431   for (; InputIndex < NumberOfFeatures; ++InputIndex) {
432     Evaluator->initInput<int64_t>(InputIndex, Dim);
433   }
434 
435   for (; InputIndex < InputNames.size(); ++InputIndex) {
436     TrainingOnlyFeatures[InputIndex - NumberOfFeatures].Initializer(
437         Evaluator.get(), InputIndex, Dim);
438   }
439 }
440 
441 bool ModelUnderTrainingRunner::run() {
442   auto ER = Evaluator->evaluate();
443   if (!ER.hasValue()) {
444     Ctx.emitError("Error evaluating model.");
445     return false;
446   }
447   int64_t Decision = *ER->getTensorValue<int64_t>(0);
448   return static_cast<bool>(Decision);
449 }
450 
451 int64_t ModelUnderTrainingRunner::getFeature(int Index) const {
452   return *Evaluator->getInput<int64_t>(Index);
453 }
454 
455 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) {
456   size_t NumericIndex = static_cast<size_t>(Index);
457   *(Evaluator->getInput<int64_t>(NumericIndex)) = Value;
458 }
459 
460 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
461     Module &M, ModuleAnalysisManager &MAM,
462     std::function<bool(CallBase &)> GetDefaultAdvice) {
463   auto &Ctx = M.getContext();
464   if (TrainingLog.empty() !=
465       !InlineSizeEstimatorAnalysis::isEvaluatorRequested()) {
466     Ctx.emitError("For development mode, if training logs are requested, then "
467                   "a size estimator must be available; either that, or neither "
468                   "are specified.");
469     return nullptr;
470   }
471 
472   std::unique_ptr<MLModelRunner> Runner;
473 
474   bool IsDoingInference = false;
475   if (TFModelUnderTrainingPath.empty())
476     Runner.reset(new NoInferenceModelRunner(Ctx));
477   else {
478     Runner = std::make_unique<ModelUnderTrainingRunner>(
479         Ctx, TFModelUnderTrainingPath);
480     if (!Runner) {
481       Ctx.emitError("Could not load the policy model from the provided path");
482       return nullptr;
483     }
484     IsDoingInference = true;
485   }
486   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
487       M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference);
488 }
489 #endif // defined(LLVM_HAVE_TF_API)
490