1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a model runner using Tensorflow C APIs, allowing the
11 // loading of a model from a command line option.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Config/config.h"
15 #if defined(LLVM_HAVE_TF_API)
16 
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
19 #include "llvm/Analysis/MLInlineAdvisor.h"
20 #include "llvm/Analysis/Utils/TFUtils.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/ManagedStatic.h"
24 
25 #include <vector>
26 
27 using namespace llvm;
28 
29 static cl::opt<std::string> TrainingLog(
30     "training-log", cl::Hidden,
31     cl::desc("Path where the development - mode inlining log is saved."));
32 
33 static cl::opt<std::string> TFModelUnderTrainingPath(
34     "ml-inliner-model-under-training", cl::Hidden,
35     cl::desc("Path to SavedModel from the previous training iteration."));
36 
37 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
38                                          cl::Hidden, cl::init("action_"),
39                                          cl::desc("Prefix for feature names."));
40 
41 static cl::opt<std::string> TFDecisionName(
42     "ml-inliner-trained-model-decision-name", cl::Hidden,
43     cl::init("StatefulPartitionedCall"),
44     cl::desc("Name of the graph operation representing the decision."));
45 
46 namespace {
47 /// An InlineEvent, used by TrainingLogger.
48 struct InlineEvent {
49   /// What the default policy's decision would have been.
50   bool DefaultDecision = false;
51 
52   /// What we advised. When training off the default policy, this is the same as
53   /// DefaultDecision.
54   bool AdvisedDecision = false;
55 
56   /// What actually happened. This would be 'false' in the case of an inline
57   /// error, even if AdvisedDecision were true, otherwise it agrees with
58   /// AdvisedDecision.
59   bool Effect = false;
60 
61   /// What the change in size was: size_after - size_before
62   int64_t Reward = 0;
63 };
64 
65 /// Collect data we may use for training a model, and write it as a textual
66 /// Tensorflow SequenceExample
67 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
68 /// protobuf (https://developers.google.com/protocol-buffers).
69 /// Because this is a protobuf, we cannot just stream the events as they come.
70 /// Internally, TrainingLogger stores data in column-major format, because that
71 /// lines up with how TF SequenceExample represents it.
72 class TrainingLogger final {
73 public:
74   TrainingLogger();
75 
76   /// Log one inlining event.
77   void logInlineEvent(const InlineEvent &Event,
78                       const MLModelRunner &ModelRunner);
79 
80   /// Print the stored tensors.
81   void print(raw_fd_ostream &OutFile);
82 
83 private:
84   template <typename T>
85   void writeTensor(raw_fd_ostream &OutFile, StringRef TensorName,
86                    const std::vector<T> &Tensor);
87 
88   std::vector<InlineFeatures> Features;
89   std::vector<bool> DefaultDecisions;
90   std::vector<bool> Decisions;
91   std::vector<bool> Effects;
92   std::vector<int64_t> Rewards;
93 };
94 
95 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
96 /// the offline training scenario. Note that training happens outside of the
97 /// compiler, this facility is concerned with producing training data ("logs").
98 /// This InlineAdvisor can operate in the following modes:
99 ///
100 /// 1) collect logs for the default policy. This is useful for bootstrapping
101 /// training, which will be considerably faster by starting from a reasonable
102 /// policy.
103 ///
104 /// 2) collect logs for the ML policy, using a model from a previous
105 /// training. Potentially, that model uses internally some small random
106 /// perturbation of its weights, to induce exploration (setting this up is the
107 /// responsibility of the training algorithm). The logs would then be used to
108 /// retrain and improve on this model.
109 ///
110 /// 3) use the provided model, with no logging. This is useful for end to end
111 /// validation - the model, in this case, is a release candidate and shouldn't
112 /// have random perturbations. It is a convenience feature: rather than needing
113 /// to take the release candidate model and compile it in 'release' mode,
114 /// validate it, then potentially discard it, it's easier to just pass the model
115 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
116 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
117 /// release mode. The expectation is that a well-trained model provides a good
118 /// policy over a sufficiently diverse codebase, over many changes (i.e.
119 /// training happens seldom).
120 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
121 public:
122   DevelopmentModeMLInlineAdvisor(
123       Module &M, ModuleAnalysisManager &MAM,
124       std::unique_ptr<MLModelRunner> ModelRunner,
125       std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference);
126 
127   size_t getTotalSizeEstimate();
128 
129   virtual ~DevelopmentModeMLInlineAdvisor();
130   void updateNativeSizeEstimate(int64_t Change) { CurrentNativeSize += Change; }
131   void resetNativeSize(Function *F) {
132     FAM.invalidate<InlineSizeEstimatorAnalysis>(*F);
133   }
134 
135   std::unique_ptr<MLInlineAdvice>
136   getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
137   std::unique_ptr<MLInlineAdvice>
138   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
139 
140   size_t getNativeSizeEstimate(const Function &F) const;
141 
142 private:
143   bool isLogging() const { return !TrainingLog.empty(); }
144 
145   std::function<bool(CallBase &)> GetDefaultAdvice;
146   TrainingLogger Logger;
147   const bool IsDoingInference;
148 
149   const int32_t InitialNativeSize;
150   int32_t CurrentNativeSize = 0;
151 };
152 
153 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
154 /// decisions, for training/logging.
155 class LoggingMLInlineAdvice : public MLInlineAdvice {
156 public:
157   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
158                         OptimizationRemarkEmitter &ORE, bool Recommendation,
159                         TrainingLogger &Logger, size_t CallerSizeEstimateBefore,
160                         size_t CalleeSizeEstimateBefore, bool DefaultDecision,
161                         bool Mandatory = false)
162       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
163         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
164         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
165         DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
166 
167   virtual ~LoggingMLInlineAdvice() = default;
168 
169 private:
170   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
171     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
172   }
173   void recordInliningImpl() override {
174     MLInlineAdvice::recordInliningImpl();
175     getAdvisor()->resetNativeSize(Caller);
176     int Reward = std::numeric_limits<int>::max();
177     if (!getAdvisor()->isForcedToStop()) {
178       int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller) +
179                             CalleeSizeEstimateBefore;
180       Reward = NativeSizeAfter -
181                (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
182       getAdvisor()->updateNativeSizeEstimate(Reward);
183     }
184     log(Reward, /*Success=*/true);
185   }
186 
187   void recordInliningWithCalleeDeletedImpl() override {
188     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
189     getAdvisor()->resetNativeSize(Caller);
190     if (!getAdvisor()->isForcedToStop()) {
191       int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller);
192       int Reward = NativeSizeAfter -
193                    (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
194       getAdvisor()->updateNativeSizeEstimate(Reward);
195       log(Reward, /*Success=*/true);
196     }
197   }
198 
199   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
200     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
201     log(NoReward, /*Success=*/false);
202   }
203 
204   void recordUnattemptedInliningImpl() override {
205     MLInlineAdvice::recordUnattemptedInliningImpl();
206     log(NoReward, /*Success=*/false);
207   }
208 
209   void log(int64_t Reward, bool Success) {
210     if (Mandatory)
211       return;
212     InlineEvent Event;
213     Event.AdvisedDecision = isInliningRecommended();
214     Event.DefaultDecision = DefaultDecision;
215     Event.Effect = Success;
216     Event.Reward = Reward;
217     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
218   }
219 
220   static const int64_t NoReward = 0;
221   TrainingLogger &Logger;
222   const size_t CallerSizeEstimateBefore;
223   const size_t CalleeSizeEstimateBefore;
224   const bool DefaultDecision;
225   const bool Mandatory;
226 };
227 
228 /// A pseudo model runner. We use it to store feature values when collecting
229 /// logs for the default policy, but never ask it to 'run'.
230 class NoInferenceModelRunner : public MLModelRunner {
231 public:
232   NoInferenceModelRunner(LLVMContext &Ctx)
233       : MLModelRunner(Ctx), Features(NumberOfFeatures) {}
234   void setFeature(FeatureIndex Index, int64_t Value) override {
235     Features[static_cast<int>(Index)] = Value;
236   }
237 
238   int64_t getFeature(int Index) const override { return Features[Index]; }
239   bool run() override {
240     llvm_unreachable("We shouldn't call run on this model runner.");
241   }
242 
243 private:
244   InlineFeatures Features;
245 };
246 
247 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs
248 /// to dynamically load and evaluate a TF SavedModel
249 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is
250 /// sacrificed for ease of use while training.
251 class ModelUnderTrainingRunner final : public MLModelRunner {
252 public:
253   ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath);
254 
255   bool run() override;
256 
257   // Disallows copy and assign.
258   ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete;
259   ModelUnderTrainingRunner &
260   operator=(const ModelUnderTrainingRunner &) = delete;
261 
262   void setFeature(FeatureIndex Index, int64_t Value) override;
263   int64_t getFeature(int Index) const override;
264   bool isValid() const { return !!Evaluator; }
265 
266 private:
267   std::unique_ptr<TFModelEvaluator> Evaluator;
268 
269   // The training framework needs some additional features.
270   const std::vector<TensorSpec> TrainingOnlyFeatures{
271       TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
272       TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
273       TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
274       TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
275 };
276 } // namespace
277 
278 TrainingLogger::TrainingLogger() {
279   for (size_t I = 0; I < NumberOfFeatures; ++I) {
280     Features.push_back(InlineFeatures());
281   }
282 }
283 
284 /// Log one inlining event.
285 void TrainingLogger::logInlineEvent(const InlineEvent &Event,
286                                     const MLModelRunner &ModelRunner) {
287   for (size_t I = 0; I < NumberOfFeatures; ++I) {
288     Features[I].push_back(ModelRunner.getFeature(I));
289   }
290   Decisions.push_back(Event.AdvisedDecision);
291   Effects.push_back(Event.Effect);
292   Rewards.push_back(Event.Reward);
293   DefaultDecisions.push_back(Event.DefaultDecision);
294 }
295 
296 void TrainingLogger::print(raw_fd_ostream &OutFile) {
297   if (DefaultDecisions.empty())
298     return;
299   OutFile << "feature_lists: {\n";
300 
301   for (size_t I = 0; I < Features.size(); I++) {
302     writeTensor(OutFile, FeatureNameMap.at(I), Features[I]);
303   }
304   writeTensor(OutFile, DefaultDecisionName, DefaultDecisions);
305   writeTensor(OutFile, DecisionName, Decisions);
306   writeTensor(OutFile, RewardName, Rewards);
307 
308   OutFile << "}\n";
309 }
310 
311 template <typename T>
312 void TrainingLogger::writeTensor(raw_fd_ostream &OutFile, StringRef TensorName,
313                                  const std::vector<T> &Tensor) {
314   OutFile << "  feature_list: {\n";
315   OutFile << "    key: "
316           << "\"" << TensorName << "\" ";
317   OutFile << "value: {\n";
318   for (const auto &Feature : Tensor) {
319     OutFile << "      feature: { int64_list: { value: [" << Feature
320             << "] } }\n";
321   }
322   OutFile << "    }\n";
323   OutFile << "  }\n";
324 }
325 
326 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
327     Module &M, ModuleAnalysisManager &MAM,
328     std::unique_ptr<MLModelRunner> ModelRunner,
329     std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference)
330     : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
331       GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference),
332       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
333       CurrentNativeSize(InitialNativeSize) {
334   // We cannot have the case of neither inference nor logging.
335   assert(IsDoingInference || isLogging());
336 }
337 
338 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
339   if (TrainingLog.empty())
340     return;
341   std::error_code ErrorCode;
342   raw_fd_ostream OutFile(TrainingLog, ErrorCode);
343   Logger.print(OutFile);
344 }
345 
346 size_t
347 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
348   auto &R =
349       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
350   if (!R) {
351     F.getParent()->getContext().emitError(
352         "Native size estimator is not present.");
353     return 0;
354   }
355   return *R;
356 }
357 
358 std::unique_ptr<MLInlineAdvice>
359 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice(
360     CallBase &CB, OptimizationRemarkEmitter &ORE) {
361   if (!isLogging())
362     return MLInlineAdvisor::getMandatoryAdvice(CB, ORE);
363   return std::make_unique<LoggingMLInlineAdvice>(
364       /*Advisor=*/this,
365       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/Logger,
366       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
367       /*CalleeSizeEstimateBefore=*/
368       getNativeSizeEstimate(*CB.getCalledFunction()),
369       /*DefaultDecision=*/true, /*Mandatory*/ true);
370 }
371 
372 std::unique_ptr<MLInlineAdvice>
373 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
374     CallBase &CB, OptimizationRemarkEmitter &ORE) {
375   if (IsDoingInference && !isLogging())
376     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
377 
378   bool DefaultAdvice = GetDefaultAdvice(CB);
379   auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice;
380   return std::make_unique<LoggingMLInlineAdvice>(
381       /*Advisor=*/this,
382       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
383       /*Logger=*/Logger,
384       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
385       /*CalleeSizeEstimateBefore=*/
386       getNativeSizeEstimate(*CB.getCalledFunction()),
387       /*DefaultDecision=*/DefaultAdvice);
388 }
389 
390 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
391   size_t Ret = 0;
392   for (auto &F : M) {
393     if (F.isDeclaration())
394       continue;
395     if (isFunctionDeleted(&F))
396       continue;
397     Ret += getNativeSizeEstimate(F);
398   }
399   return Ret;
400 }
401 
402 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
403                                                    const std::string &ModelPath)
404     : MLModelRunner(Ctx) {
405   std::vector<TensorSpec> InputSpecs;
406   std::vector<TensorSpec> OutputSpecs;
407   for (size_t I = 0; I < NumberOfFeatures; ++I)
408     InputSpecs.push_back(
409         TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
410   InputSpecs.insert(InputSpecs.end(), TrainingOnlyFeatures.begin(),
411                     TrainingOnlyFeatures.end());
412   OutputSpecs.push_back(TensorSpec::createSpec<int64_t>(TFDecisionName, {1}));
413 
414   Evaluator =
415       std::make_unique<TFModelEvaluator>(ModelPath, InputSpecs, OutputSpecs);
416   if (!Evaluator || !Evaluator->isValid()) {
417     Ctx.emitError("Failed to create inliner saved model evaluator");
418     Evaluator.reset();
419     return;
420   }
421 }
422 
423 bool ModelUnderTrainingRunner::run() {
424   auto ER = Evaluator->evaluate();
425   if (!ER.hasValue()) {
426     Ctx.emitError("Error evaluating model.");
427     return false;
428   }
429   int64_t Decision = *ER->getTensorValue<int64_t>(0);
430   return static_cast<bool>(Decision);
431 }
432 
433 int64_t ModelUnderTrainingRunner::getFeature(int Index) const {
434   return *Evaluator->getInput<int64_t>(Index);
435 }
436 
437 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) {
438   size_t NumericIndex = static_cast<size_t>(Index);
439   *(Evaluator->getInput<int64_t>(NumericIndex)) = Value;
440 }
441 
442 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
443     Module &M, ModuleAnalysisManager &MAM,
444     std::function<bool(CallBase &)> GetDefaultAdvice) {
445   auto &Ctx = M.getContext();
446   if (TrainingLog.empty() !=
447       !InlineSizeEstimatorAnalysis::isEvaluatorRequested()) {
448     Ctx.emitError("For development mode, if training logs are requested, then "
449                   "a size estimator must be available; either that, or neither "
450                   "are specified.");
451     return nullptr;
452   }
453 
454   std::unique_ptr<MLModelRunner> Runner;
455 
456   bool IsDoingInference = false;
457   if (TFModelUnderTrainingPath.empty())
458     Runner.reset(new NoInferenceModelRunner(Ctx));
459   else {
460     Runner = std::make_unique<ModelUnderTrainingRunner>(
461         Ctx, TFModelUnderTrainingPath);
462     if (!Runner) {
463       Ctx.emitError("Could not load the policy model from the provided path");
464       return nullptr;
465     }
466     IsDoingInference = true;
467   }
468   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
469       M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference);
470 }
471 #endif // defined(LLVM_HAVE_TF_API)
472