1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a model runner using Tensorflow C APIs, allowing the
11 // loading of a model from a command line option.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Config/config.h"
15 #if defined(LLVM_HAVE_TF_API)
16 
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
19 #include "llvm/Analysis/MLInlineAdvisor.h"
20 #include "llvm/Analysis/Utils/TFUtils.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/ManagedStatic.h"
24 #include "llvm/Support/Path.h"
25 
26 #include <vector>
27 
28 using namespace llvm;
29 
30 static cl::opt<std::string> TrainingLog(
31     "training-log", cl::Hidden,
32     cl::desc("Path where the development - mode inlining log is saved."));
33 
34 static cl::opt<std::string> TFModelUnderTrainingPath(
35     "ml-inliner-model-under-training", cl::Hidden,
36     cl::desc(R"(Path to SavedModel from the previous training iteration.
37 The directory is also expected to contain a JSON specification of the
38 outputs expected to be logged, where the first entry must be the
39 inlining decision. The file containing the specification should be
40 called output_spec.json. The expected JSON value is an array of
41 dictionaries. Each dictionary should have 2 keys:
42 
43 - "tensor_spec, followed by the TensorSpec description of the
44 output; and
45 - "logging_name", a string indicating the name to use when
46 logging the output values.
47 
48 Example:
49 [
50   {
51     "logging_name" : "some_name",
52     "tensor_spec" : {
53       "name" : "model_name",
54       "port" : 0,
55       "shape" : [2, 3],
56       "type" : "float"
57       }
58   }
59 ]
60 
61 The first value must always correspond to the decision.)"));
62 
63 static cl::opt<std::string> TFOutputSpecOverride(
64     "ml-inliner-output-spec-override", cl::Hidden,
65     cl::desc("Override the path to the output spec json file. See "
66              "-ml-inliner-model-under-training documentation for the "
67              "specification of that file."));
68 
69 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
70                                          cl::Hidden, cl::init("action_"),
71                                          cl::desc("Prefix for feature names."));
72 
73 namespace {
74 /// An InlineEvent, used by TrainingLogger.
75 struct InlineEvent {
76   /// What the default policy's decision would have been.
77   int64_t DefaultDecision = 0;
78 
79   /// What we advised. When training off the default policy, this is the same as
80   /// DefaultDecision.
81   int64_t AdvisedDecision = 0;
82 
83   /// What actually happened. This would be 'false' in the case of an inline
84   /// error, even if AdvisedDecision were true, otherwise it agrees with
85   /// AdvisedDecision.
86   bool Effect = false;
87 
88   /// What the change in size was: size_after - size_before
89   int64_t Reward = 0;
90 };
91 
92 /// Collect data we may use for training a model, and write it as a textual
93 /// Tensorflow SequenceExample
94 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
95 /// protobuf (https://developers.google.com/protocol-buffers).
96 /// Because this is a protobuf, we cannot just stream the events as they come.
97 /// Internally, TrainingLogger stores data in column-major format, because that
98 /// lines up with how TF SequenceExample represents it.
99 class ModelUnderTrainingRunner;
100 class TrainingLogger final {
101 public:
102   TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
103 
104   /// Log one inlining event.
105   void logInlineEvent(const InlineEvent &Event,
106                       const MLModelRunner &ModelRunner);
107 
108   /// Print the stored tensors.
109   void print();
110 
111 private:
112   StringRef LogFileName;
113   const ModelUnderTrainingRunner *const MUTR;
114   std::unique_ptr<Logger> L;
115   std::vector<bool> Effects;
116   /// There's at least one output. We'll set this to a different value if MUTR
117   /// is avaliable.
118   size_t OutputCount = 1;
119   /// Set these 2 clearly OOB, to make sure we set them later.
120   size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
121   size_t DecisionPos = std::numeric_limits<size_t>::max();
122 };
123 
124 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
125 /// the offline training scenario. Note that training happens outside of the
126 /// compiler, this facility is concerned with producing training data ("logs").
127 /// This InlineAdvisor can operate in the following modes:
128 ///
129 /// 1) collect logs for the default policy. This is useful for bootstrapping
130 /// training, which will be considerably faster by starting from a reasonable
131 /// policy.
132 ///
133 /// 2) collect logs for the ML policy, using a model from a previous
134 /// training. Potentially, that model uses internally some small random
135 /// perturbation of its weights, to induce exploration (setting this up is the
136 /// responsibility of the training algorithm). The logs would then be used to
137 /// retrain and improve on this model.
138 ///
139 /// 3) use the provided model, with no logging. This is useful for end to end
140 /// validation - the model, in this case, is a release candidate and shouldn't
141 /// have random perturbations. It is a convenience feature: rather than needing
142 /// to take the release candidate model and compile it in 'release' mode,
143 /// validate it, then potentially discard it, it's easier to just pass the model
144 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
145 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
146 /// release mode. The expectation is that a well-trained model provides a good
147 /// policy over a sufficiently diverse codebase, over many changes (i.e.
148 /// training happens seldom).
149 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
150 public:
151   DevelopmentModeMLInlineAdvisor(
152       Module &M, ModuleAnalysisManager &MAM,
153       std::unique_ptr<MLModelRunner> ModelRunner,
154       std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference,
155       std::unique_ptr<TrainingLogger> Logger);
156 
157   size_t getTotalSizeEstimate();
158 
159   virtual ~DevelopmentModeMLInlineAdvisor();
160   void updateNativeSizeEstimate(int64_t Change) {
161     *CurrentNativeSize += Change;
162   }
163   void resetNativeSize(Function *F) {
164     FAM.invalidate<InlineSizeEstimatorAnalysis>(*F);
165   }
166 
167   std::unique_ptr<MLInlineAdvice>
168   getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
169   std::unique_ptr<MLInlineAdvice>
170   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
171 
172   Optional<size_t> getNativeSizeEstimate(const Function &F) const;
173 
174 private:
175   bool isLogging() const { return !!Logger; }
176 
177   std::function<bool(CallBase &)> GetDefaultAdvice;
178   const bool IsDoingInference;
179   std::unique_ptr<TrainingLogger> Logger;
180 
181   const Optional<int32_t> InitialNativeSize;
182   Optional<int32_t> CurrentNativeSize;
183 };
184 
185 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
186 /// decisions, for training/logging.
187 class LoggingMLInlineAdvice : public MLInlineAdvice {
188 public:
189   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
190                         OptimizationRemarkEmitter &ORE, bool Recommendation,
191                         TrainingLogger &Logger,
192                         Optional<size_t> CallerSizeEstimateBefore,
193                         Optional<size_t> CalleeSizeEstimateBefore,
194                         bool DefaultDecision, bool Mandatory = false)
195       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
196         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
197         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
198         DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
199 
200   virtual ~LoggingMLInlineAdvice() = default;
201 
202 private:
203   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
204     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
205   }
206   void recordInliningImpl() override {
207     MLInlineAdvice::recordInliningImpl();
208     getAdvisor()->resetNativeSize(Caller);
209     int Reward = std::numeric_limits<int>::max();
210     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
211         !getAdvisor()->isForcedToStop()) {
212       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
213                             *CalleeSizeEstimateBefore;
214       Reward = NativeSizeAfter -
215                (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
216       getAdvisor()->updateNativeSizeEstimate(Reward);
217     }
218     log(Reward, /*Success=*/true);
219   }
220 
221   void recordInliningWithCalleeDeletedImpl() override {
222     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
223     getAdvisor()->resetNativeSize(Caller);
224     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
225         !getAdvisor()->isForcedToStop()) {
226       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
227       int Reward = NativeSizeAfter -
228                    (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
229       getAdvisor()->updateNativeSizeEstimate(Reward);
230       log(Reward, /*Success=*/true);
231     }
232   }
233 
234   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
235     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
236     log(NoReward, /*Success=*/false);
237   }
238 
239   void recordUnattemptedInliningImpl() override {
240     MLInlineAdvice::recordUnattemptedInliningImpl();
241     log(NoReward, /*Success=*/false);
242   }
243 
244   void log(int64_t Reward, bool Success) {
245     if (Mandatory)
246       return;
247     InlineEvent Event;
248     Event.AdvisedDecision = isInliningRecommended();
249     Event.DefaultDecision = DefaultDecision;
250     Event.Effect = Success;
251     Event.Reward = Reward;
252     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
253   }
254 
255   static const int64_t NoReward = 0;
256   TrainingLogger &Logger;
257   const Optional<size_t> CallerSizeEstimateBefore;
258   const Optional<size_t> CalleeSizeEstimateBefore;
259   const int64_t DefaultDecision;
260   const int64_t Mandatory;
261 };
262 
263 /// A pseudo model runner. We use it to store feature values when collecting
264 /// logs for the default policy, but never ask it to 'run'.
265 class NoInferenceModelRunner : public MLModelRunner {
266 public:
267   NoInferenceModelRunner(LLVMContext &Ctx)
268       : MLModelRunner(Ctx), Features(NumberOfFeatures) {}
269   void setFeature(FeatureIndex Index, int64_t Value) override {
270     Features[static_cast<int>(Index)] = Value;
271   }
272 
273   int64_t getFeature(int Index) const override { return Features[Index]; }
274   bool run() override {
275     llvm_unreachable("We shouldn't call run on this model runner.");
276   }
277 
278 private:
279   InlineFeatures Features;
280 };
281 
282 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs
283 /// to dynamically load and evaluate a TF SavedModel
284 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is
285 /// sacrificed for ease of use while training.
286 class ModelUnderTrainingRunner final : public MLModelRunner {
287 public:
288   ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath);
289 
290   bool run() override;
291 
292   // Disallows copy and assign.
293   ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete;
294   ModelUnderTrainingRunner &
295   operator=(const ModelUnderTrainingRunner &) = delete;
296 
297   void setFeature(FeatureIndex Index, int64_t Value) override;
298   int64_t getFeature(int Index) const override;
299   bool isValid() const { return !!Evaluator; }
300 
301   const std::vector<LoggedFeatureSpec> &outputLoggedFeatureSpecs() const {
302     return OutputSpecs;
303   }
304 
305   const Optional<TFModelEvaluator::EvaluationResult> &
306   lastEvaluationResult() const {
307     return LastEvaluationResult;
308   }
309 
310 private:
311   std::unique_ptr<TFModelEvaluator> Evaluator;
312   std::vector<LoggedFeatureSpec> OutputSpecs;
313   Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult;
314 
315   // The training framework needs some additional features.
316   const std::vector<TensorSpec> TrainingOnlyFeatures{
317       TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
318       TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
319       TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
320       TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
321 };
322 } // namespace
323 
324 TrainingLogger::TrainingLogger(StringRef LogFileName,
325                                const ModelUnderTrainingRunner *MUTR)
326     : LogFileName(LogFileName), MUTR(MUTR) {
327   // The first output is the inlining decision.
328   if (MUTR)
329     OutputCount = MUTR->outputLoggedFeatureSpecs().size();
330   std::vector<LoggedFeatureSpec> FT;
331 
332   for (size_t I = 0; I < NumberOfFeatures; ++I)
333     FT.push_back(
334         {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None});
335   if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1)
336     FT.insert(FT.end(), MUTR->outputLoggedFeatureSpecs().begin() + 1,
337               MUTR->outputLoggedFeatureSpecs().end());
338 
339   DefaultDecisionPos = FT.size();
340   FT.push_back(
341       {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None});
342 
343   DecisionPos = FT.size();
344   FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None});
345 
346   L = std::make_unique<Logger>(
347       FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
348       InlineSizeEstimatorAnalysis::isEvaluatorRequested());
349 }
350 
351 /// Log one inlining event.
352 void TrainingLogger::logInlineEvent(const InlineEvent &Event,
353                                     const MLModelRunner &ModelRunner) {
354   size_t CurrentFeature = 0;
355   for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
356     int64_t F = ModelRunner.getFeature(CurrentFeature);
357     L->logTensorValue(CurrentFeature, &F);
358   }
359 
360   for (size_t I = 1; I < OutputCount; ++I) {
361     const auto &Result = *MUTR->lastEvaluationResult();
362     auto &Spec = MUTR->outputLoggedFeatureSpecs()[I].Spec;
363     const char *RawData =
364         reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
365     L->logTensorValue(CurrentFeature, RawData,
366                       Spec.getElementCount() * Spec.getElementByteSize());
367     ++CurrentFeature;
368   }
369 
370   assert(CurrentFeature == DefaultDecisionPos);
371   L->logTensorValue(DefaultDecisionPos, &Event.DefaultDecision);
372   L->logTensorValue(DecisionPos, &Event.AdvisedDecision);
373   if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
374     L->logReward(Event.Reward);
375 
376   // For debugging / later use
377   Effects.push_back(Event.Effect);
378 }
379 
380 void TrainingLogger::print() {
381   std::error_code EC;
382   raw_fd_ostream OutFile(LogFileName, EC);
383   L->print(OutFile);
384 }
385 
386 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
387     Module &M, ModuleAnalysisManager &MAM,
388     std::unique_ptr<MLModelRunner> ModelRunner,
389     std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference,
390     std::unique_ptr<TrainingLogger> Logger)
391     : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
392       GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference),
393       Logger(std::move(Logger)),
394       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
395       CurrentNativeSize(InitialNativeSize) {
396   // We cannot have the case of neither inference nor logging.
397   assert(IsDoingInference || isLogging());
398 }
399 
400 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
401   if (isLogging())
402     Logger->print();
403 }
404 
405 Optional<size_t>
406 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
407   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
408     return None;
409   auto &R =
410       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
411   if (!R) {
412     F.getParent()->getContext().emitError(
413         "Native size estimator is not present.");
414     return 0;
415   }
416   return *R;
417 }
418 
419 std::unique_ptr<MLInlineAdvice>
420 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice(
421     CallBase &CB, OptimizationRemarkEmitter &ORE) {
422   if (!isLogging())
423     return MLInlineAdvisor::getMandatoryAdvice(CB, ORE);
424 
425   return std::make_unique<LoggingMLInlineAdvice>(
426       /*Advisor=*/this,
427       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/*Logger,
428       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
429       /*CalleeSizeEstimateBefore=*/
430       getNativeSizeEstimate(*CB.getCalledFunction()),
431       /*DefaultDecision=*/true, /*Mandatory*/ true);
432 }
433 
434 std::unique_ptr<MLInlineAdvice>
435 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
436     CallBase &CB, OptimizationRemarkEmitter &ORE) {
437   if (IsDoingInference && !isLogging())
438     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
439 
440   bool DefaultAdvice = GetDefaultAdvice(CB);
441   auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice;
442   return std::make_unique<LoggingMLInlineAdvice>(
443       /*Advisor=*/this,
444       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
445       /*Logger=*/*Logger,
446       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
447       /*CalleeSizeEstimateBefore=*/
448       getNativeSizeEstimate(*CB.getCalledFunction()),
449       /*DefaultDecision=*/DefaultAdvice);
450 }
451 
452 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
453   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
454     return 0;
455   size_t Ret = 0;
456   for (auto &F : M) {
457     if (F.isDeclaration())
458       continue;
459     if (isFunctionDeleted(&F))
460       continue;
461     Ret += *getNativeSizeEstimate(F);
462   }
463   return Ret;
464 }
465 
466 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
467                                                    const std::string &ModelPath)
468     : MLModelRunner(Ctx) {
469   std::vector<TensorSpec> InputSpecs;
470   for (size_t I = 0; I < NumberOfFeatures; ++I)
471     InputSpecs.push_back(
472         TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
473   InputSpecs.insert(InputSpecs.end(), TrainingOnlyFeatures.begin(),
474                     TrainingOnlyFeatures.end());
475   SmallVector<char, 128> OutputSpecsPath;
476   StringRef OutputSpecPath = TFOutputSpecOverride;
477   if (OutputSpecPath.empty()) {
478     llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json");
479     OutputSpecPath = {OutputSpecsPath.data(), OutputSpecsPath.size()};
480   }
481 
482   if (!loadOutputSpecs(Ctx, OutputSpecPath, DecisionName, OutputSpecs))
483     return;
484 
485   Evaluator = std::make_unique<TFModelEvaluator>(
486       ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; },
487       OutputSpecs.size());
488   if (!Evaluator || !Evaluator->isValid()) {
489     Ctx.emitError("Failed to create inliner saved model evaluator");
490     Evaluator.reset();
491     return;
492   }
493 }
494 
495 bool ModelUnderTrainingRunner::run() {
496   LastEvaluationResult = Evaluator->evaluate();
497   if (!LastEvaluationResult.hasValue()) {
498     Ctx.emitError("Error evaluating model.");
499     return false;
500   }
501   int64_t Decision = *LastEvaluationResult->getTensorValue<int64_t>(0);
502   return static_cast<bool>(Decision);
503 }
504 
505 int64_t ModelUnderTrainingRunner::getFeature(int Index) const {
506   return *Evaluator->getInput<int64_t>(Index);
507 }
508 
509 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) {
510   size_t NumericIndex = static_cast<size_t>(Index);
511   *(Evaluator->getInput<int64_t>(NumericIndex)) = Value;
512 }
513 
514 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
515     Module &M, ModuleAnalysisManager &MAM,
516     std::function<bool(CallBase &)> GetDefaultAdvice) {
517   auto &Ctx = M.getContext();
518   std::unique_ptr<MLModelRunner> Runner;
519   ModelUnderTrainingRunner *MUTRPtr = nullptr;
520   bool IsDoingInference = false;
521   if (TFModelUnderTrainingPath.empty())
522     Runner.reset(new NoInferenceModelRunner(Ctx));
523   else {
524     auto MUTR = std::make_unique<ModelUnderTrainingRunner>(
525         Ctx, TFModelUnderTrainingPath);
526     if (!MUTR || !MUTR->isValid()) {
527       Ctx.emitError("Could not load the policy model from the provided path");
528       return nullptr;
529     }
530     IsDoingInference = true;
531     MUTRPtr = MUTR.get();
532     Runner = std::move(MUTR);
533   }
534   std::unique_ptr<TrainingLogger> Logger;
535   if (!TrainingLog.empty())
536     Logger = std::make_unique<TrainingLogger>(TrainingLog, MUTRPtr);
537 
538   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
539       M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference,
540       std::move(Logger));
541 }
542 #endif // defined(LLVM_HAVE_TF_API)
543