1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a model runner using Tensorflow C APIs, allowing the 11 // loading of a model from a command line option. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Analysis/CallGraph.h" 15 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 16 #include "llvm/Analysis/MLInlineAdvisor.h" 17 #include "llvm/Analysis/Utils/TFUtils.h" 18 #include "llvm/IR/LLVMContext.h" 19 #include "llvm/Support/CommandLine.h" 20 #include "llvm/Support/ManagedStatic.h" 21 22 #include <vector> 23 24 using namespace llvm; 25 26 static cl::opt<std::string> TrainingLog( 27 "training-log", cl::Hidden, 28 cl::desc("Path where the development - mode inlining log is saved.")); 29 30 static cl::opt<std::string> TFModelUnderTrainingPath( 31 "ml-inliner-model-under-training", cl::Hidden, 32 cl::desc("Path to SavedModel from the previous training iteration.")); 33 34 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 35 cl::Hidden, cl::init("action_"), 36 cl::desc("Prefix for feature names.")); 37 38 static cl::opt<std::string> TFDecisionName( 39 "ml-inliner-trained-model-decision-name", cl::Hidden, 40 cl::init("StatefulPartitionedCall"), 41 cl::desc("Name of the graph operation representing the decision.")); 42 43 namespace { 44 /// An InlineEvent, used by TrainingLogger. 45 struct InlineEvent { 46 /// What the default policy's decision would have been. 47 bool DefaultDecision = false; 48 49 /// What we advised. When training off the default policy, this is the same as 50 /// DefaultDecision. 51 bool AdvisedDecision = false; 52 53 /// What actually happened. This would be 'false' in the case of an inline 54 /// error, even if AdvisedDecision were true, otherwise it agrees with 55 /// AdvisedDecision. 56 bool Effect = false; 57 58 /// What the change in size was: size_after - size_before 59 int64_t Reward = 0; 60 }; 61 62 /// Collect data we may use for training a model, and write it as a textual 63 /// Tensorflow SequenceExample 64 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) 65 /// protobuf (https://developers.google.com/protocol-buffers). 66 /// Because this is a protobuf, we cannot just stream the events as they come. 67 /// Internally, TrainingLogger stores data in column-major format, because that 68 /// lines up with how TF SequenceExample represents it. 69 class TrainingLogger final { 70 public: 71 TrainingLogger() { 72 for (size_t I = 0; I < NumberOfFeatures; ++I) { 73 Features.push_back(InlineFeatures()); 74 } 75 } 76 77 /// Log one inlining event. 78 void logInlineEvent(const InlineEvent &Event, 79 const MLModelRunner &ModelRunner) { 80 for (size_t I = 0; I < NumberOfFeatures; ++I) { 81 Features[I].push_back(ModelRunner.getFeature(I)); 82 } 83 Decisions.push_back(Event.AdvisedDecision); 84 Effects.push_back(Event.Effect); 85 Rewards.push_back(Event.Reward); 86 DefaultDecisions.push_back(Event.DefaultDecision); 87 } 88 89 void printTensor(raw_fd_ostream &OutFile) { 90 if (DefaultDecisions.empty()) 91 return; 92 OutFile << "feature_lists: {\n"; 93 94 for (size_t I = 0; I < Features.size(); I++) { 95 writeTensor(OutFile, FeatureNameMap.at(I), Features[I]); 96 } 97 writeTensor(OutFile, DefaultDecisionName, DefaultDecisions); 98 writeTensor(OutFile, DecisionName, Decisions); 99 writeTensor(OutFile, RewardName, Rewards); 100 101 OutFile << "}\n"; 102 } 103 104 private: 105 template <typename T> 106 void writeTensor(raw_fd_ostream &OutFile, StringRef TensorName, 107 const std::vector<T> &Tensor) { 108 OutFile << " feature_list: {\n"; 109 OutFile << " key: " 110 << "\"" << TensorName << "\" "; 111 OutFile << "value: {\n"; 112 for (const auto &Feature : Tensor) { 113 OutFile << " feature: { int64_list: { value: [" << Feature 114 << "] } }\n"; 115 } 116 OutFile << " }\n"; 117 OutFile << " }\n"; 118 } 119 120 std::vector<InlineFeatures> Features; 121 std::vector<bool> DefaultDecisions; 122 std::vector<bool> Decisions; 123 std::vector<bool> Effects; 124 std::vector<int64_t> Rewards; 125 std::vector<bool> Mandatory; 126 }; 127 128 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 129 /// the offline training scenario. Note that training happens outside of the 130 /// compiler, this facility is concerned with producing training data ("logs"). 131 /// This InlineAdvisor can operate in the following modes: 132 /// 133 /// 1) collect logs for the default policy. This is useful for bootstrapping 134 /// training, which will be considerably faster by starting from a reasonable 135 /// policy. 136 /// 137 /// 2) collect logs for the ML policy, using a model from a previous 138 /// training. Potentially, that model uses internally some small random 139 /// perturbation of its weights, to induce exploration (setting this up is the 140 /// responsibility of the training algorithm). The logs would then be used to 141 /// retrain and improve on this model. 142 /// 143 /// 3) use the provided model, with no logging. This is useful for end to end 144 /// validation - the model, in this case, is a release candidate and shouldn't 145 /// have random perturbations. It is a convenience feature: rather than needing 146 /// to take the release candidate model and compile it in 'release' mode, 147 /// validate it, then potentially discard it, it's easier to just pass the model 148 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 149 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 150 /// release mode. The expectation is that a well-trained model provides a good 151 /// policy over a sufficiently diverse codebase, over many changes (i.e. 152 /// training happens seldom). 153 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 154 public: 155 DevelopmentModeMLInlineAdvisor( 156 Module &M, ModuleAnalysisManager &MAM, 157 std::unique_ptr<MLModelRunner> ModelRunner, 158 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference); 159 160 size_t getTotalSizeEstimate(); 161 162 virtual ~DevelopmentModeMLInlineAdvisor(); 163 void updateNativeSizeEstimate(int64_t Change) { CurrentNativeSize += Change; } 164 void resetNativeSize(Function *F) { 165 FAM.invalidate<InlineSizeEstimatorAnalysis>(*F); 166 } 167 168 std::unique_ptr<MLInlineAdvice> 169 getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 170 std::unique_ptr<MLInlineAdvice> 171 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 172 173 size_t getNativeSizeEstimate(const Function &F) const; 174 175 private: 176 bool isLogging() const { return !TrainingLog.empty(); } 177 178 std::function<bool(CallBase &)> GetDefaultAdvice; 179 TrainingLogger Logger; 180 const bool IsDoingInference; 181 182 const int32_t InitialNativeSize; 183 int32_t CurrentNativeSize = 0; 184 }; 185 186 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 187 /// decisions, for training/logging. 188 class LoggingMLInlineAdvice : public MLInlineAdvice { 189 public: 190 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 191 OptimizationRemarkEmitter &ORE, bool Recommendation, 192 TrainingLogger &Logger, size_t CallerSizeEstimateBefore, 193 size_t CalleeSizeEstimateBefore, bool DefaultDecision) 194 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 195 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 196 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 197 DefaultDecision(DefaultDecision) {} 198 199 virtual ~LoggingMLInlineAdvice() = default; 200 201 private: 202 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 203 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 204 } 205 void recordInliningImpl() override { 206 MLInlineAdvice::recordInliningImpl(); 207 getAdvisor()->resetNativeSize(Caller); 208 int Reward = std::numeric_limits<int>::max(); 209 if (!getAdvisor()->isForcedToStop()) { 210 int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller) + 211 CalleeSizeEstimateBefore; 212 Reward = NativeSizeAfter - 213 (CallerSizeEstimateBefore + CalleeSizeEstimateBefore); 214 getAdvisor()->updateNativeSizeEstimate(Reward); 215 } 216 log(Reward, /*Success=*/true); 217 } 218 219 void recordInliningWithCalleeDeletedImpl() override { 220 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 221 getAdvisor()->resetNativeSize(Caller); 222 if (!getAdvisor()->isForcedToStop()) { 223 int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller); 224 int Reward = NativeSizeAfter - 225 (CallerSizeEstimateBefore + CalleeSizeEstimateBefore); 226 getAdvisor()->updateNativeSizeEstimate(Reward); 227 log(Reward, /*Success=*/true); 228 } 229 } 230 231 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 232 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 233 log(NoReward, /*Success=*/false); 234 } 235 236 void recordUnattemptedInliningImpl() override { 237 MLInlineAdvice::recordUnattemptedInliningImpl(); 238 log(NoReward, /*Success=*/false); 239 } 240 241 void log(int64_t Reward, bool Success) { 242 InlineEvent Event; 243 Event.AdvisedDecision = isInliningRecommended(); 244 Event.DefaultDecision = DefaultDecision; 245 Event.Effect = Success; 246 Event.Reward = Reward; 247 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 248 } 249 250 static const int64_t NoReward = 0; 251 TrainingLogger &Logger; 252 const size_t CallerSizeEstimateBefore; 253 const size_t CalleeSizeEstimateBefore; 254 const bool DefaultDecision; 255 }; 256 257 /// A pseudo model runner. We use it to store feature values when collecting 258 /// logs for the default policy, but never ask it to 'run'. 259 class NoInferenceModelRunner : public MLModelRunner { 260 public: 261 NoInferenceModelRunner(LLVMContext &Ctx) 262 : MLModelRunner(Ctx), Features(NumberOfFeatures) {} 263 void setFeature(FeatureIndex Index, int64_t Value) override { 264 Features[static_cast<int>(Index)] = Value; 265 } 266 267 int64_t getFeature(int Index) const override { return Features[Index]; } 268 bool run() override { 269 llvm_unreachable("We shouldn't call run on this model runner."); 270 } 271 272 private: 273 InlineFeatures Features; 274 }; 275 276 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs 277 /// to dynamically load and evaluate a TF SavedModel 278 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is 279 /// sacrificed for ease of use while training. 280 class ModelUnderTrainingRunner final : public MLModelRunner { 281 public: 282 ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath); 283 284 bool run() override; 285 286 // Disallows copy and assign. 287 ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete; 288 ModelUnderTrainingRunner & 289 operator=(const ModelUnderTrainingRunner &) = delete; 290 291 void setFeature(FeatureIndex Index, int64_t Value) override; 292 int64_t getFeature(int Index) const override; 293 bool isValid() const { return !!Evaluator; } 294 295 private: 296 std::unique_ptr<TFModelEvaluator> Evaluator; 297 298 // The training framework needs some additional features, that just need to 299 // be set to 0. 300 struct TensorSpec { 301 std::string Name; 302 std::function<void(TFModelEvaluator *, size_t Index, 303 const std::vector<int64_t> &Dim)> 304 Initializer; 305 }; 306 307 const std::vector<TensorSpec> TrainingOnlyFeatures{ 308 {"inlining_default", 309 [](TFModelEvaluator *Evaluator, size_t Index, 310 const std::vector<int64_t> &Dim) { 311 Evaluator->initInput<int64_t>(Index, Dim); 312 }}, 313 {"discount", 314 [](TFModelEvaluator *Evaluator, size_t Index, 315 const std::vector<int64_t> &Dim) { 316 Evaluator->initInput<float>(Index, Dim); 317 }}, 318 {"reward", 319 [](TFModelEvaluator *Evaluator, size_t Index, 320 const std::vector<int64_t> &Dim) { 321 Evaluator->initInput<float>(Index, Dim); 322 }}, 323 {"step_type", [](TFModelEvaluator *Evaluator, size_t Index, 324 const std::vector<int64_t> &Dim) { 325 Evaluator->initInput<int32_t>(Index, Dim); 326 }}}; 327 }; 328 } // namespace 329 330 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 331 Module &M, ModuleAnalysisManager &MAM, 332 std::unique_ptr<MLModelRunner> ModelRunner, 333 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference) 334 : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), 335 GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference), 336 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 337 CurrentNativeSize(InitialNativeSize) { 338 // We cannot have the case of neither inference nor logging. 339 assert(IsDoingInference || isLogging()); 340 } 341 342 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { 343 if (TrainingLog.empty()) 344 return; 345 std::error_code ErrorCode; 346 raw_fd_ostream OutFile(TrainingLog, ErrorCode); 347 Logger.printTensor(OutFile); 348 } 349 350 size_t 351 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 352 auto &R = 353 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 354 if (!R) { 355 F.getParent()->getContext().emitError( 356 "Native size estimator is not present."); 357 return 0; 358 } 359 return *R; 360 } 361 362 std::unique_ptr<MLInlineAdvice> 363 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice( 364 CallBase &CB, OptimizationRemarkEmitter &ORE) { 365 if (!isLogging()) 366 return MLInlineAdvisor::getMandatoryAdvice(CB, ORE); 367 return std::make_unique<LoggingMLInlineAdvice>( 368 /*Advisor=*/this, 369 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/Logger, 370 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 371 /*CalleeSizeEstimateBefore=*/ 372 getNativeSizeEstimate(*CB.getCalledFunction()), 373 /*DefaultDecision=*/true); 374 } 375 376 std::unique_ptr<MLInlineAdvice> 377 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 378 CallBase &CB, OptimizationRemarkEmitter &ORE) { 379 if (IsDoingInference && !isLogging()) 380 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 381 382 bool DefaultAdvice = GetDefaultAdvice(CB); 383 auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice; 384 return std::make_unique<LoggingMLInlineAdvice>( 385 /*Advisor=*/this, 386 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 387 /*Logger=*/Logger, 388 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 389 /*CalleeSizeEstimateBefore=*/ 390 getNativeSizeEstimate(*CB.getCalledFunction()), 391 /*DefaultDecision=*/DefaultAdvice); 392 } 393 394 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 395 size_t Ret = 0; 396 for (auto &F : M) { 397 if (F.isDeclaration()) 398 continue; 399 if (isFunctionDeleted(&F)) 400 continue; 401 Ret += getNativeSizeEstimate(F); 402 } 403 return Ret; 404 } 405 406 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx, 407 const std::string &ModelPath) 408 : MLModelRunner(Ctx) { 409 std::vector<std::string> InputNames; 410 std::vector<std::string> OutputNames; 411 for (size_t I = 0; I < NumberOfFeatures; ++I) 412 InputNames.push_back(TFFeedPrefix + FeatureNameMap[I]); 413 for (size_t I = 0; I < TrainingOnlyFeatures.size(); ++I) 414 InputNames.push_back(TFFeedPrefix + TrainingOnlyFeatures[I].Name); 415 OutputNames.push_back(TFDecisionName); 416 417 Evaluator = 418 std::make_unique<TFModelEvaluator>(ModelPath, InputNames, OutputNames); 419 if (!Evaluator || !Evaluator->isValid()) { 420 Ctx.emitError("Failed to create inliner saved model evaluator"); 421 Evaluator.reset(); 422 return; 423 } 424 425 static const std::vector<int64_t> Dim{1}; 426 427 size_t InputIndex = 0; 428 for (; InputIndex < NumberOfFeatures; ++InputIndex) { 429 Evaluator->initInput<int64_t>(InputIndex, Dim); 430 } 431 432 for (; InputIndex < InputNames.size(); ++InputIndex) { 433 TrainingOnlyFeatures[InputIndex - NumberOfFeatures].Initializer( 434 Evaluator.get(), InputIndex, Dim); 435 } 436 } 437 438 bool ModelUnderTrainingRunner::run() { 439 auto ER = Evaluator->evaluate(); 440 if (!ER.hasValue()) { 441 Ctx.emitError("Error evaluating model."); 442 return false; 443 } 444 int64_t Decision = *ER->getTensorValue<int64_t>(0); 445 return static_cast<bool>(Decision); 446 } 447 448 int64_t ModelUnderTrainingRunner::getFeature(int Index) const { 449 return *Evaluator->getInput<int64_t>(Index); 450 } 451 452 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) { 453 size_t NumericIndex = static_cast<size_t>(Index); 454 *(Evaluator->getInput<int64_t>(NumericIndex)) = Value; 455 } 456 457 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 458 Module &M, ModuleAnalysisManager &MAM, 459 std::function<bool(CallBase &)> GetDefaultAdvice) { 460 auto &Ctx = M.getContext(); 461 if (TrainingLog.empty() != 462 !InlineSizeEstimatorAnalysis::isEvaluatorRequested()) { 463 Ctx.emitError("For development mode, if training logs are requested, then " 464 "a size estimator must be available; either that, or neither " 465 "are specified."); 466 return nullptr; 467 } 468 469 std::unique_ptr<MLModelRunner> Runner; 470 471 bool IsDoingInference = false; 472 if (TFModelUnderTrainingPath.empty()) 473 Runner.reset(new NoInferenceModelRunner(Ctx)); 474 else { 475 Runner = std::make_unique<ModelUnderTrainingRunner>( 476 Ctx, TFModelUnderTrainingPath); 477 if (!Runner) { 478 Ctx.emitError("Could not load the policy model from the provided path"); 479 return nullptr; 480 } 481 IsDoingInference = true; 482 } 483 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 484 M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference); 485 }