1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a model runner using Tensorflow C APIs, allowing the 11 // loading of a model from a command line option. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Config/config.h" 15 #if defined(LLVM_HAVE_TF_API) 16 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 19 #include "llvm/Analysis/MLInlineAdvisor.h" 20 #include "llvm/Analysis/Utils/TFUtils.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/ManagedStatic.h" 24 25 #include <vector> 26 27 using namespace llvm; 28 29 static cl::opt<std::string> TrainingLog( 30 "training-log", cl::Hidden, 31 cl::desc("Path where the development - mode inlining log is saved.")); 32 33 static cl::opt<std::string> TFModelUnderTrainingPath( 34 "ml-inliner-model-under-training", cl::Hidden, 35 cl::desc("Path to SavedModel from the previous training iteration.")); 36 37 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 38 cl::Hidden, cl::init("action_"), 39 cl::desc("Prefix for feature names.")); 40 41 static cl::opt<std::string> TFDecisionName( 42 "ml-inliner-trained-model-decision-name", cl::Hidden, 43 cl::init("StatefulPartitionedCall"), 44 cl::desc("Name of the graph operation representing the decision.")); 45 46 namespace { 47 /// An InlineEvent, used by TrainingLogger. 48 struct InlineEvent { 49 /// What the default policy's decision would have been. 50 bool DefaultDecision = false; 51 52 /// What we advised. When training off the default policy, this is the same as 53 /// DefaultDecision. 54 bool AdvisedDecision = false; 55 56 /// What actually happened. This would be 'false' in the case of an inline 57 /// error, even if AdvisedDecision were true, otherwise it agrees with 58 /// AdvisedDecision. 59 bool Effect = false; 60 61 /// What the change in size was: size_after - size_before 62 int64_t Reward = 0; 63 }; 64 65 /// Collect data we may use for training a model, and write it as a textual 66 /// Tensorflow SequenceExample 67 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) 68 /// protobuf (https://developers.google.com/protocol-buffers). 69 /// Because this is a protobuf, we cannot just stream the events as they come. 70 /// Internally, TrainingLogger stores data in column-major format, because that 71 /// lines up with how TF SequenceExample represents it. 72 class TrainingLogger final { 73 public: 74 TrainingLogger() { 75 for (size_t I = 0; I < NumberOfFeatures; ++I) { 76 Features.push_back(InlineFeatures()); 77 } 78 } 79 80 /// Log one inlining event. 81 void logInlineEvent(const InlineEvent &Event, 82 const MLModelRunner &ModelRunner) { 83 for (size_t I = 0; I < NumberOfFeatures; ++I) { 84 Features[I].push_back(ModelRunner.getFeature(I)); 85 } 86 Decisions.push_back(Event.AdvisedDecision); 87 Effects.push_back(Event.Effect); 88 Rewards.push_back(Event.Reward); 89 DefaultDecisions.push_back(Event.DefaultDecision); 90 } 91 92 void printTensor(raw_fd_ostream &OutFile) { 93 if (DefaultDecisions.empty()) 94 return; 95 OutFile << "feature_lists: {\n"; 96 97 for (size_t I = 0; I < Features.size(); I++) { 98 writeTensor(OutFile, FeatureNameMap.at(I), Features[I]); 99 } 100 writeTensor(OutFile, DefaultDecisionName, DefaultDecisions); 101 writeTensor(OutFile, DecisionName, Decisions); 102 writeTensor(OutFile, RewardName, Rewards); 103 104 OutFile << "}\n"; 105 } 106 107 private: 108 template <typename T> 109 void writeTensor(raw_fd_ostream &OutFile, StringRef TensorName, 110 const std::vector<T> &Tensor) { 111 OutFile << " feature_list: {\n"; 112 OutFile << " key: " 113 << "\"" << TensorName << "\" "; 114 OutFile << "value: {\n"; 115 for (const auto &Feature : Tensor) { 116 OutFile << " feature: { int64_list: { value: [" << Feature 117 << "] } }\n"; 118 } 119 OutFile << " }\n"; 120 OutFile << " }\n"; 121 } 122 123 std::vector<InlineFeatures> Features; 124 std::vector<bool> DefaultDecisions; 125 std::vector<bool> Decisions; 126 std::vector<bool> Effects; 127 std::vector<int64_t> Rewards; 128 std::vector<bool> Mandatory; 129 }; 130 131 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 132 /// the offline training scenario. Note that training happens outside of the 133 /// compiler, this facility is concerned with producing training data ("logs"). 134 /// This InlineAdvisor can operate in the following modes: 135 /// 136 /// 1) collect logs for the default policy. This is useful for bootstrapping 137 /// training, which will be considerably faster by starting from a reasonable 138 /// policy. 139 /// 140 /// 2) collect logs for the ML policy, using a model from a previous 141 /// training. Potentially, that model uses internally some small random 142 /// perturbation of its weights, to induce exploration (setting this up is the 143 /// responsibility of the training algorithm). The logs would then be used to 144 /// retrain and improve on this model. 145 /// 146 /// 3) use the provided model, with no logging. This is useful for end to end 147 /// validation - the model, in this case, is a release candidate and shouldn't 148 /// have random perturbations. It is a convenience feature: rather than needing 149 /// to take the release candidate model and compile it in 'release' mode, 150 /// validate it, then potentially discard it, it's easier to just pass the model 151 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 152 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 153 /// release mode. The expectation is that a well-trained model provides a good 154 /// policy over a sufficiently diverse codebase, over many changes (i.e. 155 /// training happens seldom). 156 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 157 public: 158 DevelopmentModeMLInlineAdvisor( 159 Module &M, ModuleAnalysisManager &MAM, 160 std::unique_ptr<MLModelRunner> ModelRunner, 161 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference); 162 163 size_t getTotalSizeEstimate(); 164 165 virtual ~DevelopmentModeMLInlineAdvisor(); 166 void updateNativeSizeEstimate(int64_t Change) { CurrentNativeSize += Change; } 167 void resetNativeSize(Function *F) { 168 FAM.invalidate<InlineSizeEstimatorAnalysis>(*F); 169 } 170 171 std::unique_ptr<MLInlineAdvice> 172 getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 173 std::unique_ptr<MLInlineAdvice> 174 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 175 176 size_t getNativeSizeEstimate(const Function &F) const; 177 178 private: 179 bool isLogging() const { return !TrainingLog.empty(); } 180 181 std::function<bool(CallBase &)> GetDefaultAdvice; 182 TrainingLogger Logger; 183 const bool IsDoingInference; 184 185 const int32_t InitialNativeSize; 186 int32_t CurrentNativeSize = 0; 187 }; 188 189 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 190 /// decisions, for training/logging. 191 class LoggingMLInlineAdvice : public MLInlineAdvice { 192 public: 193 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 194 OptimizationRemarkEmitter &ORE, bool Recommendation, 195 TrainingLogger &Logger, size_t CallerSizeEstimateBefore, 196 size_t CalleeSizeEstimateBefore, bool DefaultDecision) 197 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 198 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 199 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 200 DefaultDecision(DefaultDecision) {} 201 202 virtual ~LoggingMLInlineAdvice() = default; 203 204 private: 205 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 206 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 207 } 208 void recordInliningImpl() override { 209 MLInlineAdvice::recordInliningImpl(); 210 getAdvisor()->resetNativeSize(Caller); 211 int Reward = std::numeric_limits<int>::max(); 212 if (!getAdvisor()->isForcedToStop()) { 213 int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller) + 214 CalleeSizeEstimateBefore; 215 Reward = NativeSizeAfter - 216 (CallerSizeEstimateBefore + CalleeSizeEstimateBefore); 217 getAdvisor()->updateNativeSizeEstimate(Reward); 218 } 219 log(Reward, /*Success=*/true); 220 } 221 222 void recordInliningWithCalleeDeletedImpl() override { 223 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 224 getAdvisor()->resetNativeSize(Caller); 225 if (!getAdvisor()->isForcedToStop()) { 226 int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller); 227 int Reward = NativeSizeAfter - 228 (CallerSizeEstimateBefore + CalleeSizeEstimateBefore); 229 getAdvisor()->updateNativeSizeEstimate(Reward); 230 log(Reward, /*Success=*/true); 231 } 232 } 233 234 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 235 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 236 log(NoReward, /*Success=*/false); 237 } 238 239 void recordUnattemptedInliningImpl() override { 240 MLInlineAdvice::recordUnattemptedInliningImpl(); 241 log(NoReward, /*Success=*/false); 242 } 243 244 void log(int64_t Reward, bool Success) { 245 InlineEvent Event; 246 Event.AdvisedDecision = isInliningRecommended(); 247 Event.DefaultDecision = DefaultDecision; 248 Event.Effect = Success; 249 Event.Reward = Reward; 250 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 251 } 252 253 static const int64_t NoReward = 0; 254 TrainingLogger &Logger; 255 const size_t CallerSizeEstimateBefore; 256 const size_t CalleeSizeEstimateBefore; 257 const bool DefaultDecision; 258 }; 259 260 /// A pseudo model runner. We use it to store feature values when collecting 261 /// logs for the default policy, but never ask it to 'run'. 262 class NoInferenceModelRunner : public MLModelRunner { 263 public: 264 NoInferenceModelRunner(LLVMContext &Ctx) 265 : MLModelRunner(Ctx), Features(NumberOfFeatures) {} 266 void setFeature(FeatureIndex Index, int64_t Value) override { 267 Features[static_cast<int>(Index)] = Value; 268 } 269 270 int64_t getFeature(int Index) const override { return Features[Index]; } 271 bool run() override { 272 llvm_unreachable("We shouldn't call run on this model runner."); 273 } 274 275 private: 276 InlineFeatures Features; 277 }; 278 279 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs 280 /// to dynamically load and evaluate a TF SavedModel 281 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is 282 /// sacrificed for ease of use while training. 283 class ModelUnderTrainingRunner final : public MLModelRunner { 284 public: 285 ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath); 286 287 bool run() override; 288 289 // Disallows copy and assign. 290 ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete; 291 ModelUnderTrainingRunner & 292 operator=(const ModelUnderTrainingRunner &) = delete; 293 294 void setFeature(FeatureIndex Index, int64_t Value) override; 295 int64_t getFeature(int Index) const override; 296 bool isValid() const { return !!Evaluator; } 297 298 private: 299 std::unique_ptr<TFModelEvaluator> Evaluator; 300 301 // The training framework needs some additional features, that just need to 302 // be set to 0. 303 struct TensorSpec { 304 std::string Name; 305 std::function<void(TFModelEvaluator *, size_t Index, 306 const std::vector<int64_t> &Dim)> 307 Initializer; 308 }; 309 310 const std::vector<TensorSpec> TrainingOnlyFeatures{ 311 {"inlining_default", 312 [](TFModelEvaluator *Evaluator, size_t Index, 313 const std::vector<int64_t> &Dim) { 314 Evaluator->initInput<int64_t>(Index, Dim); 315 }}, 316 {"discount", 317 [](TFModelEvaluator *Evaluator, size_t Index, 318 const std::vector<int64_t> &Dim) { 319 Evaluator->initInput<float>(Index, Dim); 320 }}, 321 {"reward", 322 [](TFModelEvaluator *Evaluator, size_t Index, 323 const std::vector<int64_t> &Dim) { 324 Evaluator->initInput<float>(Index, Dim); 325 }}, 326 {"step_type", [](TFModelEvaluator *Evaluator, size_t Index, 327 const std::vector<int64_t> &Dim) { 328 Evaluator->initInput<int32_t>(Index, Dim); 329 }}}; 330 }; 331 } // namespace 332 333 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 334 Module &M, ModuleAnalysisManager &MAM, 335 std::unique_ptr<MLModelRunner> ModelRunner, 336 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference) 337 : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), 338 GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference), 339 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 340 CurrentNativeSize(InitialNativeSize) { 341 // We cannot have the case of neither inference nor logging. 342 assert(IsDoingInference || isLogging()); 343 } 344 345 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { 346 if (TrainingLog.empty()) 347 return; 348 std::error_code ErrorCode; 349 raw_fd_ostream OutFile(TrainingLog, ErrorCode); 350 Logger.printTensor(OutFile); 351 } 352 353 size_t 354 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 355 auto &R = 356 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 357 if (!R) { 358 F.getParent()->getContext().emitError( 359 "Native size estimator is not present."); 360 return 0; 361 } 362 return *R; 363 } 364 365 std::unique_ptr<MLInlineAdvice> 366 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice( 367 CallBase &CB, OptimizationRemarkEmitter &ORE) { 368 if (!isLogging()) 369 return MLInlineAdvisor::getMandatoryAdvice(CB, ORE); 370 return std::make_unique<LoggingMLInlineAdvice>( 371 /*Advisor=*/this, 372 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/Logger, 373 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 374 /*CalleeSizeEstimateBefore=*/ 375 getNativeSizeEstimate(*CB.getCalledFunction()), 376 /*DefaultDecision=*/true); 377 } 378 379 std::unique_ptr<MLInlineAdvice> 380 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 381 CallBase &CB, OptimizationRemarkEmitter &ORE) { 382 if (IsDoingInference && !isLogging()) 383 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 384 385 bool DefaultAdvice = GetDefaultAdvice(CB); 386 auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice; 387 return std::make_unique<LoggingMLInlineAdvice>( 388 /*Advisor=*/this, 389 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 390 /*Logger=*/Logger, 391 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 392 /*CalleeSizeEstimateBefore=*/ 393 getNativeSizeEstimate(*CB.getCalledFunction()), 394 /*DefaultDecision=*/DefaultAdvice); 395 } 396 397 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 398 size_t Ret = 0; 399 for (auto &F : M) { 400 if (F.isDeclaration()) 401 continue; 402 if (isFunctionDeleted(&F)) 403 continue; 404 Ret += getNativeSizeEstimate(F); 405 } 406 return Ret; 407 } 408 409 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx, 410 const std::string &ModelPath) 411 : MLModelRunner(Ctx) { 412 std::vector<std::string> InputNames; 413 std::vector<std::string> OutputNames; 414 for (size_t I = 0; I < NumberOfFeatures; ++I) 415 InputNames.push_back(TFFeedPrefix + FeatureNameMap[I]); 416 for (size_t I = 0; I < TrainingOnlyFeatures.size(); ++I) 417 InputNames.push_back(TFFeedPrefix + TrainingOnlyFeatures[I].Name); 418 OutputNames.push_back(TFDecisionName); 419 420 Evaluator = 421 std::make_unique<TFModelEvaluator>(ModelPath, InputNames, OutputNames); 422 if (!Evaluator || !Evaluator->isValid()) { 423 Ctx.emitError("Failed to create inliner saved model evaluator"); 424 Evaluator.reset(); 425 return; 426 } 427 428 static const std::vector<int64_t> Dim{1}; 429 430 size_t InputIndex = 0; 431 for (; InputIndex < NumberOfFeatures; ++InputIndex) { 432 Evaluator->initInput<int64_t>(InputIndex, Dim); 433 } 434 435 for (; InputIndex < InputNames.size(); ++InputIndex) { 436 TrainingOnlyFeatures[InputIndex - NumberOfFeatures].Initializer( 437 Evaluator.get(), InputIndex, Dim); 438 } 439 } 440 441 bool ModelUnderTrainingRunner::run() { 442 auto ER = Evaluator->evaluate(); 443 if (!ER.hasValue()) { 444 Ctx.emitError("Error evaluating model."); 445 return false; 446 } 447 int64_t Decision = *ER->getTensorValue<int64_t>(0); 448 return static_cast<bool>(Decision); 449 } 450 451 int64_t ModelUnderTrainingRunner::getFeature(int Index) const { 452 return *Evaluator->getInput<int64_t>(Index); 453 } 454 455 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) { 456 size_t NumericIndex = static_cast<size_t>(Index); 457 *(Evaluator->getInput<int64_t>(NumericIndex)) = Value; 458 } 459 460 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 461 Module &M, ModuleAnalysisManager &MAM, 462 std::function<bool(CallBase &)> GetDefaultAdvice) { 463 auto &Ctx = M.getContext(); 464 if (TrainingLog.empty() != 465 !InlineSizeEstimatorAnalysis::isEvaluatorRequested()) { 466 Ctx.emitError("For development mode, if training logs are requested, then " 467 "a size estimator must be available; either that, or neither " 468 "are specified."); 469 return nullptr; 470 } 471 472 std::unique_ptr<MLModelRunner> Runner; 473 474 bool IsDoingInference = false; 475 if (TFModelUnderTrainingPath.empty()) 476 Runner.reset(new NoInferenceModelRunner(Ctx)); 477 else { 478 Runner = std::make_unique<ModelUnderTrainingRunner>( 479 Ctx, TFModelUnderTrainingPath); 480 if (!Runner) { 481 Ctx.emitError("Could not load the policy model from the provided path"); 482 return nullptr; 483 } 484 IsDoingInference = true; 485 } 486 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 487 M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference); 488 } 489 #endif // defined(LLVM_HAVE_TF_API) 490