1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a model runner using Tensorflow C APIs, allowing the 11 // loading of a model from a command line option. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Config/config.h" 15 #if defined(LLVM_HAVE_TF_API) 16 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 19 #include "llvm/Analysis/MLInlineAdvisor.h" 20 #include "llvm/Analysis/Utils/TFUtils.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/ManagedStatic.h" 24 25 #include <vector> 26 27 using namespace llvm; 28 29 static cl::opt<std::string> TrainingLog( 30 "training-log", cl::Hidden, 31 cl::desc("Path where the development - mode inlining log is saved.")); 32 33 static cl::opt<std::string> TFModelUnderTrainingPath( 34 "ml-inliner-model-under-training", cl::Hidden, 35 cl::desc("Path to SavedModel from the previous training iteration.")); 36 37 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 38 cl::Hidden, cl::init("action_"), 39 cl::desc("Prefix for feature names.")); 40 41 static cl::opt<std::string> TFDecisionName( 42 "ml-inliner-trained-model-decision-name", cl::Hidden, 43 cl::init("StatefulPartitionedCall"), 44 cl::desc("Name of the graph operation representing the decision.")); 45 46 namespace { 47 /// An InlineEvent, used by TrainingLogger. 48 struct InlineEvent { 49 /// What the default policy's decision would have been. 50 bool DefaultDecision = false; 51 52 /// What we advised. When training off the default policy, this is the same as 53 /// DefaultDecision. 54 bool AdvisedDecision = false; 55 56 /// What actually happened. This would be 'false' in the case of an inline 57 /// error, even if AdvisedDecision were true, otherwise it agrees with 58 /// AdvisedDecision. 59 bool Effect = false; 60 61 /// What the change in size was: size_after - size_before 62 int64_t Reward = 0; 63 }; 64 65 /// Collect data we may use for training a model, and write it as a textual 66 /// Tensorflow SequenceExample 67 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) 68 /// protobuf (https://developers.google.com/protocol-buffers). 69 /// Because this is a protobuf, we cannot just stream the events as they come. 70 /// Internally, TrainingLogger stores data in column-major format, because that 71 /// lines up with how TF SequenceExample represents it. 72 class TrainingLogger final { 73 public: 74 TrainingLogger(); 75 76 /// Log one inlining event. 77 void logInlineEvent(const InlineEvent &Event, 78 const MLModelRunner &ModelRunner); 79 80 /// Print the stored tensors. 81 void print(raw_fd_ostream &OutFile); 82 83 private: 84 template <typename T> 85 void writeTensor(raw_fd_ostream &OutFile, StringRef TensorName, 86 const std::vector<T> &Tensor); 87 88 std::vector<InlineFeatures> Features; 89 std::vector<bool> DefaultDecisions; 90 std::vector<bool> Decisions; 91 std::vector<bool> Effects; 92 std::vector<int64_t> Rewards; 93 }; 94 95 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 96 /// the offline training scenario. Note that training happens outside of the 97 /// compiler, this facility is concerned with producing training data ("logs"). 98 /// This InlineAdvisor can operate in the following modes: 99 /// 100 /// 1) collect logs for the default policy. This is useful for bootstrapping 101 /// training, which will be considerably faster by starting from a reasonable 102 /// policy. 103 /// 104 /// 2) collect logs for the ML policy, using a model from a previous 105 /// training. Potentially, that model uses internally some small random 106 /// perturbation of its weights, to induce exploration (setting this up is the 107 /// responsibility of the training algorithm). The logs would then be used to 108 /// retrain and improve on this model. 109 /// 110 /// 3) use the provided model, with no logging. This is useful for end to end 111 /// validation - the model, in this case, is a release candidate and shouldn't 112 /// have random perturbations. It is a convenience feature: rather than needing 113 /// to take the release candidate model and compile it in 'release' mode, 114 /// validate it, then potentially discard it, it's easier to just pass the model 115 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 116 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 117 /// release mode. The expectation is that a well-trained model provides a good 118 /// policy over a sufficiently diverse codebase, over many changes (i.e. 119 /// training happens seldom). 120 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 121 public: 122 DevelopmentModeMLInlineAdvisor( 123 Module &M, ModuleAnalysisManager &MAM, 124 std::unique_ptr<MLModelRunner> ModelRunner, 125 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference); 126 127 size_t getTotalSizeEstimate(); 128 129 virtual ~DevelopmentModeMLInlineAdvisor(); 130 void updateNativeSizeEstimate(int64_t Change) { CurrentNativeSize += Change; } 131 void resetNativeSize(Function *F) { 132 FAM.invalidate<InlineSizeEstimatorAnalysis>(*F); 133 } 134 135 std::unique_ptr<MLInlineAdvice> 136 getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 137 std::unique_ptr<MLInlineAdvice> 138 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 139 140 size_t getNativeSizeEstimate(const Function &F) const; 141 142 private: 143 bool isLogging() const { return !TrainingLog.empty(); } 144 145 std::function<bool(CallBase &)> GetDefaultAdvice; 146 TrainingLogger Logger; 147 const bool IsDoingInference; 148 149 const int32_t InitialNativeSize; 150 int32_t CurrentNativeSize = 0; 151 }; 152 153 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 154 /// decisions, for training/logging. 155 class LoggingMLInlineAdvice : public MLInlineAdvice { 156 public: 157 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 158 OptimizationRemarkEmitter &ORE, bool Recommendation, 159 TrainingLogger &Logger, size_t CallerSizeEstimateBefore, 160 size_t CalleeSizeEstimateBefore, bool DefaultDecision, 161 bool Mandatory = false) 162 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 163 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 164 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 165 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} 166 167 virtual ~LoggingMLInlineAdvice() = default; 168 169 private: 170 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 171 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 172 } 173 void recordInliningImpl() override { 174 MLInlineAdvice::recordInliningImpl(); 175 getAdvisor()->resetNativeSize(Caller); 176 int Reward = std::numeric_limits<int>::max(); 177 if (!getAdvisor()->isForcedToStop()) { 178 int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller) + 179 CalleeSizeEstimateBefore; 180 Reward = NativeSizeAfter - 181 (CallerSizeEstimateBefore + CalleeSizeEstimateBefore); 182 getAdvisor()->updateNativeSizeEstimate(Reward); 183 } 184 log(Reward, /*Success=*/true); 185 } 186 187 void recordInliningWithCalleeDeletedImpl() override { 188 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 189 getAdvisor()->resetNativeSize(Caller); 190 if (!getAdvisor()->isForcedToStop()) { 191 int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller); 192 int Reward = NativeSizeAfter - 193 (CallerSizeEstimateBefore + CalleeSizeEstimateBefore); 194 getAdvisor()->updateNativeSizeEstimate(Reward); 195 log(Reward, /*Success=*/true); 196 } 197 } 198 199 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 200 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 201 log(NoReward, /*Success=*/false); 202 } 203 204 void recordUnattemptedInliningImpl() override { 205 MLInlineAdvice::recordUnattemptedInliningImpl(); 206 log(NoReward, /*Success=*/false); 207 } 208 209 void log(int64_t Reward, bool Success) { 210 if (Mandatory) 211 return; 212 InlineEvent Event; 213 Event.AdvisedDecision = isInliningRecommended(); 214 Event.DefaultDecision = DefaultDecision; 215 Event.Effect = Success; 216 Event.Reward = Reward; 217 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 218 } 219 220 static const int64_t NoReward = 0; 221 TrainingLogger &Logger; 222 const size_t CallerSizeEstimateBefore; 223 const size_t CalleeSizeEstimateBefore; 224 const bool DefaultDecision; 225 const bool Mandatory; 226 }; 227 228 /// A pseudo model runner. We use it to store feature values when collecting 229 /// logs for the default policy, but never ask it to 'run'. 230 class NoInferenceModelRunner : public MLModelRunner { 231 public: 232 NoInferenceModelRunner(LLVMContext &Ctx) 233 : MLModelRunner(Ctx), Features(NumberOfFeatures) {} 234 void setFeature(FeatureIndex Index, int64_t Value) override { 235 Features[static_cast<int>(Index)] = Value; 236 } 237 238 int64_t getFeature(int Index) const override { return Features[Index]; } 239 bool run() override { 240 llvm_unreachable("We shouldn't call run on this model runner."); 241 } 242 243 private: 244 InlineFeatures Features; 245 }; 246 247 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs 248 /// to dynamically load and evaluate a TF SavedModel 249 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is 250 /// sacrificed for ease of use while training. 251 class ModelUnderTrainingRunner final : public MLModelRunner { 252 public: 253 ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath); 254 255 bool run() override; 256 257 // Disallows copy and assign. 258 ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete; 259 ModelUnderTrainingRunner & 260 operator=(const ModelUnderTrainingRunner &) = delete; 261 262 void setFeature(FeatureIndex Index, int64_t Value) override; 263 int64_t getFeature(int Index) const override; 264 bool isValid() const { return !!Evaluator; } 265 266 private: 267 std::unique_ptr<TFModelEvaluator> Evaluator; 268 269 // The training framework needs some additional features. 270 const std::vector<TensorSpec> TrainingOnlyFeatures{ 271 TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), 272 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), 273 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), 274 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; 275 }; 276 } // namespace 277 278 TrainingLogger::TrainingLogger() { 279 for (size_t I = 0; I < NumberOfFeatures; ++I) { 280 Features.push_back(InlineFeatures()); 281 } 282 } 283 284 /// Log one inlining event. 285 void TrainingLogger::logInlineEvent(const InlineEvent &Event, 286 const MLModelRunner &ModelRunner) { 287 for (size_t I = 0; I < NumberOfFeatures; ++I) { 288 Features[I].push_back(ModelRunner.getFeature(I)); 289 } 290 Decisions.push_back(Event.AdvisedDecision); 291 Effects.push_back(Event.Effect); 292 Rewards.push_back(Event.Reward); 293 DefaultDecisions.push_back(Event.DefaultDecision); 294 } 295 296 void TrainingLogger::print(raw_fd_ostream &OutFile) { 297 if (DefaultDecisions.empty()) 298 return; 299 OutFile << "feature_lists: {\n"; 300 301 for (size_t I = 0; I < Features.size(); I++) { 302 writeTensor(OutFile, FeatureNameMap.at(I), Features[I]); 303 } 304 writeTensor(OutFile, DefaultDecisionName, DefaultDecisions); 305 writeTensor(OutFile, DecisionName, Decisions); 306 writeTensor(OutFile, RewardName, Rewards); 307 308 OutFile << "}\n"; 309 } 310 311 template <typename T> 312 void TrainingLogger::writeTensor(raw_fd_ostream &OutFile, StringRef TensorName, 313 const std::vector<T> &Tensor) { 314 OutFile << " feature_list: {\n"; 315 OutFile << " key: " 316 << "\"" << TensorName << "\" "; 317 OutFile << "value: {\n"; 318 for (const auto &Feature : Tensor) { 319 OutFile << " feature: { int64_list: { value: [" << Feature 320 << "] } }\n"; 321 } 322 OutFile << " }\n"; 323 OutFile << " }\n"; 324 } 325 326 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 327 Module &M, ModuleAnalysisManager &MAM, 328 std::unique_ptr<MLModelRunner> ModelRunner, 329 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference) 330 : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), 331 GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference), 332 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 333 CurrentNativeSize(InitialNativeSize) { 334 // We cannot have the case of neither inference nor logging. 335 assert(IsDoingInference || isLogging()); 336 } 337 338 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { 339 if (TrainingLog.empty()) 340 return; 341 std::error_code ErrorCode; 342 raw_fd_ostream OutFile(TrainingLog, ErrorCode); 343 Logger.print(OutFile); 344 } 345 346 size_t 347 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 348 auto &R = 349 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 350 if (!R) { 351 F.getParent()->getContext().emitError( 352 "Native size estimator is not present."); 353 return 0; 354 } 355 return *R; 356 } 357 358 std::unique_ptr<MLInlineAdvice> 359 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice( 360 CallBase &CB, OptimizationRemarkEmitter &ORE) { 361 if (!isLogging()) 362 return MLInlineAdvisor::getMandatoryAdvice(CB, ORE); 363 return std::make_unique<LoggingMLInlineAdvice>( 364 /*Advisor=*/this, 365 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/Logger, 366 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 367 /*CalleeSizeEstimateBefore=*/ 368 getNativeSizeEstimate(*CB.getCalledFunction()), 369 /*DefaultDecision=*/true, /*Mandatory*/ true); 370 } 371 372 std::unique_ptr<MLInlineAdvice> 373 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 374 CallBase &CB, OptimizationRemarkEmitter &ORE) { 375 if (IsDoingInference && !isLogging()) 376 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 377 378 bool DefaultAdvice = GetDefaultAdvice(CB); 379 auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice; 380 return std::make_unique<LoggingMLInlineAdvice>( 381 /*Advisor=*/this, 382 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 383 /*Logger=*/Logger, 384 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 385 /*CalleeSizeEstimateBefore=*/ 386 getNativeSizeEstimate(*CB.getCalledFunction()), 387 /*DefaultDecision=*/DefaultAdvice); 388 } 389 390 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 391 size_t Ret = 0; 392 for (auto &F : M) { 393 if (F.isDeclaration()) 394 continue; 395 if (isFunctionDeleted(&F)) 396 continue; 397 Ret += getNativeSizeEstimate(F); 398 } 399 return Ret; 400 } 401 402 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx, 403 const std::string &ModelPath) 404 : MLModelRunner(Ctx) { 405 std::vector<TensorSpec> InputSpecs; 406 std::vector<TensorSpec> OutputSpecs; 407 for (size_t I = 0; I < NumberOfFeatures; ++I) 408 InputSpecs.push_back( 409 TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1})); 410 InputSpecs.insert(InputSpecs.end(), TrainingOnlyFeatures.begin(), 411 TrainingOnlyFeatures.end()); 412 OutputSpecs.push_back(TensorSpec::createSpec<int64_t>(TFDecisionName, {1})); 413 414 Evaluator = 415 std::make_unique<TFModelEvaluator>(ModelPath, InputSpecs, OutputSpecs); 416 if (!Evaluator || !Evaluator->isValid()) { 417 Ctx.emitError("Failed to create inliner saved model evaluator"); 418 Evaluator.reset(); 419 return; 420 } 421 } 422 423 bool ModelUnderTrainingRunner::run() { 424 auto ER = Evaluator->evaluate(); 425 if (!ER.hasValue()) { 426 Ctx.emitError("Error evaluating model."); 427 return false; 428 } 429 int64_t Decision = *ER->getTensorValue<int64_t>(0); 430 return static_cast<bool>(Decision); 431 } 432 433 int64_t ModelUnderTrainingRunner::getFeature(int Index) const { 434 return *Evaluator->getInput<int64_t>(Index); 435 } 436 437 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) { 438 size_t NumericIndex = static_cast<size_t>(Index); 439 *(Evaluator->getInput<int64_t>(NumericIndex)) = Value; 440 } 441 442 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 443 Module &M, ModuleAnalysisManager &MAM, 444 std::function<bool(CallBase &)> GetDefaultAdvice) { 445 auto &Ctx = M.getContext(); 446 if (TrainingLog.empty() != 447 !InlineSizeEstimatorAnalysis::isEvaluatorRequested()) { 448 Ctx.emitError("For development mode, if training logs are requested, then " 449 "a size estimator must be available; either that, or neither " 450 "are specified."); 451 return nullptr; 452 } 453 454 std::unique_ptr<MLModelRunner> Runner; 455 456 bool IsDoingInference = false; 457 if (TFModelUnderTrainingPath.empty()) 458 Runner.reset(new NoInferenceModelRunner(Ctx)); 459 else { 460 Runner = std::make_unique<ModelUnderTrainingRunner>( 461 Ctx, TFModelUnderTrainingPath); 462 if (!Runner) { 463 Ctx.emitError("Could not load the policy model from the provided path"); 464 return nullptr; 465 } 466 IsDoingInference = true; 467 } 468 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 469 M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference); 470 } 471 #endif // defined(LLVM_HAVE_TF_API) 472