1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a model runner using Tensorflow C APIs, allowing the 11 // loading of a model from a command line option. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Config/config.h" 15 #if defined(LLVM_HAVE_TF_API) 16 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 19 #include "llvm/Analysis/MLInlineAdvisor.h" 20 #include "llvm/Analysis/Utils/TFUtils.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/ManagedStatic.h" 24 25 #include <vector> 26 27 using namespace llvm; 28 29 static cl::opt<std::string> TrainingLog( 30 "training-log", cl::Hidden, 31 cl::desc("Path where the development - mode inlining log is saved.")); 32 33 static cl::opt<std::string> TFModelUnderTrainingPath( 34 "ml-inliner-model-under-training", cl::Hidden, 35 cl::desc("Path to SavedModel from the previous training iteration.")); 36 37 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 38 cl::Hidden, cl::init("action_"), 39 cl::desc("Prefix for feature names.")); 40 41 static cl::opt<std::string> TFDecisionName( 42 "ml-inliner-trained-model-decision-name", cl::Hidden, 43 cl::init("StatefulPartitionedCall"), 44 cl::desc("Name of the graph operation representing the decision.")); 45 46 namespace { 47 /// An InlineEvent, used by TrainingLogger. 48 struct InlineEvent { 49 /// What the default policy's decision would have been. 50 bool DefaultDecision = false; 51 52 /// What we advised. When training off the default policy, this is the same as 53 /// DefaultDecision. 54 bool AdvisedDecision = false; 55 56 /// What actually happened. This would be 'false' in the case of an inline 57 /// error, even if AdvisedDecision were true, otherwise it agrees with 58 /// AdvisedDecision. 59 bool Effect = false; 60 61 /// What the change in size was: size_after - size_before 62 int64_t Reward = 0; 63 }; 64 65 /// Collect data we may use for training a model, and write it as a textual 66 /// Tensorflow SequenceExample 67 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) 68 /// protobuf (https://developers.google.com/protocol-buffers). 69 /// Because this is a protobuf, we cannot just stream the events as they come. 70 /// Internally, TrainingLogger stores data in column-major format, because that 71 /// lines up with how TF SequenceExample represents it. 72 class TrainingLogger final { 73 public: 74 TrainingLogger(); 75 76 /// Log one inlining event. 77 void logInlineEvent(const InlineEvent &Event, 78 const MLModelRunner &ModelRunner); 79 80 /// Print the stored tensors. 81 void print(raw_fd_ostream &OutFile); 82 83 private: 84 /// Write the values of one tensor as a list. 85 template <typename T> 86 void writeTensorValues(raw_fd_ostream &OutFile, const char *TensorData, 87 size_t ElemCount) const { 88 OutFile << "["; 89 const T *TypedData = reinterpret_cast<const T *>(TensorData); 90 for (size_t I = 0; I < ElemCount; ++I) { 91 if (I > 0) 92 OutFile << ", "; 93 OutFile << TypedData[I]; 94 } 95 OutFile << "]"; 96 } 97 98 /// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs. 99 /// The tensors are assumed to be stored contiguously, in row-major format, 100 /// in the TensorData buffer. Each tensor has the shape given by Spec. The 101 /// feature name in the output is either the provided LoggingName, if 102 /// specified, otherwise it's the name of the tensor (as given by Spec). 103 template <typename T> 104 void 105 writeTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec, 106 const T *TensorData, size_t TensorCount, 107 Optional<StringRef> LoggingName = None) const { 108 writeRawTensorsAsFeatureLists(OutFile, Spec, 109 reinterpret_cast<const char *>(TensorData), 110 TensorCount, LoggingName); 111 } 112 113 /// Untyped implementation of the API above. 114 void 115 writeRawTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec, 116 const char *TensorData, size_t TensorCount, 117 Optional<StringRef> LoggingName = None) const { 118 const char *FieldName = "<invalid>"; 119 std::function<void(const char *)> ValueWriter; 120 // The 'Feature' protobuf only has 3 possible fields: float_list, 121 // int64_list, or bytes_list, so we capture int32 values as int64. We don't 122 // support any other types. 123 if (Spec.isElementType<int64_t>()) { 124 FieldName = "int64_list"; 125 ValueWriter = [&](const char *Data) { 126 writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount()); 127 }; 128 } else if (Spec.isElementType<int32_t>()) { 129 FieldName = "int64_list"; 130 ValueWriter = [&](const char *Data) { 131 writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount()); 132 }; 133 134 } else if (Spec.isElementType<float>()) { 135 FieldName = "float_list"; 136 ValueWriter = [&](const char *Data) { 137 writeTensorValues<float>(OutFile, Data, Spec.getElementCount()); 138 }; 139 140 } else 141 llvm_unreachable("Unsupported tensor type."); 142 143 OutFile << " feature_list: {\n"; 144 OutFile << " key: " 145 << "\"" << (LoggingName ? *LoggingName : Spec.name()) << "\" "; 146 OutFile << "value: {\n"; 147 size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize(); 148 for (const char *P = TensorData, 149 *E = TensorData + TensorByteSize * TensorCount; 150 P < E; P += TensorByteSize) { 151 OutFile << " feature: { " << FieldName << ": { value: "; 152 ValueWriter(P); 153 OutFile << " } }\n"; 154 } 155 OutFile << " }\n"; 156 OutFile << " }\n"; 157 } 158 159 std::vector<InlineFeatures> Features; 160 std::vector<int64_t> DefaultDecisions; 161 std::vector<int64_t> Decisions; 162 std::vector<bool> Effects; 163 std::vector<int64_t> Rewards; 164 }; 165 166 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 167 /// the offline training scenario. Note that training happens outside of the 168 /// compiler, this facility is concerned with producing training data ("logs"). 169 /// This InlineAdvisor can operate in the following modes: 170 /// 171 /// 1) collect logs for the default policy. This is useful for bootstrapping 172 /// training, which will be considerably faster by starting from a reasonable 173 /// policy. 174 /// 175 /// 2) collect logs for the ML policy, using a model from a previous 176 /// training. Potentially, that model uses internally some small random 177 /// perturbation of its weights, to induce exploration (setting this up is the 178 /// responsibility of the training algorithm). The logs would then be used to 179 /// retrain and improve on this model. 180 /// 181 /// 3) use the provided model, with no logging. This is useful for end to end 182 /// validation - the model, in this case, is a release candidate and shouldn't 183 /// have random perturbations. It is a convenience feature: rather than needing 184 /// to take the release candidate model and compile it in 'release' mode, 185 /// validate it, then potentially discard it, it's easier to just pass the model 186 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 187 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 188 /// release mode. The expectation is that a well-trained model provides a good 189 /// policy over a sufficiently diverse codebase, over many changes (i.e. 190 /// training happens seldom). 191 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 192 public: 193 DevelopmentModeMLInlineAdvisor( 194 Module &M, ModuleAnalysisManager &MAM, 195 std::unique_ptr<MLModelRunner> ModelRunner, 196 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference); 197 198 size_t getTotalSizeEstimate(); 199 200 virtual ~DevelopmentModeMLInlineAdvisor(); 201 void updateNativeSizeEstimate(int64_t Change) { CurrentNativeSize += Change; } 202 void resetNativeSize(Function *F) { 203 FAM.invalidate<InlineSizeEstimatorAnalysis>(*F); 204 } 205 206 std::unique_ptr<MLInlineAdvice> 207 getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 208 std::unique_ptr<MLInlineAdvice> 209 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 210 211 size_t getNativeSizeEstimate(const Function &F) const; 212 213 private: 214 bool isLogging() const { return !TrainingLog.empty(); } 215 216 std::function<bool(CallBase &)> GetDefaultAdvice; 217 TrainingLogger Logger; 218 const bool IsDoingInference; 219 220 const int32_t InitialNativeSize; 221 int32_t CurrentNativeSize = 0; 222 }; 223 224 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 225 /// decisions, for training/logging. 226 class LoggingMLInlineAdvice : public MLInlineAdvice { 227 public: 228 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 229 OptimizationRemarkEmitter &ORE, bool Recommendation, 230 TrainingLogger &Logger, size_t CallerSizeEstimateBefore, 231 size_t CalleeSizeEstimateBefore, bool DefaultDecision, 232 bool Mandatory = false) 233 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 234 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 235 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 236 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} 237 238 virtual ~LoggingMLInlineAdvice() = default; 239 240 private: 241 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 242 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 243 } 244 void recordInliningImpl() override { 245 MLInlineAdvice::recordInliningImpl(); 246 getAdvisor()->resetNativeSize(Caller); 247 int Reward = std::numeric_limits<int>::max(); 248 if (!getAdvisor()->isForcedToStop()) { 249 int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller) + 250 CalleeSizeEstimateBefore; 251 Reward = NativeSizeAfter - 252 (CallerSizeEstimateBefore + CalleeSizeEstimateBefore); 253 getAdvisor()->updateNativeSizeEstimate(Reward); 254 } 255 log(Reward, /*Success=*/true); 256 } 257 258 void recordInliningWithCalleeDeletedImpl() override { 259 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 260 getAdvisor()->resetNativeSize(Caller); 261 if (!getAdvisor()->isForcedToStop()) { 262 int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller); 263 int Reward = NativeSizeAfter - 264 (CallerSizeEstimateBefore + CalleeSizeEstimateBefore); 265 getAdvisor()->updateNativeSizeEstimate(Reward); 266 log(Reward, /*Success=*/true); 267 } 268 } 269 270 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 271 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 272 log(NoReward, /*Success=*/false); 273 } 274 275 void recordUnattemptedInliningImpl() override { 276 MLInlineAdvice::recordUnattemptedInliningImpl(); 277 log(NoReward, /*Success=*/false); 278 } 279 280 void log(int64_t Reward, bool Success) { 281 if (Mandatory) 282 return; 283 InlineEvent Event; 284 Event.AdvisedDecision = isInliningRecommended(); 285 Event.DefaultDecision = DefaultDecision; 286 Event.Effect = Success; 287 Event.Reward = Reward; 288 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 289 } 290 291 static const int64_t NoReward = 0; 292 TrainingLogger &Logger; 293 const size_t CallerSizeEstimateBefore; 294 const size_t CalleeSizeEstimateBefore; 295 const bool DefaultDecision; 296 const bool Mandatory; 297 }; 298 299 /// A pseudo model runner. We use it to store feature values when collecting 300 /// logs for the default policy, but never ask it to 'run'. 301 class NoInferenceModelRunner : public MLModelRunner { 302 public: 303 NoInferenceModelRunner(LLVMContext &Ctx) 304 : MLModelRunner(Ctx), Features(NumberOfFeatures) {} 305 void setFeature(FeatureIndex Index, int64_t Value) override { 306 Features[static_cast<int>(Index)] = Value; 307 } 308 309 int64_t getFeature(int Index) const override { return Features[Index]; } 310 bool run() override { 311 llvm_unreachable("We shouldn't call run on this model runner."); 312 } 313 314 private: 315 InlineFeatures Features; 316 }; 317 318 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs 319 /// to dynamically load and evaluate a TF SavedModel 320 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is 321 /// sacrificed for ease of use while training. 322 class ModelUnderTrainingRunner final : public MLModelRunner { 323 public: 324 ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath); 325 326 bool run() override; 327 328 // Disallows copy and assign. 329 ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete; 330 ModelUnderTrainingRunner & 331 operator=(const ModelUnderTrainingRunner &) = delete; 332 333 void setFeature(FeatureIndex Index, int64_t Value) override; 334 int64_t getFeature(int Index) const override; 335 bool isValid() const { return !!Evaluator; } 336 337 private: 338 std::unique_ptr<TFModelEvaluator> Evaluator; 339 340 // The training framework needs some additional features. 341 const std::vector<TensorSpec> TrainingOnlyFeatures{ 342 TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), 343 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), 344 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), 345 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; 346 }; 347 } // namespace 348 349 TrainingLogger::TrainingLogger() { 350 for (size_t I = 0; I < NumberOfFeatures; ++I) { 351 Features.push_back(InlineFeatures()); 352 } 353 } 354 355 /// Log one inlining event. 356 void TrainingLogger::logInlineEvent(const InlineEvent &Event, 357 const MLModelRunner &ModelRunner) { 358 for (size_t I = 0; I < NumberOfFeatures; ++I) { 359 Features[I].push_back(ModelRunner.getFeature(I)); 360 } 361 Decisions.push_back(Event.AdvisedDecision); 362 Effects.push_back(Event.Effect); 363 Rewards.push_back(Event.Reward); 364 DefaultDecisions.push_back(Event.DefaultDecision); 365 } 366 367 void TrainingLogger::print(raw_fd_ostream &OutFile) { 368 size_t NumberOfRecords = Decisions.size(); 369 if (NumberOfRecords == 0) 370 return; 371 372 OutFile << "feature_lists: {\n"; 373 for (size_t I = 0; I < Features.size(); ++I) 374 writeTensorsAsFeatureLists( 375 OutFile, TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), 376 Features[I].data(), NumberOfRecords); 377 378 writeTensorsAsFeatureLists( 379 OutFile, TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), 380 DefaultDecisions.data(), NumberOfRecords); 381 382 writeTensorsAsFeatureLists(OutFile, 383 TensorSpec::createSpec<int64_t>(DecisionName, {1}), 384 Decisions.data(), NumberOfRecords); 385 writeTensorsAsFeatureLists(OutFile, 386 TensorSpec::createSpec<int64_t>(RewardName, {1}), 387 Rewards.data(), NumberOfRecords); 388 389 OutFile << "}\n"; 390 } 391 392 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 393 Module &M, ModuleAnalysisManager &MAM, 394 std::unique_ptr<MLModelRunner> ModelRunner, 395 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference) 396 : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), 397 GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference), 398 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 399 CurrentNativeSize(InitialNativeSize) { 400 // We cannot have the case of neither inference nor logging. 401 assert(IsDoingInference || isLogging()); 402 } 403 404 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { 405 if (TrainingLog.empty()) 406 return; 407 std::error_code ErrorCode; 408 raw_fd_ostream OutFile(TrainingLog, ErrorCode); 409 Logger.print(OutFile); 410 } 411 412 size_t 413 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 414 auto &R = 415 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 416 if (!R) { 417 F.getParent()->getContext().emitError( 418 "Native size estimator is not present."); 419 return 0; 420 } 421 return *R; 422 } 423 424 std::unique_ptr<MLInlineAdvice> 425 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice( 426 CallBase &CB, OptimizationRemarkEmitter &ORE) { 427 if (!isLogging()) 428 return MLInlineAdvisor::getMandatoryAdvice(CB, ORE); 429 return std::make_unique<LoggingMLInlineAdvice>( 430 /*Advisor=*/this, 431 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/Logger, 432 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 433 /*CalleeSizeEstimateBefore=*/ 434 getNativeSizeEstimate(*CB.getCalledFunction()), 435 /*DefaultDecision=*/true, /*Mandatory*/ true); 436 } 437 438 std::unique_ptr<MLInlineAdvice> 439 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 440 CallBase &CB, OptimizationRemarkEmitter &ORE) { 441 if (IsDoingInference && !isLogging()) 442 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 443 444 bool DefaultAdvice = GetDefaultAdvice(CB); 445 auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice; 446 return std::make_unique<LoggingMLInlineAdvice>( 447 /*Advisor=*/this, 448 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 449 /*Logger=*/Logger, 450 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 451 /*CalleeSizeEstimateBefore=*/ 452 getNativeSizeEstimate(*CB.getCalledFunction()), 453 /*DefaultDecision=*/DefaultAdvice); 454 } 455 456 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 457 size_t Ret = 0; 458 for (auto &F : M) { 459 if (F.isDeclaration()) 460 continue; 461 if (isFunctionDeleted(&F)) 462 continue; 463 Ret += getNativeSizeEstimate(F); 464 } 465 return Ret; 466 } 467 468 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx, 469 const std::string &ModelPath) 470 : MLModelRunner(Ctx) { 471 std::vector<TensorSpec> InputSpecs; 472 std::vector<TensorSpec> OutputSpecs; 473 for (size_t I = 0; I < NumberOfFeatures; ++I) 474 InputSpecs.push_back( 475 TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1})); 476 InputSpecs.insert(InputSpecs.end(), TrainingOnlyFeatures.begin(), 477 TrainingOnlyFeatures.end()); 478 OutputSpecs.push_back(TensorSpec::createSpec<int64_t>(TFDecisionName, {1})); 479 480 Evaluator = 481 std::make_unique<TFModelEvaluator>(ModelPath, InputSpecs, OutputSpecs); 482 if (!Evaluator || !Evaluator->isValid()) { 483 Ctx.emitError("Failed to create inliner saved model evaluator"); 484 Evaluator.reset(); 485 return; 486 } 487 } 488 489 bool ModelUnderTrainingRunner::run() { 490 auto ER = Evaluator->evaluate(); 491 if (!ER.hasValue()) { 492 Ctx.emitError("Error evaluating model."); 493 return false; 494 } 495 int64_t Decision = *ER->getTensorValue<int64_t>(0); 496 return static_cast<bool>(Decision); 497 } 498 499 int64_t ModelUnderTrainingRunner::getFeature(int Index) const { 500 return *Evaluator->getInput<int64_t>(Index); 501 } 502 503 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) { 504 size_t NumericIndex = static_cast<size_t>(Index); 505 *(Evaluator->getInput<int64_t>(NumericIndex)) = Value; 506 } 507 508 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 509 Module &M, ModuleAnalysisManager &MAM, 510 std::function<bool(CallBase &)> GetDefaultAdvice) { 511 auto &Ctx = M.getContext(); 512 if (TrainingLog.empty() != 513 !InlineSizeEstimatorAnalysis::isEvaluatorRequested()) { 514 Ctx.emitError("For development mode, if training logs are requested, then " 515 "a size estimator must be available; either that, or neither " 516 "are specified."); 517 return nullptr; 518 } 519 520 std::unique_ptr<MLModelRunner> Runner; 521 522 bool IsDoingInference = false; 523 if (TFModelUnderTrainingPath.empty()) 524 Runner.reset(new NoInferenceModelRunner(Ctx)); 525 else { 526 Runner = std::make_unique<ModelUnderTrainingRunner>( 527 Ctx, TFModelUnderTrainingPath); 528 if (!Runner) { 529 Ctx.emitError("Could not load the policy model from the provided path"); 530 return nullptr; 531 } 532 IsDoingInference = true; 533 } 534 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 535 M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference); 536 } 537 #endif // defined(LLVM_HAVE_TF_API) 538