1 //===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the ML eviction advisor and reward injection pass 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RegAllocEvictionAdvisor.h" 14 #include "llvm/Analysis/MLModelRunner.h" 15 #include "llvm/Analysis/ModelUnderTrainingRunner.h" 16 #include "llvm/Analysis/NoInferenceModelRunner.h" 17 #include "llvm/Analysis/Utils/TFUtils.h" 18 #include "llvm/CodeGen/CalcSpillWeights.h" 19 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineLoopInfo.h" 22 #include "llvm/CodeGen/RegisterClassInfo.h" 23 #include "llvm/CodeGen/VirtRegMap.h" 24 #include "llvm/Config/config.h" 25 #include "llvm/InitializePasses.h" 26 #include "llvm/Pass.h" 27 #include "llvm/PassRegistry.h" 28 #include "llvm/Support/CommandLine.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Target/TargetMachine.h" 31 32 #include <memory> 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "ml-regalloc" 37 38 #if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) 39 namespace { 40 // This is the maximum number of interfererring ranges. That's the number of 41 // distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize. 42 // For X86, that's 32. 43 // TODO: find a way to get this, statically, in a programmatic way. 44 static const int64_t MaxInterferences = 32; 45 46 // Logically, we can think of the feature set given to the evaluator as a 2D 47 // matrix. The rows are the features (see next). The columns correspond to the 48 // interferences. We treat the candidate virt reg as an 'interference', too, as 49 // its feature set is the same as that of the interferring ranges. So we'll have 50 // MaxInterferences + 1 columns and by convention, we will use the last column 51 // for the virt reg seeking allocation. 52 static const int64_t CandidateVirtRegPos = MaxInterferences; 53 static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1; 54 55 // Most features are as described above, so we'll reuse this vector in defining 56 // them. 57 static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences}; 58 59 // -------------- 60 // Features table 61 // -------------- 62 // For each interfering live range (incl. the candidate) we collect a number of 63 // features. However, because the features are of different types (and because 64 // of ML best practices), we organize the tensors per feature, not per 65 // candidate. Each such tensor has a scalar value corresponding to the 66 // interferring live range at that position, in the order in AllocationOrder. 67 // The last position corresponds to the virt reg seeking allocation. 68 // Exception to all that is the progression feature, which is just a scalar (see 69 // its documentation for details). 70 // Note on naming: the "_by_max" are normalized using the largest value of that 71 // tensor, as observed in the current decision making stage (i.e. for the 72 // current call to the advisor's tryFindEvictionCandidate) 73 // 74 // The feature list format: type, name, shape, documentation. 75 // Note: we can really just use int64 and float, hence the modeling of some 76 // bools as int64 values. 77 #define RA_EVICT_FEATURES_LIST(M) \ 78 M(int64_t, mask, PerLiveRangeShape, \ 79 "boolean values, 0 for unavailable candidates (i.e. if a position is 0, " \ 80 "it " \ 81 "can't be evicted)") \ 82 M(int64_t, is_free, PerLiveRangeShape, \ 83 "boolean values, 1 if this phys reg is actually free (no interferences)") \ 84 M(float, nr_urgent, PerLiveRangeShape, \ 85 "number of 'urgent' intervals, normalized. Urgent are those that are OK " \ 86 "to break cascades") \ 87 M(float, nr_broken_hints, PerLiveRangeShape, \ 88 "if this position were evicted, how many broken hints would there be") \ 89 M(int64_t, is_hint, PerLiveRangeShape, \ 90 "is this a preferred phys reg for the candidate") \ 91 M(int64_t, is_local, PerLiveRangeShape, \ 92 "is this live range local to a basic block") \ 93 M(float, nr_rematerializable, PerLiveRangeShape, \ 94 "nr rematerializable ranges") \ 95 M(float, nr_defs_and_uses, PerLiveRangeShape, \ 96 "bb freq - weighed nr defs and uses") \ 97 M(float, weighed_reads_by_max, PerLiveRangeShape, \ 98 "bb freq - weighed nr of reads, normalized") \ 99 M(float, weighed_writes_by_max, PerLiveRangeShape, \ 100 "bb feq - weighed nr of writes, normalized") \ 101 M(float, weighed_read_writes_by_max, PerLiveRangeShape, \ 102 "bb freq - weighed nr of uses that are both read and writes, normalized") \ 103 M(float, weighed_indvars_by_max, PerLiveRangeShape, \ 104 "bb freq - weighed nr of uses that are indvars, normalized") \ 105 M(float, hint_weights_by_max, PerLiveRangeShape, \ 106 "bb freq - weighed nr of uses that are hints, normalized") \ 107 M(float, start_bb_freq_by_max, PerLiveRangeShape, \ 108 "the freq in the start block, normalized") \ 109 M(float, end_bb_freq_by_max, PerLiveRangeShape, \ 110 "freq of end block, normalized") \ 111 M(float, hottest_bb_freq_by_max, PerLiveRangeShape, \ 112 "hottest BB freq, normalized") \ 113 M(float, liverange_size, PerLiveRangeShape, \ 114 "size (instr index diff) of the LR") \ 115 M(float, use_def_density, PerLiveRangeShape, \ 116 "the max weight, as computed by the manual heuristic") \ 117 M(int64_t, max_stage, PerLiveRangeShape, \ 118 "largest stage of an interval in this LR") \ 119 M(int64_t, min_stage, PerLiveRangeShape, \ 120 "lowest stage of an interval in this LR") \ 121 M(float, progress, {1}, "ratio of current queue size to initial size") 122 123 // The model learns to pick one of the mask == 1 interferences. This is the name 124 // of the output tensor. 125 // The contract with the model is that the output will be guaranteed to be to a 126 // mask == 1 position. 127 const char *const DecisionName = "index_to_evict"; 128 129 // Named features index. 130 enum FeatureIDs { 131 #define _FEATURE_IDX(_, name, __, ___) name, 132 RA_EVICT_FEATURES_LIST(_FEATURE_IDX) 133 #undef _FEATURE_IDX 134 FeatureCount 135 }; 136 137 // The ML advisor will typically have a sparse input to the evaluator, because 138 // various phys regs won't be available. It's easier (maintenance-wise) to 139 // bulk-reset the state of the evaluator each time we are about to use it again. 140 template <typename T> size_t getTotalSize(const std::vector<int64_t> &Shape) { 141 size_t Ret = sizeof(T); 142 for (const auto V : Shape) 143 Ret *= V; 144 return Ret; 145 } 146 147 void resetInputs(MLModelRunner &Runner) { 148 #define _RESET(TYPE, NAME, SHAPE, __) \ 149 std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \ 150 getTotalSize<TYPE>(SHAPE)); 151 RA_EVICT_FEATURES_LIST(_RESET) 152 #undef _RESET 153 } 154 155 // Development mode-specifics 156 #ifdef LLVM_HAVE_TF_API 157 #define _DECL_FEATURES(type, name, shape, _) \ 158 TensorSpec::createSpec<type>(#name, shape), 159 160 static const std::vector<TensorSpec> InputFeatures{ 161 {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}}; 162 #undef _DECL_FEATURES 163 static const TensorSpec Output = 164 TensorSpec::createSpec<int64_t>(DecisionName, {1}); 165 static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1}); 166 167 #endif //#ifdef LLVM_HAVE_TF_API 168 } // namespace 169 #endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) 170