1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
2 |*
3 |* The LLVM Compiler Infrastructure
4 |*
5 |* This file is distributed under the University of Illinois Open Source
6 |* License. See LICENSE.TXT for details.
7 |*
8 \*===----------------------------------------------------------------------===*/
9
10 #include <limits.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14
15 #include "InstrProfiling.h"
16 #include "InstrProfilingInternal.h"
17 #include "InstrProfilingUtil.h"
18
19 #define INSTR_PROF_VALUE_PROF_DATA
20 #define INSTR_PROF_COMMON_API_IMPL
21 #include "InstrProfData.inc"
22
23 static int hasStaticCounters = 1;
24 static int OutOfNodesWarnings = 0;
25 static int hasNonDefaultValsPerSite = 0;
26 #define INSTR_PROF_MAX_VP_WARNS 10
27 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 16
28 #define INSTR_PROF_VNODE_POOL_SIZE 1024
29
30 #ifndef _MSC_VER
31 /* A shared static pool in addition to the vnodes statically
32 * allocated by the compiler. */
33 COMPILER_RT_VISIBILITY ValueProfNode
34 lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
35 COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME_STR);
36 #endif
37
38 COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
39 INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
40
lprofSetupValueProfiler()41 COMPILER_RT_VISIBILITY void lprofSetupValueProfiler() {
42 const char *Str = 0;
43 Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
44 if (Str && Str[0]) {
45 VPMaxNumValsPerSite = atoi(Str);
46 hasNonDefaultValsPerSite = 1;
47 }
48 if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
49 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
50 }
51
lprofSetMaxValsPerSite(uint32_t MaxVals)52 COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
53 VPMaxNumValsPerSite = MaxVals;
54 hasNonDefaultValsPerSite = 1;
55 }
56
57 /* This method is only used in value profiler mock testing. */
58 COMPILER_RT_VISIBILITY void
__llvm_profile_set_num_value_sites(__llvm_profile_data * Data,uint32_t ValueKind,uint16_t NumValueSites)59 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
60 uint32_t ValueKind, uint16_t NumValueSites) {
61 *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
62 }
63
64 /* This method is only used in value profiler mock testing. */
65 COMPILER_RT_VISIBILITY const __llvm_profile_data *
__llvm_profile_iterate_data(const __llvm_profile_data * Data)66 __llvm_profile_iterate_data(const __llvm_profile_data *Data) {
67 return Data + 1;
68 }
69
70 /* This method is only used in value profiler mock testing. */
71 COMPILER_RT_VISIBILITY void *
__llvm_get_function_addr(const __llvm_profile_data * Data)72 __llvm_get_function_addr(const __llvm_profile_data *Data) {
73 return Data->FunctionPointer;
74 }
75
76 /* Allocate an array that holds the pointers to the linked lists of
77 * value profile counter nodes. The number of element of the array
78 * is the total number of value profile sites instrumented. Returns
79 * 0 if allocation fails.
80 */
81
allocateValueProfileCounters(__llvm_profile_data * Data)82 static int allocateValueProfileCounters(__llvm_profile_data *Data) {
83 uint64_t NumVSites = 0;
84 uint32_t VKI;
85
86 /* This function will never be called when value site array is allocated
87 statically at compile time. */
88 hasStaticCounters = 0;
89 /* When dynamic allocation is enabled, allow tracking the max number of
90 * values allowd. */
91 if (!hasNonDefaultValsPerSite)
92 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
93
94 for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
95 NumVSites += Data->NumValueSites[VKI];
96
97 ValueProfNode **Mem =
98 (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));
99 if (!Mem)
100 return 0;
101 if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
102 free(Mem);
103 return 0;
104 }
105 return 1;
106 }
107
allocateOneNode(void)108 static ValueProfNode *allocateOneNode(void) {
109 ValueProfNode *Node;
110
111 if (!hasStaticCounters)
112 return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
113
114 /* Early check to avoid value wrapping around. */
115 if (CurrentVNode + 1 > EndVNode) {
116 if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
117 PROF_WARN("Unable to track new values: %s. "
118 " Consider using option -mllvm -vp-counters-per-site=<n> to "
119 "allocate more"
120 " value profile counters at compile time. \n",
121 "Running out of static counters");
122 }
123 return 0;
124 }
125 Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
126 /* Due to section padding, EndVNode point to a byte which is one pass
127 * an incomplete VNode, so we need to skip the last incomplete node. */
128 if (Node + 1 > EndVNode)
129 return 0;
130
131 return Node;
132 }
133
134 static COMPILER_RT_ALWAYS_INLINE void
instrumentTargetValueImpl(uint64_t TargetValue,void * Data,uint32_t CounterIndex,uint64_t CountValue)135 instrumentTargetValueImpl(uint64_t TargetValue, void *Data,
136 uint32_t CounterIndex, uint64_t CountValue) {
137 __llvm_profile_data *PData = (__llvm_profile_data *)Data;
138 if (!PData)
139 return;
140 if (!CountValue)
141 return;
142 if (!PData->Values) {
143 if (!allocateValueProfileCounters(PData))
144 return;
145 }
146
147 ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
148 ValueProfNode *PrevVNode = NULL;
149 ValueProfNode *MinCountVNode = NULL;
150 ValueProfNode *CurVNode = ValueCounters[CounterIndex];
151 uint64_t MinCount = UINT64_MAX;
152
153 uint8_t VDataCount = 0;
154 while (CurVNode) {
155 if (TargetValue == CurVNode->Value) {
156 CurVNode->Count += CountValue;
157 return;
158 }
159 if (CurVNode->Count < MinCount) {
160 MinCount = CurVNode->Count;
161 MinCountVNode = CurVNode;
162 }
163 PrevVNode = CurVNode;
164 CurVNode = CurVNode->Next;
165 ++VDataCount;
166 }
167
168 if (VDataCount >= VPMaxNumValsPerSite) {
169 /* Bump down the min count node's count. If it reaches 0,
170 * evict it. This eviction/replacement policy makes hot
171 * targets more sticky while cold targets less so. In other
172 * words, it makes it less likely for the hot targets to be
173 * prematurally evicted during warmup/establishment period,
174 * when their counts are still low. In a special case when
175 * the number of values tracked is reduced to only one, this
176 * policy will guarantee that the dominating target with >50%
177 * total count will survive in the end. Note that this scheme
178 * allows the runtime to track the min count node in an adaptive
179 * manner. It can correct previous mistakes and eventually
180 * lock on a cold target that is alread in stable state.
181 *
182 * In very rare cases, this replacement scheme may still lead
183 * to target loss. For instance, out of \c N value slots, \c N-1
184 * slots are occupied by luke warm targets during the warmup
185 * period and the remaining one slot is competed by two or more
186 * very hot targets. If those hot targets occur in an interleaved
187 * way, none of them will survive (gain enough weight to throw out
188 * other established entries) due to the ping-pong effect.
189 * To handle this situation, user can choose to increase the max
190 * number of tracked values per value site. Alternatively, a more
191 * expensive eviction mechanism can be implemented. It requires
192 * the runtime to track the total number of evictions per-site.
193 * When the total number of evictions reaches certain threshold,
194 * the runtime can wipe out more than one lowest count entries
195 * to give space for hot targets.
196 */
197 if (MinCountVNode->Count <= CountValue) {
198 CurVNode = MinCountVNode;
199 CurVNode->Value = TargetValue;
200 CurVNode->Count = CountValue;
201 } else
202 MinCountVNode->Count -= CountValue;
203
204 return;
205 }
206
207 CurVNode = allocateOneNode();
208 if (!CurVNode)
209 return;
210 CurVNode->Value = TargetValue;
211 CurVNode->Count += CountValue;
212
213 uint32_t Success = 0;
214 if (!ValueCounters[CounterIndex])
215 Success =
216 COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
217 else if (PrevVNode && !PrevVNode->Next)
218 Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
219
220 if (!Success && !hasStaticCounters) {
221 free(CurVNode);
222 return;
223 }
224 }
225
226 COMPILER_RT_VISIBILITY void
__llvm_profile_instrument_target(uint64_t TargetValue,void * Data,uint32_t CounterIndex)227 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
228 uint32_t CounterIndex) {
229 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1);
230 }
231 COMPILER_RT_VISIBILITY void
__llvm_profile_instrument_target_value(uint64_t TargetValue,void * Data,uint32_t CounterIndex,uint64_t CountValue)232 __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
233 uint32_t CounterIndex,
234 uint64_t CountValue) {
235 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);
236 }
237
238 /*
239 * The target values are partitioned into multiple regions/ranges. There is one
240 * contiguous region which is precise -- every value in the range is tracked
241 * individually. A value outside the precise region will be collapsed into one
242 * value depending on the region it falls in.
243 *
244 * There are three regions:
245 * 1. (-inf, PreciseRangeStart) and (PreciseRangeLast, LargeRangeValue) belong
246 * to one region -- all values here should be mapped to one value of
247 * "PreciseRangeLast + 1".
248 * 2. [PreciseRangeStart, PreciseRangeLast]
249 * 3. Large values: [LargeValue, +inf) maps to one value of LargeValue.
250 *
251 * The range for large values is optional. The default value of INT64_MIN
252 * indicates it is not specified.
253 */
__llvm_profile_instrument_range(uint64_t TargetValue,void * Data,uint32_t CounterIndex,int64_t PreciseRangeStart,int64_t PreciseRangeLast,int64_t LargeValue)254 COMPILER_RT_VISIBILITY void __llvm_profile_instrument_range(
255 uint64_t TargetValue, void *Data, uint32_t CounterIndex,
256 int64_t PreciseRangeStart, int64_t PreciseRangeLast, int64_t LargeValue) {
257
258 if (LargeValue != INT64_MIN && (int64_t)TargetValue >= LargeValue)
259 TargetValue = LargeValue;
260 else if ((int64_t)TargetValue < PreciseRangeStart ||
261 (int64_t)TargetValue > PreciseRangeLast)
262 TargetValue = PreciseRangeLast + 1;
263
264 __llvm_profile_instrument_target(TargetValue, Data, CounterIndex);
265 }
266
267 /*
268 * A wrapper struct that represents value profile runtime data.
269 * Like InstrProfRecord class which is used by profiling host tools,
270 * ValueProfRuntimeRecord also implements the abstract intefaces defined in
271 * ValueProfRecordClosure so that the runtime data can be serialized using
272 * shared C implementation.
273 */
274 typedef struct ValueProfRuntimeRecord {
275 const __llvm_profile_data *Data;
276 ValueProfNode **NodesKind[IPVK_Last + 1];
277 uint8_t **SiteCountArray;
278 } ValueProfRuntimeRecord;
279
280 /* ValueProfRecordClosure Interface implementation. */
281
getNumValueSitesRT(const void * R,uint32_t VK)282 static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
283 return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
284 }
285
getNumValueDataRT(const void * R,uint32_t VK)286 static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
287 uint32_t S = 0, I;
288 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
289 if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
290 return 0;
291 for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
292 S += Record->SiteCountArray[VK][I];
293 return S;
294 }
295
getNumValueDataForSiteRT(const void * R,uint32_t VK,uint32_t S)296 static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
297 uint32_t S) {
298 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
299 return Record->SiteCountArray[VK][S];
300 }
301
302 static ValueProfRuntimeRecord RTRecord;
303 static ValueProfRecordClosure RTRecordClosure = {
304 &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */
305 getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT,
306 INSTR_PROF_NULLPTR, /* RemapValueData */
307 INSTR_PROF_NULLPTR, /* GetValueForSite, */
308 INSTR_PROF_NULLPTR /* AllocValueProfData */
309 };
310
311 static uint32_t
initializeValueProfRuntimeRecord(const __llvm_profile_data * Data,uint8_t * SiteCountArray[])312 initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
313 uint8_t *SiteCountArray[]) {
314 unsigned I, J, S = 0, NumValueKinds = 0;
315 ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
316 RTRecord.Data = Data;
317 RTRecord.SiteCountArray = SiteCountArray;
318 for (I = 0; I <= IPVK_Last; I++) {
319 uint16_t N = Data->NumValueSites[I];
320 if (!N)
321 continue;
322
323 NumValueKinds++;
324
325 RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
326 for (J = 0; J < N; J++) {
327 /* Compute value count for each site. */
328 uint32_t C = 0;
329 ValueProfNode *Site =
330 Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
331 while (Site) {
332 C++;
333 Site = Site->Next;
334 }
335 if (C > UCHAR_MAX)
336 C = UCHAR_MAX;
337 RTRecord.SiteCountArray[I][J] = C;
338 }
339 S += N;
340 }
341 return NumValueKinds;
342 }
343
getNextNValueData(uint32_t VK,uint32_t Site,InstrProfValueData * Dst,ValueProfNode * StartNode,uint32_t N)344 static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
345 InstrProfValueData *Dst,
346 ValueProfNode *StartNode, uint32_t N) {
347 unsigned I;
348 ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
349 for (I = 0; I < N; I++) {
350 Dst[I].Value = VNode->Value;
351 Dst[I].Count = VNode->Count;
352 VNode = VNode->Next;
353 }
354 return VNode;
355 }
356
getValueProfDataSizeWrapper(void)357 static uint32_t getValueProfDataSizeWrapper(void) {
358 return getValueProfDataSize(&RTRecordClosure);
359 }
360
getNumValueDataForSiteWrapper(uint32_t VK,uint32_t S)361 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
362 return getNumValueDataForSiteRT(&RTRecord, VK, S);
363 }
364
365 static VPDataReaderType TheVPDataReader = {
366 initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
367 getFirstValueProfRecord, getNumValueDataForSiteWrapper,
368 getValueProfDataSizeWrapper, getNextNValueData};
369
lprofGetVPDataReader()370 COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader() {
371 return &TheVPDataReader;
372 }
373