xref: /freebsd-14.2/sys/kern/subr_stats.c (revision 8971fe25)
1 /*-
2  * Copyright (c) 2014-2018 Netflix, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * Author: Lawrence Stewart <[email protected]>
29  */
30 
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/arb.h>
34 #include <sys/ctype.h>
35 #include <sys/errno.h>
36 #include <sys/hash.h>
37 #include <sys/limits.h>
38 #include <sys/malloc.h>
39 #include <sys/qmath.h>
40 #include <sys/sbuf.h>
41 #if defined(DIAGNOSTIC)
42 #include <sys/tree.h>
43 #endif
44 #include <sys/stats.h> /* Must come after qmath.h and arb.h */
45 #include <sys/stddef.h>
46 #include <sys/stdint.h>
47 #include <sys/time.h>
48 
49 #ifdef _KERNEL
50 #include <sys/kernel.h>
51 #include <sys/lock.h>
52 #include <sys/rwlock.h>
53 #include <sys/sysctl.h>
54 #include <sys/systm.h>
55 #else /* ! _KERNEL */
56 #include <pthread.h>
57 #include <stdbool.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #endif /* _KERNEL */
62 
63 struct voistatdata_voistate {
64 	/* Previous VOI value for diff calculation. */
65 	struct voistatdata_numeric prev;
66 };
67 
68 #define	VS_VSDVALID	0x0001	/* Stat's voistatdata updated at least once. */
69 struct voistat {
70 	int8_t		stype;		/* Type of stat e.g. VS_STYPE_SUM. */
71 	enum vsd_dtype	dtype : 8;	/* Data type of this stat's data. */
72 	uint16_t	data_off;	/* Blob offset for this stat's data. */
73 	uint16_t	dsz;		/* Size of stat's data. */
74 #define	VS_EBITS 8
75 	uint16_t	errs : VS_EBITS;/* Non-wrapping error count. */
76 	uint16_t	flags : 16 - VS_EBITS;
77 };
78 /* The voistat error count is capped to avoid wrapping. */
79 #define	VS_INCERRS(vs) do {						\
80 	if ((vs)->errs < (1U << VS_EBITS) - 1)				\
81 		(vs)->errs++;						\
82 } while (0)
83 
84 /*
85  * Ideas for flags:
86  *   - Global or entity specific (global would imply use of counter(9)?)
87  *   - Whether to reset stats on read or not
88  *   - Signal an overflow?
89  *   - Compressed voistat array
90  */
91 #define	VOI_REQSTATE	0x0001	/* VOI requires VS_STYPE_VOISTATE. */
92 struct voi {
93 	int16_t		id;		/* VOI id. */
94 	enum vsd_dtype	dtype : 8;	/* Data type of the VOI itself. */
95 	int8_t		voistatmaxid;	/* Largest allocated voistat index. */
96 	uint16_t	stats_off;	/* Blob offset for this VOIs stats. */
97 	uint16_t	flags;
98 };
99 
100 /*
101  * Memory for the entire blob is allocated as a slab and then offsets are
102  * maintained to carve up the slab into sections holding different data types.
103  *
104  * Ideas for flags:
105  * - Compressed voi array (trade off memory usage vs search time)
106  * - Units of offsets (default bytes, flag for e.g. vm_page/KiB/Mib)
107  */
108 struct statsblobv1 {
109 	uint8_t		abi;
110 	uint8_t		endian;
111 	uint16_t	flags;
112 	uint16_t	maxsz;
113 	uint16_t	cursz;
114 	/* Fields from here down are opaque to consumers. */
115 	uint32_t	tplhash;	/* Base template hash ID. */
116 	uint16_t	stats_off;	/* voistat array blob offset. */
117 	uint16_t	statsdata_off;	/* voistatdata array blob offset. */
118 	sbintime_t	created;	/* Blob creation time. */
119 	sbintime_t	lastrst;	/* Time of last reset. */
120 	struct voi	vois[];		/* Array indexed by [voi_id]. */
121 } __aligned(sizeof(void *));
122 _Static_assert(offsetof(struct statsblobv1, cursz) +
123     SIZEOF_MEMBER(struct statsblobv1, cursz) ==
124     offsetof(struct statsblob, opaque),
125     "statsblobv1 ABI mismatch");
126 
127 struct statsblobv1_tpl {
128 	struct metablob		*mb;
129 	struct statsblobv1	*sb;
130 };
131 
132 /* Context passed to iterator callbacks. */
133 struct sb_iter_ctx {
134 	void		*usrctx;	/* Caller supplied context. */
135 	uint32_t	flags;		/* Flags for current iteration. */
136 	int16_t		vslot;		/* struct voi slot index. */
137 	int8_t		vsslot;		/* struct voistat slot index. */
138 };
139 
140 struct sb_tostrcb_ctx {
141 	struct sbuf		*buf;
142 	struct statsblob_tpl	*tpl;
143 	enum sb_str_fmt	fmt;
144 	uint32_t		flags;
145 };
146 
147 struct sb_visitcb_ctx {
148 	stats_blob_visitcb_t	cb;
149 	void			*usrctx;
150 };
151 
152 /* Stats blob iterator callback. */
153 typedef int (*stats_v1_blob_itercb_t)(struct statsblobv1 *sb, struct voi *v,
154     struct voistat *vs, struct sb_iter_ctx *ctx);
155 
156 #ifdef _KERNEL
157 static struct rwlock tpllistlock;
158 RW_SYSINIT(stats_tpl_list, &tpllistlock, "Stat template list lock");
159 #define	TPL_LIST_RLOCK() rw_rlock(&tpllistlock)
160 #define	TPL_LIST_RUNLOCK() rw_runlock(&tpllistlock)
161 #define	TPL_LIST_WLOCK() rw_wlock(&tpllistlock)
162 #define	TPL_LIST_WUNLOCK() rw_wunlock(&tpllistlock)
163 #define	TPL_LIST_LOCK_ASSERT() rw_assert(&tpllistlock, RA_LOCKED)
164 #define	TPL_LIST_RLOCK_ASSERT() rw_assert(&tpllistlock, RA_RLOCKED)
165 #define	TPL_LIST_WLOCK_ASSERT() rw_assert(&tpllistlock, RA_WLOCKED)
166 MALLOC_DEFINE(M_STATS, "stats(9) related memory", "stats(9) related memory");
167 #define	stats_free(ptr) free((ptr), M_STATS)
168 #else /* ! _KERNEL */
169 static void stats_constructor(void);
170 static void stats_destructor(void);
171 static pthread_rwlock_t tpllistlock;
172 #define	TPL_LIST_UNLOCK() pthread_rwlock_unlock(&tpllistlock)
173 #define	TPL_LIST_RLOCK() pthread_rwlock_rdlock(&tpllistlock)
174 #define	TPL_LIST_RUNLOCK() TPL_LIST_UNLOCK()
175 #define	TPL_LIST_WLOCK() pthread_rwlock_wrlock(&tpllistlock)
176 #define	TPL_LIST_WUNLOCK() TPL_LIST_UNLOCK()
177 #define	TPL_LIST_LOCK_ASSERT() do { } while (0)
178 #define	TPL_LIST_RLOCK_ASSERT() do { } while (0)
179 #define	TPL_LIST_WLOCK_ASSERT() do { } while (0)
180 #ifdef NDEBUG
181 #define	KASSERT(cond, msg) do {} while (0)
182 #define	stats_abort() do {} while (0)
183 #else /* ! NDEBUG */
184 #define	KASSERT(cond, msg) do { \
185 	if (!(cond)) { \
186 		panic msg; \
187 	} \
188 } while (0)
189 #define	stats_abort() abort()
190 #endif /* NDEBUG */
191 #define	stats_free(ptr) free(ptr)
192 #define	panic(fmt, ...) do { \
193 	fprintf(stderr, (fmt), ##__VA_ARGS__); \
194 	stats_abort(); \
195 } while (0)
196 #endif /* _KERNEL */
197 
198 #define	SB_V1_MAXSZ 65535
199 
200 /* Obtain a blob offset pointer. */
201 #define	BLOB_OFFSET(sb, off) ((void *)(((uint8_t *)(sb)) + (off)))
202 
203 /*
204  * Number of VOIs in the blob's vois[] array. By virtue of struct voi being a
205  * power of 2 size, we can shift instead of divide. The shift amount must be
206  * updated if sizeof(struct voi) ever changes, which the assert should catch.
207  */
208 #define	NVOIS(sb) ((int32_t)((((struct statsblobv1 *)(sb))->stats_off - \
209     sizeof(struct statsblobv1)) >> 3))
210 _Static_assert(sizeof(struct voi) == 8, "statsblobv1 voi ABI mismatch");
211 
212 /* Try restrict names to alphanumeric and underscore to simplify JSON compat. */
213 const char *vs_stype2name[VS_NUM_STYPES] = {
214 	[VS_STYPE_VOISTATE] = "VOISTATE",
215 	[VS_STYPE_SUM] = "SUM",
216 	[VS_STYPE_MAX] = "MAX",
217 	[VS_STYPE_MIN] = "MIN",
218 	[VS_STYPE_HIST] = "HIST",
219 	[VS_STYPE_TDGST] = "TDGST",
220 };
221 
222 const char *vs_stype2desc[VS_NUM_STYPES] = {
223 	[VS_STYPE_VOISTATE] = "VOI related state data (not a real stat)",
224 	[VS_STYPE_SUM] = "Simple arithmetic accumulator",
225 	[VS_STYPE_MAX] = "Maximum observed VOI value",
226 	[VS_STYPE_MIN] = "Minimum observed VOI value",
227 	[VS_STYPE_HIST] = "Histogram of observed VOI values",
228 	[VS_STYPE_TDGST] = "t-digest of observed VOI values",
229 };
230 
231 const char *vsd_dtype2name[VSD_NUM_DTYPES] = {
232 	[VSD_DTYPE_VOISTATE] = "VOISTATE",
233 	[VSD_DTYPE_INT_S32] = "INT_S32",
234 	[VSD_DTYPE_INT_U32] = "INT_U32",
235 	[VSD_DTYPE_INT_S64] = "INT_S64",
236 	[VSD_DTYPE_INT_U64] = "INT_U64",
237 	[VSD_DTYPE_INT_SLONG] = "INT_SLONG",
238 	[VSD_DTYPE_INT_ULONG] = "INT_ULONG",
239 	[VSD_DTYPE_Q_S32] = "Q_S32",
240 	[VSD_DTYPE_Q_U32] = "Q_U32",
241 	[VSD_DTYPE_Q_S64] = "Q_S64",
242 	[VSD_DTYPE_Q_U64] = "Q_U64",
243 	[VSD_DTYPE_CRHIST32] = "CRHIST32",
244 	[VSD_DTYPE_DRHIST32] = "DRHIST32",
245 	[VSD_DTYPE_DVHIST32] = "DVHIST32",
246 	[VSD_DTYPE_CRHIST64] = "CRHIST64",
247 	[VSD_DTYPE_DRHIST64] = "DRHIST64",
248 	[VSD_DTYPE_DVHIST64] = "DVHIST64",
249 	[VSD_DTYPE_TDGSTCLUST32] = "TDGSTCLUST32",
250 	[VSD_DTYPE_TDGSTCLUST64] = "TDGSTCLUST64",
251 };
252 
253 const size_t vsd_dtype2size[VSD_NUM_DTYPES] = {
254 	[VSD_DTYPE_VOISTATE] = sizeof(struct voistatdata_voistate),
255 	[VSD_DTYPE_INT_S32] = sizeof(struct voistatdata_int32),
256 	[VSD_DTYPE_INT_U32] = sizeof(struct voistatdata_int32),
257 	[VSD_DTYPE_INT_S64] = sizeof(struct voistatdata_int64),
258 	[VSD_DTYPE_INT_U64] = sizeof(struct voistatdata_int64),
259 	[VSD_DTYPE_INT_SLONG] = sizeof(struct voistatdata_intlong),
260 	[VSD_DTYPE_INT_ULONG] = sizeof(struct voistatdata_intlong),
261 	[VSD_DTYPE_Q_S32] = sizeof(struct voistatdata_q32),
262 	[VSD_DTYPE_Q_U32] = sizeof(struct voistatdata_q32),
263 	[VSD_DTYPE_Q_S64] = sizeof(struct voistatdata_q64),
264 	[VSD_DTYPE_Q_U64] = sizeof(struct voistatdata_q64),
265 	[VSD_DTYPE_CRHIST32] = sizeof(struct voistatdata_crhist32),
266 	[VSD_DTYPE_DRHIST32] = sizeof(struct voistatdata_drhist32),
267 	[VSD_DTYPE_DVHIST32] = sizeof(struct voistatdata_dvhist32),
268 	[VSD_DTYPE_CRHIST64] = sizeof(struct voistatdata_crhist64),
269 	[VSD_DTYPE_DRHIST64] = sizeof(struct voistatdata_drhist64),
270 	[VSD_DTYPE_DVHIST64] = sizeof(struct voistatdata_dvhist64),
271 	[VSD_DTYPE_TDGSTCLUST32] = sizeof(struct voistatdata_tdgstclust32),
272 	[VSD_DTYPE_TDGSTCLUST64] = sizeof(struct voistatdata_tdgstclust64),
273 };
274 
275 static const bool vsd_compoundtype[VSD_NUM_DTYPES] = {
276 	[VSD_DTYPE_VOISTATE] = true,
277 	[VSD_DTYPE_INT_S32] = false,
278 	[VSD_DTYPE_INT_U32] = false,
279 	[VSD_DTYPE_INT_S64] = false,
280 	[VSD_DTYPE_INT_U64] = false,
281 	[VSD_DTYPE_INT_SLONG] = false,
282 	[VSD_DTYPE_INT_ULONG] = false,
283 	[VSD_DTYPE_Q_S32] = false,
284 	[VSD_DTYPE_Q_U32] = false,
285 	[VSD_DTYPE_Q_S64] = false,
286 	[VSD_DTYPE_Q_U64] = false,
287 	[VSD_DTYPE_CRHIST32] = true,
288 	[VSD_DTYPE_DRHIST32] = true,
289 	[VSD_DTYPE_DVHIST32] = true,
290 	[VSD_DTYPE_CRHIST64] = true,
291 	[VSD_DTYPE_DRHIST64] = true,
292 	[VSD_DTYPE_DVHIST64] = true,
293 	[VSD_DTYPE_TDGSTCLUST32] = true,
294 	[VSD_DTYPE_TDGSTCLUST64] = true,
295 };
296 
297 const struct voistatdata_numeric numeric_limits[2][VSD_DTYPE_Q_U64 + 1] = {
298 	[LIM_MIN] = {
299 		[VSD_DTYPE_VOISTATE] = {},
300 		[VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MIN}},
301 		[VSD_DTYPE_INT_U32] = {.int32 = {.u32 = 0}},
302 		[VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MIN}},
303 		[VSD_DTYPE_INT_U64] = {.int64 = {.u64 = 0}},
304 		[VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MIN}},
305 		[VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = 0}},
306 		[VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMINVAL(INT32_MIN)}},
307 		[VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = 0}},
308 		[VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMINVAL(INT64_MIN)}},
309 		[VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = 0}},
310 	},
311 	[LIM_MAX] = {
312 		[VSD_DTYPE_VOISTATE] = {},
313 		[VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MAX}},
314 		[VSD_DTYPE_INT_U32] = {.int32 = {.u32 = UINT32_MAX}},
315 		[VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MAX}},
316 		[VSD_DTYPE_INT_U64] = {.int64 = {.u64 = UINT64_MAX}},
317 		[VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MAX}},
318 		[VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = ULONG_MAX}},
319 		[VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMAXVAL(INT32_MAX)}},
320 		[VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = Q_IFMAXVAL(UINT32_MAX)}},
321 		[VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMAXVAL(INT64_MAX)}},
322 		[VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = Q_IFMAXVAL(UINT64_MAX)}},
323 	}
324 };
325 
326 /* tpllistlock protects tpllist and ntpl */
327 static uint32_t ntpl;
328 static struct statsblob_tpl **tpllist;
329 
330 static inline void * stats_realloc(void *ptr, size_t oldsz, size_t newsz,
331     int flags);
332 //static void stats_v1_blob_finalise(struct statsblobv1 *sb);
333 static int stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
334     uint32_t flags);
335 static int stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
336     int newvoistatbytes, int newvoistatdatabytes);
337 static void stats_v1_blob_iter(struct statsblobv1 *sb,
338     stats_v1_blob_itercb_t icb, void *usrctx, uint32_t flags);
339 static inline int stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype,
340     struct voistatdata_tdgst *tdgst, s64q_t x, uint64_t weight, int attempt);
341 
342 static inline int
ctd32cmp(const struct voistatdata_tdgstctd32 * c1,const struct voistatdata_tdgstctd32 * c2)343 ctd32cmp(const struct voistatdata_tdgstctd32 *c1, const struct voistatdata_tdgstctd32 *c2)
344 {
345 
346 	KASSERT(Q_PRECEQ(c1->mu, c2->mu),
347 	    ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
348 	    Q_RELPREC(c1->mu, c2->mu)));
349 
350        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
351 }
352 ARB_GENERATE_STATIC(ctdth32, voistatdata_tdgstctd32, ctdlnk, ctd32cmp);
353 
354 static inline int
ctd64cmp(const struct voistatdata_tdgstctd64 * c1,const struct voistatdata_tdgstctd64 * c2)355 ctd64cmp(const struct voistatdata_tdgstctd64 *c1, const struct voistatdata_tdgstctd64 *c2)
356 {
357 
358 	KASSERT(Q_PRECEQ(c1->mu, c2->mu),
359 	    ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
360 	    Q_RELPREC(c1->mu, c2->mu)));
361 
362        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
363 }
364 ARB_GENERATE_STATIC(ctdth64, voistatdata_tdgstctd64, ctdlnk, ctd64cmp);
365 
366 #ifdef DIAGNOSTIC
367 RB_GENERATE_STATIC(rbctdth32, voistatdata_tdgstctd32, rblnk, ctd32cmp);
368 RB_GENERATE_STATIC(rbctdth64, voistatdata_tdgstctd64, rblnk, ctd64cmp);
369 #endif
370 
371 static inline sbintime_t
stats_sbinuptime(void)372 stats_sbinuptime(void)
373 {
374 	sbintime_t sbt;
375 #ifdef _KERNEL
376 
377 	sbt = sbinuptime();
378 #else /* ! _KERNEL */
379 	struct timespec tp;
380 
381 	clock_gettime(CLOCK_MONOTONIC_FAST, &tp);
382 	sbt = tstosbt(tp);
383 #endif /* _KERNEL */
384 
385 	return (sbt);
386 }
387 
388 static inline void *
stats_realloc(void * ptr,size_t oldsz,size_t newsz,int flags)389 stats_realloc(void *ptr, size_t oldsz, size_t newsz, int flags)
390 {
391 
392 #ifdef _KERNEL
393 	/* Default to M_NOWAIT if neither M_NOWAIT or M_WAITOK are set. */
394 	if (!(flags & (M_WAITOK | M_NOWAIT)))
395 		flags |= M_NOWAIT;
396 	ptr = realloc(ptr, newsz, M_STATS, flags);
397 #else /* ! _KERNEL */
398 	ptr = realloc(ptr, newsz);
399 	if ((flags & M_ZERO) && ptr != NULL) {
400 		if (oldsz == 0)
401 			memset(ptr, '\0', newsz);
402 		else if (newsz > oldsz)
403 			memset(BLOB_OFFSET(ptr, oldsz), '\0', newsz - oldsz);
404 	}
405 #endif /* _KERNEL */
406 
407 	return (ptr);
408 }
409 
410 static inline char *
stats_strdup(const char * s,int flags)411 stats_strdup(const char *s,
412 #ifdef _KERNEL
413     int flags)
414 {
415 	char *copy;
416 	size_t len;
417 
418 	if (!(flags & (M_WAITOK | M_NOWAIT)))
419 		flags |= M_NOWAIT;
420 
421 	len = strlen(s) + 1;
422 	if ((copy = malloc(len, M_STATS, flags)) != NULL)
423 		bcopy(s, copy, len);
424 
425 	return (copy);
426 #else
427     int flags __unused)
428 {
429 	return (strdup(s));
430 #endif
431 }
432 
433 static inline void
434 stats_tpl_update_hash(struct statsblob_tpl *tpl)
435 {
436 
437 	TPL_LIST_WLOCK_ASSERT();
438 	tpl->mb->tplhash = hash32_str(tpl->mb->tplname, 0);
439 	for (int voi_id = 0; voi_id < NVOIS(tpl->sb); voi_id++) {
440 		if (tpl->mb->voi_meta[voi_id].name != NULL)
441 			tpl->mb->tplhash = hash32_str(
442 			    tpl->mb->voi_meta[voi_id].name, tpl->mb->tplhash);
443 	}
444 	tpl->mb->tplhash = hash32_buf(tpl->sb, tpl->sb->cursz,
445 	    tpl->mb->tplhash);
446 }
447 
448 static inline uint64_t
449 stats_pow_u64(uint64_t base, uint64_t exp)
450 {
451 	uint64_t result = 1;
452 
453 	while (exp) {
454 		if (exp & 1)
455 			result *= base;
456 		exp >>= 1;
457 		base *= base;
458 	}
459 
460 	return (result);
461 }
462 
463 static inline int
464 stats_vss_hist_bkt_hlpr(struct vss_hist_hlpr_info *info, uint32_t curbkt,
465     struct voistatdata_numeric *bkt_lb, struct voistatdata_numeric *bkt_ub)
466 {
467 	uint64_t step = 0;
468 	int error = 0;
469 
470 	switch (info->scheme) {
471 	case BKT_LIN:
472 		step = info->lin.stepinc;
473 		break;
474 	case BKT_EXP:
475 		step = stats_pow_u64(info->exp.stepbase,
476 		    info->exp.stepexp + curbkt);
477 		break;
478 	case BKT_LINEXP:
479 		{
480 		uint64_t curstepexp = 1;
481 
482 		switch (info->voi_dtype) {
483 		case VSD_DTYPE_INT_S32:
484 			while ((int32_t)stats_pow_u64(info->linexp.stepbase,
485 			    curstepexp) <= bkt_lb->int32.s32)
486 				curstepexp++;
487 			break;
488 		case VSD_DTYPE_INT_U32:
489 			while ((uint32_t)stats_pow_u64(info->linexp.stepbase,
490 			    curstepexp) <= bkt_lb->int32.u32)
491 				curstepexp++;
492 			break;
493 		case VSD_DTYPE_INT_S64:
494 			while ((int64_t)stats_pow_u64(info->linexp.stepbase,
495 			    curstepexp) <= bkt_lb->int64.s64)
496 				curstepexp++;
497 			break;
498 		case VSD_DTYPE_INT_U64:
499 			while ((uint64_t)stats_pow_u64(info->linexp.stepbase,
500 			    curstepexp) <= bkt_lb->int64.u64)
501 				curstepexp++;
502 			break;
503 		case VSD_DTYPE_INT_SLONG:
504 			while ((long)stats_pow_u64(info->linexp.stepbase,
505 			    curstepexp) <= bkt_lb->intlong.slong)
506 				curstepexp++;
507 			break;
508 		case VSD_DTYPE_INT_ULONG:
509 			while ((unsigned long)stats_pow_u64(info->linexp.stepbase,
510 			    curstepexp) <= bkt_lb->intlong.ulong)
511 				curstepexp++;
512 			break;
513 		case VSD_DTYPE_Q_S32:
514 			while ((s32q_t)stats_pow_u64(info->linexp.stepbase,
515 			    curstepexp) <= Q_GIVAL(bkt_lb->q32.sq32))
516 			break;
517 		case VSD_DTYPE_Q_U32:
518 			while ((u32q_t)stats_pow_u64(info->linexp.stepbase,
519 			    curstepexp) <= Q_GIVAL(bkt_lb->q32.uq32))
520 			break;
521 		case VSD_DTYPE_Q_S64:
522 			while ((s64q_t)stats_pow_u64(info->linexp.stepbase,
523 			    curstepexp) <= Q_GIVAL(bkt_lb->q64.sq64))
524 				curstepexp++;
525 			break;
526 		case VSD_DTYPE_Q_U64:
527 			while ((u64q_t)stats_pow_u64(info->linexp.stepbase,
528 			    curstepexp) <= Q_GIVAL(bkt_lb->q64.uq64))
529 				curstepexp++;
530 			break;
531 		default:
532 			break;
533 		}
534 
535 		step = stats_pow_u64(info->linexp.stepbase, curstepexp) /
536 		    info->linexp.linstepdiv;
537 		if (step == 0)
538 			step = 1;
539 		break;
540 		}
541 	default:
542 		break;
543 	}
544 
545 	if (info->scheme == BKT_USR) {
546 		*bkt_lb = info->usr.bkts[curbkt].lb;
547 		*bkt_ub = info->usr.bkts[curbkt].ub;
548 	} else if (step != 0) {
549 		switch (info->voi_dtype) {
550 		case VSD_DTYPE_INT_S32:
551 			bkt_ub->int32.s32 += (int32_t)step;
552 			break;
553 		case VSD_DTYPE_INT_U32:
554 			bkt_ub->int32.u32 += (uint32_t)step;
555 			break;
556 		case VSD_DTYPE_INT_S64:
557 			bkt_ub->int64.s64 += (int64_t)step;
558 			break;
559 		case VSD_DTYPE_INT_U64:
560 			bkt_ub->int64.u64 += (uint64_t)step;
561 			break;
562 		case VSD_DTYPE_INT_SLONG:
563 			bkt_ub->intlong.slong += (long)step;
564 			break;
565 		case VSD_DTYPE_INT_ULONG:
566 			bkt_ub->intlong.ulong += (unsigned long)step;
567 			break;
568 		case VSD_DTYPE_Q_S32:
569 			error = Q_QADDI(&bkt_ub->q32.sq32, step);
570 			break;
571 		case VSD_DTYPE_Q_U32:
572 			error = Q_QADDI(&bkt_ub->q32.uq32, step);
573 			break;
574 		case VSD_DTYPE_Q_S64:
575 			error = Q_QADDI(&bkt_ub->q64.sq64, step);
576 			break;
577 		case VSD_DTYPE_Q_U64:
578 			error = Q_QADDI(&bkt_ub->q64.uq64, step);
579 			break;
580 		default:
581 			break;
582 		}
583 	} else { /* info->scheme != BKT_USR && step == 0 */
584 		return (EINVAL);
585 	}
586 
587 	return (error);
588 }
589 
590 static uint32_t
591 stats_vss_hist_nbkts_hlpr(struct vss_hist_hlpr_info *info)
592 {
593 	struct voistatdata_numeric bkt_lb, bkt_ub;
594 	uint32_t nbkts;
595 	int done;
596 
597 	if (info->scheme == BKT_USR) {
598 		/* XXXLAS: Setting info->{lb,ub} from macro is tricky. */
599 		info->lb = info->usr.bkts[0].lb;
600 		info->ub = info->usr.bkts[info->usr.nbkts - 1].lb;
601 	}
602 
603 	nbkts = 0;
604 	done = 0;
605 	bkt_ub = info->lb;
606 
607 	do {
608 		bkt_lb = bkt_ub;
609 		if (stats_vss_hist_bkt_hlpr(info, nbkts++, &bkt_lb, &bkt_ub))
610 			return (0);
611 
612 		if (info->scheme == BKT_USR)
613 			done = (nbkts == info->usr.nbkts);
614 		else {
615 			switch (info->voi_dtype) {
616 			case VSD_DTYPE_INT_S32:
617 				done = (bkt_ub.int32.s32 > info->ub.int32.s32);
618 				break;
619 			case VSD_DTYPE_INT_U32:
620 				done = (bkt_ub.int32.u32 > info->ub.int32.u32);
621 				break;
622 			case VSD_DTYPE_INT_S64:
623 				done = (bkt_ub.int64.s64 > info->ub.int64.s64);
624 				break;
625 			case VSD_DTYPE_INT_U64:
626 				done = (bkt_ub.int64.u64 > info->ub.int64.u64);
627 				break;
628 			case VSD_DTYPE_INT_SLONG:
629 				done = (bkt_ub.intlong.slong >
630 				    info->ub.intlong.slong);
631 				break;
632 			case VSD_DTYPE_INT_ULONG:
633 				done = (bkt_ub.intlong.ulong >
634 				    info->ub.intlong.ulong);
635 				break;
636 			case VSD_DTYPE_Q_S32:
637 				done = Q_QGTQ(bkt_ub.q32.sq32,
638 				    info->ub.q32.sq32);
639 				break;
640 			case VSD_DTYPE_Q_U32:
641 				done = Q_QGTQ(bkt_ub.q32.uq32,
642 				    info->ub.q32.uq32);
643 				break;
644 			case VSD_DTYPE_Q_S64:
645 				done = Q_QGTQ(bkt_ub.q64.sq64,
646 				    info->ub.q64.sq64);
647 				break;
648 			case VSD_DTYPE_Q_U64:
649 				done = Q_QGTQ(bkt_ub.q64.uq64,
650 				    info->ub.q64.uq64);
651 				break;
652 			default:
653 				return (0);
654 			}
655 		}
656 	} while (!done);
657 
658 	if (info->flags & VSD_HIST_LBOUND_INF)
659 		nbkts++;
660 	if (info->flags & VSD_HIST_UBOUND_INF)
661 		nbkts++;
662 
663 	return (nbkts);
664 }
665 
666 int
667 stats_vss_hist_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
668     struct vss_hist_hlpr_info *info)
669 {
670 	struct voistatdata_hist *hist;
671 	struct voistatdata_numeric bkt_lb, bkt_ub, *lbinfbktlb, *lbinfbktub,
672 	    *ubinfbktlb, *ubinfbktub;
673 	uint32_t bkt, nbkts, nloop;
674 
675 	if (vss == NULL || info == NULL || (info->flags &
676 	(VSD_HIST_LBOUND_INF|VSD_HIST_UBOUND_INF) && (info->hist_dtype ==
677 	VSD_DTYPE_DVHIST32 || info->hist_dtype == VSD_DTYPE_DVHIST64)))
678 		return (EINVAL);
679 
680 	info->voi_dtype = voi_dtype;
681 
682 	if ((nbkts = stats_vss_hist_nbkts_hlpr(info)) == 0)
683 		return (EINVAL);
684 
685 	switch (info->hist_dtype) {
686 	case VSD_DTYPE_CRHIST32:
687 		vss->vsdsz = HIST_NBKTS2VSDSZ(crhist32, nbkts);
688 		break;
689 	case VSD_DTYPE_DRHIST32:
690 		vss->vsdsz = HIST_NBKTS2VSDSZ(drhist32, nbkts);
691 		break;
692 	case VSD_DTYPE_DVHIST32:
693 		vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist32, nbkts);
694 		break;
695 	case VSD_DTYPE_CRHIST64:
696 		vss->vsdsz = HIST_NBKTS2VSDSZ(crhist64, nbkts);
697 		break;
698 	case VSD_DTYPE_DRHIST64:
699 		vss->vsdsz = HIST_NBKTS2VSDSZ(drhist64, nbkts);
700 		break;
701 	case VSD_DTYPE_DVHIST64:
702 		vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist64, nbkts);
703 		break;
704 	default:
705 		return (EINVAL);
706 	}
707 
708 	vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
709 	if (vss->iv == NULL)
710 		return (ENOMEM);
711 
712 	hist = (struct voistatdata_hist *)vss->iv;
713 	bkt_ub = info->lb;
714 
715 	for (bkt = (info->flags & VSD_HIST_LBOUND_INF), nloop = 0;
716 	    bkt < nbkts;
717 	    bkt++, nloop++) {
718 		bkt_lb = bkt_ub;
719 		if (stats_vss_hist_bkt_hlpr(info, nloop, &bkt_lb, &bkt_ub))
720 			return (EINVAL);
721 
722 		switch (info->hist_dtype) {
723 		case VSD_DTYPE_CRHIST32:
724 			VSD(crhist32, hist)->bkts[bkt].lb = bkt_lb;
725 			break;
726 		case VSD_DTYPE_DRHIST32:
727 			VSD(drhist32, hist)->bkts[bkt].lb = bkt_lb;
728 			VSD(drhist32, hist)->bkts[bkt].ub = bkt_ub;
729 			break;
730 		case VSD_DTYPE_DVHIST32:
731 			VSD(dvhist32, hist)->bkts[bkt].val = bkt_lb;
732 			break;
733 		case VSD_DTYPE_CRHIST64:
734 			VSD(crhist64, hist)->bkts[bkt].lb = bkt_lb;
735 			break;
736 		case VSD_DTYPE_DRHIST64:
737 			VSD(drhist64, hist)->bkts[bkt].lb = bkt_lb;
738 			VSD(drhist64, hist)->bkts[bkt].ub = bkt_ub;
739 			break;
740 		case VSD_DTYPE_DVHIST64:
741 			VSD(dvhist64, hist)->bkts[bkt].val = bkt_lb;
742 			break;
743 		default:
744 			return (EINVAL);
745 		}
746 	}
747 
748 	lbinfbktlb = lbinfbktub = ubinfbktlb = ubinfbktub = NULL;
749 
750 	switch (info->hist_dtype) {
751 	case VSD_DTYPE_CRHIST32:
752 		lbinfbktlb = &VSD(crhist32, hist)->bkts[0].lb;
753 		ubinfbktlb = &VSD(crhist32, hist)->bkts[nbkts - 1].lb;
754 		break;
755 	case VSD_DTYPE_DRHIST32:
756 		lbinfbktlb = &VSD(drhist32, hist)->bkts[0].lb;
757 		lbinfbktub = &VSD(drhist32, hist)->bkts[0].ub;
758 		ubinfbktlb = &VSD(drhist32, hist)->bkts[nbkts - 1].lb;
759 		ubinfbktub = &VSD(drhist32, hist)->bkts[nbkts - 1].ub;
760 		break;
761 	case VSD_DTYPE_CRHIST64:
762 		lbinfbktlb = &VSD(crhist64, hist)->bkts[0].lb;
763 		ubinfbktlb = &VSD(crhist64, hist)->bkts[nbkts - 1].lb;
764 		break;
765 	case VSD_DTYPE_DRHIST64:
766 		lbinfbktlb = &VSD(drhist64, hist)->bkts[0].lb;
767 		lbinfbktub = &VSD(drhist64, hist)->bkts[0].ub;
768 		ubinfbktlb = &VSD(drhist64, hist)->bkts[nbkts - 1].lb;
769 		ubinfbktub = &VSD(drhist64, hist)->bkts[nbkts - 1].ub;
770 		break;
771 	case VSD_DTYPE_DVHIST32:
772 	case VSD_DTYPE_DVHIST64:
773 		break;
774 	default:
775 		return (EINVAL);
776 	}
777 
778 	if ((info->flags & VSD_HIST_LBOUND_INF) && lbinfbktlb) {
779 		*lbinfbktlb = numeric_limits[LIM_MIN][info->voi_dtype];
780 		/*
781 		 * Assignment from numeric_limit array for Q types assigns max
782 		 * possible integral/fractional value for underlying data type,
783 		 * but we must set control bits for this specific histogram per
784 		 * the user's choice of fractional bits, which we extract from
785 		 * info->lb.
786 		 */
787 		if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
788 		    info->voi_dtype == VSD_DTYPE_Q_U32) {
789 			/* Signedness doesn't matter for setting control bits. */
790 			Q_SCVAL(lbinfbktlb->q32.sq32,
791 			    Q_GCVAL(info->lb.q32.sq32));
792 		} else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
793 		    info->voi_dtype == VSD_DTYPE_Q_U64) {
794 			/* Signedness doesn't matter for setting control bits. */
795 			Q_SCVAL(lbinfbktlb->q64.sq64,
796 			    Q_GCVAL(info->lb.q64.sq64));
797 		}
798 		if (lbinfbktub)
799 			*lbinfbktub = info->lb;
800 	}
801 	if ((info->flags & VSD_HIST_UBOUND_INF) && ubinfbktlb) {
802 		*ubinfbktlb = bkt_lb;
803 		if (ubinfbktub) {
804 			*ubinfbktub = numeric_limits[LIM_MAX][info->voi_dtype];
805 			if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
806 			    info->voi_dtype == VSD_DTYPE_Q_U32) {
807 				Q_SCVAL(ubinfbktub->q32.sq32,
808 				    Q_GCVAL(info->lb.q32.sq32));
809 			} else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
810 			    info->voi_dtype == VSD_DTYPE_Q_U64) {
811 				Q_SCVAL(ubinfbktub->q64.sq64,
812 				    Q_GCVAL(info->lb.q64.sq64));
813 			}
814 		}
815 	}
816 
817 	return (0);
818 }
819 
820 int
821 stats_vss_tdgst_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
822     struct vss_tdgst_hlpr_info *info)
823 {
824 	struct voistatdata_tdgst *tdgst;
825 	struct ctdth32 *ctd32tree;
826 	struct ctdth64 *ctd64tree;
827 	struct voistatdata_tdgstctd32 *ctd32;
828 	struct voistatdata_tdgstctd64 *ctd64;
829 
830 	info->voi_dtype = voi_dtype;
831 
832 	switch (info->tdgst_dtype) {
833 	case VSD_DTYPE_TDGSTCLUST32:
834 		vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust32, info->nctds);
835 		break;
836 	case VSD_DTYPE_TDGSTCLUST64:
837 		vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust64, info->nctds);
838 		break;
839 	default:
840 		return (EINVAL);
841 	}
842 
843 	vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
844 	if (vss->iv == NULL)
845 		return (ENOMEM);
846 
847 	tdgst = (struct voistatdata_tdgst *)vss->iv;
848 
849 	switch (info->tdgst_dtype) {
850 	case VSD_DTYPE_TDGSTCLUST32:
851 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
852 		ARB_INIT(ctd32, ctdlnk, ctd32tree, info->nctds) {
853 			Q_INI(&ctd32->mu, 0, 0, info->prec);
854 		}
855 		break;
856 	case VSD_DTYPE_TDGSTCLUST64:
857 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
858 		ARB_INIT(ctd64, ctdlnk, ctd64tree, info->nctds) {
859 			Q_INI(&ctd64->mu, 0, 0, info->prec);
860 		}
861 		break;
862 	default:
863 		return (EINVAL);
864 	}
865 
866 	return (0);
867 }
868 
869 int
870 stats_vss_numeric_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
871     struct vss_numeric_hlpr_info *info)
872 {
873 	struct voistatdata_numeric iv;
874 
875 	switch (vss->stype) {
876 	case VS_STYPE_SUM:
877 		iv = stats_ctor_vsd_numeric(0);
878 		break;
879 	case VS_STYPE_MIN:
880 		iv = numeric_limits[LIM_MAX][voi_dtype];
881 		break;
882 	case VS_STYPE_MAX:
883 		iv = numeric_limits[LIM_MIN][voi_dtype];
884 		break;
885 	default:
886 		return (EINVAL);
887 	}
888 
889 	vss->iv = stats_realloc(NULL, 0, vsd_dtype2size[voi_dtype], 0);
890 	if (vss->iv == NULL)
891 		return (ENOMEM);
892 
893 	vss->vs_dtype = voi_dtype;
894 	vss->vsdsz = vsd_dtype2size[voi_dtype];
895 	switch (voi_dtype) {
896 	case VSD_DTYPE_INT_S32:
897 		*((int32_t *)vss->iv) = iv.int32.s32;
898 		break;
899 	case VSD_DTYPE_INT_U32:
900 		*((uint32_t *)vss->iv) = iv.int32.u32;
901 		break;
902 	case VSD_DTYPE_INT_S64:
903 		*((int64_t *)vss->iv) = iv.int64.s64;
904 		break;
905 	case VSD_DTYPE_INT_U64:
906 		*((uint64_t *)vss->iv) = iv.int64.u64;
907 		break;
908 	case VSD_DTYPE_INT_SLONG:
909 		*((long *)vss->iv) = iv.intlong.slong;
910 		break;
911 	case VSD_DTYPE_INT_ULONG:
912 		*((unsigned long *)vss->iv) = iv.intlong.ulong;
913 		break;
914 	case VSD_DTYPE_Q_S32:
915 		*((s32q_t *)vss->iv) = Q_SCVAL(iv.q32.sq32,
916 		    Q_CTRLINI(info->prec));
917 		break;
918 	case VSD_DTYPE_Q_U32:
919 		*((u32q_t *)vss->iv) = Q_SCVAL(iv.q32.uq32,
920 		    Q_CTRLINI(info->prec));
921 		break;
922 	case VSD_DTYPE_Q_S64:
923 		*((s64q_t *)vss->iv) = Q_SCVAL(iv.q64.sq64,
924 		    Q_CTRLINI(info->prec));
925 		break;
926 	case VSD_DTYPE_Q_U64:
927 		*((u64q_t *)vss->iv) = Q_SCVAL(iv.q64.uq64,
928 		    Q_CTRLINI(info->prec));
929 		break;
930 	default:
931 		break;
932 	}
933 
934 	return (0);
935 }
936 
937 int
938 stats_vss_hlpr_init(enum vsd_dtype voi_dtype, uint32_t nvss,
939     struct voistatspec *vss)
940 {
941 	int i, ret;
942 
943 	for (i = nvss - 1; i >= 0; i--) {
944 		if (vss[i].hlpr && (ret = vss[i].hlpr(voi_dtype, &vss[i],
945 		    vss[i].hlprinfo)) != 0)
946 			return (ret);
947 	}
948 
949 	return (0);
950 }
951 
952 void
953 stats_vss_hlpr_cleanup(uint32_t nvss, struct voistatspec *vss)
954 {
955 	int i;
956 
957 	for (i = nvss - 1; i >= 0; i--) {
958 		if (vss[i].hlpr) {
959 			stats_free((void *)vss[i].iv);
960 			vss[i].iv = NULL;
961 		}
962 	}
963 }
964 
965 int
966 stats_tpl_fetch(int tpl_id, struct statsblob_tpl **tpl)
967 {
968 	int error;
969 
970 	error = 0;
971 
972 	TPL_LIST_WLOCK();
973 	if (tpl_id < 0 || tpl_id >= (int)ntpl) {
974 		error = ENOENT;
975 	} else {
976 		*tpl = tpllist[tpl_id];
977 		/* XXXLAS: Acquire refcount on tpl. */
978 	}
979 	TPL_LIST_WUNLOCK();
980 
981 	return (error);
982 }
983 
984 int
985 stats_tpl_fetch_allocid(const char *name, uint32_t hash)
986 {
987 	int i, tpl_id;
988 
989 	tpl_id = -ESRCH;
990 
991 	TPL_LIST_RLOCK();
992 	for (i = ntpl - 1; i >= 0; i--) {
993 		if (name != NULL) {
994 			if (strlen(name) == strlen(tpllist[i]->mb->tplname) &&
995 			    strncmp(name, tpllist[i]->mb->tplname,
996 			    TPL_MAX_NAME_LEN) == 0 && (!hash || hash ==
997 			    tpllist[i]->mb->tplhash)) {
998 				tpl_id = i;
999 				break;
1000 			}
1001 		} else if (hash == tpllist[i]->mb->tplhash) {
1002 			tpl_id = i;
1003 			break;
1004 		}
1005 	}
1006 	TPL_LIST_RUNLOCK();
1007 
1008 	return (tpl_id);
1009 }
1010 
1011 int
1012 stats_tpl_id2name(uint32_t tpl_id, char *buf, size_t len)
1013 {
1014 	int error;
1015 
1016 	error = 0;
1017 
1018 	TPL_LIST_RLOCK();
1019 	if (tpl_id < ntpl) {
1020 		if (buf != NULL && len > strlen(tpllist[tpl_id]->mb->tplname))
1021 			strlcpy(buf, tpllist[tpl_id]->mb->tplname, len);
1022 		else
1023 			error = EOVERFLOW;
1024 	} else
1025 		error = ENOENT;
1026 	TPL_LIST_RUNLOCK();
1027 
1028 	return (error);
1029 }
1030 
1031 int
1032 stats_tpl_sample_rollthedice(struct stats_tpl_sample_rate *rates, int nrates,
1033     void *seed_bytes, size_t seed_len)
1034 {
1035 	uint32_t cum_pct, rnd_pct;
1036 	int i;
1037 
1038 	cum_pct = 0;
1039 
1040 	/*
1041 	 * Choose a pseudorandom or seeded number in range [0,100] and use
1042 	 * it to make a sampling decision and template selection where required.
1043 	 * If no seed is supplied, a PRNG is used to generate a pseudorandom
1044 	 * number so that every selection is independent. If a seed is supplied,
1045 	 * the caller desires random selection across different seeds, but
1046 	 * deterministic selection given the same seed. This is achieved by
1047 	 * hashing the seed and using the hash as the random number source.
1048 	 *
1049 	 * XXXLAS: Characterise hash function output distribution.
1050 	 */
1051 	if (seed_bytes == NULL)
1052 		rnd_pct = random() / (INT32_MAX / 100);
1053 	else
1054 		rnd_pct = hash32_buf(seed_bytes, seed_len, 0) /
1055 		    (UINT32_MAX / 100U);
1056 
1057 	/*
1058 	 * We map the randomly selected percentage on to the interval [0,100]
1059 	 * consisting of the cumulatively summed template sampling percentages.
1060 	 * The difference between the cumulative sum of all template sampling
1061 	 * percentages and 100 is treated as a NULL assignment i.e. no stats
1062 	 * template will be assigned, and -1 returned instead.
1063 	 */
1064 	for (i = 0; i < nrates; i++) {
1065 		cum_pct += rates[i].tpl_sample_pct;
1066 
1067 		KASSERT(cum_pct <= 100, ("%s cum_pct %u > 100", __func__,
1068 		    cum_pct));
1069 		if (rnd_pct > cum_pct || rates[i].tpl_sample_pct == 0)
1070 			continue;
1071 
1072 		return (rates[i].tpl_slot_id);
1073 	}
1074 
1075 	return (-1);
1076 }
1077 
1078 int
1079 stats_v1_blob_clone(struct statsblobv1 **dst, size_t dstmaxsz,
1080     struct statsblobv1 *src, uint32_t flags)
1081 {
1082 	int error;
1083 
1084 	error = 0;
1085 
1086 	if (src == NULL || dst == NULL ||
1087 	    src->cursz < sizeof(struct statsblob) ||
1088 	    ((flags & SB_CLONE_ALLOCDST) &&
1089 	    (flags & (SB_CLONE_USRDSTNOFAULT | SB_CLONE_USRDST)))) {
1090 		error = EINVAL;
1091 	} else if (flags & SB_CLONE_ALLOCDST) {
1092 		*dst = stats_realloc(NULL, 0, src->cursz, 0);
1093 		if (*dst)
1094 			(*dst)->maxsz = dstmaxsz = src->cursz;
1095 		else
1096 			error = ENOMEM;
1097 	} else if (*dst == NULL || dstmaxsz < sizeof(struct statsblob)) {
1098 		error = EINVAL;
1099 	}
1100 
1101 	if (!error) {
1102 		size_t postcurszlen;
1103 
1104 		/*
1105 		 * Clone src into dst except for the maxsz field. If dst is too
1106 		 * small to hold all of src, only copy src's header and return
1107 		 * EOVERFLOW.
1108 		 */
1109 #ifdef _KERNEL
1110 		if (flags & SB_CLONE_USRDSTNOFAULT)
1111 			error = copyout_nofault(src, *dst,
1112 			    offsetof(struct statsblob, maxsz));
1113 		else if (flags & SB_CLONE_USRDST)
1114 			error = copyout(src, *dst,
1115 			    offsetof(struct statsblob, maxsz));
1116 		else
1117 #endif
1118 			memcpy(*dst, src, offsetof(struct statsblob, maxsz));
1119 #ifdef _KERNEL
1120 		if (error != 0)
1121 			goto out;
1122 #endif
1123 
1124 
1125 		if (dstmaxsz >= src->cursz) {
1126 			postcurszlen = src->cursz -
1127 			    offsetof(struct statsblob, cursz);
1128 		} else {
1129 			error = EOVERFLOW;
1130 			postcurszlen = sizeof(struct statsblob) -
1131 			    offsetof(struct statsblob, cursz);
1132 		}
1133 #ifdef _KERNEL
1134 		if (flags & SB_CLONE_USRDSTNOFAULT)
1135 			error = copyout_nofault(&(src->cursz), &((*dst)->cursz),
1136 			    postcurszlen);
1137 		else if (flags & SB_CLONE_USRDST)
1138 			error = copyout(&(src->cursz), &((*dst)->cursz),
1139 			    postcurszlen);
1140 		else
1141 #endif
1142 			memcpy(&((*dst)->cursz), &(src->cursz), postcurszlen);
1143 	}
1144 #ifdef _KERNEL
1145 out:
1146 #endif
1147 
1148 	return (error);
1149 }
1150 
1151 int
1152 stats_v1_tpl_alloc(const char *name, uint32_t flags __unused)
1153 {
1154 	struct statsblobv1_tpl *tpl, **newtpllist;
1155 	struct statsblobv1 *tpl_sb;
1156 	struct metablob *tpl_mb;
1157 	int tpl_id;
1158 
1159 	if (name != NULL && strlen(name) > TPL_MAX_NAME_LEN)
1160 		return (-EINVAL);
1161 
1162 	if (name != NULL && stats_tpl_fetch_allocid(name, 0) >= 0)
1163 		return (-EEXIST);
1164 
1165 	tpl = stats_realloc(NULL, 0, sizeof(struct statsblobv1_tpl), M_ZERO);
1166 	tpl_mb = stats_realloc(NULL, 0, sizeof(struct metablob), M_ZERO);
1167 	tpl_sb = stats_realloc(NULL, 0, sizeof(struct statsblobv1), M_ZERO);
1168 
1169 	if (tpl_mb != NULL && name != NULL)
1170 		tpl_mb->tplname = stats_strdup(name, 0);
1171 
1172 	if (tpl == NULL || tpl_sb == NULL || tpl_mb == NULL ||
1173 	    tpl_mb->tplname == NULL) {
1174 		stats_free(tpl);
1175 		stats_free(tpl_sb);
1176 		if (tpl_mb != NULL) {
1177 			stats_free(tpl_mb->tplname);
1178 			stats_free(tpl_mb);
1179 		}
1180 		return (-ENOMEM);
1181 	}
1182 
1183 	tpl->mb = tpl_mb;
1184 	tpl->sb = tpl_sb;
1185 
1186 	tpl_sb->abi = STATS_ABI_V1;
1187 	tpl_sb->endian =
1188 #if BYTE_ORDER == LITTLE_ENDIAN
1189 	    SB_LE;
1190 #elif BYTE_ORDER == BIG_ENDIAN
1191 	    SB_BE;
1192 #else
1193 	    SB_UE;
1194 #endif
1195 	tpl_sb->cursz = tpl_sb->maxsz = sizeof(struct statsblobv1);
1196 	tpl_sb->stats_off = tpl_sb->statsdata_off = sizeof(struct statsblobv1);
1197 
1198 	TPL_LIST_WLOCK();
1199 	newtpllist = stats_realloc(tpllist, ntpl * sizeof(void *),
1200 	    (ntpl + 1) * sizeof(void *), 0);
1201 	if (newtpllist != NULL) {
1202 		tpl_id = ntpl++;
1203 		tpllist = (struct statsblob_tpl **)newtpllist;
1204 		tpllist[tpl_id] = (struct statsblob_tpl *)tpl;
1205 		stats_tpl_update_hash(tpllist[tpl_id]);
1206 	} else {
1207 		stats_free(tpl);
1208 		stats_free(tpl_sb);
1209 		if (tpl_mb != NULL) {
1210 			stats_free(tpl_mb->tplname);
1211 			stats_free(tpl_mb);
1212 		}
1213 		tpl_id = -ENOMEM;
1214 	}
1215 	TPL_LIST_WUNLOCK();
1216 
1217 	return (tpl_id);
1218 }
1219 
1220 int
1221 stats_v1_tpl_add_voistats(uint32_t tpl_id, int32_t voi_id, const char *voi_name,
1222     enum vsd_dtype voi_dtype, uint32_t nvss, struct voistatspec *vss,
1223     uint32_t flags)
1224 {
1225 	struct voi *voi;
1226 	struct voistat *tmpstat;
1227 	struct statsblobv1 *tpl_sb;
1228 	struct metablob *tpl_mb;
1229 	int error, i, newstatdataidx, newvoibytes, newvoistatbytes,
1230 	    newvoistatdatabytes, newvoistatmaxid;
1231 	uint32_t nbytes;
1232 
1233 	if (voi_id < 0 || voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES ||
1234 	    nvss == 0 || vss == NULL)
1235 		return (EINVAL);
1236 
1237 	error = nbytes = newvoibytes = newvoistatbytes =
1238 	    newvoistatdatabytes = 0;
1239 	newvoistatmaxid = -1;
1240 
1241 	/* Calculate the number of bytes required for the new voistats. */
1242 	for (i = nvss - 1; i >= 0; i--) {
1243 		if (vss[i].stype == 0 || vss[i].stype >= VS_NUM_STYPES ||
1244 		    vss[i].vs_dtype == 0 || vss[i].vs_dtype >= VSD_NUM_DTYPES ||
1245 		    vss[i].iv == NULL || vss[i].vsdsz == 0)
1246 			return (EINVAL);
1247 		if ((int)vss[i].stype > newvoistatmaxid)
1248 			newvoistatmaxid = vss[i].stype;
1249 		newvoistatdatabytes += vss[i].vsdsz;
1250 	}
1251 
1252 	if (flags & SB_VOI_RELUPDATE) {
1253 		/* XXXLAS: VOI state bytes may need to vary based on stat types. */
1254 		newvoistatdatabytes += sizeof(struct voistatdata_voistate);
1255 	}
1256 	nbytes += newvoistatdatabytes;
1257 
1258 	TPL_LIST_WLOCK();
1259 	if (tpl_id < ntpl) {
1260 		tpl_sb = (struct statsblobv1 *)tpllist[tpl_id]->sb;
1261 		tpl_mb = tpllist[tpl_id]->mb;
1262 
1263 		if (voi_id >= NVOIS(tpl_sb) || tpl_sb->vois[voi_id].id == -1) {
1264 			/* Adding a new VOI and associated stats. */
1265 			if (voi_id >= NVOIS(tpl_sb)) {
1266 				/* We need to grow the tpl_sb->vois array. */
1267 				newvoibytes = (voi_id - (NVOIS(tpl_sb) - 1)) *
1268 				    sizeof(struct voi);
1269 				nbytes += newvoibytes;
1270 			}
1271 			newvoistatbytes =
1272 			    (newvoistatmaxid + 1) * sizeof(struct voistat);
1273 		} else {
1274 			/* Adding stats to an existing VOI. */
1275 			if (newvoistatmaxid >
1276 			    tpl_sb->vois[voi_id].voistatmaxid) {
1277 				newvoistatbytes = (newvoistatmaxid -
1278 				    tpl_sb->vois[voi_id].voistatmaxid) *
1279 				    sizeof(struct voistat);
1280 			}
1281 			/* XXXLAS: KPI does not yet support expanding VOIs. */
1282 			error = EOPNOTSUPP;
1283 		}
1284 		nbytes += newvoistatbytes;
1285 
1286 		if (!error && newvoibytes > 0) {
1287 			struct voi_meta *voi_meta = tpl_mb->voi_meta;
1288 
1289 			voi_meta = stats_realloc(voi_meta, voi_meta == NULL ?
1290 			    0 : NVOIS(tpl_sb) * sizeof(struct voi_meta),
1291 			    (1 + voi_id) * sizeof(struct voi_meta),
1292 			    M_ZERO);
1293 
1294 			if (voi_meta == NULL)
1295 				error = ENOMEM;
1296 			else
1297 				tpl_mb->voi_meta = voi_meta;
1298 		}
1299 
1300 		if (!error) {
1301 			/* NB: Resizing can change where tpl_sb points. */
1302 			error = stats_v1_blob_expand(&tpl_sb, newvoibytes,
1303 			    newvoistatbytes, newvoistatdatabytes);
1304 		}
1305 
1306 		if (!error) {
1307 			tpl_mb->voi_meta[voi_id].name = stats_strdup(voi_name,
1308 			    0);
1309 			if (tpl_mb->voi_meta[voi_id].name == NULL)
1310 				error = ENOMEM;
1311 		}
1312 
1313 		if (!error) {
1314 			/* Update the template list with the resized pointer. */
1315 			tpllist[tpl_id]->sb = (struct statsblob *)tpl_sb;
1316 
1317 			/* Update the template. */
1318 			voi = &tpl_sb->vois[voi_id];
1319 
1320 			if (voi->id < 0) {
1321 				/* VOI is new and needs to be initialised. */
1322 				voi->id = voi_id;
1323 				voi->dtype = voi_dtype;
1324 				voi->stats_off = tpl_sb->stats_off;
1325 				if (flags & SB_VOI_RELUPDATE)
1326 					voi->flags |= VOI_REQSTATE;
1327 			} else {
1328 				/*
1329 				 * XXXLAS: When this else block is written, the
1330 				 * "KPI does not yet support expanding VOIs"
1331 				 * error earlier in this function can be
1332 				 * removed. What is required here is to shuffle
1333 				 * the voistat array such that the new stats for
1334 				 * the voi are contiguous, which will displace
1335 				 * stats for other vois that reside after the
1336 				 * voi being updated. The other vois then need
1337 				 * to have their stats_off adjusted post
1338 				 * shuffle.
1339 				 */
1340 			}
1341 
1342 			voi->voistatmaxid = newvoistatmaxid;
1343 			newstatdataidx = 0;
1344 
1345 			if (voi->flags & VOI_REQSTATE) {
1346 				/* Initialise the voistate stat in slot 0. */
1347 				tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off);
1348 				tmpstat->stype = VS_STYPE_VOISTATE;
1349 				tmpstat->flags = 0;
1350 				tmpstat->dtype = VSD_DTYPE_VOISTATE;
1351 				newstatdataidx = tmpstat->dsz =
1352 				    sizeof(struct voistatdata_numeric);
1353 				tmpstat->data_off = tpl_sb->statsdata_off;
1354 			}
1355 
1356 			for (i = 0; (uint32_t)i < nvss; i++) {
1357 				tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off +
1358 				    (vss[i].stype * sizeof(struct voistat)));
1359 				KASSERT(tmpstat->stype < 0, ("voistat %p "
1360 				    "already initialised", tmpstat));
1361 				tmpstat->stype = vss[i].stype;
1362 				tmpstat->flags = vss[i].flags;
1363 				tmpstat->dtype = vss[i].vs_dtype;
1364 				tmpstat->dsz = vss[i].vsdsz;
1365 				tmpstat->data_off = tpl_sb->statsdata_off +
1366 				    newstatdataidx;
1367 				memcpy(BLOB_OFFSET(tpl_sb, tmpstat->data_off),
1368 				    vss[i].iv, vss[i].vsdsz);
1369 				newstatdataidx += vss[i].vsdsz;
1370 			}
1371 
1372 			/* Update the template version hash. */
1373 			stats_tpl_update_hash(tpllist[tpl_id]);
1374 			/* XXXLAS: Confirm tpl name/hash pair remains unique. */
1375 		}
1376 	} else
1377 		error = EINVAL;
1378 	TPL_LIST_WUNLOCK();
1379 
1380 	return (error);
1381 }
1382 
1383 struct statsblobv1 *
1384 stats_v1_blob_alloc(uint32_t tpl_id, uint32_t flags __unused)
1385 {
1386 	struct statsblobv1 *sb;
1387 	int error;
1388 
1389 	sb = NULL;
1390 
1391 	TPL_LIST_RLOCK();
1392 	if (tpl_id < ntpl) {
1393 		sb = stats_realloc(NULL, 0, tpllist[tpl_id]->sb->maxsz, 0);
1394 		if (sb != NULL) {
1395 			sb->maxsz = tpllist[tpl_id]->sb->maxsz;
1396 			error = stats_v1_blob_init_locked(sb, tpl_id, 0);
1397 		} else
1398 			error = ENOMEM;
1399 
1400 		if (error) {
1401 			stats_free(sb);
1402 			sb = NULL;
1403 		}
1404 	}
1405 	TPL_LIST_RUNLOCK();
1406 
1407 	return (sb);
1408 }
1409 
1410 void
1411 stats_v1_blob_destroy(struct statsblobv1 *sb)
1412 {
1413 
1414 	stats_free(sb);
1415 }
1416 
1417 int
1418 stats_v1_voistat_fetch_dptr(struct statsblobv1 *sb, int32_t voi_id,
1419     enum voi_stype stype, enum vsd_dtype *retdtype, struct voistatdata **retvsd,
1420     size_t *retvsdsz)
1421 {
1422 	struct voi *v;
1423 	struct voistat *vs;
1424 
1425 	if (retvsd == NULL || sb == NULL || sb->abi != STATS_ABI_V1 ||
1426 	    voi_id >= NVOIS(sb))
1427 		return (EINVAL);
1428 
1429 	v = &sb->vois[voi_id];
1430 	if ((__typeof(v->voistatmaxid))stype > v->voistatmaxid)
1431 		return (EINVAL);
1432 
1433 	vs = BLOB_OFFSET(sb, v->stats_off + (stype * sizeof(struct voistat)));
1434 	*retvsd = BLOB_OFFSET(sb, vs->data_off);
1435 	if (retdtype != NULL)
1436 		*retdtype = vs->dtype;
1437 	if (retvsdsz != NULL)
1438 		*retvsdsz = vs->dsz;
1439 
1440 	return (0);
1441 }
1442 
1443 int
1444 stats_v1_blob_init(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags)
1445 {
1446 	int error;
1447 
1448 	error = 0;
1449 
1450 	TPL_LIST_RLOCK();
1451 	if (sb == NULL || tpl_id >= ntpl) {
1452 		error = EINVAL;
1453 	} else {
1454 		error = stats_v1_blob_init_locked(sb, tpl_id, flags);
1455 	}
1456 	TPL_LIST_RUNLOCK();
1457 
1458 	return (error);
1459 }
1460 
1461 static inline int
1462 stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
1463     uint32_t flags __unused)
1464 {
1465 	int error;
1466 
1467 	TPL_LIST_RLOCK_ASSERT();
1468 	error = (sb->maxsz >= tpllist[tpl_id]->sb->cursz) ? 0 : EOVERFLOW;
1469 	KASSERT(!error,
1470 	    ("sb %d instead of %d bytes", sb->maxsz, tpllist[tpl_id]->sb->cursz));
1471 
1472 	if (!error) {
1473 		memcpy(sb, tpllist[tpl_id]->sb, tpllist[tpl_id]->sb->cursz);
1474 		sb->created = sb->lastrst = stats_sbinuptime();
1475 		sb->tplhash = tpllist[tpl_id]->mb->tplhash;
1476 	}
1477 
1478 	return (error);
1479 }
1480 
1481 static int
1482 stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
1483     int newvoistatbytes, int newvoistatdatabytes)
1484 {
1485 	struct statsblobv1 *sb;
1486 	struct voi *tmpvoi;
1487 	struct voistat *tmpvoistat, *voistat_array;
1488 	int error, i, idxnewvois, idxnewvoistats, nbytes, nvoistats;
1489 
1490 	KASSERT(newvoibytes % sizeof(struct voi) == 0,
1491 	    ("Bad newvoibytes %d", newvoibytes));
1492 	KASSERT(newvoistatbytes % sizeof(struct voistat) == 0,
1493 	    ("Bad newvoistatbytes %d", newvoistatbytes));
1494 
1495 	error = ((newvoibytes % sizeof(struct voi) == 0) &&
1496 	    (newvoistatbytes % sizeof(struct voistat) == 0)) ? 0 : EINVAL;
1497 	sb = *sbpp;
1498 	nbytes = newvoibytes + newvoistatbytes + newvoistatdatabytes;
1499 
1500 	/*
1501 	 * XXXLAS: Required until we gain support for flags which alter the
1502 	 * units of size/offset fields in key structs.
1503 	 */
1504 	if (!error && ((((int)sb->cursz) + nbytes) > SB_V1_MAXSZ))
1505 		error = EFBIG;
1506 
1507 	if (!error && (sb->cursz + nbytes > sb->maxsz)) {
1508 		/* Need to expand our blob. */
1509 		sb = stats_realloc(sb, sb->maxsz, sb->cursz + nbytes, M_ZERO);
1510 		if (sb != NULL) {
1511 			sb->maxsz = sb->cursz + nbytes;
1512 			*sbpp = sb;
1513 		} else
1514 		    error = ENOMEM;
1515 	}
1516 
1517 	if (!error) {
1518 		/*
1519 		 * Shuffle memory within the expanded blob working from the end
1520 		 * backwards, leaving gaps for the new voistat and voistatdata
1521 		 * structs at the beginning of their respective blob regions,
1522 		 * and for the new voi structs at the end of their blob region.
1523 		 */
1524 		memmove(BLOB_OFFSET(sb, sb->statsdata_off + nbytes),
1525 		    BLOB_OFFSET(sb, sb->statsdata_off),
1526 		    sb->cursz - sb->statsdata_off);
1527 		memmove(BLOB_OFFSET(sb, sb->stats_off + newvoibytes +
1528 		    newvoistatbytes), BLOB_OFFSET(sb, sb->stats_off),
1529 		    sb->statsdata_off - sb->stats_off);
1530 
1531 		/* First index of new voi/voistat structs to be initialised. */
1532 		idxnewvois = NVOIS(sb);
1533 		idxnewvoistats = (newvoistatbytes / sizeof(struct voistat)) - 1;
1534 
1535 		/* Update housekeeping variables and offsets. */
1536 		sb->cursz += nbytes;
1537 		sb->stats_off += newvoibytes;
1538 		sb->statsdata_off += newvoibytes + newvoistatbytes;
1539 
1540 		/* XXXLAS: Zeroing not strictly needed but aids debugging. */
1541 		memset(&sb->vois[idxnewvois], '\0', newvoibytes);
1542 		memset(BLOB_OFFSET(sb, sb->stats_off), '\0',
1543 		    newvoistatbytes);
1544 		memset(BLOB_OFFSET(sb, sb->statsdata_off), '\0',
1545 		    newvoistatdatabytes);
1546 
1547 		/* Initialise new voi array members and update offsets. */
1548 		for (i = 0; i < NVOIS(sb); i++) {
1549 			tmpvoi = &sb->vois[i];
1550 			if (i >= idxnewvois) {
1551 				tmpvoi->id = tmpvoi->voistatmaxid = -1;
1552 			} else if (tmpvoi->id > -1) {
1553 				tmpvoi->stats_off += newvoibytes +
1554 				    newvoistatbytes;
1555 			}
1556 		}
1557 
1558 		/* Initialise new voistat array members and update offsets. */
1559 		nvoistats = (sb->statsdata_off - sb->stats_off) /
1560 		    sizeof(struct voistat);
1561 		voistat_array = BLOB_OFFSET(sb, sb->stats_off);
1562 		for (i = 0; i < nvoistats; i++) {
1563 			tmpvoistat = &voistat_array[i];
1564 			if (i <= idxnewvoistats) {
1565 				tmpvoistat->stype = -1;
1566 			} else if (tmpvoistat->stype > -1) {
1567 				tmpvoistat->data_off += nbytes;
1568 			}
1569 		}
1570 	}
1571 
1572 	return (error);
1573 }
1574 
1575 static void
1576 stats_v1_blob_finalise(struct statsblobv1 *sb __unused)
1577 {
1578 
1579 	/* XXXLAS: Fill this in. */
1580 }
1581 
1582 static void
1583 stats_v1_blob_iter(struct statsblobv1 *sb, stats_v1_blob_itercb_t icb,
1584     void *usrctx, uint32_t flags)
1585 {
1586 	struct voi *v;
1587 	struct voistat *vs;
1588 	struct sb_iter_ctx ctx;
1589 	int i, j, firstvoi;
1590 
1591 	ctx.usrctx = usrctx;
1592 	ctx.flags = SB_IT_FIRST_CB;
1593 	firstvoi = 1;
1594 
1595 	for (i = 0; i < NVOIS(sb); i++) {
1596 		v = &sb->vois[i];
1597 		ctx.vslot = i;
1598 		ctx.vsslot = -1;
1599 		ctx.flags |= SB_IT_FIRST_VOISTAT;
1600 
1601 		if (firstvoi)
1602 			ctx.flags |= SB_IT_FIRST_VOI;
1603 		else if (i == (NVOIS(sb) - 1))
1604 			ctx.flags |= SB_IT_LAST_VOI | SB_IT_LAST_CB;
1605 
1606 		if (v->id < 0 && (flags & SB_IT_NULLVOI)) {
1607 			if (icb(sb, v, NULL, &ctx))
1608 				return;
1609 			firstvoi = 0;
1610 			ctx.flags &= ~SB_IT_FIRST_CB;
1611 		}
1612 
1613 		/* If NULL voi, v->voistatmaxid == -1 */
1614 		for (j = 0; j <= v->voistatmaxid; j++) {
1615 			vs = &((struct voistat *)BLOB_OFFSET(sb,
1616 			    v->stats_off))[j];
1617 			if (vs->stype < 0 &&
1618 			    !(flags & SB_IT_NULLVOISTAT))
1619 				continue;
1620 
1621 			if (j == v->voistatmaxid) {
1622 				ctx.flags |= SB_IT_LAST_VOISTAT;
1623 				if (i == (NVOIS(sb) - 1))
1624 					ctx.flags |=
1625 					    SB_IT_LAST_CB;
1626 			} else
1627 				ctx.flags &= ~SB_IT_LAST_CB;
1628 
1629 			ctx.vsslot = j;
1630 			if (icb(sb, v, vs, &ctx))
1631 				return;
1632 
1633 			ctx.flags &= ~(SB_IT_FIRST_CB | SB_IT_FIRST_VOISTAT |
1634 			    SB_IT_LAST_VOISTAT);
1635 		}
1636 		ctx.flags &= ~(SB_IT_FIRST_VOI | SB_IT_LAST_VOI);
1637 	}
1638 }
1639 
1640 static inline void
1641 stats_voistatdata_tdgst_tostr(enum vsd_dtype voi_dtype __unused,
1642     const struct voistatdata_tdgst *tdgst, enum vsd_dtype tdgst_dtype,
1643     size_t tdgst_dsz __unused, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1644 {
1645 	const struct ctdth32 *ctd32tree;
1646 	const struct ctdth64 *ctd64tree;
1647 	const struct voistatdata_tdgstctd32 *ctd32;
1648 	const struct voistatdata_tdgstctd64 *ctd64;
1649 	const char *fmtstr;
1650 	uint64_t smplcnt, compcnt;
1651 	int is32bit, qmaxstrlen;
1652 	uint16_t maxctds, curctds;
1653 
1654 	switch (tdgst_dtype) {
1655 	case VSD_DTYPE_TDGSTCLUST32:
1656 		smplcnt = CONSTVSD(tdgstclust32, tdgst)->smplcnt;
1657 		compcnt = CONSTVSD(tdgstclust32, tdgst)->compcnt;
1658 		maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1659 		curctds = ARB_CURNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1660 		ctd32tree = &CONSTVSD(tdgstclust32, tdgst)->ctdtree;
1661 		ctd32 = (objdump ? ARB_CNODE(ctd32tree, 0) :
1662 		    ARB_CMIN(ctdth32, ctd32tree));
1663 		qmaxstrlen = (ctd32 == NULL) ? 1 : Q_MAXSTRLEN(ctd32->mu, 10);
1664 		is32bit = 1;
1665 		ctd64tree = NULL;
1666 		ctd64 = NULL;
1667 		break;
1668 	case VSD_DTYPE_TDGSTCLUST64:
1669 		smplcnt = CONSTVSD(tdgstclust64, tdgst)->smplcnt;
1670 		compcnt = CONSTVSD(tdgstclust64, tdgst)->compcnt;
1671 		maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1672 		curctds = ARB_CURNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1673 		ctd64tree = &CONSTVSD(tdgstclust64, tdgst)->ctdtree;
1674 		ctd64 = (objdump ? ARB_CNODE(ctd64tree, 0) :
1675 		    ARB_CMIN(ctdth64, ctd64tree));
1676 		qmaxstrlen = (ctd64 == NULL) ? 1 : Q_MAXSTRLEN(ctd64->mu, 10);
1677 		is32bit = 0;
1678 		ctd32tree = NULL;
1679 		ctd32 = NULL;
1680 		break;
1681 	default:
1682 		return;
1683 	}
1684 
1685 	switch (fmt) {
1686 	case SB_STRFMT_FREEFORM:
1687 		fmtstr = "smplcnt=%ju, compcnt=%ju, maxctds=%hu, nctds=%hu";
1688 		break;
1689 	case SB_STRFMT_JSON:
1690 	default:
1691 		fmtstr =
1692 		    "\"smplcnt\":%ju,\"compcnt\":%ju,\"maxctds\":%hu,"
1693 		    "\"nctds\":%hu,\"ctds\":[";
1694 		break;
1695 	}
1696 	sbuf_printf(buf, fmtstr, (uintmax_t)smplcnt, (uintmax_t)compcnt,
1697 	    maxctds, curctds);
1698 
1699 	while ((is32bit ? NULL != ctd32 : NULL != ctd64)) {
1700 		char qstr[qmaxstrlen];
1701 
1702 		switch (fmt) {
1703 		case SB_STRFMT_FREEFORM:
1704 			fmtstr = "\n\t\t\t\t";
1705 			break;
1706 		case SB_STRFMT_JSON:
1707 		default:
1708 			fmtstr = "{";
1709 			break;
1710 		}
1711 		sbuf_cat(buf, fmtstr);
1712 
1713 		if (objdump) {
1714 			switch (fmt) {
1715 			case SB_STRFMT_FREEFORM:
1716 				fmtstr = "ctd[%hu].";
1717 				break;
1718 			case SB_STRFMT_JSON:
1719 			default:
1720 				fmtstr = "\"ctd\":%hu,";
1721 				break;
1722 			}
1723 			sbuf_printf(buf, fmtstr, is32bit ?
1724 			    ARB_SELFIDX(ctd32tree, ctd32) :
1725 			    ARB_SELFIDX(ctd64tree, ctd64));
1726 		}
1727 
1728 		switch (fmt) {
1729 		case SB_STRFMT_FREEFORM:
1730 			fmtstr = "{mu=";
1731 			break;
1732 		case SB_STRFMT_JSON:
1733 		default:
1734 			fmtstr = "\"mu\":";
1735 			break;
1736 		}
1737 		sbuf_cat(buf, fmtstr);
1738 		Q_TOSTR((is32bit ? ctd32->mu : ctd64->mu), -1, 10, qstr,
1739 		    sizeof(qstr));
1740 		sbuf_cat(buf, qstr);
1741 
1742 		switch (fmt) {
1743 		case SB_STRFMT_FREEFORM:
1744 			fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1745 			break;
1746 		case SB_STRFMT_JSON:
1747 		default:
1748 			fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1749 			break;
1750 		}
1751 		sbuf_printf(buf, fmtstr,
1752 		    is32bit ? ctd32->cnt : (uintmax_t)ctd64->cnt);
1753 
1754 		if (is32bit)
1755 			ctd32 = (objdump ? ARB_CNODE(ctd32tree,
1756 			    ARB_SELFIDX(ctd32tree, ctd32) + 1) :
1757 			    ARB_CNEXT(ctdth32, ctd32tree, ctd32));
1758 		else
1759 			ctd64 = (objdump ? ARB_CNODE(ctd64tree,
1760 			    ARB_SELFIDX(ctd64tree, ctd64) + 1) :
1761 			    ARB_CNEXT(ctdth64, ctd64tree, ctd64));
1762 
1763 		if (fmt == SB_STRFMT_JSON &&
1764 		    (is32bit ? NULL != ctd32 : NULL != ctd64))
1765 			sbuf_putc(buf, ',');
1766 	}
1767 	if (fmt == SB_STRFMT_JSON)
1768 		sbuf_cat(buf, "]");
1769 }
1770 
1771 static inline void
1772 stats_voistatdata_hist_tostr(enum vsd_dtype voi_dtype,
1773     const struct voistatdata_hist *hist, enum vsd_dtype hist_dtype,
1774     size_t hist_dsz, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1775 {
1776 	const struct voistatdata_numeric *bkt_lb, *bkt_ub;
1777 	const char *fmtstr;
1778 	int is32bit;
1779 	uint16_t i, nbkts;
1780 
1781 	switch (hist_dtype) {
1782 	case VSD_DTYPE_CRHIST32:
1783 		nbkts = HIST_VSDSZ2NBKTS(crhist32, hist_dsz);
1784 		is32bit = 1;
1785 		break;
1786 	case VSD_DTYPE_DRHIST32:
1787 		nbkts = HIST_VSDSZ2NBKTS(drhist32, hist_dsz);
1788 		is32bit = 1;
1789 		break;
1790 	case VSD_DTYPE_DVHIST32:
1791 		nbkts = HIST_VSDSZ2NBKTS(dvhist32, hist_dsz);
1792 		is32bit = 1;
1793 		break;
1794 	case VSD_DTYPE_CRHIST64:
1795 		nbkts = HIST_VSDSZ2NBKTS(crhist64, hist_dsz);
1796 		is32bit = 0;
1797 		break;
1798 	case VSD_DTYPE_DRHIST64:
1799 		nbkts = HIST_VSDSZ2NBKTS(drhist64, hist_dsz);
1800 		is32bit = 0;
1801 		break;
1802 	case VSD_DTYPE_DVHIST64:
1803 		nbkts = HIST_VSDSZ2NBKTS(dvhist64, hist_dsz);
1804 		is32bit = 0;
1805 		break;
1806 	default:
1807 		return;
1808 	}
1809 
1810 	switch (fmt) {
1811 	case SB_STRFMT_FREEFORM:
1812 		fmtstr = "nbkts=%hu, ";
1813 		break;
1814 	case SB_STRFMT_JSON:
1815 	default:
1816 		fmtstr = "\"nbkts\":%hu,";
1817 		break;
1818 	}
1819 	sbuf_printf(buf, fmtstr, nbkts);
1820 
1821 	switch (fmt) {
1822 		case SB_STRFMT_FREEFORM:
1823 			fmtstr = (is32bit ? "oob=%u" : "oob=%ju");
1824 			break;
1825 		case SB_STRFMT_JSON:
1826 		default:
1827 			fmtstr = (is32bit ? "\"oob\":%u,\"bkts\":[" :
1828 			    "\"oob\":%ju,\"bkts\":[");
1829 			break;
1830 	}
1831 	sbuf_printf(buf, fmtstr, is32bit ? VSD_CONSTHIST_FIELDVAL(hist,
1832 	    hist_dtype, oob) : (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist,
1833 	    hist_dtype, oob));
1834 
1835 	for (i = 0; i < nbkts; i++) {
1836 		switch (hist_dtype) {
1837 		case VSD_DTYPE_CRHIST32:
1838 		case VSD_DTYPE_CRHIST64:
1839 			bkt_lb = VSD_CONSTCRHIST_FIELDPTR(hist, hist_dtype,
1840 			    bkts[i].lb);
1841 			if (i < nbkts - 1)
1842 				bkt_ub = VSD_CONSTCRHIST_FIELDPTR(hist,
1843 				    hist_dtype, bkts[i + 1].lb);
1844 			else
1845 				bkt_ub = &numeric_limits[LIM_MAX][voi_dtype];
1846 			break;
1847 		case VSD_DTYPE_DRHIST32:
1848 		case VSD_DTYPE_DRHIST64:
1849 			bkt_lb = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1850 			    bkts[i].lb);
1851 			bkt_ub = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1852 			    bkts[i].ub);
1853 			break;
1854 		case VSD_DTYPE_DVHIST32:
1855 		case VSD_DTYPE_DVHIST64:
1856 			bkt_lb = bkt_ub = VSD_CONSTDVHIST_FIELDPTR(hist,
1857 			    hist_dtype, bkts[i].val);
1858 			break;
1859 		default:
1860 			break;
1861 		}
1862 
1863 		switch (fmt) {
1864 		case SB_STRFMT_FREEFORM:
1865 			fmtstr = "\n\t\t\t\t";
1866 			break;
1867 		case SB_STRFMT_JSON:
1868 		default:
1869 			fmtstr = "{";
1870 			break;
1871 		}
1872 		sbuf_cat(buf, fmtstr);
1873 
1874 		if (objdump) {
1875 			switch (fmt) {
1876 			case SB_STRFMT_FREEFORM:
1877 				fmtstr = "bkt[%hu].";
1878 				break;
1879 			case SB_STRFMT_JSON:
1880 			default:
1881 				fmtstr = "\"bkt\":%hu,";
1882 				break;
1883 			}
1884 			sbuf_printf(buf, fmtstr, i);
1885 		}
1886 
1887 		switch (fmt) {
1888 		case SB_STRFMT_FREEFORM:
1889 			fmtstr = "{lb=";
1890 			break;
1891 		case SB_STRFMT_JSON:
1892 		default:
1893 			fmtstr = "\"lb\":";
1894 			break;
1895 		}
1896 		sbuf_cat(buf, fmtstr);
1897 		stats_voistatdata_tostr((const struct voistatdata *)bkt_lb,
1898 		    voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1899 		    fmt, buf, objdump);
1900 
1901 		switch (fmt) {
1902 		case SB_STRFMT_FREEFORM:
1903 			fmtstr = ",ub=";
1904 			break;
1905 		case SB_STRFMT_JSON:
1906 		default:
1907 			fmtstr = ",\"ub\":";
1908 			break;
1909 		}
1910 		sbuf_cat(buf, fmtstr);
1911 		stats_voistatdata_tostr((const struct voistatdata *)bkt_ub,
1912 		    voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1913 		    fmt, buf, objdump);
1914 
1915 		switch (fmt) {
1916 		case SB_STRFMT_FREEFORM:
1917 			fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1918 			break;
1919 		case SB_STRFMT_JSON:
1920 		default:
1921 			fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1922 			break;
1923 		}
1924 		sbuf_printf(buf, fmtstr, is32bit ?
1925 		    VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, bkts[i].cnt) :
1926 		    (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist, hist_dtype,
1927 		    bkts[i].cnt));
1928 
1929 		if (fmt == SB_STRFMT_JSON && i < nbkts - 1)
1930 			sbuf_putc(buf, ',');
1931 	}
1932 	if (fmt == SB_STRFMT_JSON)
1933 		sbuf_cat(buf, "]");
1934 }
1935 
1936 int
1937 stats_voistatdata_tostr(const struct voistatdata *vsd, enum vsd_dtype voi_dtype,
1938     enum vsd_dtype vsd_dtype, size_t vsd_sz, enum sb_str_fmt fmt,
1939     struct sbuf *buf, int objdump)
1940 {
1941 	const char *fmtstr;
1942 
1943 	if (vsd == NULL || buf == NULL || voi_dtype >= VSD_NUM_DTYPES ||
1944 	    vsd_dtype >= VSD_NUM_DTYPES || fmt >= SB_STRFMT_NUM_FMTS)
1945 		return (EINVAL);
1946 
1947 	switch (vsd_dtype) {
1948 	case VSD_DTYPE_VOISTATE:
1949 		switch (fmt) {
1950 		case SB_STRFMT_FREEFORM:
1951 			fmtstr = "prev=";
1952 			break;
1953 		case SB_STRFMT_JSON:
1954 		default:
1955 			fmtstr = "\"prev\":";
1956 			break;
1957 		}
1958 		sbuf_cat(buf, fmtstr);
1959 		/*
1960 		 * Render prev by passing it as *vsd and voi_dtype as vsd_dtype.
1961 		 */
1962 		stats_voistatdata_tostr(
1963 		    (const struct voistatdata *)&CONSTVSD(voistate, vsd)->prev,
1964 		    voi_dtype, voi_dtype, vsd_sz, fmt, buf, objdump);
1965 		break;
1966 	case VSD_DTYPE_INT_S32:
1967 		sbuf_printf(buf, "%d", vsd->int32.s32);
1968 		break;
1969 	case VSD_DTYPE_INT_U32:
1970 		sbuf_printf(buf, "%u", vsd->int32.u32);
1971 		break;
1972 	case VSD_DTYPE_INT_S64:
1973 		sbuf_printf(buf, "%jd", (intmax_t)vsd->int64.s64);
1974 		break;
1975 	case VSD_DTYPE_INT_U64:
1976 		sbuf_printf(buf, "%ju", (uintmax_t)vsd->int64.u64);
1977 		break;
1978 	case VSD_DTYPE_INT_SLONG:
1979 		sbuf_printf(buf, "%ld", vsd->intlong.slong);
1980 		break;
1981 	case VSD_DTYPE_INT_ULONG:
1982 		sbuf_printf(buf, "%lu", vsd->intlong.ulong);
1983 		break;
1984 	case VSD_DTYPE_Q_S32:
1985 		{
1986 		char qstr[Q_MAXSTRLEN(vsd->q32.sq32, 10)];
1987 		Q_TOSTR((s32q_t)vsd->q32.sq32, -1, 10, qstr, sizeof(qstr));
1988 		sbuf_cat(buf, qstr);
1989 		}
1990 		break;
1991 	case VSD_DTYPE_Q_U32:
1992 		{
1993 		char qstr[Q_MAXSTRLEN(vsd->q32.uq32, 10)];
1994 		Q_TOSTR((u32q_t)vsd->q32.uq32, -1, 10, qstr, sizeof(qstr));
1995 		sbuf_cat(buf, qstr);
1996 		}
1997 		break;
1998 	case VSD_DTYPE_Q_S64:
1999 		{
2000 		char qstr[Q_MAXSTRLEN(vsd->q64.sq64, 10)];
2001 		Q_TOSTR((s64q_t)vsd->q64.sq64, -1, 10, qstr, sizeof(qstr));
2002 		sbuf_cat(buf, qstr);
2003 		}
2004 		break;
2005 	case VSD_DTYPE_Q_U64:
2006 		{
2007 		char qstr[Q_MAXSTRLEN(vsd->q64.uq64, 10)];
2008 		Q_TOSTR((u64q_t)vsd->q64.uq64, -1, 10, qstr, sizeof(qstr));
2009 		sbuf_cat(buf, qstr);
2010 		}
2011 		break;
2012 	case VSD_DTYPE_CRHIST32:
2013 	case VSD_DTYPE_DRHIST32:
2014 	case VSD_DTYPE_DVHIST32:
2015 	case VSD_DTYPE_CRHIST64:
2016 	case VSD_DTYPE_DRHIST64:
2017 	case VSD_DTYPE_DVHIST64:
2018 		stats_voistatdata_hist_tostr(voi_dtype, CONSTVSD(hist, vsd),
2019 		    vsd_dtype, vsd_sz, fmt, buf, objdump);
2020 		break;
2021 	case VSD_DTYPE_TDGSTCLUST32:
2022 	case VSD_DTYPE_TDGSTCLUST64:
2023 		stats_voistatdata_tdgst_tostr(voi_dtype,
2024 		    CONSTVSD(tdgst, vsd), vsd_dtype, vsd_sz, fmt, buf,
2025 		    objdump);
2026 		break;
2027 	default:
2028 		break;
2029 	}
2030 
2031 	return (sbuf_error(buf));
2032 }
2033 
2034 static void
2035 stats_v1_itercb_tostr_freeform(struct statsblobv1 *sb, struct voi *v,
2036     struct voistat *vs, struct sb_iter_ctx *ctx)
2037 {
2038 	struct sb_tostrcb_ctx *sctx;
2039 	struct metablob *tpl_mb;
2040 	struct sbuf *buf;
2041 	void *vsd;
2042 	uint8_t dump;
2043 
2044 	sctx = ctx->usrctx;
2045 	buf = sctx->buf;
2046 	tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2047 	dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2048 
2049 	if (ctx->flags & SB_IT_FIRST_CB) {
2050 		sbuf_printf(buf, "struct statsblobv1@%p", sb);
2051 		if (dump) {
2052 			sbuf_printf(buf, ", abi=%hhu, endian=%hhu, maxsz=%hu, "
2053 			    "cursz=%hu, created=%jd, lastrst=%jd, flags=0x%04hx, "
2054 			    "stats_off=%hu, statsdata_off=%hu",
2055 			    sb->abi, sb->endian, sb->maxsz, sb->cursz,
2056 			    sb->created, sb->lastrst, sb->flags, sb->stats_off,
2057 			    sb->statsdata_off);
2058 		}
2059 		sbuf_printf(buf, ", tplhash=%u", sb->tplhash);
2060 	}
2061 
2062 	if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2063 		sbuf_printf(buf, "\n\tvois[%hd]: id=%hd", ctx->vslot, v->id);
2064 		if (v->id < 0)
2065 			return;
2066 		sbuf_printf(buf, ", name=\"%s\"", (tpl_mb == NULL) ? "" :
2067 		    tpl_mb->voi_meta[v->id].name);
2068 		if (dump)
2069 		    sbuf_printf(buf, ", flags=0x%04hx, dtype=%s, "
2070 		    "voistatmaxid=%hhd, stats_off=%hu", v->flags,
2071 		    vsd_dtype2name[v->dtype], v->voistatmaxid, v->stats_off);
2072 	}
2073 
2074 	if (!dump && vs->stype <= 0)
2075 		return;
2076 
2077 	sbuf_printf(buf, "\n\t\tvois[%hd]stat[%hhd]: stype=", v->id, ctx->vsslot);
2078 	if (vs->stype < 0) {
2079 		sbuf_printf(buf, "%hhd", vs->stype);
2080 		return;
2081 	} else
2082 		sbuf_printf(buf, "%s, errs=%hu", vs_stype2name[vs->stype],
2083 		    vs->errs);
2084 	vsd = BLOB_OFFSET(sb, vs->data_off);
2085 	if (dump)
2086 		sbuf_printf(buf, ", flags=0x%04x, dtype=%s, dsz=%hu, "
2087 		    "data_off=%hu", vs->flags, vsd_dtype2name[vs->dtype],
2088 		    vs->dsz, vs->data_off);
2089 
2090 	sbuf_printf(buf, "\n\t\t\tvoistatdata: ");
2091 	stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2092 	    sctx->fmt, buf, dump);
2093 }
2094 
2095 static void
2096 stats_v1_itercb_tostr_json(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2097     struct sb_iter_ctx *ctx)
2098 {
2099 	struct sb_tostrcb_ctx *sctx;
2100 	struct metablob *tpl_mb;
2101 	struct sbuf *buf;
2102 	const char *fmtstr;
2103 	void *vsd;
2104 	uint8_t dump;
2105 
2106 	sctx = ctx->usrctx;
2107 	buf = sctx->buf;
2108 	tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2109 	dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2110 
2111 	if (ctx->flags & SB_IT_FIRST_CB) {
2112 		sbuf_putc(buf, '{');
2113 		if (dump) {
2114 			sbuf_printf(buf, "\"abi\":%hhu,\"endian\":%hhu,"
2115 			    "\"maxsz\":%hu,\"cursz\":%hu,\"created\":%jd,"
2116 			    "\"lastrst\":%jd,\"flags\":%hu,\"stats_off\":%hu,"
2117 			    "\"statsdata_off\":%hu,", sb->abi,
2118 			    sb->endian, sb->maxsz, sb->cursz, sb->created,
2119 			    sb->lastrst, sb->flags, sb->stats_off,
2120 			    sb->statsdata_off);
2121 		}
2122 
2123 		if (tpl_mb == NULL)
2124 			fmtstr = "\"tplname\":%s,\"tplhash\":%u,\"vois\":{";
2125 		else
2126 			fmtstr = "\"tplname\":\"%s\",\"tplhash\":%u,\"vois\":{";
2127 
2128 		sbuf_printf(buf, fmtstr, tpl_mb ? tpl_mb->tplname : "null",
2129 		    sb->tplhash);
2130 	}
2131 
2132 	if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2133 		if (dump) {
2134 			sbuf_printf(buf, "\"[%d]\":{\"id\":%d", ctx->vslot,
2135 			    v->id);
2136 			if (v->id < 0) {
2137 				sbuf_printf(buf, "},");
2138 				return;
2139 			}
2140 
2141 			if (tpl_mb == NULL)
2142 				fmtstr = ",\"name\":%s,\"flags\":%hu,"
2143 				    "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2144 				    "\"stats_off\":%hu,";
2145 			else
2146 				fmtstr = ",\"name\":\"%s\",\"flags\":%hu,"
2147 				    "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2148 				    "\"stats_off\":%hu,";
2149 
2150 			sbuf_printf(buf, fmtstr, tpl_mb ?
2151 			    tpl_mb->voi_meta[v->id].name : "null", v->flags,
2152 			    vsd_dtype2name[v->dtype], v->voistatmaxid,
2153 			    v->stats_off);
2154 		} else {
2155 			if (tpl_mb == NULL) {
2156 				sbuf_printf(buf, "\"[%hd]\":{", v->id);
2157 			} else {
2158 				sbuf_printf(buf, "\"%s\":{",
2159 				    tpl_mb->voi_meta[v->id].name);
2160 			}
2161 		}
2162 		sbuf_cat(buf, "\"stats\":{");
2163 	}
2164 
2165 	vsd = BLOB_OFFSET(sb, vs->data_off);
2166 	if (dump) {
2167 		sbuf_printf(buf, "\"[%hhd]\":", ctx->vsslot);
2168 		if (vs->stype < 0) {
2169 			sbuf_printf(buf, "{\"stype\":-1},");
2170 			return;
2171 		}
2172 		sbuf_printf(buf, "{\"stype\":\"%s\",\"errs\":%hu,\"flags\":%hu,"
2173 		    "\"dtype\":\"%s\",\"data_off\":%hu,\"voistatdata\":{",
2174 		    vs_stype2name[vs->stype], vs->errs, vs->flags,
2175 		    vsd_dtype2name[vs->dtype], vs->data_off);
2176 	} else if (vs->stype > 0) {
2177 		if (tpl_mb == NULL)
2178 			sbuf_printf(buf, "\"[%hhd]\":", vs->stype);
2179 		else
2180 			sbuf_printf(buf, "\"%s\":", vs_stype2name[vs->stype]);
2181 	} else
2182 		return;
2183 
2184 	if ((vs->flags & VS_VSDVALID) || dump) {
2185 		if (!dump)
2186 			sbuf_printf(buf, "{\"errs\":%hu,", vs->errs);
2187 		/* Simple non-compound VSD types need a key. */
2188 		if (!vsd_compoundtype[vs->dtype])
2189 			sbuf_cat(buf, "\"val\":");
2190 		stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2191 		    sctx->fmt, buf, dump);
2192 		sbuf_cat(buf, dump ? "}}" : "}");
2193 	} else
2194 		sbuf_cat(buf, dump ? "null}" : "null");
2195 
2196 	if (ctx->flags & SB_IT_LAST_VOISTAT)
2197 		sbuf_cat(buf, "}}");
2198 
2199 	if (ctx->flags & SB_IT_LAST_CB)
2200 		sbuf_cat(buf, "}}");
2201 	else
2202 		sbuf_putc(buf, ',');
2203 }
2204 
2205 static int
2206 stats_v1_itercb_tostr(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2207     struct sb_iter_ctx *ctx)
2208 {
2209 	struct sb_tostrcb_ctx *sctx;
2210 
2211 	sctx = ctx->usrctx;
2212 
2213 	switch (sctx->fmt) {
2214 	case SB_STRFMT_FREEFORM:
2215 		stats_v1_itercb_tostr_freeform(sb, v, vs, ctx);
2216 		break;
2217 	case SB_STRFMT_JSON:
2218 		stats_v1_itercb_tostr_json(sb, v, vs, ctx);
2219 		break;
2220 	default:
2221 		break;
2222 	}
2223 
2224 	return (sbuf_error(sctx->buf));
2225 }
2226 
2227 int
2228 stats_v1_blob_tostr(struct statsblobv1 *sb, struct sbuf *buf,
2229     enum sb_str_fmt fmt, uint32_t flags)
2230 {
2231 	struct sb_tostrcb_ctx sctx;
2232 	uint32_t iflags;
2233 
2234 	if (sb == NULL || sb->abi != STATS_ABI_V1 || buf == NULL ||
2235 	    fmt >= SB_STRFMT_NUM_FMTS)
2236 		return (EINVAL);
2237 
2238 	sctx.buf = buf;
2239 	sctx.fmt = fmt;
2240 	sctx.flags = flags;
2241 
2242 	if (flags & SB_TOSTR_META) {
2243 		if (stats_tpl_fetch(stats_tpl_fetch_allocid(NULL, sb->tplhash),
2244 		    &sctx.tpl))
2245 			return (EINVAL);
2246 	} else
2247 		sctx.tpl = NULL;
2248 
2249 	iflags = 0;
2250 	if (flags & SB_TOSTR_OBJDUMP)
2251 		iflags |= (SB_IT_NULLVOI | SB_IT_NULLVOISTAT);
2252 	stats_v1_blob_iter(sb, stats_v1_itercb_tostr, &sctx, iflags);
2253 
2254 	return (sbuf_error(buf));
2255 }
2256 
2257 static int
2258 stats_v1_itercb_visit(struct statsblobv1 *sb, struct voi *v,
2259     struct voistat *vs, struct sb_iter_ctx *ctx)
2260 {
2261 	struct sb_visitcb_ctx *vctx;
2262 	struct sb_visit sbv;
2263 
2264 	vctx = ctx->usrctx;
2265 
2266 	sbv.tplhash = sb->tplhash;
2267 	sbv.voi_id = v->id;
2268 	sbv.voi_dtype = v->dtype;
2269 	sbv.vs_stype = vs->stype;
2270 	sbv.vs_dtype = vs->dtype;
2271 	sbv.vs_dsz = vs->dsz;
2272 	sbv.vs_data = BLOB_OFFSET(sb, vs->data_off);
2273 	sbv.vs_errs = vs->errs;
2274 	sbv.flags = ctx->flags & (SB_IT_FIRST_CB | SB_IT_LAST_CB |
2275 	    SB_IT_FIRST_VOI | SB_IT_LAST_VOI | SB_IT_FIRST_VOISTAT |
2276 	    SB_IT_LAST_VOISTAT);
2277 
2278 	return (vctx->cb(&sbv, vctx->usrctx));
2279 }
2280 
2281 int
2282 stats_v1_blob_visit(struct statsblobv1 *sb, stats_blob_visitcb_t func,
2283     void *usrctx)
2284 {
2285 	struct sb_visitcb_ctx vctx;
2286 
2287 	if (sb == NULL || sb->abi != STATS_ABI_V1 || func == NULL)
2288 		return (EINVAL);
2289 
2290 	vctx.cb = func;
2291 	vctx.usrctx = usrctx;
2292 
2293 	stats_v1_blob_iter(sb, stats_v1_itercb_visit, &vctx, 0);
2294 
2295 	return (0);
2296 }
2297 
2298 static int
2299 stats_v1_icb_reset_voistat(struct statsblobv1 *sb, struct voi *v __unused,
2300     struct voistat *vs, struct sb_iter_ctx *ctx __unused)
2301 {
2302 	void *vsd;
2303 
2304 	if (vs->stype == VS_STYPE_VOISTATE)
2305 		return (0);
2306 
2307 	vsd = BLOB_OFFSET(sb, vs->data_off);
2308 
2309 	/* Perform the stat type's default reset action. */
2310 	switch (vs->stype) {
2311 	case VS_STYPE_SUM:
2312 		switch (vs->dtype) {
2313 		case VSD_DTYPE_Q_S32:
2314 			Q_SIFVAL(VSD(q32, vsd)->sq32, 0);
2315 			break;
2316 		case VSD_DTYPE_Q_U32:
2317 			Q_SIFVAL(VSD(q32, vsd)->uq32, 0);
2318 			break;
2319 		case VSD_DTYPE_Q_S64:
2320 			Q_SIFVAL(VSD(q64, vsd)->sq64, 0);
2321 			break;
2322 		case VSD_DTYPE_Q_U64:
2323 			Q_SIFVAL(VSD(q64, vsd)->uq64, 0);
2324 			break;
2325 		default:
2326 			bzero(vsd, vs->dsz);
2327 			break;
2328 		}
2329 		break;
2330 	case VS_STYPE_MAX:
2331 		switch (vs->dtype) {
2332 		case VSD_DTYPE_Q_S32:
2333 			Q_SIFVAL(VSD(q32, vsd)->sq32,
2334 			    Q_IFMINVAL(VSD(q32, vsd)->sq32));
2335 			break;
2336 		case VSD_DTYPE_Q_U32:
2337 			Q_SIFVAL(VSD(q32, vsd)->uq32,
2338 			    Q_IFMINVAL(VSD(q32, vsd)->uq32));
2339 			break;
2340 		case VSD_DTYPE_Q_S64:
2341 			Q_SIFVAL(VSD(q64, vsd)->sq64,
2342 			    Q_IFMINVAL(VSD(q64, vsd)->sq64));
2343 			break;
2344 		case VSD_DTYPE_Q_U64:
2345 			Q_SIFVAL(VSD(q64, vsd)->uq64,
2346 			    Q_IFMINVAL(VSD(q64, vsd)->uq64));
2347 			break;
2348 		default:
2349 			memcpy(vsd, &numeric_limits[LIM_MIN][vs->dtype],
2350 			    vs->dsz);
2351 			break;
2352 		}
2353 		break;
2354 	case VS_STYPE_MIN:
2355 		switch (vs->dtype) {
2356 		case VSD_DTYPE_Q_S32:
2357 			Q_SIFVAL(VSD(q32, vsd)->sq32,
2358 			    Q_IFMAXVAL(VSD(q32, vsd)->sq32));
2359 			break;
2360 		case VSD_DTYPE_Q_U32:
2361 			Q_SIFVAL(VSD(q32, vsd)->uq32,
2362 			    Q_IFMAXVAL(VSD(q32, vsd)->uq32));
2363 			break;
2364 		case VSD_DTYPE_Q_S64:
2365 			Q_SIFVAL(VSD(q64, vsd)->sq64,
2366 			    Q_IFMAXVAL(VSD(q64, vsd)->sq64));
2367 			break;
2368 		case VSD_DTYPE_Q_U64:
2369 			Q_SIFVAL(VSD(q64, vsd)->uq64,
2370 			    Q_IFMAXVAL(VSD(q64, vsd)->uq64));
2371 			break;
2372 		default:
2373 			memcpy(vsd, &numeric_limits[LIM_MAX][vs->dtype],
2374 			    vs->dsz);
2375 			break;
2376 		}
2377 		break;
2378 	case VS_STYPE_HIST:
2379 		{
2380 		/* Reset bucket counts. */
2381 		struct voistatdata_hist *hist;
2382 		int i, is32bit;
2383 		uint16_t nbkts;
2384 
2385 		hist = VSD(hist, vsd);
2386 		switch (vs->dtype) {
2387 		case VSD_DTYPE_CRHIST32:
2388 			nbkts = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2389 			is32bit = 1;
2390 			break;
2391 		case VSD_DTYPE_DRHIST32:
2392 			nbkts = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2393 			is32bit = 1;
2394 			break;
2395 		case VSD_DTYPE_DVHIST32:
2396 			nbkts = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2397 			is32bit = 1;
2398 			break;
2399 		case VSD_DTYPE_CRHIST64:
2400 			nbkts = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2401 			is32bit = 0;
2402 			break;
2403 		case VSD_DTYPE_DRHIST64:
2404 			nbkts = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2405 			is32bit = 0;
2406 			break;
2407 		case VSD_DTYPE_DVHIST64:
2408 			nbkts = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2409 			is32bit = 0;
2410 			break;
2411 		default:
2412 			return (0);
2413 		}
2414 
2415 		bzero(VSD_HIST_FIELDPTR(hist, vs->dtype, oob),
2416 		    is32bit ? sizeof(uint32_t) : sizeof(uint64_t));
2417 		for (i = nbkts - 1; i >= 0; i--) {
2418 			bzero(VSD_HIST_FIELDPTR(hist, vs->dtype,
2419 			    bkts[i].cnt), is32bit ? sizeof(uint32_t) :
2420 			    sizeof(uint64_t));
2421 		}
2422 		break;
2423 		}
2424 	case VS_STYPE_TDGST:
2425 		{
2426 		/* Reset sample count centroids array/tree. */
2427 		struct voistatdata_tdgst *tdgst;
2428 		struct ctdth32 *ctd32tree;
2429 		struct ctdth64 *ctd64tree;
2430 		struct voistatdata_tdgstctd32 *ctd32;
2431 		struct voistatdata_tdgstctd64 *ctd64;
2432 
2433 		tdgst = VSD(tdgst, vsd);
2434 		switch (vs->dtype) {
2435 		case VSD_DTYPE_TDGSTCLUST32:
2436 			VSD(tdgstclust32, tdgst)->smplcnt = 0;
2437 			VSD(tdgstclust32, tdgst)->compcnt = 0;
2438 			ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2439 			ARB_INIT(ctd32, ctdlnk, ctd32tree,
2440 			    ARB_MAXNODES(ctd32tree)) {
2441 				ctd32->cnt = 0;
2442 				Q_SIFVAL(ctd32->mu, 0);
2443 			}
2444 #ifdef DIAGNOSTIC
2445 			RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2446 #endif
2447 		break;
2448 		case VSD_DTYPE_TDGSTCLUST64:
2449 			VSD(tdgstclust64, tdgst)->smplcnt = 0;
2450 			VSD(tdgstclust64, tdgst)->compcnt = 0;
2451 			ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2452 			ARB_INIT(ctd64, ctdlnk, ctd64tree,
2453 			    ARB_MAXNODES(ctd64tree)) {
2454 				ctd64->cnt = 0;
2455 				Q_SIFVAL(ctd64->mu, 0);
2456 			}
2457 #ifdef DIAGNOSTIC
2458 			RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2459 #endif
2460 		break;
2461 		default:
2462 			return (0);
2463 		}
2464 		break;
2465 		}
2466 	default:
2467 		KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
2468 		break;
2469 	}
2470 
2471 	vs->errs = 0;
2472 	vs->flags &= ~VS_VSDVALID;
2473 
2474 	return (0);
2475 }
2476 
2477 int
2478 stats_v1_blob_snapshot(struct statsblobv1 **dst, size_t dstmaxsz,
2479     struct statsblobv1 *src, uint32_t flags)
2480 {
2481 	int error;
2482 
2483 	if (src != NULL && src->abi == STATS_ABI_V1) {
2484 		error = stats_v1_blob_clone(dst, dstmaxsz, src, flags);
2485 		if (!error) {
2486 			if (flags & SB_CLONE_RSTSRC) {
2487 				stats_v1_blob_iter(src,
2488 				    stats_v1_icb_reset_voistat, NULL, 0);
2489 				src->lastrst = stats_sbinuptime();
2490 			}
2491 			stats_v1_blob_finalise(*dst);
2492 		}
2493 	} else
2494 		error = EINVAL;
2495 
2496 	return (error);
2497 }
2498 
2499 static inline int
2500 stats_v1_voi_update_max(enum vsd_dtype voi_dtype __unused,
2501     struct voistatdata *voival, struct voistat *vs, void *vsd)
2502 {
2503 	int error;
2504 
2505 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2506 	    ("Unknown VSD dtype %d", vs->dtype));
2507 
2508 	error = 0;
2509 
2510 	switch (vs->dtype) {
2511 	case VSD_DTYPE_INT_S32:
2512 		if (VSD(int32, vsd)->s32 < voival->int32.s32) {
2513 			VSD(int32, vsd)->s32 = voival->int32.s32;
2514 			vs->flags |= VS_VSDVALID;
2515 		}
2516 		break;
2517 	case VSD_DTYPE_INT_U32:
2518 		if (VSD(int32, vsd)->u32 < voival->int32.u32) {
2519 			VSD(int32, vsd)->u32 = voival->int32.u32;
2520 			vs->flags |= VS_VSDVALID;
2521 		}
2522 		break;
2523 	case VSD_DTYPE_INT_S64:
2524 		if (VSD(int64, vsd)->s64 < voival->int64.s64) {
2525 			VSD(int64, vsd)->s64 = voival->int64.s64;
2526 			vs->flags |= VS_VSDVALID;
2527 		}
2528 		break;
2529 	case VSD_DTYPE_INT_U64:
2530 		if (VSD(int64, vsd)->u64 < voival->int64.u64) {
2531 			VSD(int64, vsd)->u64 = voival->int64.u64;
2532 			vs->flags |= VS_VSDVALID;
2533 		}
2534 		break;
2535 	case VSD_DTYPE_INT_SLONG:
2536 		if (VSD(intlong, vsd)->slong < voival->intlong.slong) {
2537 			VSD(intlong, vsd)->slong = voival->intlong.slong;
2538 			vs->flags |= VS_VSDVALID;
2539 		}
2540 		break;
2541 	case VSD_DTYPE_INT_ULONG:
2542 		if (VSD(intlong, vsd)->ulong < voival->intlong.ulong) {
2543 			VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2544 			vs->flags |= VS_VSDVALID;
2545 		}
2546 		break;
2547 	case VSD_DTYPE_Q_S32:
2548 		if (Q_QLTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2549 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2550 		    voival->q32.sq32)))) {
2551 			vs->flags |= VS_VSDVALID;
2552 		}
2553 		break;
2554 	case VSD_DTYPE_Q_U32:
2555 		if (Q_QLTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2556 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2557 		    voival->q32.uq32)))) {
2558 			vs->flags |= VS_VSDVALID;
2559 		}
2560 		break;
2561 	case VSD_DTYPE_Q_S64:
2562 		if (Q_QLTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2563 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2564 		    voival->q64.sq64)))) {
2565 			vs->flags |= VS_VSDVALID;
2566 		}
2567 		break;
2568 	case VSD_DTYPE_Q_U64:
2569 		if (Q_QLTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2570 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2571 		    voival->q64.uq64)))) {
2572 			vs->flags |= VS_VSDVALID;
2573 		}
2574 		break;
2575 	default:
2576 		error = EINVAL;
2577 		break;
2578 	}
2579 
2580 	return (error);
2581 }
2582 
2583 static inline int
2584 stats_v1_voi_update_min(enum vsd_dtype voi_dtype __unused,
2585     struct voistatdata *voival, struct voistat *vs, void *vsd)
2586 {
2587 	int error;
2588 
2589 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2590 	    ("Unknown VSD dtype %d", vs->dtype));
2591 
2592 	error = 0;
2593 
2594 	switch (vs->dtype) {
2595 	case VSD_DTYPE_INT_S32:
2596 		if (VSD(int32, vsd)->s32 > voival->int32.s32) {
2597 			VSD(int32, vsd)->s32 = voival->int32.s32;
2598 			vs->flags |= VS_VSDVALID;
2599 		}
2600 		break;
2601 	case VSD_DTYPE_INT_U32:
2602 		if (VSD(int32, vsd)->u32 > voival->int32.u32) {
2603 			VSD(int32, vsd)->u32 = voival->int32.u32;
2604 			vs->flags |= VS_VSDVALID;
2605 		}
2606 		break;
2607 	case VSD_DTYPE_INT_S64:
2608 		if (VSD(int64, vsd)->s64 > voival->int64.s64) {
2609 			VSD(int64, vsd)->s64 = voival->int64.s64;
2610 			vs->flags |= VS_VSDVALID;
2611 		}
2612 		break;
2613 	case VSD_DTYPE_INT_U64:
2614 		if (VSD(int64, vsd)->u64 > voival->int64.u64) {
2615 			VSD(int64, vsd)->u64 = voival->int64.u64;
2616 			vs->flags |= VS_VSDVALID;
2617 		}
2618 		break;
2619 	case VSD_DTYPE_INT_SLONG:
2620 		if (VSD(intlong, vsd)->slong > voival->intlong.slong) {
2621 			VSD(intlong, vsd)->slong = voival->intlong.slong;
2622 			vs->flags |= VS_VSDVALID;
2623 		}
2624 		break;
2625 	case VSD_DTYPE_INT_ULONG:
2626 		if (VSD(intlong, vsd)->ulong > voival->intlong.ulong) {
2627 			VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2628 			vs->flags |= VS_VSDVALID;
2629 		}
2630 		break;
2631 	case VSD_DTYPE_Q_S32:
2632 		if (Q_QGTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2633 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2634 		    voival->q32.sq32)))) {
2635 			vs->flags |= VS_VSDVALID;
2636 		}
2637 		break;
2638 	case VSD_DTYPE_Q_U32:
2639 		if (Q_QGTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2640 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2641 		    voival->q32.uq32)))) {
2642 			vs->flags |= VS_VSDVALID;
2643 		}
2644 		break;
2645 	case VSD_DTYPE_Q_S64:
2646 		if (Q_QGTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2647 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2648 		    voival->q64.sq64)))) {
2649 			vs->flags |= VS_VSDVALID;
2650 		}
2651 		break;
2652 	case VSD_DTYPE_Q_U64:
2653 		if (Q_QGTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2654 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2655 		    voival->q64.uq64)))) {
2656 			vs->flags |= VS_VSDVALID;
2657 		}
2658 		break;
2659 	default:
2660 		error = EINVAL;
2661 		break;
2662 	}
2663 
2664 	return (error);
2665 }
2666 
2667 static inline int
2668 stats_v1_voi_update_sum(enum vsd_dtype voi_dtype __unused,
2669     struct voistatdata *voival, struct voistat *vs, void *vsd)
2670 {
2671 	int error;
2672 
2673 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2674 	    ("Unknown VSD dtype %d", vs->dtype));
2675 
2676 	error = 0;
2677 
2678 	switch (vs->dtype) {
2679 	case VSD_DTYPE_INT_S32:
2680 		VSD(int32, vsd)->s32 += voival->int32.s32;
2681 		break;
2682 	case VSD_DTYPE_INT_U32:
2683 		VSD(int32, vsd)->u32 += voival->int32.u32;
2684 		break;
2685 	case VSD_DTYPE_INT_S64:
2686 		VSD(int64, vsd)->s64 += voival->int64.s64;
2687 		break;
2688 	case VSD_DTYPE_INT_U64:
2689 		VSD(int64, vsd)->u64 += voival->int64.u64;
2690 		break;
2691 	case VSD_DTYPE_INT_SLONG:
2692 		VSD(intlong, vsd)->slong += voival->intlong.slong;
2693 		break;
2694 	case VSD_DTYPE_INT_ULONG:
2695 		VSD(intlong, vsd)->ulong += voival->intlong.ulong;
2696 		break;
2697 	case VSD_DTYPE_Q_S32:
2698 		error = Q_QADDQ(&VSD(q32, vsd)->sq32, voival->q32.sq32);
2699 		break;
2700 	case VSD_DTYPE_Q_U32:
2701 		error = Q_QADDQ(&VSD(q32, vsd)->uq32, voival->q32.uq32);
2702 		break;
2703 	case VSD_DTYPE_Q_S64:
2704 		error = Q_QADDQ(&VSD(q64, vsd)->sq64, voival->q64.sq64);
2705 		break;
2706 	case VSD_DTYPE_Q_U64:
2707 		error = Q_QADDQ(&VSD(q64, vsd)->uq64, voival->q64.uq64);
2708 		break;
2709 	default:
2710 		error = EINVAL;
2711 		break;
2712 	}
2713 
2714 	if (!error)
2715 		vs->flags |= VS_VSDVALID;
2716 
2717 	return (error);
2718 }
2719 
2720 static inline int
2721 stats_v1_voi_update_hist(enum vsd_dtype voi_dtype, struct voistatdata *voival,
2722     struct voistat *vs, struct voistatdata_hist *hist)
2723 {
2724 	struct voistatdata_numeric *bkt_lb, *bkt_ub;
2725 	uint64_t *oob64, *cnt64;
2726 	uint32_t *oob32, *cnt32;
2727 	int error, i, found, is32bit, has_ub, eq_only;
2728 
2729 	error = 0;
2730 
2731 	switch (vs->dtype) {
2732 	case VSD_DTYPE_CRHIST32:
2733 		i = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2734 		is32bit = 1;
2735 		has_ub = eq_only = 0;
2736 		oob32 = &VSD(crhist32, hist)->oob;
2737 		break;
2738 	case VSD_DTYPE_DRHIST32:
2739 		i = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2740 		is32bit = has_ub = 1;
2741 		eq_only = 0;
2742 		oob32 = &VSD(drhist32, hist)->oob;
2743 		break;
2744 	case VSD_DTYPE_DVHIST32:
2745 		i = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2746 		is32bit = eq_only = 1;
2747 		has_ub = 0;
2748 		oob32 = &VSD(dvhist32, hist)->oob;
2749 		break;
2750 	case VSD_DTYPE_CRHIST64:
2751 		i = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2752 		is32bit = has_ub = eq_only = 0;
2753 		oob64 = &VSD(crhist64, hist)->oob;
2754 		break;
2755 	case VSD_DTYPE_DRHIST64:
2756 		i = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2757 		is32bit = eq_only = 0;
2758 		has_ub = 1;
2759 		oob64 = &VSD(drhist64, hist)->oob;
2760 		break;
2761 	case VSD_DTYPE_DVHIST64:
2762 		i = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2763 		is32bit = has_ub = 0;
2764 		eq_only = 1;
2765 		oob64 = &VSD(dvhist64, hist)->oob;
2766 		break;
2767 	default:
2768 		return (EINVAL);
2769 	}
2770 	i--; /* Adjust for 0-based array index. */
2771 
2772 	/* XXXLAS: Should probably use a better bucket search algorithm. ARB? */
2773 	for (found = 0; i >= 0 && !found; i--) {
2774 		switch (vs->dtype) {
2775 		case VSD_DTYPE_CRHIST32:
2776 			bkt_lb = &VSD(crhist32, hist)->bkts[i].lb;
2777 			cnt32 = &VSD(crhist32, hist)->bkts[i].cnt;
2778 			break;
2779 		case VSD_DTYPE_DRHIST32:
2780 			bkt_lb = &VSD(drhist32, hist)->bkts[i].lb;
2781 			bkt_ub = &VSD(drhist32, hist)->bkts[i].ub;
2782 			cnt32 = &VSD(drhist32, hist)->bkts[i].cnt;
2783 			break;
2784 		case VSD_DTYPE_DVHIST32:
2785 			bkt_lb = &VSD(dvhist32, hist)->bkts[i].val;
2786 			cnt32 = &VSD(dvhist32, hist)->bkts[i].cnt;
2787 			break;
2788 		case VSD_DTYPE_CRHIST64:
2789 			bkt_lb = &VSD(crhist64, hist)->bkts[i].lb;
2790 			cnt64 = &VSD(crhist64, hist)->bkts[i].cnt;
2791 			break;
2792 		case VSD_DTYPE_DRHIST64:
2793 			bkt_lb = &VSD(drhist64, hist)->bkts[i].lb;
2794 			bkt_ub = &VSD(drhist64, hist)->bkts[i].ub;
2795 			cnt64 = &VSD(drhist64, hist)->bkts[i].cnt;
2796 			break;
2797 		case VSD_DTYPE_DVHIST64:
2798 			bkt_lb = &VSD(dvhist64, hist)->bkts[i].val;
2799 			cnt64 = &VSD(dvhist64, hist)->bkts[i].cnt;
2800 			break;
2801 		default:
2802 			return (EINVAL);
2803 		}
2804 
2805 		switch (voi_dtype) {
2806 		case VSD_DTYPE_INT_S32:
2807 			if (voival->int32.s32 >= bkt_lb->int32.s32) {
2808 				if ((eq_only && voival->int32.s32 ==
2809 				    bkt_lb->int32.s32) ||
2810 				    (!eq_only && (!has_ub ||
2811 				    voival->int32.s32 < bkt_ub->int32.s32)))
2812 					found = 1;
2813 			}
2814 			break;
2815 		case VSD_DTYPE_INT_U32:
2816 			if (voival->int32.u32 >= bkt_lb->int32.u32) {
2817 				if ((eq_only && voival->int32.u32 ==
2818 				    bkt_lb->int32.u32) ||
2819 				    (!eq_only && (!has_ub ||
2820 				    voival->int32.u32 < bkt_ub->int32.u32)))
2821 					found = 1;
2822 			}
2823 			break;
2824 		case VSD_DTYPE_INT_S64:
2825 			if (voival->int64.s64 >= bkt_lb->int64.s64)
2826 				if ((eq_only && voival->int64.s64 ==
2827 				    bkt_lb->int64.s64) ||
2828 				    (!eq_only && (!has_ub ||
2829 				    voival->int64.s64 < bkt_ub->int64.s64)))
2830 					found = 1;
2831 			break;
2832 		case VSD_DTYPE_INT_U64:
2833 			if (voival->int64.u64 >= bkt_lb->int64.u64)
2834 				if ((eq_only && voival->int64.u64 ==
2835 				    bkt_lb->int64.u64) ||
2836 				    (!eq_only && (!has_ub ||
2837 				    voival->int64.u64 < bkt_ub->int64.u64)))
2838 					found = 1;
2839 			break;
2840 		case VSD_DTYPE_INT_SLONG:
2841 			if (voival->intlong.slong >= bkt_lb->intlong.slong)
2842 				if ((eq_only && voival->intlong.slong ==
2843 				    bkt_lb->intlong.slong) ||
2844 				    (!eq_only && (!has_ub ||
2845 				    voival->intlong.slong <
2846 				    bkt_ub->intlong.slong)))
2847 					found = 1;
2848 			break;
2849 		case VSD_DTYPE_INT_ULONG:
2850 			if (voival->intlong.ulong >= bkt_lb->intlong.ulong)
2851 				if ((eq_only && voival->intlong.ulong ==
2852 				    bkt_lb->intlong.ulong) ||
2853 				    (!eq_only && (!has_ub ||
2854 				    voival->intlong.ulong <
2855 				    bkt_ub->intlong.ulong)))
2856 					found = 1;
2857 			break;
2858 		case VSD_DTYPE_Q_S32:
2859 			if (Q_QGEQ(voival->q32.sq32, bkt_lb->q32.sq32))
2860 				if ((eq_only && Q_QEQ(voival->q32.sq32,
2861 				    bkt_lb->q32.sq32)) ||
2862 				    (!eq_only && (!has_ub ||
2863 				    Q_QLTQ(voival->q32.sq32,
2864 				    bkt_ub->q32.sq32))))
2865 					found = 1;
2866 			break;
2867 		case VSD_DTYPE_Q_U32:
2868 			if (Q_QGEQ(voival->q32.uq32, bkt_lb->q32.uq32))
2869 				if ((eq_only && Q_QEQ(voival->q32.uq32,
2870 				    bkt_lb->q32.uq32)) ||
2871 				    (!eq_only && (!has_ub ||
2872 				    Q_QLTQ(voival->q32.uq32,
2873 				    bkt_ub->q32.uq32))))
2874 					found = 1;
2875 			break;
2876 		case VSD_DTYPE_Q_S64:
2877 			if (Q_QGEQ(voival->q64.sq64, bkt_lb->q64.sq64))
2878 				if ((eq_only && Q_QEQ(voival->q64.sq64,
2879 				    bkt_lb->q64.sq64)) ||
2880 				    (!eq_only && (!has_ub ||
2881 				    Q_QLTQ(voival->q64.sq64,
2882 				    bkt_ub->q64.sq64))))
2883 					found = 1;
2884 			break;
2885 		case VSD_DTYPE_Q_U64:
2886 			if (Q_QGEQ(voival->q64.uq64, bkt_lb->q64.uq64))
2887 				if ((eq_only && Q_QEQ(voival->q64.uq64,
2888 				    bkt_lb->q64.uq64)) ||
2889 				    (!eq_only && (!has_ub ||
2890 				    Q_QLTQ(voival->q64.uq64,
2891 				    bkt_ub->q64.uq64))))
2892 					found = 1;
2893 			break;
2894 		default:
2895 			break;
2896 		}
2897 	}
2898 
2899 	if (found) {
2900 		if (is32bit)
2901 			*cnt32 += 1;
2902 		else
2903 			*cnt64 += 1;
2904 	} else {
2905 		if (is32bit)
2906 			*oob32 += 1;
2907 		else
2908 			*oob64 += 1;
2909 	}
2910 
2911 	vs->flags |= VS_VSDVALID;
2912 	return (error);
2913 }
2914 
2915 static inline int
2916 stats_v1_vsd_tdgst_compress(enum vsd_dtype vs_dtype,
2917     struct voistatdata_tdgst *tdgst, int attempt)
2918 {
2919 	struct ctdth32 *ctd32tree;
2920 	struct ctdth64 *ctd64tree;
2921 	struct voistatdata_tdgstctd32 *ctd32;
2922 	struct voistatdata_tdgstctd64 *ctd64;
2923 	uint64_t ebits, idxmask;
2924 	uint32_t bitsperidx, nebits;
2925 	int error, idx, is32bit, maxctds, remctds, tmperr;
2926 
2927 	error = 0;
2928 
2929 	switch (vs_dtype) {
2930 	case VSD_DTYPE_TDGSTCLUST32:
2931 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2932 		if (!ARB_FULL(ctd32tree))
2933 			return (0);
2934 		VSD(tdgstclust32, tdgst)->compcnt++;
2935 		maxctds = remctds = ARB_MAXNODES(ctd32tree);
2936 		ARB_RESET_TREE(ctd32tree, ctdth32, maxctds);
2937 		VSD(tdgstclust32, tdgst)->smplcnt = 0;
2938 		is32bit = 1;
2939 		ctd64tree = NULL;
2940 		ctd64 = NULL;
2941 #ifdef DIAGNOSTIC
2942 		RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2943 #endif
2944 		break;
2945 	case VSD_DTYPE_TDGSTCLUST64:
2946 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2947 		if (!ARB_FULL(ctd64tree))
2948 			return (0);
2949 		VSD(tdgstclust64, tdgst)->compcnt++;
2950 		maxctds = remctds = ARB_MAXNODES(ctd64tree);
2951 		ARB_RESET_TREE(ctd64tree, ctdth64, maxctds);
2952 		VSD(tdgstclust64, tdgst)->smplcnt = 0;
2953 		is32bit = 0;
2954 		ctd32tree = NULL;
2955 		ctd32 = NULL;
2956 #ifdef DIAGNOSTIC
2957 		RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2958 #endif
2959 		break;
2960 	default:
2961 		return (EINVAL);
2962 	}
2963 
2964 	/*
2965 	 * Rebuild the t-digest ARB by pseudorandomly selecting centroids and
2966 	 * re-inserting the mu/cnt of each as a value and corresponding weight.
2967 	 */
2968 
2969 	/*
2970 	 * XXXCEM: random(9) is currently rand(3), not random(3).  rand(3)
2971 	 * RAND_MAX happens to be approximately 31 bits (range [0,
2972 	 * 0x7ffffffd]), so the math kinda works out.  When/if this portion of
2973 	 * the code is compiled in userspace, it gets the random(3) behavior,
2974 	 * which has expected range [0, 0x7fffffff].
2975 	 */
2976 #define	bitsperrand 31
2977 	ebits = 0;
2978 	nebits = 0;
2979 	bitsperidx = fls(maxctds);
2980 	KASSERT(bitsperidx <= sizeof(ebits) << 3,
2981 	    ("%s: bitsperidx=%d, ebits=%d",
2982 	    __func__, bitsperidx, (int)(sizeof(ebits) << 3)));
2983 	idxmask = (UINT64_C(1) << bitsperidx) - 1;
2984 
2985 	/* Initialise the free list with randomised centroid indices. */
2986 	for (; remctds > 0; remctds--) {
2987 		while (nebits < bitsperidx) {
2988 			ebits |= ((uint64_t)random()) << nebits;
2989 			nebits += bitsperrand;
2990 			if (nebits > (sizeof(ebits) << 3))
2991 				nebits = sizeof(ebits) << 3;
2992 		}
2993 		idx = ebits & idxmask;
2994 		nebits -= bitsperidx;
2995 		ebits >>= bitsperidx;
2996 
2997 		/*
2998 		 * Select the next centroid to put on the ARB free list. We
2999 		 * start with the centroid at our randomly selected array index,
3000 		 * and work our way forwards until finding one (the latter
3001 		 * aspect reduces re-insertion randomness, but is good enough).
3002 		 */
3003 		do {
3004 			if (idx >= maxctds)
3005 				idx %= maxctds;
3006 
3007 			if (is32bit)
3008 				ctd32 = ARB_NODE(ctd32tree, idx);
3009 			else
3010 				ctd64 = ARB_NODE(ctd64tree, idx);
3011 		} while ((is32bit ? ARB_ISFREE(ctd32, ctdlnk) :
3012 		    ARB_ISFREE(ctd64, ctdlnk)) && ++idx);
3013 
3014 		/* Put the centroid on the ARB free list. */
3015 		if (is32bit)
3016 			ARB_RETURNFREE(ctd32tree, ctd32, ctdlnk);
3017 		else
3018 			ARB_RETURNFREE(ctd64tree, ctd64, ctdlnk);
3019 	}
3020 
3021 	/*
3022 	 * The free list now contains the randomised indices of every centroid.
3023 	 * Walk the free list from start to end, re-inserting each centroid's
3024 	 * mu/cnt. The tdgst_add() call may or may not consume the free centroid
3025 	 * we re-insert values from during each loop iteration, so we must latch
3026 	 * the index of the next free list centroid before the re-insertion
3027 	 * call. The previous loop above should have left the centroid pointer
3028 	 * pointing to the element at the head of the free list.
3029 	 */
3030 	KASSERT((is32bit ?
3031 	    ARB_FREEIDX(ctd32tree) == ARB_SELFIDX(ctd32tree, ctd32) :
3032 	    ARB_FREEIDX(ctd64tree) == ARB_SELFIDX(ctd64tree, ctd64)),
3033 	    ("%s: t-digest ARB@%p free list bug", __func__,
3034 	    (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3035 	remctds = maxctds;
3036 	while ((is32bit ? ctd32 != NULL : ctd64 != NULL)) {
3037 		tmperr = 0;
3038 		if (is32bit) {
3039 			s64q_t x;
3040 
3041 			idx = ARB_NEXTFREEIDX(ctd32, ctdlnk);
3042 			/* Cloning a s32q_t into a s64q_t should never fail. */
3043 			tmperr = Q_QCLONEQ(&x, ctd32->mu);
3044 			tmperr = tmperr ? tmperr : stats_v1_vsd_tdgst_add(
3045 			    vs_dtype, tdgst, x, ctd32->cnt, attempt);
3046 			ctd32 = ARB_NODE(ctd32tree, idx);
3047 			KASSERT(ctd32 == NULL || ARB_ISFREE(ctd32, ctdlnk),
3048 			    ("%s: t-digest ARB@%p free list bug", __func__,
3049 			    ctd32tree));
3050 		} else {
3051 			idx = ARB_NEXTFREEIDX(ctd64, ctdlnk);
3052 			tmperr = stats_v1_vsd_tdgst_add(vs_dtype, tdgst,
3053 			    ctd64->mu, ctd64->cnt, attempt);
3054 			ctd64 = ARB_NODE(ctd64tree, idx);
3055 			KASSERT(ctd64 == NULL || ARB_ISFREE(ctd64, ctdlnk),
3056 			    ("%s: t-digest ARB@%p free list bug", __func__,
3057 			    ctd64tree));
3058 		}
3059 		/*
3060 		 * This process should not produce errors, bugs notwithstanding.
3061 		 * Just in case, latch any errors and attempt all re-insertions.
3062 		 */
3063 		error = tmperr ? tmperr : error;
3064 		remctds--;
3065 	}
3066 
3067 	KASSERT(remctds == 0, ("%s: t-digest ARB@%p free list bug", __func__,
3068 	    (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3069 
3070 	return (error);
3071 }
3072 
3073 static inline int
3074 stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst,
3075     s64q_t x, uint64_t weight, int attempt)
3076 {
3077 #ifdef DIAGNOSTIC
3078 	char qstr[Q_MAXSTRLEN(x, 10)];
3079 #endif
3080 	struct ctdth32 *ctd32tree;
3081 	struct ctdth64 *ctd64tree;
3082 	void *closest, *cur, *lb, *ub;
3083 	struct voistatdata_tdgstctd32 *ctd32;
3084 	struct voistatdata_tdgstctd64 *ctd64;
3085 	uint64_t cnt, smplcnt, sum, tmpsum;
3086 	s64q_t k, minz, q, z;
3087 	int error, is32bit, n;
3088 
3089 	error = 0;
3090 	minz = Q_INI(&z, 0, 0, Q_NFBITS(x));
3091 
3092 	switch (vs_dtype) {
3093 	case VSD_DTYPE_TDGSTCLUST32:
3094 		if ((UINT32_MAX - weight) < VSD(tdgstclust32, tdgst)->smplcnt)
3095 			error = EOVERFLOW;
3096 		smplcnt = VSD(tdgstclust32, tdgst)->smplcnt;
3097 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
3098 		is32bit = 1;
3099 		ctd64tree = NULL;
3100 		ctd64 = NULL;
3101 		break;
3102 	case VSD_DTYPE_TDGSTCLUST64:
3103 		if ((UINT64_MAX - weight) < VSD(tdgstclust64, tdgst)->smplcnt)
3104 			error = EOVERFLOW;
3105 		smplcnt = VSD(tdgstclust64, tdgst)->smplcnt;
3106 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
3107 		is32bit = 0;
3108 		ctd32tree = NULL;
3109 		ctd32 = NULL;
3110 		break;
3111 	default:
3112 		error = EINVAL;
3113 		break;
3114 	}
3115 
3116 	if (error)
3117 		return (error);
3118 
3119 	/*
3120 	 * Inspired by Ted Dunning's AVLTreeDigest.java
3121 	 */
3122 	do {
3123 #if defined(DIAGNOSTIC)
3124 		KASSERT(attempt < 5,
3125 		    ("%s: Too many attempts", __func__));
3126 #endif
3127 		if (attempt >= 5)
3128 			return (EAGAIN);
3129 
3130 		Q_SIFVAL(minz, Q_IFMAXVAL(minz));
3131 		closest = ub = NULL;
3132 		sum = tmpsum = 0;
3133 
3134 		if (is32bit)
3135 			lb = cur = (void *)(ctd32 = ARB_MIN(ctdth32, ctd32tree));
3136 		else
3137 			lb = cur = (void *)(ctd64 = ARB_MIN(ctdth64, ctd64tree));
3138 
3139 		if (lb == NULL) /* Empty tree. */
3140 			lb = (is32bit ? (void *)ARB_ROOT(ctd32tree) :
3141 			    (void *)ARB_ROOT(ctd64tree));
3142 
3143 		/*
3144 		 * Find the set of centroids with minimum distance to x and
3145 		 * compute the sum of counts for all centroids with mean less
3146 		 * than the first centroid in the set.
3147 		 */
3148 		for (; cur != NULL;
3149 		    cur = (is32bit ?
3150 		    (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3151 		    (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3152 			if (is32bit) {
3153 				cnt = ctd32->cnt;
3154 				KASSERT(Q_PRECEQ(ctd32->mu, x),
3155 				    ("%s: Q_RELPREC(mu,x)=%d", __func__,
3156 				    Q_RELPREC(ctd32->mu, x)));
3157 				/* Ok to assign as both have same precision. */
3158 				z = ctd32->mu;
3159 			} else {
3160 				cnt = ctd64->cnt;
3161 				KASSERT(Q_PRECEQ(ctd64->mu, x),
3162 				    ("%s: Q_RELPREC(mu,x)=%d", __func__,
3163 				    Q_RELPREC(ctd64->mu, x)));
3164 				/* Ok to assign as both have same precision. */
3165 				z = ctd64->mu;
3166 			}
3167 
3168 			error = Q_QSUBQ(&z, x);
3169 #if defined(DIAGNOSTIC)
3170 			KASSERT(!error, ("%s: unexpected error %d", __func__,
3171 			    error));
3172 #endif
3173 			if (error)
3174 				return (error);
3175 
3176 			z = Q_QABS(z);
3177 			if (Q_QLTQ(z, minz)) {
3178 				minz = z;
3179 				lb = cur;
3180 				sum = tmpsum;
3181 				tmpsum += cnt;
3182 			} else if (Q_QGTQ(z, minz)) {
3183 				ub = cur;
3184 				break;
3185 			}
3186 		}
3187 
3188 		cur = (is32bit ?
3189 		    (void *)(ctd32 = (struct voistatdata_tdgstctd32 *)lb) :
3190 		    (void *)(ctd64 = (struct voistatdata_tdgstctd64 *)lb));
3191 
3192 		for (n = 0; cur != ub; cur = (is32bit ?
3193 		    (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3194 		    (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3195 			if (is32bit)
3196 				cnt = ctd32->cnt;
3197 			else
3198 				cnt = ctd64->cnt;
3199 
3200 			q = Q_CTRLINI(16);
3201 			if (smplcnt == 1)
3202 				error = Q_QFRACI(&q, 1, 2);
3203 			else
3204 				/* [ sum + ((cnt - 1) / 2) ] / (smplcnt - 1) */
3205 				error = Q_QFRACI(&q, (sum << 1) + cnt - 1,
3206 				    (smplcnt - 1) << 1);
3207 			k = q;
3208 			/* k = q x 4 x samplcnt x attempt */
3209 			error |= Q_QMULI(&k, 4 * smplcnt * attempt);
3210 			/* k = k x (1 - q) */
3211 			error |= Q_QSUBI(&q, 1);
3212 			q = Q_QABS(q);
3213 			error |= Q_QMULQ(&k, q);
3214 #if defined(DIAGNOSTIC)
3215 #if !defined(_KERNEL)
3216 			double q_dbl, k_dbl, q2d, k2d;
3217 			q2d = Q_Q2D(q);
3218 			k2d = Q_Q2D(k);
3219 			q_dbl = smplcnt == 1 ? 0.5 :
3220 			    (sum + ((cnt - 1)  / 2.0)) / (double)(smplcnt - 1);
3221 			k_dbl = 4 * smplcnt * q_dbl * (1.0 - q_dbl) * attempt;
3222 			/*
3223 			 * If the difference between q and q_dbl is greater than
3224 			 * the fractional precision of q, something is off.
3225 			 * NB: q is holding the value of 1 - q
3226 			 */
3227 			q_dbl = 1.0 - q_dbl;
3228 			KASSERT((q_dbl > q2d ? q_dbl - q2d : q2d - q_dbl) <
3229 			    (1.05 * ((double)1 / (double)(1ULL << Q_NFBITS(q)))),
3230 			    ("Q-type q bad precision"));
3231 			KASSERT((k_dbl > k2d ? k_dbl - k2d : k2d - k_dbl) <
3232 			    1.0 + (0.01 * smplcnt),
3233 			    ("Q-type k bad precision"));
3234 #endif /* !_KERNEL */
3235 			KASSERT(!error, ("%s: unexpected error %d", __func__,
3236 			    error));
3237 #endif /* DIAGNOSTIC */
3238 			if (error)
3239 				return (error);
3240 			if ((is32bit && ((ctd32->cnt + weight) <=
3241 			    (uint64_t)Q_GIVAL(k))) ||
3242 			    (!is32bit && ((ctd64->cnt + weight) <=
3243 			    (uint64_t)Q_GIVAL(k)))) {
3244 				n++;
3245 				/* random() produces 31 bits. */
3246 				if (random() < (INT32_MAX / n))
3247 					closest = cur;
3248 			}
3249 			sum += cnt;
3250 		}
3251 	} while (closest == NULL &&
3252 	    (is32bit ? ARB_FULL(ctd32tree) : ARB_FULL(ctd64tree)) &&
3253 	    (error = stats_v1_vsd_tdgst_compress(vs_dtype, tdgst,
3254 	    attempt++)) == 0);
3255 
3256 	if (error)
3257 		return (error);
3258 
3259 	if (closest != NULL) {
3260 		/* Merge with an existing centroid. */
3261 		if (is32bit) {
3262 			ctd32 = (struct voistatdata_tdgstctd32 *)closest;
3263 			error = Q_QSUBQ(&x, ctd32->mu);
3264 			/*
3265 			 * The following calculation "x / (cnt + weight)"
3266 			 * computes the amount by which to adjust the centroid's
3267 			 * mu value in order to merge in the VOI sample.
3268 			 *
3269 			 * It can underflow (Q_QDIVI() returns ERANGE) when the
3270 			 * user centroids' fractional precision (which is
3271 			 * inherited by 'x') is too low to represent the result.
3272 			 *
3273 			 * A sophisticated approach to dealing with this issue
3274 			 * would minimise accumulation of error by tracking
3275 			 * underflow per centroid and making an adjustment when
3276 			 * a LSB's worth of underflow has accumulated.
3277 			 *
3278 			 * A simpler approach is to let the result underflow
3279 			 * i.e. merge the VOI sample into the centroid without
3280 			 * adjusting the centroid's mu, and rely on the user to
3281 			 * specify their t-digest with sufficient centroid
3282 			 * fractional precision such that the accumulation of
3283 			 * error from multiple underflows is of no material
3284 			 * consequence to the centroid's final value of mu.
3285 			 *
3286 			 * For the moment, the latter approach is employed by
3287 			 * simply ignoring ERANGE here.
3288 			 *
3289 			 * XXXLAS: Per-centroid underflow tracking is likely too
3290 			 * onerous, but it probably makes sense to accumulate a
3291 			 * single underflow error variable across all centroids
3292 			 * and report it as part of the digest to provide
3293 			 * additional visibility into the digest's fidelity.
3294 			 */
3295 			error = error ? error :
3296 			    Q_QDIVI(&x, ctd32->cnt + weight);
3297 			if ((error && error != ERANGE)
3298 			    || (error = Q_QADDQ(&ctd32->mu, x))) {
3299 #ifdef DIAGNOSTIC
3300 				KASSERT(!error, ("%s: unexpected error %d",
3301 				    __func__, error));
3302 #endif
3303 				return (error);
3304 			}
3305 			ctd32->cnt += weight;
3306 			error = ARB_REINSERT(ctdth32, ctd32tree, ctd32) ==
3307 			    NULL ? 0 : EALREADY;
3308 #ifdef DIAGNOSTIC
3309 			RB_REINSERT(rbctdth32,
3310 			    &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3311 #endif
3312 		} else {
3313 			ctd64 = (struct voistatdata_tdgstctd64 *)closest;
3314 			error = Q_QSUBQ(&x, ctd64->mu);
3315 			error = error ? error :
3316 			    Q_QDIVI(&x, ctd64->cnt + weight);
3317 			/* Refer to is32bit ERANGE discussion above. */
3318 			if ((error && error != ERANGE)
3319 			    || (error = Q_QADDQ(&ctd64->mu, x))) {
3320 				KASSERT(!error, ("%s: unexpected error %d",
3321 				    __func__, error));
3322 				return (error);
3323 			}
3324 			ctd64->cnt += weight;
3325 			error = ARB_REINSERT(ctdth64, ctd64tree, ctd64) ==
3326 			    NULL ? 0 : EALREADY;
3327 #ifdef DIAGNOSTIC
3328 			RB_REINSERT(rbctdth64,
3329 			    &VSD(tdgstclust64, tdgst)->rbctdtree, ctd64);
3330 #endif
3331 		}
3332 	} else {
3333 		/*
3334 		 * Add a new centroid. If digest compression is working
3335 		 * correctly, there should always be at least one free.
3336 		 */
3337 		if (is32bit) {
3338 			ctd32 = ARB_GETFREE(ctd32tree, ctdlnk);
3339 #ifdef DIAGNOSTIC
3340 			KASSERT(ctd32 != NULL,
3341 			    ("%s: t-digest@%p has no free centroids",
3342 			    __func__, tdgst));
3343 #endif
3344 			if (ctd32 == NULL)
3345 				return (EAGAIN);
3346 			if ((error = Q_QCPYVALQ(&ctd32->mu, x)))
3347 				return (error);
3348 			ctd32->cnt = weight;
3349 			error = ARB_INSERT(ctdth32, ctd32tree, ctd32) == NULL ?
3350 			    0 : EALREADY;
3351 #ifdef DIAGNOSTIC
3352 			RB_INSERT(rbctdth32,
3353 			    &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3354 #endif
3355 		} else {
3356 			ctd64 = ARB_GETFREE(ctd64tree, ctdlnk);
3357 #ifdef DIAGNOSTIC
3358 			KASSERT(ctd64 != NULL,
3359 			    ("%s: t-digest@%p has no free centroids",
3360 			    __func__, tdgst));
3361 #endif
3362 			if (ctd64 == NULL) /* Should not happen. */
3363 				return (EAGAIN);
3364 			/* Direct assignment ok as both have same type/prec. */
3365 			ctd64->mu = x;
3366 			ctd64->cnt = weight;
3367 			error = ARB_INSERT(ctdth64, ctd64tree, ctd64) == NULL ?
3368 			    0 : EALREADY;
3369 #ifdef DIAGNOSTIC
3370 			RB_INSERT(rbctdth64, &VSD(tdgstclust64,
3371 			    tdgst)->rbctdtree, ctd64);
3372 #endif
3373 		}
3374 	}
3375 
3376 	if (is32bit)
3377 		VSD(tdgstclust32, tdgst)->smplcnt += weight;
3378 	else {
3379 		VSD(tdgstclust64, tdgst)->smplcnt += weight;
3380 
3381 #ifdef DIAGNOSTIC
3382 		struct rbctdth64 *rbctdtree =
3383 		    &VSD(tdgstclust64, tdgst)->rbctdtree;
3384 		struct voistatdata_tdgstctd64 *rbctd64;
3385 		int i = 0;
3386 		ARB_FOREACH(ctd64, ctdth64, ctd64tree) {
3387 			rbctd64 = (i == 0 ? RB_MIN(rbctdth64, rbctdtree) :
3388 			    RB_NEXT(rbctdth64, rbctdtree, rbctd64));
3389 
3390 			if (i >= ARB_CURNODES(ctd64tree)
3391 			    || ctd64 != rbctd64
3392 			    || ARB_MIN(ctdth64, ctd64tree) !=
3393 			       RB_MIN(rbctdth64, rbctdtree)
3394 			    || ARB_MAX(ctdth64, ctd64tree) !=
3395 			       RB_MAX(rbctdth64, rbctdtree)
3396 			    || ARB_LEFTIDX(ctd64, ctdlnk) !=
3397 			       ARB_SELFIDX(ctd64tree, RB_LEFT(rbctd64, rblnk))
3398 			    || ARB_RIGHTIDX(ctd64, ctdlnk) !=
3399 			       ARB_SELFIDX(ctd64tree, RB_RIGHT(rbctd64, rblnk))
3400 			    || ARB_PARENTIDX(ctd64, ctdlnk) !=
3401 			       ARB_SELFIDX(ctd64tree,
3402 			       RB_PARENT(rbctd64, rblnk))) {
3403 				Q_TOSTR(ctd64->mu, -1, 10, qstr, sizeof(qstr));
3404 				printf("ARB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3405 				    "mu=%s\n",
3406 				    (int)ARB_SELFIDX(ctd64tree, ctd64),
3407 				    ARB_PARENTIDX(ctd64, ctdlnk),
3408 				    ARB_LEFTIDX(ctd64, ctdlnk),
3409 				    ARB_RIGHTIDX(ctd64, ctdlnk),
3410 				    ARB_COLOR(ctd64, ctdlnk),
3411 				    qstr);
3412 
3413 				Q_TOSTR(rbctd64->mu, -1, 10, qstr,
3414 				    sizeof(qstr));
3415 				struct voistatdata_tdgstctd64 *parent;
3416 				parent = RB_PARENT(rbctd64, rblnk);
3417 				int rb_color =
3418 					parent == NULL ? 0 :
3419 					RB_LEFT(parent, rblnk) == rbctd64 ?
3420 					(_RB_BITSUP(parent, rblnk) & _RB_L) != 0 :
3421  					(_RB_BITSUP(parent, rblnk) & _RB_R) != 0;
3422 				printf(" RB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3423 				    "mu=%s\n",
3424 				    (int)ARB_SELFIDX(ctd64tree, rbctd64),
3425 				    (int)ARB_SELFIDX(ctd64tree,
3426 				      RB_PARENT(rbctd64, rblnk)),
3427 				    (int)ARB_SELFIDX(ctd64tree,
3428 				      RB_LEFT(rbctd64, rblnk)),
3429 				    (int)ARB_SELFIDX(ctd64tree,
3430 				      RB_RIGHT(rbctd64, rblnk)),
3431 				    rb_color,
3432 				    qstr);
3433 
3434 				panic("RB@%p and ARB@%p trees differ\n",
3435 				    rbctdtree, ctd64tree);
3436 			}
3437 			i++;
3438 		}
3439 #endif /* DIAGNOSTIC */
3440 	}
3441 
3442 	return (error);
3443 }
3444 
3445 static inline int
3446 stats_v1_voi_update_tdgst(enum vsd_dtype voi_dtype, struct voistatdata *voival,
3447     struct voistat *vs, struct voistatdata_tdgst *tdgst)
3448 {
3449 	s64q_t x;
3450 	int error;
3451 
3452 	error = 0;
3453 
3454 	switch (vs->dtype) {
3455 	case VSD_DTYPE_TDGSTCLUST32:
3456 		/* Use same precision as the user's centroids. */
3457 		Q_INI(&x, 0, 0, Q_NFBITS(
3458 		    ARB_CNODE(&VSD(tdgstclust32, tdgst)->ctdtree, 0)->mu));
3459 		break;
3460 	case VSD_DTYPE_TDGSTCLUST64:
3461 		/* Use same precision as the user's centroids. */
3462 		Q_INI(&x, 0, 0, Q_NFBITS(
3463 		    ARB_CNODE(&VSD(tdgstclust64, tdgst)->ctdtree, 0)->mu));
3464 		break;
3465 	default:
3466 		KASSERT(vs->dtype == VSD_DTYPE_TDGSTCLUST32 ||
3467 		    vs->dtype == VSD_DTYPE_TDGSTCLUST64,
3468 		    ("%s: vs->dtype(%d) != VSD_DTYPE_TDGSTCLUST<32|64>",
3469 		    __func__, vs->dtype));
3470 		return (EINVAL);
3471 	}
3472 
3473 	/*
3474 	 * XXXLAS: Should have both a signed and unsigned 'x' variable to avoid
3475 	 * returning EOVERFLOW if the voival would have fit in a u64q_t.
3476 	 */
3477 	switch (voi_dtype) {
3478 	case VSD_DTYPE_INT_S32:
3479 		error = Q_QCPYVALI(&x, voival->int32.s32);
3480 		break;
3481 	case VSD_DTYPE_INT_U32:
3482 		error = Q_QCPYVALI(&x, voival->int32.u32);
3483 		break;
3484 	case VSD_DTYPE_INT_S64:
3485 		error = Q_QCPYVALI(&x, voival->int64.s64);
3486 		break;
3487 	case VSD_DTYPE_INT_U64:
3488 		error = Q_QCPYVALI(&x, voival->int64.u64);
3489 		break;
3490 	case VSD_DTYPE_INT_SLONG:
3491 		error = Q_QCPYVALI(&x, voival->intlong.slong);
3492 		break;
3493 	case VSD_DTYPE_INT_ULONG:
3494 		error = Q_QCPYVALI(&x, voival->intlong.ulong);
3495 		break;
3496 	case VSD_DTYPE_Q_S32:
3497 		error = Q_QCPYVALQ(&x, voival->q32.sq32);
3498 		break;
3499 	case VSD_DTYPE_Q_U32:
3500 		error = Q_QCPYVALQ(&x, voival->q32.uq32);
3501 		break;
3502 	case VSD_DTYPE_Q_S64:
3503 		error = Q_QCPYVALQ(&x, voival->q64.sq64);
3504 		break;
3505 	case VSD_DTYPE_Q_U64:
3506 		error = Q_QCPYVALQ(&x, voival->q64.uq64);
3507 		break;
3508 	default:
3509 		error = EINVAL;
3510 		break;
3511 	}
3512 
3513 	if (error ||
3514 	    (error = stats_v1_vsd_tdgst_add(vs->dtype, tdgst, x, 1, 1)))
3515 		return (error);
3516 
3517 	vs->flags |= VS_VSDVALID;
3518 	return (0);
3519 }
3520 
3521 int
3522 stats_v1_voi_update(struct statsblobv1 *sb, int32_t voi_id,
3523     enum vsd_dtype voi_dtype, struct voistatdata *voival, uint32_t flags)
3524 {
3525 	struct voi *v;
3526 	struct voistat *vs;
3527 	void *statevsd, *vsd;
3528 	int error, i, tmperr;
3529 
3530 	error = 0;
3531 
3532 	if (sb == NULL || sb->abi != STATS_ABI_V1 || voi_id >= NVOIS(sb) ||
3533 	    voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES || voival == NULL)
3534 		return (EINVAL);
3535 	v = &sb->vois[voi_id];
3536 	if (voi_dtype != v->dtype || v->id < 0 ||
3537 	    ((flags & SB_VOI_RELUPDATE) && !(v->flags & VOI_REQSTATE)))
3538 		return (EINVAL);
3539 
3540 	vs = BLOB_OFFSET(sb, v->stats_off);
3541 	if (v->flags & VOI_REQSTATE)
3542 		statevsd = BLOB_OFFSET(sb, vs->data_off);
3543 	else
3544 		statevsd = NULL;
3545 
3546 	if (flags & SB_VOI_RELUPDATE) {
3547 		switch (voi_dtype) {
3548 		case VSD_DTYPE_INT_S32:
3549 			voival->int32.s32 +=
3550 			    VSD(voistate, statevsd)->prev.int32.s32;
3551 			break;
3552 		case VSD_DTYPE_INT_U32:
3553 			voival->int32.u32 +=
3554 			    VSD(voistate, statevsd)->prev.int32.u32;
3555 			break;
3556 		case VSD_DTYPE_INT_S64:
3557 			voival->int64.s64 +=
3558 			    VSD(voistate, statevsd)->prev.int64.s64;
3559 			break;
3560 		case VSD_DTYPE_INT_U64:
3561 			voival->int64.u64 +=
3562 			    VSD(voistate, statevsd)->prev.int64.u64;
3563 			break;
3564 		case VSD_DTYPE_INT_SLONG:
3565 			voival->intlong.slong +=
3566 			    VSD(voistate, statevsd)->prev.intlong.slong;
3567 			break;
3568 		case VSD_DTYPE_INT_ULONG:
3569 			voival->intlong.ulong +=
3570 			    VSD(voistate, statevsd)->prev.intlong.ulong;
3571 			break;
3572 		case VSD_DTYPE_Q_S32:
3573 			error = Q_QADDQ(&voival->q32.sq32,
3574 			    VSD(voistate, statevsd)->prev.q32.sq32);
3575 			break;
3576 		case VSD_DTYPE_Q_U32:
3577 			error = Q_QADDQ(&voival->q32.uq32,
3578 			    VSD(voistate, statevsd)->prev.q32.uq32);
3579 			break;
3580 		case VSD_DTYPE_Q_S64:
3581 			error = Q_QADDQ(&voival->q64.sq64,
3582 			    VSD(voistate, statevsd)->prev.q64.sq64);
3583 			break;
3584 		case VSD_DTYPE_Q_U64:
3585 			error = Q_QADDQ(&voival->q64.uq64,
3586 			    VSD(voistate, statevsd)->prev.q64.uq64);
3587 			break;
3588 		default:
3589 			KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3590 			break;
3591 		}
3592 	}
3593 
3594 	if (error)
3595 		return (error);
3596 
3597 	for (i = v->voistatmaxid; i > 0; i--) {
3598 		vs = &((struct voistat *)BLOB_OFFSET(sb, v->stats_off))[i];
3599 		if (vs->stype < 0)
3600 			continue;
3601 
3602 		vsd = BLOB_OFFSET(sb, vs->data_off);
3603 
3604 		switch (vs->stype) {
3605 		case VS_STYPE_MAX:
3606 			tmperr = stats_v1_voi_update_max(voi_dtype, voival,
3607 			    vs, vsd);
3608 			break;
3609 		case VS_STYPE_MIN:
3610 			tmperr = stats_v1_voi_update_min(voi_dtype, voival,
3611 			    vs, vsd);
3612 			break;
3613 		case VS_STYPE_SUM:
3614 			tmperr = stats_v1_voi_update_sum(voi_dtype, voival,
3615 			    vs, vsd);
3616 			break;
3617 		case VS_STYPE_HIST:
3618 			tmperr = stats_v1_voi_update_hist(voi_dtype, voival,
3619 			    vs, vsd);
3620 			break;
3621 		case VS_STYPE_TDGST:
3622 			tmperr = stats_v1_voi_update_tdgst(voi_dtype, voival,
3623 			    vs, vsd);
3624 			break;
3625 		default:
3626 			KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
3627 			break;
3628 		}
3629 
3630 		if (tmperr) {
3631 			error = tmperr;
3632 			VS_INCERRS(vs);
3633 		}
3634 	}
3635 
3636 	if (statevsd) {
3637 		switch (voi_dtype) {
3638 		case VSD_DTYPE_INT_S32:
3639 			VSD(voistate, statevsd)->prev.int32.s32 =
3640 			    voival->int32.s32;
3641 			break;
3642 		case VSD_DTYPE_INT_U32:
3643 			VSD(voistate, statevsd)->prev.int32.u32 =
3644 			    voival->int32.u32;
3645 			break;
3646 		case VSD_DTYPE_INT_S64:
3647 			VSD(voistate, statevsd)->prev.int64.s64 =
3648 			    voival->int64.s64;
3649 			break;
3650 		case VSD_DTYPE_INT_U64:
3651 			VSD(voistate, statevsd)->prev.int64.u64 =
3652 			    voival->int64.u64;
3653 			break;
3654 		case VSD_DTYPE_INT_SLONG:
3655 			VSD(voistate, statevsd)->prev.intlong.slong =
3656 			    voival->intlong.slong;
3657 			break;
3658 		case VSD_DTYPE_INT_ULONG:
3659 			VSD(voistate, statevsd)->prev.intlong.ulong =
3660 			    voival->intlong.ulong;
3661 			break;
3662 		case VSD_DTYPE_Q_S32:
3663 			error = Q_QCPYVALQ(
3664 			    &VSD(voistate, statevsd)->prev.q32.sq32,
3665 			    voival->q32.sq32);
3666 			break;
3667 		case VSD_DTYPE_Q_U32:
3668 			error = Q_QCPYVALQ(
3669 			    &VSD(voistate, statevsd)->prev.q32.uq32,
3670 			    voival->q32.uq32);
3671 			break;
3672 		case VSD_DTYPE_Q_S64:
3673 			error = Q_QCPYVALQ(
3674 			    &VSD(voistate, statevsd)->prev.q64.sq64,
3675 			    voival->q64.sq64);
3676 			break;
3677 		case VSD_DTYPE_Q_U64:
3678 			error = Q_QCPYVALQ(
3679 			    &VSD(voistate, statevsd)->prev.q64.uq64,
3680 			    voival->q64.uq64);
3681 			break;
3682 		default:
3683 			KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3684 			break;
3685 		}
3686 	}
3687 
3688 	return (error);
3689 }
3690 
3691 #ifdef _KERNEL
3692 
3693 static void
3694 stats_init(void *arg)
3695 {
3696 
3697 }
3698 SYSINIT(stats, SI_SUB_KDTRACE, SI_ORDER_FIRST, stats_init, NULL);
3699 
3700 /*
3701  * Sysctl handler to display the list of available stats templates.
3702  */
3703 static int
3704 stats_tpl_list_available(SYSCTL_HANDLER_ARGS)
3705 {
3706 	struct sbuf *s;
3707 	int err, i;
3708 
3709 	err = 0;
3710 
3711 	/* We can tolerate ntpl being stale, so do not take the lock. */
3712 	s = sbuf_new(NULL, NULL, /* +1 per tpl for , */
3713 	    ntpl * (STATS_TPL_MAX_STR_SPEC_LEN + 1), SBUF_FIXEDLEN);
3714 	if (s == NULL)
3715 		return (ENOMEM);
3716 
3717 	TPL_LIST_RLOCK();
3718 	for (i = 0; i < ntpl; i++) {
3719 		err = sbuf_printf(s, "%s\"%s\":%u", i ? "," : "",
3720 		    tpllist[i]->mb->tplname, tpllist[i]->mb->tplhash);
3721 		if (err) {
3722 			/* Sbuf overflow condition. */
3723 			err = EOVERFLOW;
3724 			break;
3725 		}
3726 	}
3727 	TPL_LIST_RUNLOCK();
3728 
3729 	if (!err) {
3730 		sbuf_finish(s);
3731 		err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
3732 	}
3733 
3734 	sbuf_delete(s);
3735 	return (err);
3736 }
3737 
3738 /*
3739  * Called by subsystem-specific sysctls to report and/or parse the list of
3740  * templates being sampled and their sampling rates. A stats_tpl_sr_cb_t
3741  * conformant function pointer must be passed in as arg1, which is used to
3742  * interact with the subsystem's stats template sample rates list. If arg2 > 0,
3743  * a zero-initialised allocation of arg2-sized contextual memory is
3744  * heap-allocated and passed in to all subsystem callbacks made during the
3745  * operation of stats_tpl_sample_rates().
3746  *
3747  * XXXLAS: Assumes templates are never removed, which is currently true but may
3748  * need to be reworked in future if dynamic template management becomes a
3749  * requirement e.g. to support kernel module based templates.
3750  */
3751 int
3752 stats_tpl_sample_rates(SYSCTL_HANDLER_ARGS)
3753 {
3754 	char kvpair_fmt[16], tplspec_fmt[16];
3755 	char tpl_spec[STATS_TPL_MAX_STR_SPEC_LEN];
3756 	char tpl_name[TPL_MAX_NAME_LEN + 2]; /* +2 for "" */
3757 	stats_tpl_sr_cb_t subsys_cb;
3758 	void *subsys_ctx;
3759 	char *buf, *new_rates_usr_str, *tpl_name_p;
3760 	struct stats_tpl_sample_rate *rates;
3761 	struct sbuf *s, _s;
3762 	uint32_t cum_pct, pct, tpl_hash;
3763 	int err, i, off, len, newlen, nrates;
3764 
3765 	buf = NULL;
3766 	rates = NULL;
3767 	err = nrates = 0;
3768 	subsys_cb = (stats_tpl_sr_cb_t)arg1;
3769 	KASSERT(subsys_cb != NULL, ("%s: subsys_cb == arg1 == NULL", __func__));
3770 	if (arg2 > 0)
3771 		subsys_ctx = malloc(arg2, M_TEMP, M_WAITOK | M_ZERO);
3772 	else
3773 		subsys_ctx = NULL;
3774 
3775 	/* Grab current count of subsystem rates. */
3776 	err = subsys_cb(TPL_SR_UNLOCKED_GET, NULL, &nrates, subsys_ctx);
3777 	if (err)
3778 		goto done;
3779 
3780 	/* +1 to ensure we can append '\0' post copyin, +5 per rate for =nnn, */
3781 	len = max(req->newlen + 1, nrates * (STATS_TPL_MAX_STR_SPEC_LEN + 5));
3782 
3783 	if (req->oldptr != NULL || req->newptr != NULL)
3784 		buf = malloc(len, M_TEMP, M_WAITOK);
3785 
3786 	if (req->oldptr != NULL) {
3787 		if (nrates == 0) {
3788 			/* No rates, so return an empty string via oldptr. */
3789 			err = SYSCTL_OUT(req, "", 1);
3790 			if (err)
3791 				goto done;
3792 			goto process_new;
3793 		}
3794 
3795 		s = sbuf_new(&_s, buf, len, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
3796 
3797 		/* Grab locked count of, and ptr to, subsystem rates. */
3798 		err = subsys_cb(TPL_SR_RLOCKED_GET, &rates, &nrates,
3799 		    subsys_ctx);
3800 		if (err)
3801 			goto done;
3802 		TPL_LIST_RLOCK();
3803 		for (i = 0; i < nrates && !err; i++) {
3804 			err = sbuf_printf(s, "%s\"%s\":%u=%u", i ? "," : "",
3805 			    tpllist[rates[i].tpl_slot_id]->mb->tplname,
3806 			    tpllist[rates[i].tpl_slot_id]->mb->tplhash,
3807 			    rates[i].tpl_sample_pct);
3808 		}
3809 		TPL_LIST_RUNLOCK();
3810 		/* Tell subsystem that we're done with its rates list. */
3811 		err = subsys_cb(TPL_SR_RUNLOCK, &rates, &nrates, subsys_ctx);
3812 		if (err)
3813 			goto done;
3814 
3815 		err = sbuf_finish(s);
3816 		if (err)
3817 			goto done; /* We lost a race for buf to be too small. */
3818 
3819 		/* Return the rendered string data via oldptr. */
3820 		err = SYSCTL_OUT(req, sbuf_data(s), sbuf_len(s));
3821 	} else {
3822 		/* Return the upper bound size for buffer sizing requests. */
3823 		err = SYSCTL_OUT(req, NULL, len);
3824 	}
3825 
3826 process_new:
3827 	if (err || req->newptr == NULL)
3828 		goto done;
3829 
3830 	newlen = req->newlen - req->newidx;
3831 	err = SYSCTL_IN(req, buf, newlen);
3832 	if (err)
3833 		goto done;
3834 
3835 	/*
3836 	 * Initialise format strings at run time.
3837 	 *
3838 	 * Write the max template spec string length into the
3839 	 * template_spec=percent key-value pair parsing format string as:
3840 	 *     " %<width>[^=]=%u %n"
3841 	 *
3842 	 * Write the max template name string length into the tplname:tplhash
3843 	 * parsing format string as:
3844 	 *     "%<width>[^:]:%u"
3845 	 *
3846 	 * Subtract 1 for \0 appended by sscanf().
3847 	 */
3848 	sprintf(kvpair_fmt, " %%%zu[^=]=%%u %%n", sizeof(tpl_spec) - 1);
3849 	sprintf(tplspec_fmt, "%%%zu[^:]:%%u", sizeof(tpl_name) - 1);
3850 
3851 	/*
3852 	 * Parse each CSV key-value pair specifying a template and its sample
3853 	 * percentage. Whitespace either side of a key-value pair is ignored.
3854 	 * Templates can be specified by name, hash, or name and hash per the
3855 	 * following formats (chars in [] are optional):
3856 	 *    ["]<tplname>["]=<percent>
3857 	 *    :hash=pct
3858 	 *    ["]<tplname>["]:hash=<percent>
3859 	 */
3860 	cum_pct = nrates = 0;
3861 	rates = NULL;
3862 	buf[newlen] = '\0'; /* buf is at least newlen+1 in size. */
3863 	new_rates_usr_str = buf;
3864 	while (isspace(*new_rates_usr_str))
3865 		new_rates_usr_str++; /* Skip leading whitespace. */
3866 	while (*new_rates_usr_str != '\0') {
3867 		tpl_name_p = tpl_name;
3868 		tpl_name[0] = '\0';
3869 		tpl_hash = 0;
3870 		off = 0;
3871 
3872 		/*
3873 		 * Parse key-value pair which must perform 2 conversions, then
3874 		 * parse the template spec to extract either name, hash, or name
3875 		 * and hash depending on the three possible spec formats. The
3876 		 * tplspec_fmt format specifier parses name or name and hash
3877 		 * template specs, while the ":%u" format specifier parses
3878 		 * hash-only template specs. If parsing is successfull, ensure
3879 		 * the cumulative sampling percentage does not exceed 100.
3880 		 */
3881 		err = EINVAL;
3882 		if (2 != sscanf(new_rates_usr_str, kvpair_fmt, tpl_spec, &pct,
3883 		    &off))
3884 			break;
3885 		if ((1 > sscanf(tpl_spec, tplspec_fmt, tpl_name, &tpl_hash)) &&
3886 		    (1 != sscanf(tpl_spec, ":%u", &tpl_hash)))
3887 			break;
3888 		if ((cum_pct += pct) > 100)
3889 			break;
3890 		err = 0;
3891 
3892 		/* Strip surrounding "" from template name if present. */
3893 		len = strlen(tpl_name);
3894 		if (len > 0) {
3895 			if (tpl_name[len - 1] == '"')
3896 				tpl_name[--len] = '\0';
3897 			if (tpl_name[0] == '"') {
3898 				tpl_name_p++;
3899 				len--;
3900 			}
3901 		}
3902 
3903 		rates = stats_realloc(rates, 0, /* oldsz is unused in kernel. */
3904 		    (nrates + 1) * sizeof(*rates), M_WAITOK);
3905 		rates[nrates].tpl_slot_id =
3906 		    stats_tpl_fetch_allocid(len ? tpl_name_p : NULL, tpl_hash);
3907 		if (rates[nrates].tpl_slot_id < 0) {
3908 			err = -rates[nrates].tpl_slot_id;
3909 			break;
3910 		}
3911 		rates[nrates].tpl_sample_pct = pct;
3912 		nrates++;
3913 		new_rates_usr_str += off;
3914 		if (*new_rates_usr_str != ',')
3915 			break; /* End-of-input or malformed. */
3916 		new_rates_usr_str++; /* Move past comma to next pair. */
3917 	}
3918 
3919 	if (!err) {
3920 		if ((new_rates_usr_str - buf) < newlen) {
3921 			/* Entire input has not been consumed. */
3922 			err = EINVAL;
3923 		} else {
3924 			/*
3925 			 * Give subsystem the new rates. They'll return the
3926 			 * appropriate rates pointer for us to garbage collect.
3927 			 */
3928 			err = subsys_cb(TPL_SR_PUT, &rates, &nrates,
3929 			    subsys_ctx);
3930 		}
3931 	}
3932 	stats_free(rates);
3933 
3934 done:
3935 	free(buf, M_TEMP);
3936 	free(subsys_ctx, M_TEMP);
3937 	return (err);
3938 }
3939 
3940 SYSCTL_NODE(_kern, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
3941     "stats(9) MIB");
3942 
3943 SYSCTL_PROC(_kern_stats, OID_AUTO, templates,
3944     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
3945     stats_tpl_list_available, "A",
3946     "list the name/hash of all available stats(9) templates");
3947 
3948 #else /* ! _KERNEL */
3949 
3950 static void __attribute__ ((constructor))
3951 stats_constructor(void)
3952 {
3953 
3954 	pthread_rwlock_init(&tpllistlock, NULL);
3955 }
3956 
3957 static void __attribute__ ((destructor))
3958 stats_destructor(void)
3959 {
3960 
3961 	pthread_rwlock_destroy(&tpllistlock);
3962 }
3963 
3964 #endif /* _KERNEL */
3965