1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Fault Management Architecture (FMA) Resource and Protocol Support
27 *
28 * The routines contained herein provide services to support kernel subsystems
29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
30 *
31 * Name-Value Pair Lists
32 *
33 * The embodiment of an FMA protocol element (event, fmri or authority) is a
34 * name-value pair list (nvlist_t). FMA-specific nvlist constructor and
35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
36 * to create an nvpair list using custom allocators. Callers may choose to
37 * allocate either from the kernel memory allocator, or from a preallocated
38 * buffer, useful in constrained contexts like high-level interrupt routines.
39 *
40 * Protocol Event and FMRI Construction
41 *
42 * Convenience routines are provided to construct nvlist events according to
43 * the FMA Event Protocol and Naming Schema specification for ereports and
44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
45 *
46 * ENA Manipulation
47 *
48 * Routines to generate ENA formats 0, 1 and 2 are available as well as
49 * routines to increment formats 1 and 2. Individual fields within the
50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
51 * fm_ena_format_get() and fm_ena_gen_get().
52 */
53
54 #include <sys/types.h>
55 #include <sys/time.h>
56 #include <sys/list.h>
57 #include <sys/nvpair.h>
58 #include <sys/cmn_err.h>
59 #include <sys/sysmacros.h>
60 #include <sys/sunddi.h>
61 #include <sys/systeminfo.h>
62 #include <sys/fm/util.h>
63 #include <sys/fm/protocol.h>
64 #include <sys/kstat.h>
65 #include <sys/zfs_context.h>
66 #ifdef _KERNEL
67 #include <sys/atomic.h>
68 #include <sys/condvar.h>
69 #include <sys/console.h>
70 #include <sys/time.h>
71 #include <sys/zfs_ioctl.h>
72
73 int zfs_zevent_len_max = 0;
74 int zfs_zevent_cols = 80;
75 int zfs_zevent_console = 0;
76
77 static int zevent_len_cur = 0;
78 static int zevent_waiters = 0;
79 static int zevent_flags = 0;
80
81 /* Num events rate limited since the last time zfs_zevent_next() was called */
82 static uint64_t ratelimit_dropped = 0;
83
84 /*
85 * The EID (Event IDentifier) is used to uniquely tag a zevent when it is
86 * posted. The posted EIDs are monotonically increasing but not persistent.
87 * They will be reset to the initial value (1) each time the kernel module is
88 * loaded.
89 */
90 static uint64_t zevent_eid = 0;
91
92 static kmutex_t zevent_lock;
93 static list_t zevent_list;
94 static kcondvar_t zevent_cv;
95 #endif /* _KERNEL */
96
97
98 /*
99 * Common fault management kstats to record event generation failures
100 */
101
102 struct erpt_kstat {
103 kstat_named_t erpt_dropped; /* num erpts dropped on post */
104 kstat_named_t erpt_set_failed; /* num erpt set failures */
105 kstat_named_t fmri_set_failed; /* num fmri set failures */
106 kstat_named_t payload_set_failed; /* num payload set failures */
107 kstat_named_t erpt_duplicates; /* num duplicate erpts */
108 };
109
110 static struct erpt_kstat erpt_kstat_data = {
111 { "erpt-dropped", KSTAT_DATA_UINT64 },
112 { "erpt-set-failed", KSTAT_DATA_UINT64 },
113 { "fmri-set-failed", KSTAT_DATA_UINT64 },
114 { "payload-set-failed", KSTAT_DATA_UINT64 },
115 { "erpt-duplicates", KSTAT_DATA_UINT64 }
116 };
117
118 kstat_t *fm_ksp;
119
120 #ifdef _KERNEL
121
122 /*
123 * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of
124 * output so they aren't split across console lines, and return the end column.
125 */
126 /*PRINTFLIKE4*/
127 static int
fm_printf(int depth,int c,int cols,const char * format,...)128 fm_printf(int depth, int c, int cols, const char *format, ...)
129 {
130 va_list ap;
131 int width;
132 char c1;
133
134 va_start(ap, format);
135 width = vsnprintf(&c1, sizeof (c1), format, ap);
136 va_end(ap);
137
138 if (c + width >= cols) {
139 console_printf("\n");
140 c = 0;
141 if (format[0] != ' ' && depth > 0) {
142 console_printf(" ");
143 c++;
144 }
145 }
146
147 va_start(ap, format);
148 console_vprintf(format, ap);
149 va_end(ap);
150
151 return ((c + width) % cols);
152 }
153
154 /*
155 * Recursively print an nvlist in the specified column width and return the
156 * column we end up in. This function is called recursively by fm_nvprint(),
157 * below. We generically format the entire nvpair using hexadecimal
158 * integers and strings, and elide any integer arrays. Arrays are basically
159 * used for cache dumps right now, so we suppress them so as not to overwhelm
160 * the amount of console output we produce at panic time. This can be further
161 * enhanced as FMA technology grows based upon the needs of consumers. All
162 * FMA telemetry is logged using the dump device transport, so the console
163 * output serves only as a fallback in case this procedure is unsuccessful.
164 */
165 static int
fm_nvprintr(nvlist_t * nvl,int d,int c,int cols)166 fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
167 {
168 nvpair_t *nvp;
169
170 for (nvp = nvlist_next_nvpair(nvl, NULL);
171 nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
172
173 data_type_t type = nvpair_type(nvp);
174 const char *name = nvpair_name(nvp);
175
176 boolean_t b;
177 uint8_t i8;
178 uint16_t i16;
179 uint32_t i32;
180 uint64_t i64;
181 char *str;
182 nvlist_t *cnv;
183
184 if (strcmp(name, FM_CLASS) == 0)
185 continue; /* already printed by caller */
186
187 c = fm_printf(d, c, cols, " %s=", name);
188
189 switch (type) {
190 case DATA_TYPE_BOOLEAN:
191 c = fm_printf(d + 1, c, cols, " 1");
192 break;
193
194 case DATA_TYPE_BOOLEAN_VALUE:
195 (void) nvpair_value_boolean_value(nvp, &b);
196 c = fm_printf(d + 1, c, cols, b ? "1" : "0");
197 break;
198
199 case DATA_TYPE_BYTE:
200 (void) nvpair_value_byte(nvp, &i8);
201 c = fm_printf(d + 1, c, cols, "0x%x", i8);
202 break;
203
204 case DATA_TYPE_INT8:
205 (void) nvpair_value_int8(nvp, (void *)&i8);
206 c = fm_printf(d + 1, c, cols, "0x%x", i8);
207 break;
208
209 case DATA_TYPE_UINT8:
210 (void) nvpair_value_uint8(nvp, &i8);
211 c = fm_printf(d + 1, c, cols, "0x%x", i8);
212 break;
213
214 case DATA_TYPE_INT16:
215 (void) nvpair_value_int16(nvp, (void *)&i16);
216 c = fm_printf(d + 1, c, cols, "0x%x", i16);
217 break;
218
219 case DATA_TYPE_UINT16:
220 (void) nvpair_value_uint16(nvp, &i16);
221 c = fm_printf(d + 1, c, cols, "0x%x", i16);
222 break;
223
224 case DATA_TYPE_INT32:
225 (void) nvpair_value_int32(nvp, (void *)&i32);
226 c = fm_printf(d + 1, c, cols, "0x%x", i32);
227 break;
228
229 case DATA_TYPE_UINT32:
230 (void) nvpair_value_uint32(nvp, &i32);
231 c = fm_printf(d + 1, c, cols, "0x%x", i32);
232 break;
233
234 case DATA_TYPE_INT64:
235 (void) nvpair_value_int64(nvp, (void *)&i64);
236 c = fm_printf(d + 1, c, cols, "0x%llx",
237 (u_longlong_t)i64);
238 break;
239
240 case DATA_TYPE_UINT64:
241 (void) nvpair_value_uint64(nvp, &i64);
242 c = fm_printf(d + 1, c, cols, "0x%llx",
243 (u_longlong_t)i64);
244 break;
245
246 case DATA_TYPE_HRTIME:
247 (void) nvpair_value_hrtime(nvp, (void *)&i64);
248 c = fm_printf(d + 1, c, cols, "0x%llx",
249 (u_longlong_t)i64);
250 break;
251
252 case DATA_TYPE_STRING:
253 (void) nvpair_value_string(nvp, &str);
254 c = fm_printf(d + 1, c, cols, "\"%s\"",
255 str ? str : "<NULL>");
256 break;
257
258 case DATA_TYPE_NVLIST:
259 c = fm_printf(d + 1, c, cols, "[");
260 (void) nvpair_value_nvlist(nvp, &cnv);
261 c = fm_nvprintr(cnv, d + 1, c, cols);
262 c = fm_printf(d + 1, c, cols, " ]");
263 break;
264
265 case DATA_TYPE_NVLIST_ARRAY: {
266 nvlist_t **val;
267 uint_t i, nelem;
268
269 c = fm_printf(d + 1, c, cols, "[");
270 (void) nvpair_value_nvlist_array(nvp, &val, &nelem);
271 for (i = 0; i < nelem; i++) {
272 c = fm_nvprintr(val[i], d + 1, c, cols);
273 }
274 c = fm_printf(d + 1, c, cols, " ]");
275 }
276 break;
277
278 case DATA_TYPE_INT8_ARRAY: {
279 int8_t *val;
280 uint_t i, nelem;
281
282 c = fm_printf(d + 1, c, cols, "[ ");
283 (void) nvpair_value_int8_array(nvp, &val, &nelem);
284 for (i = 0; i < nelem; i++)
285 c = fm_printf(d + 1, c, cols, "0x%llx ",
286 (u_longlong_t)val[i]);
287
288 c = fm_printf(d + 1, c, cols, "]");
289 break;
290 }
291
292 case DATA_TYPE_UINT8_ARRAY: {
293 uint8_t *val;
294 uint_t i, nelem;
295
296 c = fm_printf(d + 1, c, cols, "[ ");
297 (void) nvpair_value_uint8_array(nvp, &val, &nelem);
298 for (i = 0; i < nelem; i++)
299 c = fm_printf(d + 1, c, cols, "0x%llx ",
300 (u_longlong_t)val[i]);
301
302 c = fm_printf(d + 1, c, cols, "]");
303 break;
304 }
305
306 case DATA_TYPE_INT16_ARRAY: {
307 int16_t *val;
308 uint_t i, nelem;
309
310 c = fm_printf(d + 1, c, cols, "[ ");
311 (void) nvpair_value_int16_array(nvp, &val, &nelem);
312 for (i = 0; i < nelem; i++)
313 c = fm_printf(d + 1, c, cols, "0x%llx ",
314 (u_longlong_t)val[i]);
315
316 c = fm_printf(d + 1, c, cols, "]");
317 break;
318 }
319
320 case DATA_TYPE_UINT16_ARRAY: {
321 uint16_t *val;
322 uint_t i, nelem;
323
324 c = fm_printf(d + 1, c, cols, "[ ");
325 (void) nvpair_value_uint16_array(nvp, &val, &nelem);
326 for (i = 0; i < nelem; i++)
327 c = fm_printf(d + 1, c, cols, "0x%llx ",
328 (u_longlong_t)val[i]);
329
330 c = fm_printf(d + 1, c, cols, "]");
331 break;
332 }
333
334 case DATA_TYPE_INT32_ARRAY: {
335 int32_t *val;
336 uint_t i, nelem;
337
338 c = fm_printf(d + 1, c, cols, "[ ");
339 (void) nvpair_value_int32_array(nvp, &val, &nelem);
340 for (i = 0; i < nelem; i++)
341 c = fm_printf(d + 1, c, cols, "0x%llx ",
342 (u_longlong_t)val[i]);
343
344 c = fm_printf(d + 1, c, cols, "]");
345 break;
346 }
347
348 case DATA_TYPE_UINT32_ARRAY: {
349 uint32_t *val;
350 uint_t i, nelem;
351
352 c = fm_printf(d + 1, c, cols, "[ ");
353 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
354 for (i = 0; i < nelem; i++)
355 c = fm_printf(d + 1, c, cols, "0x%llx ",
356 (u_longlong_t)val[i]);
357
358 c = fm_printf(d + 1, c, cols, "]");
359 break;
360 }
361
362 case DATA_TYPE_INT64_ARRAY: {
363 int64_t *val;
364 uint_t i, nelem;
365
366 c = fm_printf(d + 1, c, cols, "[ ");
367 (void) nvpair_value_int64_array(nvp, &val, &nelem);
368 for (i = 0; i < nelem; i++)
369 c = fm_printf(d + 1, c, cols, "0x%llx ",
370 (u_longlong_t)val[i]);
371
372 c = fm_printf(d + 1, c, cols, "]");
373 break;
374 }
375
376 case DATA_TYPE_UINT64_ARRAY: {
377 uint64_t *val;
378 uint_t i, nelem;
379
380 c = fm_printf(d + 1, c, cols, "[ ");
381 (void) nvpair_value_uint64_array(nvp, &val, &nelem);
382 for (i = 0; i < nelem; i++)
383 c = fm_printf(d + 1, c, cols, "0x%llx ",
384 (u_longlong_t)val[i]);
385
386 c = fm_printf(d + 1, c, cols, "]");
387 break;
388 }
389
390 case DATA_TYPE_STRING_ARRAY:
391 case DATA_TYPE_BOOLEAN_ARRAY:
392 case DATA_TYPE_BYTE_ARRAY:
393 c = fm_printf(d + 1, c, cols, "[...]");
394 break;
395
396 case DATA_TYPE_UNKNOWN:
397 case DATA_TYPE_DONTCARE:
398 c = fm_printf(d + 1, c, cols, "<unknown>");
399 break;
400 }
401 }
402
403 return (c);
404 }
405
406 void
fm_nvprint(nvlist_t * nvl)407 fm_nvprint(nvlist_t *nvl)
408 {
409 char *class;
410 int c = 0;
411
412 console_printf("\n");
413
414 if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
415 c = fm_printf(0, c, zfs_zevent_cols, "%s", class);
416
417 if (fm_nvprintr(nvl, 0, c, zfs_zevent_cols) != 0)
418 console_printf("\n");
419
420 console_printf("\n");
421 }
422
423 static zevent_t *
zfs_zevent_alloc(void)424 zfs_zevent_alloc(void)
425 {
426 zevent_t *ev;
427
428 ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP);
429
430 list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t),
431 offsetof(zfs_zevent_t, ze_node));
432 list_link_init(&ev->ev_node);
433
434 return (ev);
435 }
436
437 static void
zfs_zevent_free(zevent_t * ev)438 zfs_zevent_free(zevent_t *ev)
439 {
440 /* Run provided cleanup callback */
441 ev->ev_cb(ev->ev_nvl, ev->ev_detector);
442
443 list_destroy(&ev->ev_ze_list);
444 kmem_free(ev, sizeof (zevent_t));
445 }
446
447 static void
zfs_zevent_drain(zevent_t * ev)448 zfs_zevent_drain(zevent_t *ev)
449 {
450 zfs_zevent_t *ze;
451
452 ASSERT(MUTEX_HELD(&zevent_lock));
453 list_remove(&zevent_list, ev);
454
455 /* Remove references to this event in all private file data */
456 while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
457 list_remove(&ev->ev_ze_list, ze);
458 ze->ze_zevent = NULL;
459 ze->ze_dropped++;
460 }
461
462 zfs_zevent_free(ev);
463 }
464
465 void
zfs_zevent_drain_all(int * count)466 zfs_zevent_drain_all(int *count)
467 {
468 zevent_t *ev;
469
470 mutex_enter(&zevent_lock);
471 while ((ev = list_head(&zevent_list)) != NULL)
472 zfs_zevent_drain(ev);
473
474 *count = zevent_len_cur;
475 zevent_len_cur = 0;
476 mutex_exit(&zevent_lock);
477 }
478
479 /*
480 * New zevents are inserted at the head. If the maximum queue
481 * length is exceeded a zevent will be drained from the tail.
482 * As part of this any user space processes which currently have
483 * a reference to this zevent_t in their private data will have
484 * this reference set to NULL.
485 */
486 static void
zfs_zevent_insert(zevent_t * ev)487 zfs_zevent_insert(zevent_t *ev)
488 {
489 ASSERT(MUTEX_HELD(&zevent_lock));
490 list_insert_head(&zevent_list, ev);
491
492 if (zevent_len_cur >= zfs_zevent_len_max)
493 zfs_zevent_drain(list_tail(&zevent_list));
494 else
495 zevent_len_cur++;
496 }
497
498 /*
499 * Post a zevent. The cb will be called when nvl and detector are no longer
500 * needed, i.e.:
501 * - An error happened and a zevent can't be posted. In this case, cb is called
502 * before zfs_zevent_post() returns.
503 * - The event is being drained and freed.
504 */
505 int
zfs_zevent_post(nvlist_t * nvl,nvlist_t * detector,zevent_cb_t * cb)506 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
507 {
508 inode_timespec_t tv;
509 int64_t tv_array[2];
510 uint64_t eid;
511 size_t nvl_size = 0;
512 zevent_t *ev;
513 int error;
514
515 ASSERT(cb != NULL);
516
517 gethrestime(&tv);
518 tv_array[0] = tv.tv_sec;
519 tv_array[1] = tv.tv_nsec;
520
521 error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2);
522 if (error) {
523 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
524 goto out;
525 }
526
527 eid = atomic_inc_64_nv(&zevent_eid);
528 error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid);
529 if (error) {
530 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
531 goto out;
532 }
533
534 error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
535 if (error) {
536 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
537 goto out;
538 }
539
540 if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
541 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
542 error = EOVERFLOW;
543 goto out;
544 }
545
546 if (zfs_zevent_console)
547 fm_nvprint(nvl);
548
549 ev = zfs_zevent_alloc();
550 if (ev == NULL) {
551 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
552 error = ENOMEM;
553 goto out;
554 }
555
556 ev->ev_nvl = nvl;
557 ev->ev_detector = detector;
558 ev->ev_cb = cb;
559 ev->ev_eid = eid;
560
561 mutex_enter(&zevent_lock);
562 zfs_zevent_insert(ev);
563 cv_broadcast(&zevent_cv);
564 mutex_exit(&zevent_lock);
565
566 out:
567 if (error)
568 cb(nvl, detector);
569
570 return (error);
571 }
572
573 void
zfs_zevent_track_duplicate(void)574 zfs_zevent_track_duplicate(void)
575 {
576 atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64);
577 }
578
579 static int
zfs_zevent_minor_to_state(minor_t minor,zfs_zevent_t ** ze)580 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
581 {
582 *ze = zfsdev_get_state(minor, ZST_ZEVENT);
583 if (*ze == NULL)
584 return (SET_ERROR(EBADF));
585
586 return (0);
587 }
588
589 int
zfs_zevent_fd_hold(int fd,minor_t * minorp,zfs_zevent_t ** ze)590 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
591 {
592 int error;
593
594 error = zfsdev_getminor(fd, minorp);
595 if (error == 0)
596 error = zfs_zevent_minor_to_state(*minorp, ze);
597
598 if (error)
599 zfs_zevent_fd_rele(fd);
600
601 return (error);
602 }
603
604 void
zfs_zevent_fd_rele(int fd)605 zfs_zevent_fd_rele(int fd)
606 {
607 zfs_file_put(fd);
608 }
609
610 /*
611 * Get the next zevent in the stream and place a copy in 'event'. This
612 * may fail with ENOMEM if the encoded nvlist size exceeds the passed
613 * 'event_size'. In this case the stream pointer is not advanced and
614 * and 'event_size' is set to the minimum required buffer size.
615 */
616 int
zfs_zevent_next(zfs_zevent_t * ze,nvlist_t ** event,uint64_t * event_size,uint64_t * dropped)617 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
618 uint64_t *dropped)
619 {
620 zevent_t *ev;
621 size_t size;
622 int error = 0;
623
624 mutex_enter(&zevent_lock);
625 if (ze->ze_zevent == NULL) {
626 /* New stream start at the beginning/tail */
627 ev = list_tail(&zevent_list);
628 if (ev == NULL) {
629 error = ENOENT;
630 goto out;
631 }
632 } else {
633 /*
634 * Existing stream continue with the next element and remove
635 * ourselves from the wait queue for the previous element
636 */
637 ev = list_prev(&zevent_list, ze->ze_zevent);
638 if (ev == NULL) {
639 error = ENOENT;
640 goto out;
641 }
642 }
643
644 VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
645 if (size > *event_size) {
646 *event_size = size;
647 error = ENOMEM;
648 goto out;
649 }
650
651 if (ze->ze_zevent)
652 list_remove(&ze->ze_zevent->ev_ze_list, ze);
653
654 ze->ze_zevent = ev;
655 list_insert_head(&ev->ev_ze_list, ze);
656 (void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
657 *dropped = ze->ze_dropped;
658
659 #ifdef _KERNEL
660 /* Include events dropped due to rate limiting */
661 *dropped += ratelimit_dropped;
662 ratelimit_dropped = 0;
663 #endif
664 ze->ze_dropped = 0;
665 out:
666 mutex_exit(&zevent_lock);
667
668 return (error);
669 }
670
671 /*
672 * Wait in an interruptible state for any new events.
673 */
674 int
zfs_zevent_wait(zfs_zevent_t * ze)675 zfs_zevent_wait(zfs_zevent_t *ze)
676 {
677 int error = EAGAIN;
678
679 mutex_enter(&zevent_lock);
680 zevent_waiters++;
681
682 while (error == EAGAIN) {
683 if (zevent_flags & ZEVENT_SHUTDOWN) {
684 error = SET_ERROR(ESHUTDOWN);
685 break;
686 }
687
688 error = cv_wait_sig(&zevent_cv, &zevent_lock);
689 if (signal_pending(current)) {
690 error = SET_ERROR(EINTR);
691 break;
692 } else if (!list_is_empty(&zevent_list)) {
693 error = 0;
694 continue;
695 } else {
696 error = EAGAIN;
697 }
698 }
699
700 zevent_waiters--;
701 mutex_exit(&zevent_lock);
702
703 return (error);
704 }
705
706 /*
707 * The caller may seek to a specific EID by passing that EID. If the EID
708 * is still available in the posted list of events the cursor is positioned
709 * there. Otherwise ENOENT is returned and the cursor is not moved.
710 *
711 * There are two reserved EIDs which may be passed and will never fail.
712 * ZEVENT_SEEK_START positions the cursor at the start of the list, and
713 * ZEVENT_SEEK_END positions the cursor at the end of the list.
714 */
715 int
zfs_zevent_seek(zfs_zevent_t * ze,uint64_t eid)716 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid)
717 {
718 zevent_t *ev;
719 int error = 0;
720
721 mutex_enter(&zevent_lock);
722
723 if (eid == ZEVENT_SEEK_START) {
724 if (ze->ze_zevent)
725 list_remove(&ze->ze_zevent->ev_ze_list, ze);
726
727 ze->ze_zevent = NULL;
728 goto out;
729 }
730
731 if (eid == ZEVENT_SEEK_END) {
732 if (ze->ze_zevent)
733 list_remove(&ze->ze_zevent->ev_ze_list, ze);
734
735 ev = list_head(&zevent_list);
736 if (ev) {
737 ze->ze_zevent = ev;
738 list_insert_head(&ev->ev_ze_list, ze);
739 } else {
740 ze->ze_zevent = NULL;
741 }
742
743 goto out;
744 }
745
746 for (ev = list_tail(&zevent_list); ev != NULL;
747 ev = list_prev(&zevent_list, ev)) {
748 if (ev->ev_eid == eid) {
749 if (ze->ze_zevent)
750 list_remove(&ze->ze_zevent->ev_ze_list, ze);
751
752 ze->ze_zevent = ev;
753 list_insert_head(&ev->ev_ze_list, ze);
754 break;
755 }
756 }
757
758 if (ev == NULL)
759 error = ENOENT;
760
761 out:
762 mutex_exit(&zevent_lock);
763
764 return (error);
765 }
766
767 void
zfs_zevent_init(zfs_zevent_t ** zep)768 zfs_zevent_init(zfs_zevent_t **zep)
769 {
770 zfs_zevent_t *ze;
771
772 ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
773 list_link_init(&ze->ze_node);
774 }
775
776 void
zfs_zevent_destroy(zfs_zevent_t * ze)777 zfs_zevent_destroy(zfs_zevent_t *ze)
778 {
779 mutex_enter(&zevent_lock);
780 if (ze->ze_zevent)
781 list_remove(&ze->ze_zevent->ev_ze_list, ze);
782 mutex_exit(&zevent_lock);
783
784 kmem_free(ze, sizeof (zfs_zevent_t));
785 }
786 #endif /* _KERNEL */
787
788 /*
789 * Wrappers for FM nvlist allocators
790 */
791 /* ARGSUSED */
792 static void *
i_fm_alloc(nv_alloc_t * nva,size_t size)793 i_fm_alloc(nv_alloc_t *nva, size_t size)
794 {
795 return (kmem_zalloc(size, KM_SLEEP));
796 }
797
798 /* ARGSUSED */
799 static void
i_fm_free(nv_alloc_t * nva,void * buf,size_t size)800 i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
801 {
802 kmem_free(buf, size);
803 }
804
805 const nv_alloc_ops_t fm_mem_alloc_ops = {
806 .nv_ao_init = NULL,
807 .nv_ao_fini = NULL,
808 .nv_ao_alloc = i_fm_alloc,
809 .nv_ao_free = i_fm_free,
810 .nv_ao_reset = NULL
811 };
812
813 /*
814 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer
815 * to the newly allocated nv_alloc_t structure is returned upon success or NULL
816 * is returned to indicate that the nv_alloc structure could not be created.
817 */
818 nv_alloc_t *
fm_nva_xcreate(char * buf,size_t bufsz)819 fm_nva_xcreate(char *buf, size_t bufsz)
820 {
821 nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
822
823 if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
824 kmem_free(nvhdl, sizeof (nv_alloc_t));
825 return (NULL);
826 }
827
828 return (nvhdl);
829 }
830
831 /*
832 * Destroy a previously allocated nv_alloc structure. The fixed buffer
833 * associated with nva must be freed by the caller.
834 */
835 void
fm_nva_xdestroy(nv_alloc_t * nva)836 fm_nva_xdestroy(nv_alloc_t *nva)
837 {
838 nv_alloc_fini(nva);
839 kmem_free(nva, sizeof (nv_alloc_t));
840 }
841
842 /*
843 * Create a new nv list. A pointer to a new nv list structure is returned
844 * upon success or NULL is returned to indicate that the structure could
845 * not be created. The newly created nv list is created and managed by the
846 * operations installed in nva. If nva is NULL, the default FMA nva
847 * operations are installed and used.
848 *
849 * When called from the kernel and nva == NULL, this function must be called
850 * from passive kernel context with no locks held that can prevent a
851 * sleeping memory allocation from occurring. Otherwise, this function may
852 * be called from other kernel contexts as long a valid nva created via
853 * fm_nva_create() is supplied.
854 */
855 nvlist_t *
fm_nvlist_create(nv_alloc_t * nva)856 fm_nvlist_create(nv_alloc_t *nva)
857 {
858 int hdl_alloced = 0;
859 nvlist_t *nvl;
860 nv_alloc_t *nvhdl;
861
862 if (nva == NULL) {
863 nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
864
865 if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
866 kmem_free(nvhdl, sizeof (nv_alloc_t));
867 return (NULL);
868 }
869 hdl_alloced = 1;
870 } else {
871 nvhdl = nva;
872 }
873
874 if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
875 if (hdl_alloced) {
876 nv_alloc_fini(nvhdl);
877 kmem_free(nvhdl, sizeof (nv_alloc_t));
878 }
879 return (NULL);
880 }
881
882 return (nvl);
883 }
884
885 /*
886 * Destroy a previously allocated nvlist structure. flag indicates whether
887 * or not the associated nva structure should be freed (FM_NVA_FREE) or
888 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows
889 * it to be re-used for future nvlist creation operations.
890 */
891 void
fm_nvlist_destroy(nvlist_t * nvl,int flag)892 fm_nvlist_destroy(nvlist_t *nvl, int flag)
893 {
894 nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
895
896 nvlist_free(nvl);
897
898 if (nva != NULL) {
899 if (flag == FM_NVA_FREE)
900 fm_nva_xdestroy(nva);
901 }
902 }
903
904 int
i_fm_payload_set(nvlist_t * payload,const char * name,va_list ap)905 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
906 {
907 int nelem, ret = 0;
908 data_type_t type;
909
910 while (ret == 0 && name != NULL) {
911 type = va_arg(ap, data_type_t);
912 switch (type) {
913 case DATA_TYPE_BYTE:
914 ret = nvlist_add_byte(payload, name,
915 va_arg(ap, uint_t));
916 break;
917 case DATA_TYPE_BYTE_ARRAY:
918 nelem = va_arg(ap, int);
919 ret = nvlist_add_byte_array(payload, name,
920 va_arg(ap, uchar_t *), nelem);
921 break;
922 case DATA_TYPE_BOOLEAN_VALUE:
923 ret = nvlist_add_boolean_value(payload, name,
924 va_arg(ap, boolean_t));
925 break;
926 case DATA_TYPE_BOOLEAN_ARRAY:
927 nelem = va_arg(ap, int);
928 ret = nvlist_add_boolean_array(payload, name,
929 va_arg(ap, boolean_t *), nelem);
930 break;
931 case DATA_TYPE_INT8:
932 ret = nvlist_add_int8(payload, name,
933 va_arg(ap, int));
934 break;
935 case DATA_TYPE_INT8_ARRAY:
936 nelem = va_arg(ap, int);
937 ret = nvlist_add_int8_array(payload, name,
938 va_arg(ap, int8_t *), nelem);
939 break;
940 case DATA_TYPE_UINT8:
941 ret = nvlist_add_uint8(payload, name,
942 va_arg(ap, uint_t));
943 break;
944 case DATA_TYPE_UINT8_ARRAY:
945 nelem = va_arg(ap, int);
946 ret = nvlist_add_uint8_array(payload, name,
947 va_arg(ap, uint8_t *), nelem);
948 break;
949 case DATA_TYPE_INT16:
950 ret = nvlist_add_int16(payload, name,
951 va_arg(ap, int));
952 break;
953 case DATA_TYPE_INT16_ARRAY:
954 nelem = va_arg(ap, int);
955 ret = nvlist_add_int16_array(payload, name,
956 va_arg(ap, int16_t *), nelem);
957 break;
958 case DATA_TYPE_UINT16:
959 ret = nvlist_add_uint16(payload, name,
960 va_arg(ap, uint_t));
961 break;
962 case DATA_TYPE_UINT16_ARRAY:
963 nelem = va_arg(ap, int);
964 ret = nvlist_add_uint16_array(payload, name,
965 va_arg(ap, uint16_t *), nelem);
966 break;
967 case DATA_TYPE_INT32:
968 ret = nvlist_add_int32(payload, name,
969 va_arg(ap, int32_t));
970 break;
971 case DATA_TYPE_INT32_ARRAY:
972 nelem = va_arg(ap, int);
973 ret = nvlist_add_int32_array(payload, name,
974 va_arg(ap, int32_t *), nelem);
975 break;
976 case DATA_TYPE_UINT32:
977 ret = nvlist_add_uint32(payload, name,
978 va_arg(ap, uint32_t));
979 break;
980 case DATA_TYPE_UINT32_ARRAY:
981 nelem = va_arg(ap, int);
982 ret = nvlist_add_uint32_array(payload, name,
983 va_arg(ap, uint32_t *), nelem);
984 break;
985 case DATA_TYPE_INT64:
986 ret = nvlist_add_int64(payload, name,
987 va_arg(ap, int64_t));
988 break;
989 case DATA_TYPE_INT64_ARRAY:
990 nelem = va_arg(ap, int);
991 ret = nvlist_add_int64_array(payload, name,
992 va_arg(ap, int64_t *), nelem);
993 break;
994 case DATA_TYPE_UINT64:
995 ret = nvlist_add_uint64(payload, name,
996 va_arg(ap, uint64_t));
997 break;
998 case DATA_TYPE_UINT64_ARRAY:
999 nelem = va_arg(ap, int);
1000 ret = nvlist_add_uint64_array(payload, name,
1001 va_arg(ap, uint64_t *), nelem);
1002 break;
1003 case DATA_TYPE_STRING:
1004 ret = nvlist_add_string(payload, name,
1005 va_arg(ap, char *));
1006 break;
1007 case DATA_TYPE_STRING_ARRAY:
1008 nelem = va_arg(ap, int);
1009 ret = nvlist_add_string_array(payload, name,
1010 va_arg(ap, char **), nelem);
1011 break;
1012 case DATA_TYPE_NVLIST:
1013 ret = nvlist_add_nvlist(payload, name,
1014 va_arg(ap, nvlist_t *));
1015 break;
1016 case DATA_TYPE_NVLIST_ARRAY:
1017 nelem = va_arg(ap, int);
1018 ret = nvlist_add_nvlist_array(payload, name,
1019 va_arg(ap, nvlist_t **), nelem);
1020 break;
1021 default:
1022 ret = EINVAL;
1023 }
1024
1025 name = va_arg(ap, char *);
1026 }
1027 return (ret);
1028 }
1029
1030 void
fm_payload_set(nvlist_t * payload,...)1031 fm_payload_set(nvlist_t *payload, ...)
1032 {
1033 int ret;
1034 const char *name;
1035 va_list ap;
1036
1037 va_start(ap, payload);
1038 name = va_arg(ap, char *);
1039 ret = i_fm_payload_set(payload, name, ap);
1040 va_end(ap);
1041
1042 if (ret)
1043 atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64);
1044 }
1045
1046 /*
1047 * Set-up and validate the members of an ereport event according to:
1048 *
1049 * Member name Type Value
1050 * ====================================================
1051 * class string ereport
1052 * version uint8_t 0
1053 * ena uint64_t <ena>
1054 * detector nvlist_t <detector>
1055 * ereport-payload nvlist_t <var args>
1056 *
1057 * We don't actually add a 'version' member to the payload. Really,
1058 * the version quoted to us by our caller is that of the category 1
1059 * "ereport" event class (and we require FM_EREPORT_VERS0) but
1060 * the payload version of the actual leaf class event under construction
1061 * may be something else. Callers should supply a version in the varargs,
1062 * or (better) we could take two version arguments - one for the
1063 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
1064 * for the leaf class.
1065 */
1066 void
fm_ereport_set(nvlist_t * ereport,int version,const char * erpt_class,uint64_t ena,const nvlist_t * detector,...)1067 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
1068 uint64_t ena, const nvlist_t *detector, ...)
1069 {
1070 char ereport_class[FM_MAX_CLASS];
1071 const char *name;
1072 va_list ap;
1073 int ret;
1074
1075 if (version != FM_EREPORT_VERS0) {
1076 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1077 return;
1078 }
1079
1080 (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
1081 FM_EREPORT_CLASS, erpt_class);
1082 if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
1083 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1084 return;
1085 }
1086
1087 if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
1088 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1089 }
1090
1091 if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
1092 (nvlist_t *)detector) != 0) {
1093 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1094 }
1095
1096 va_start(ap, detector);
1097 name = va_arg(ap, const char *);
1098 ret = i_fm_payload_set(ereport, name, ap);
1099 va_end(ap);
1100
1101 if (ret)
1102 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1103 }
1104
1105 /*
1106 * Set-up and validate the members of an hc fmri according to;
1107 *
1108 * Member name Type Value
1109 * ===================================================
1110 * version uint8_t 0
1111 * auth nvlist_t <auth>
1112 * hc-name string <name>
1113 * hc-id string <id>
1114 *
1115 * Note that auth and hc-id are optional members.
1116 */
1117
1118 #define HC_MAXPAIRS 20
1119 #define HC_MAXNAMELEN 50
1120
1121 static int
fm_fmri_hc_set_common(nvlist_t * fmri,int version,const nvlist_t * auth)1122 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
1123 {
1124 if (version != FM_HC_SCHEME_VERSION) {
1125 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1126 return (0);
1127 }
1128
1129 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
1130 nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
1131 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1132 return (0);
1133 }
1134
1135 if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1136 (nvlist_t *)auth) != 0) {
1137 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1138 return (0);
1139 }
1140
1141 return (1);
1142 }
1143
1144 void
fm_fmri_hc_set(nvlist_t * fmri,int version,const nvlist_t * auth,nvlist_t * snvl,int npairs,...)1145 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1146 nvlist_t *snvl, int npairs, ...)
1147 {
1148 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1149 nvlist_t *pairs[HC_MAXPAIRS];
1150 va_list ap;
1151 int i;
1152
1153 if (!fm_fmri_hc_set_common(fmri, version, auth))
1154 return;
1155
1156 npairs = MIN(npairs, HC_MAXPAIRS);
1157
1158 va_start(ap, npairs);
1159 for (i = 0; i < npairs; i++) {
1160 const char *name = va_arg(ap, const char *);
1161 uint32_t id = va_arg(ap, uint32_t);
1162 char idstr[11];
1163
1164 (void) snprintf(idstr, sizeof (idstr), "%u", id);
1165
1166 pairs[i] = fm_nvlist_create(nva);
1167 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1168 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
1169 atomic_inc_64(
1170 &erpt_kstat_data.fmri_set_failed.value.ui64);
1171 }
1172 }
1173 va_end(ap);
1174
1175 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
1176 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1177
1178 for (i = 0; i < npairs; i++)
1179 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1180
1181 if (snvl != NULL) {
1182 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
1183 atomic_inc_64(
1184 &erpt_kstat_data.fmri_set_failed.value.ui64);
1185 }
1186 }
1187 }
1188
1189 void
fm_fmri_hc_create(nvlist_t * fmri,int version,const nvlist_t * auth,nvlist_t * snvl,nvlist_t * bboard,int npairs,...)1190 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
1191 nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
1192 {
1193 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1194 nvlist_t *pairs[HC_MAXPAIRS];
1195 nvlist_t **hcl;
1196 uint_t n;
1197 int i, j;
1198 va_list ap;
1199 char *hcname, *hcid;
1200
1201 if (!fm_fmri_hc_set_common(fmri, version, auth))
1202 return;
1203
1204 /*
1205 * copy the bboard nvpairs to the pairs array
1206 */
1207 if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
1208 != 0) {
1209 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1210 return;
1211 }
1212
1213 for (i = 0; i < n; i++) {
1214 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
1215 &hcname) != 0) {
1216 atomic_inc_64(
1217 &erpt_kstat_data.fmri_set_failed.value.ui64);
1218 return;
1219 }
1220 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
1221 atomic_inc_64(
1222 &erpt_kstat_data.fmri_set_failed.value.ui64);
1223 return;
1224 }
1225
1226 pairs[i] = fm_nvlist_create(nva);
1227 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
1228 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
1229 for (j = 0; j <= i; j++) {
1230 if (pairs[j] != NULL)
1231 fm_nvlist_destroy(pairs[j],
1232 FM_NVA_RETAIN);
1233 }
1234 atomic_inc_64(
1235 &erpt_kstat_data.fmri_set_failed.value.ui64);
1236 return;
1237 }
1238 }
1239
1240 /*
1241 * create the pairs from passed in pairs
1242 */
1243 npairs = MIN(npairs, HC_MAXPAIRS);
1244
1245 va_start(ap, npairs);
1246 for (i = n; i < npairs + n; i++) {
1247 const char *name = va_arg(ap, const char *);
1248 uint32_t id = va_arg(ap, uint32_t);
1249 char idstr[11];
1250 (void) snprintf(idstr, sizeof (idstr), "%u", id);
1251 pairs[i] = fm_nvlist_create(nva);
1252 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1253 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
1254 for (j = 0; j <= i; j++) {
1255 if (pairs[j] != NULL)
1256 fm_nvlist_destroy(pairs[j],
1257 FM_NVA_RETAIN);
1258 }
1259 atomic_inc_64(
1260 &erpt_kstat_data.fmri_set_failed.value.ui64);
1261 return;
1262 }
1263 }
1264 va_end(ap);
1265
1266 /*
1267 * Create the fmri hc list
1268 */
1269 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
1270 npairs + n) != 0) {
1271 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1272 return;
1273 }
1274
1275 for (i = 0; i < npairs + n; i++) {
1276 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1277 }
1278
1279 if (snvl != NULL) {
1280 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
1281 atomic_inc_64(
1282 &erpt_kstat_data.fmri_set_failed.value.ui64);
1283 return;
1284 }
1285 }
1286 }
1287
1288 /*
1289 * Set-up and validate the members of an dev fmri according to:
1290 *
1291 * Member name Type Value
1292 * ====================================================
1293 * version uint8_t 0
1294 * auth nvlist_t <auth>
1295 * devpath string <devpath>
1296 * [devid] string <devid>
1297 * [target-port-l0id] string <target-port-lun0-id>
1298 *
1299 * Note that auth and devid are optional members.
1300 */
1301 void
fm_fmri_dev_set(nvlist_t * fmri_dev,int version,const nvlist_t * auth,const char * devpath,const char * devid,const char * tpl0)1302 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
1303 const char *devpath, const char *devid, const char *tpl0)
1304 {
1305 int err = 0;
1306
1307 if (version != DEV_SCHEME_VERSION0) {
1308 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1309 return;
1310 }
1311
1312 err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
1313 err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
1314
1315 if (auth != NULL) {
1316 err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
1317 (nvlist_t *)auth);
1318 }
1319
1320 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
1321
1322 if (devid != NULL)
1323 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
1324
1325 if (tpl0 != NULL)
1326 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
1327
1328 if (err)
1329 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1330
1331 }
1332
1333 /*
1334 * Set-up and validate the members of an cpu fmri according to:
1335 *
1336 * Member name Type Value
1337 * ====================================================
1338 * version uint8_t 0
1339 * auth nvlist_t <auth>
1340 * cpuid uint32_t <cpu_id>
1341 * cpumask uint8_t <cpu_mask>
1342 * serial uint64_t <serial_id>
1343 *
1344 * Note that auth, cpumask, serial are optional members.
1345 *
1346 */
1347 void
fm_fmri_cpu_set(nvlist_t * fmri_cpu,int version,const nvlist_t * auth,uint32_t cpu_id,uint8_t * cpu_maskp,const char * serial_idp)1348 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
1349 uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
1350 {
1351 uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
1352
1353 if (version < CPU_SCHEME_VERSION1) {
1354 atomic_inc_64(failedp);
1355 return;
1356 }
1357
1358 if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
1359 atomic_inc_64(failedp);
1360 return;
1361 }
1362
1363 if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
1364 FM_FMRI_SCHEME_CPU) != 0) {
1365 atomic_inc_64(failedp);
1366 return;
1367 }
1368
1369 if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
1370 (nvlist_t *)auth) != 0)
1371 atomic_inc_64(failedp);
1372
1373 if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
1374 atomic_inc_64(failedp);
1375
1376 if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
1377 *cpu_maskp) != 0)
1378 atomic_inc_64(failedp);
1379
1380 if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
1381 FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
1382 atomic_inc_64(failedp);
1383 }
1384
1385 /*
1386 * Set-up and validate the members of a mem according to:
1387 *
1388 * Member name Type Value
1389 * ====================================================
1390 * version uint8_t 0
1391 * auth nvlist_t <auth> [optional]
1392 * unum string <unum>
1393 * serial string <serial> [optional*]
1394 * offset uint64_t <offset> [optional]
1395 *
1396 * * serial is required if offset is present
1397 */
1398 void
fm_fmri_mem_set(nvlist_t * fmri,int version,const nvlist_t * auth,const char * unum,const char * serial,uint64_t offset)1399 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1400 const char *unum, const char *serial, uint64_t offset)
1401 {
1402 if (version != MEM_SCHEME_VERSION0) {
1403 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1404 return;
1405 }
1406
1407 if (!serial && (offset != (uint64_t)-1)) {
1408 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1409 return;
1410 }
1411
1412 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1413 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1414 return;
1415 }
1416
1417 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
1418 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1419 return;
1420 }
1421
1422 if (auth != NULL) {
1423 if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1424 (nvlist_t *)auth) != 0) {
1425 atomic_inc_64(
1426 &erpt_kstat_data.fmri_set_failed.value.ui64);
1427 }
1428 }
1429
1430 if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
1431 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1432 }
1433
1434 if (serial != NULL) {
1435 if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
1436 (char **)&serial, 1) != 0) {
1437 atomic_inc_64(
1438 &erpt_kstat_data.fmri_set_failed.value.ui64);
1439 }
1440 if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri,
1441 FM_FMRI_MEM_OFFSET, offset) != 0) {
1442 atomic_inc_64(
1443 &erpt_kstat_data.fmri_set_failed.value.ui64);
1444 }
1445 }
1446 }
1447
1448 void
fm_fmri_zfs_set(nvlist_t * fmri,int version,uint64_t pool_guid,uint64_t vdev_guid)1449 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
1450 uint64_t vdev_guid)
1451 {
1452 if (version != ZFS_SCHEME_VERSION0) {
1453 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1454 return;
1455 }
1456
1457 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1458 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1459 return;
1460 }
1461
1462 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
1463 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1464 return;
1465 }
1466
1467 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
1468 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1469 }
1470
1471 if (vdev_guid != 0) {
1472 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
1473 atomic_inc_64(
1474 &erpt_kstat_data.fmri_set_failed.value.ui64);
1475 }
1476 }
1477 }
1478
1479 uint64_t
fm_ena_increment(uint64_t ena)1480 fm_ena_increment(uint64_t ena)
1481 {
1482 uint64_t new_ena;
1483
1484 switch (ENA_FORMAT(ena)) {
1485 case FM_ENA_FMT1:
1486 new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
1487 break;
1488 case FM_ENA_FMT2:
1489 new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
1490 break;
1491 default:
1492 new_ena = 0;
1493 }
1494
1495 return (new_ena);
1496 }
1497
1498 uint64_t
fm_ena_generate_cpu(uint64_t timestamp,processorid_t cpuid,uchar_t format)1499 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
1500 {
1501 uint64_t ena = 0;
1502
1503 switch (format) {
1504 case FM_ENA_FMT1:
1505 if (timestamp) {
1506 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1507 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1508 ENA_FMT1_CPUID_MASK) |
1509 ((timestamp << ENA_FMT1_TIME_SHFT) &
1510 ENA_FMT1_TIME_MASK));
1511 } else {
1512 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1513 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1514 ENA_FMT1_CPUID_MASK) |
1515 ((gethrtime() << ENA_FMT1_TIME_SHFT) &
1516 ENA_FMT1_TIME_MASK));
1517 }
1518 break;
1519 case FM_ENA_FMT2:
1520 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1521 ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
1522 break;
1523 default:
1524 break;
1525 }
1526
1527 return (ena);
1528 }
1529
1530 uint64_t
fm_ena_generate(uint64_t timestamp,uchar_t format)1531 fm_ena_generate(uint64_t timestamp, uchar_t format)
1532 {
1533 uint64_t ena;
1534
1535 kpreempt_disable();
1536 ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
1537 kpreempt_enable();
1538
1539 return (ena);
1540 }
1541
1542 uint64_t
fm_ena_generation_get(uint64_t ena)1543 fm_ena_generation_get(uint64_t ena)
1544 {
1545 uint64_t gen;
1546
1547 switch (ENA_FORMAT(ena)) {
1548 case FM_ENA_FMT1:
1549 gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
1550 break;
1551 case FM_ENA_FMT2:
1552 gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
1553 break;
1554 default:
1555 gen = 0;
1556 break;
1557 }
1558
1559 return (gen);
1560 }
1561
1562 uchar_t
fm_ena_format_get(uint64_t ena)1563 fm_ena_format_get(uint64_t ena)
1564 {
1565
1566 return (ENA_FORMAT(ena));
1567 }
1568
1569 uint64_t
fm_ena_id_get(uint64_t ena)1570 fm_ena_id_get(uint64_t ena)
1571 {
1572 uint64_t id;
1573
1574 switch (ENA_FORMAT(ena)) {
1575 case FM_ENA_FMT1:
1576 id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
1577 break;
1578 case FM_ENA_FMT2:
1579 id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
1580 break;
1581 default:
1582 id = 0;
1583 }
1584
1585 return (id);
1586 }
1587
1588 uint64_t
fm_ena_time_get(uint64_t ena)1589 fm_ena_time_get(uint64_t ena)
1590 {
1591 uint64_t time;
1592
1593 switch (ENA_FORMAT(ena)) {
1594 case FM_ENA_FMT1:
1595 time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
1596 break;
1597 case FM_ENA_FMT2:
1598 time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
1599 break;
1600 default:
1601 time = 0;
1602 }
1603
1604 return (time);
1605 }
1606
1607 #ifdef _KERNEL
1608 /*
1609 * Helper function to increment ereport dropped count. Used by the event
1610 * rate limiting code to give feedback to the user about how many events were
1611 * rate limited by including them in the 'dropped' count.
1612 */
1613 void
fm_erpt_dropped_increment(void)1614 fm_erpt_dropped_increment(void)
1615 {
1616 atomic_inc_64(&ratelimit_dropped);
1617 }
1618
1619 void
fm_init(void)1620 fm_init(void)
1621 {
1622 zevent_len_cur = 0;
1623 zevent_flags = 0;
1624
1625 if (zfs_zevent_len_max == 0)
1626 zfs_zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
1627
1628 /* Initialize zevent allocation and generation kstats */
1629 fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
1630 sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
1631 KSTAT_FLAG_VIRTUAL);
1632
1633 if (fm_ksp != NULL) {
1634 fm_ksp->ks_data = &erpt_kstat_data;
1635 kstat_install(fm_ksp);
1636 } else {
1637 cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
1638 }
1639
1640 mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
1641 list_create(&zevent_list, sizeof (zevent_t),
1642 offsetof(zevent_t, ev_node));
1643 cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
1644
1645 zfs_ereport_init();
1646 }
1647
1648 void
fm_fini(void)1649 fm_fini(void)
1650 {
1651 int count;
1652
1653 zfs_ereport_fini();
1654
1655 zfs_zevent_drain_all(&count);
1656
1657 mutex_enter(&zevent_lock);
1658 cv_broadcast(&zevent_cv);
1659
1660 zevent_flags |= ZEVENT_SHUTDOWN;
1661 while (zevent_waiters > 0) {
1662 mutex_exit(&zevent_lock);
1663 schedule();
1664 mutex_enter(&zevent_lock);
1665 }
1666 mutex_exit(&zevent_lock);
1667
1668 cv_destroy(&zevent_cv);
1669 list_destroy(&zevent_list);
1670 mutex_destroy(&zevent_lock);
1671
1672 if (fm_ksp != NULL) {
1673 kstat_delete(fm_ksp);
1674 fm_ksp = NULL;
1675 }
1676 }
1677 #endif /* _KERNEL */
1678
1679 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, INT, ZMOD_RW,
1680 "Max event queue length");
1681
1682 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, cols, INT, ZMOD_RW,
1683 "Max event column width");
1684
1685 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, console, INT, ZMOD_RW,
1686 "Log events to the console");
1687