1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2012-2016 Intel Corporation
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include "opt_nvme.h"
31
32 #include <sys/param.h>
33 #include <sys/bus.h>
34 #include <sys/sysctl.h>
35
36 #include "nvme_private.h"
37
38 #ifndef NVME_USE_NVD
39 #define NVME_USE_NVD 0
40 #endif
41
42 int nvme_use_nvd = NVME_USE_NVD;
43 bool nvme_verbose_cmd_dump = false;
44
45 SYSCTL_NODE(_hw, OID_AUTO, nvme, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
46 "NVMe sysctl tunables");
47 SYSCTL_INT(_hw_nvme, OID_AUTO, use_nvd, CTLFLAG_RDTUN,
48 &nvme_use_nvd, 1, "1 = Create NVD devices, 0 = Create NDA devices");
49 SYSCTL_BOOL(_hw_nvme, OID_AUTO, verbose_cmd_dump, CTLFLAG_RWTUN,
50 &nvme_verbose_cmd_dump, 0,
51 "enable verbose command printing when a command fails");
52
53 static void
nvme_dump_queue(struct nvme_qpair * qpair)54 nvme_dump_queue(struct nvme_qpair *qpair)
55 {
56 struct nvme_completion *cpl;
57 struct nvme_command *cmd;
58 int i;
59
60 printf("id:%04Xh phase:%d\n", qpair->id, qpair->phase);
61
62 printf("Completion queue:\n");
63 for (i = 0; i < qpair->num_entries; i++) {
64 cpl = &qpair->cpl[i];
65 printf("%05d: ", i);
66 nvme_qpair_print_completion(qpair, cpl);
67 }
68
69 printf("Submission queue:\n");
70 for (i = 0; i < qpair->num_entries; i++) {
71 cmd = &qpair->cmd[i];
72 printf("%05d: ", i);
73 nvme_qpair_print_command(qpair, cmd);
74 }
75 }
76
77 static int
nvme_sysctl_dump_debug(SYSCTL_HANDLER_ARGS)78 nvme_sysctl_dump_debug(SYSCTL_HANDLER_ARGS)
79 {
80 struct nvme_qpair *qpair = arg1;
81 uint32_t val = 0;
82
83 int error = sysctl_handle_int(oidp, &val, 0, req);
84
85 if (error)
86 return (error);
87
88 if (val != 0)
89 nvme_dump_queue(qpair);
90
91 return (0);
92 }
93
94 static int
nvme_sysctl_int_coal_time(SYSCTL_HANDLER_ARGS)95 nvme_sysctl_int_coal_time(SYSCTL_HANDLER_ARGS)
96 {
97 struct nvme_controller *ctrlr = arg1;
98 uint32_t oldval = ctrlr->int_coal_time;
99 int error = sysctl_handle_int(oidp, &ctrlr->int_coal_time, 0,
100 req);
101
102 if (error)
103 return (error);
104
105 if (oldval != ctrlr->int_coal_time)
106 nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr,
107 ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL,
108 NULL);
109
110 return (0);
111 }
112
113 static int
nvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS)114 nvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS)
115 {
116 struct nvme_controller *ctrlr = arg1;
117 uint32_t oldval = ctrlr->int_coal_threshold;
118 int error = sysctl_handle_int(oidp, &ctrlr->int_coal_threshold, 0,
119 req);
120
121 if (error)
122 return (error);
123
124 if (oldval != ctrlr->int_coal_threshold)
125 nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr,
126 ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL,
127 NULL);
128
129 return (0);
130 }
131
132 static int
nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS)133 nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS)
134 {
135 uint32_t *ptr = arg1;
136 uint32_t newval = *ptr;
137 int error = sysctl_handle_int(oidp, &newval, 0, req);
138
139 if (error || (req->newptr == NULL))
140 return (error);
141
142 if (newval > NVME_MAX_TIMEOUT_PERIOD ||
143 newval < NVME_MIN_TIMEOUT_PERIOD) {
144 return (EINVAL);
145 } else {
146 *ptr = newval;
147 }
148
149 return (0);
150 }
151
152 static void
nvme_qpair_reset_stats(struct nvme_qpair * qpair)153 nvme_qpair_reset_stats(struct nvme_qpair *qpair)
154 {
155
156 /*
157 * Reset the values. Due to sanity checks in
158 * nvme_qpair_process_completions, we reset the number of interrupt
159 * calls to 1.
160 */
161 qpair->num_cmds = 0;
162 qpair->num_intr_handler_calls = 1;
163 qpair->num_retries = 0;
164 qpair->num_failures = 0;
165 qpair->num_ignored = 0;
166 qpair->num_recovery_nolock = 0;
167 }
168
169 static int
nvme_sysctl_num_cmds(SYSCTL_HANDLER_ARGS)170 nvme_sysctl_num_cmds(SYSCTL_HANDLER_ARGS)
171 {
172 struct nvme_controller *ctrlr = arg1;
173 int64_t num_cmds = 0;
174 int i;
175
176 num_cmds = ctrlr->adminq.num_cmds;
177
178 for (i = 0; i < ctrlr->num_io_queues; i++)
179 num_cmds += ctrlr->ioq[i].num_cmds;
180
181 return (sysctl_handle_64(oidp, &num_cmds, 0, req));
182 }
183
184 static int
nvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS)185 nvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS)
186 {
187 struct nvme_controller *ctrlr = arg1;
188 int64_t num_intr_handler_calls = 0;
189 int i;
190
191 num_intr_handler_calls = ctrlr->adminq.num_intr_handler_calls;
192
193 for (i = 0; i < ctrlr->num_io_queues; i++)
194 num_intr_handler_calls += ctrlr->ioq[i].num_intr_handler_calls;
195
196 return (sysctl_handle_64(oidp, &num_intr_handler_calls, 0, req));
197 }
198
199 static int
nvme_sysctl_num_retries(SYSCTL_HANDLER_ARGS)200 nvme_sysctl_num_retries(SYSCTL_HANDLER_ARGS)
201 {
202 struct nvme_controller *ctrlr = arg1;
203 int64_t num_retries = 0;
204 int i;
205
206 num_retries = ctrlr->adminq.num_retries;
207
208 for (i = 0; i < ctrlr->num_io_queues; i++)
209 num_retries += ctrlr->ioq[i].num_retries;
210
211 return (sysctl_handle_64(oidp, &num_retries, 0, req));
212 }
213
214 static int
nvme_sysctl_num_failures(SYSCTL_HANDLER_ARGS)215 nvme_sysctl_num_failures(SYSCTL_HANDLER_ARGS)
216 {
217 struct nvme_controller *ctrlr = arg1;
218 int64_t num_failures = 0;
219 int i;
220
221 num_failures = ctrlr->adminq.num_failures;
222
223 for (i = 0; i < ctrlr->num_io_queues; i++)
224 num_failures += ctrlr->ioq[i].num_failures;
225
226 return (sysctl_handle_64(oidp, &num_failures, 0, req));
227 }
228
229 static int
nvme_sysctl_num_ignored(SYSCTL_HANDLER_ARGS)230 nvme_sysctl_num_ignored(SYSCTL_HANDLER_ARGS)
231 {
232 struct nvme_controller *ctrlr = arg1;
233 int64_t num_ignored = 0;
234 int i;
235
236 num_ignored = ctrlr->adminq.num_ignored;
237
238 for (i = 0; i < ctrlr->num_io_queues; i++)
239 num_ignored += ctrlr->ioq[i].num_ignored;
240
241 return (sysctl_handle_64(oidp, &num_ignored, 0, req));
242 }
243
244 static int
nvme_sysctl_num_recovery_nolock(SYSCTL_HANDLER_ARGS)245 nvme_sysctl_num_recovery_nolock(SYSCTL_HANDLER_ARGS)
246 {
247 struct nvme_controller *ctrlr = arg1;
248 int64_t num;
249 int i;
250
251 num = ctrlr->adminq.num_recovery_nolock;
252
253 for (i = 0; i < ctrlr->num_io_queues; i++)
254 num += ctrlr->ioq[i].num_recovery_nolock;
255
256 return (sysctl_handle_64(oidp, &num, 0, req));
257 }
258
259 static int
nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS)260 nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS)
261 {
262 struct nvme_controller *ctrlr = arg1;
263 uint32_t i, val = 0;
264
265 int error = sysctl_handle_int(oidp, &val, 0, req);
266
267 if (error)
268 return (error);
269
270 if (val != 0) {
271 nvme_qpair_reset_stats(&ctrlr->adminq);
272
273 for (i = 0; i < ctrlr->num_io_queues; i++)
274 nvme_qpair_reset_stats(&ctrlr->ioq[i]);
275 }
276
277 return (0);
278 }
279
280 static void
nvme_sysctl_initialize_queue(struct nvme_qpair * qpair,struct sysctl_ctx_list * ctrlr_ctx,struct sysctl_oid * que_tree)281 nvme_sysctl_initialize_queue(struct nvme_qpair *qpair,
282 struct sysctl_ctx_list *ctrlr_ctx, struct sysctl_oid *que_tree)
283 {
284 struct sysctl_oid_list *que_list = SYSCTL_CHILDREN(que_tree);
285
286 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_entries",
287 CTLFLAG_RD, &qpair->num_entries, 0,
288 "Number of entries in hardware queue");
289 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_trackers",
290 CTLFLAG_RD, &qpair->num_trackers, 0,
291 "Number of trackers pre-allocated for this queue pair");
292 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_head",
293 CTLFLAG_RD, &qpair->sq_head, 0,
294 "Current head of submission queue (as observed by driver)");
295 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_tail",
296 CTLFLAG_RD, &qpair->sq_tail, 0,
297 "Current tail of submission queue (as observed by driver)");
298 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "cq_head",
299 CTLFLAG_RD, &qpair->cq_head, 0,
300 "Current head of completion queue (as observed by driver)");
301
302 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_cmds",
303 CTLFLAG_RD, &qpair->num_cmds, "Number of commands submitted");
304 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_intr_handler_calls",
305 CTLFLAG_RD, &qpair->num_intr_handler_calls,
306 "Number of times interrupt handler was invoked (will typically be "
307 "less than number of actual interrupts generated due to "
308 "coalescing)");
309 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_retries",
310 CTLFLAG_RD, &qpair->num_retries, "Number of commands retried");
311 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_failures",
312 CTLFLAG_RD, &qpair->num_failures,
313 "Number of commands ending in failure after all retries");
314 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_ignored",
315 CTLFLAG_RD, &qpair->num_ignored,
316 "Number of interrupts posted, but were administratively ignored");
317 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_recovery_nolock",
318 CTLFLAG_RD, &qpair->num_recovery_nolock,
319 "Number of times that we failed to lock recovery in the ISR");
320
321 SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO,
322 "dump_debug", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
323 qpair, 0, nvme_sysctl_dump_debug, "IU", "Dump debug data");
324 }
325
326 void
nvme_sysctl_initialize_ctrlr(struct nvme_controller * ctrlr)327 nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr)
328 {
329 struct sysctl_ctx_list *ctrlr_ctx;
330 struct sysctl_oid *ctrlr_tree, *que_tree;
331 struct sysctl_oid_list *ctrlr_list;
332 #define QUEUE_NAME_LENGTH 16
333 char queue_name[QUEUE_NAME_LENGTH];
334 int i;
335
336 ctrlr_ctx = device_get_sysctl_ctx(ctrlr->dev);
337 ctrlr_tree = device_get_sysctl_tree(ctrlr->dev);
338 ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree);
339
340 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "num_io_queues",
341 CTLFLAG_RD, &ctrlr->num_io_queues, 0,
342 "Number of I/O queue pairs");
343
344 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
345 "int_coal_time", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
346 ctrlr, 0, nvme_sysctl_int_coal_time, "IU",
347 "Interrupt coalescing timeout (in microseconds)");
348
349 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
350 "int_coal_threshold",
351 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr, 0,
352 nvme_sysctl_int_coal_threshold, "IU",
353 "Interrupt coalescing threshold");
354
355 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
356 "admin_timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
357 &ctrlr->admin_timeout_period, 0, nvme_sysctl_timeout_period, "IU",
358 "Timeout period for Admin queue (in seconds)");
359
360 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
361 "timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
362 &ctrlr->timeout_period, 0, nvme_sysctl_timeout_period, "IU",
363 "Timeout period for I/O queues (in seconds)");
364
365 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
366 "num_cmds", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
367 ctrlr, 0, nvme_sysctl_num_cmds, "IU",
368 "Number of commands submitted");
369
370 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
371 "num_intr_handler_calls",
372 CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, ctrlr, 0,
373 nvme_sysctl_num_intr_handler_calls, "IU",
374 "Number of times interrupt handler was invoked (will "
375 "typically be less than number of actual interrupts "
376 "generated due to coalescing)");
377
378 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
379 "num_retries", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
380 ctrlr, 0, nvme_sysctl_num_retries, "IU",
381 "Number of commands retried");
382
383 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
384 "num_failures", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
385 ctrlr, 0, nvme_sysctl_num_failures, "IU",
386 "Number of commands ending in failure after all retries");
387
388 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
389 "num_ignored", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
390 ctrlr, 0, nvme_sysctl_num_ignored, "IU",
391 "Number of interrupts ignored administratively");
392
393 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
394 "num_recovery_nolock", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
395 ctrlr, 0, nvme_sysctl_num_recovery_nolock, "IU",
396 "Number of times that we failed to lock recovery in the ISR");
397
398 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
399 "reset_stats", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr,
400 0, nvme_sysctl_reset_stats, "IU", "Reset statistics to zero");
401
402 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "cap_lo",
403 CTLFLAG_RD, &ctrlr->cap_lo, 0,
404 "Low 32-bits of capacities for the drive");
405
406 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "cap_hi",
407 CTLFLAG_RD, &ctrlr->cap_hi, 0,
408 "Hi 32-bits of capacities for the drive");
409
410 que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, "adminq",
411 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue");
412
413 nvme_sysctl_initialize_queue(&ctrlr->adminq, ctrlr_ctx, que_tree);
414
415 for (i = 0; i < ctrlr->num_io_queues; i++) {
416 snprintf(queue_name, QUEUE_NAME_LENGTH, "ioq%d", i);
417 que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO,
418 queue_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IO Queue");
419 nvme_sysctl_initialize_queue(&ctrlr->ioq[i], ctrlr_ctx,
420 que_tree);
421 }
422 }
423