1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 #include <sys/cdefs.h>
31 #include <sys/param.h>
32 __FBSDID("$FreeBSD$");
33
34 #include "opt_rss.h"
35
36 #include "ena_sysctl.h"
37 #include "ena_rss.h"
38
39 static void ena_sysctl_add_wd(struct ena_adapter *);
40 static void ena_sysctl_add_stats(struct ena_adapter *);
41 static void ena_sysctl_add_eni_metrics(struct ena_adapter *);
42 static void ena_sysctl_add_tuneables(struct ena_adapter *);
43 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
44 #ifndef RSS
45 static void ena_sysctl_add_rss(struct ena_adapter *);
46 #endif
47 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
48 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
49 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
50 static int ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS);
51 #ifndef RSS
52 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS);
53 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS);
54 #endif
55
56 /* Limit max ENI sample rate to be an hour. */
57 #define ENI_METRICS_MAX_SAMPLE_INTERVAL 3600
58 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1)
59
60 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
61 "ENA driver parameters");
62
63 /*
64 * Logging level for changing verbosity of the output
65 */
66 int ena_log_level = ENA_INFO;
67 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN,
68 &ena_log_level, 0, "Logging level indicating verbosity of the logs");
69
70 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
71 DRV_MODULE_VERSION, "ENA driver version");
72
73 /*
74 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
75 * Using 9k mbufs in low memory conditions might cause allocation to take a lot
76 * of time and lead to the OS instability as it needs to look for the contiguous
77 * pages.
78 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
79 * the network performance is the priority, the 9k mbufs can be used.
80 */
81 int ena_enable_9k_mbufs = 0;
82 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
83 &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
84
85 /*
86 * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to
87 * false. This option may be important for platforms, which often handle packet
88 * headers on Tx with total header size greater than 96B, as it may
89 * reduce the latency.
90 * It also reduces the maximum Tx queue size by half, so it may cause more Tx
91 * packet drops.
92 */
93 bool ena_force_large_llq_header = false;
94 SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN,
95 &ena_force_large_llq_header, 0,
96 "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n");
97
98 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE;
99
100 void
ena_sysctl_add_nodes(struct ena_adapter * adapter)101 ena_sysctl_add_nodes(struct ena_adapter *adapter)
102 {
103 ena_sysctl_add_wd(adapter);
104 ena_sysctl_add_stats(adapter);
105 ena_sysctl_add_eni_metrics(adapter);
106 ena_sysctl_add_tuneables(adapter);
107 #ifndef RSS
108 ena_sysctl_add_rss(adapter);
109 #endif
110 }
111
112 static void
ena_sysctl_add_wd(struct ena_adapter * adapter)113 ena_sysctl_add_wd(struct ena_adapter *adapter)
114 {
115 device_t dev;
116
117 struct sysctl_ctx_list *ctx;
118 struct sysctl_oid *tree;
119 struct sysctl_oid_list *child;
120
121 dev = adapter->pdev;
122
123 ctx = device_get_sysctl_ctx(dev);
124 tree = device_get_sysctl_tree(dev);
125 child = SYSCTL_CHILDREN(tree);
126
127 /* Sysctl calls for Watchdog service */
128 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active",
129 CTLFLAG_RWTUN, &adapter->wd_active, 0,
130 "Watchdog is active");
131
132 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout",
133 CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
134 "Timeout for Keep Alive messages");
135
136 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout",
137 CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
138 "Timeout for TX completion");
139
140 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues",
141 CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
142 "Number of TX queues to check per run");
143
144 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold",
145 CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
146 "Max number of timeouted packets");
147 }
148
149 static void
ena_sysctl_add_stats(struct ena_adapter * adapter)150 ena_sysctl_add_stats(struct ena_adapter *adapter)
151 {
152 device_t dev;
153
154 struct ena_ring *tx_ring;
155 struct ena_ring *rx_ring;
156
157 struct ena_hw_stats *hw_stats;
158 struct ena_stats_dev *dev_stats;
159 struct ena_stats_tx *tx_stats;
160 struct ena_stats_rx *rx_stats;
161 struct ena_com_stats_admin *admin_stats;
162
163 struct sysctl_ctx_list *ctx;
164 struct sysctl_oid *tree;
165 struct sysctl_oid_list *child;
166
167 struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node;
168 struct sysctl_oid *admin_node;
169 struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list;
170 struct sysctl_oid_list *admin_list;
171
172 #define QUEUE_NAME_LEN 32
173 char namebuf[QUEUE_NAME_LEN];
174 int i;
175
176 dev = adapter->pdev;
177
178 ctx = device_get_sysctl_ctx(dev);
179 tree = device_get_sysctl_tree(dev);
180 child = SYSCTL_CHILDREN(tree);
181
182 tx_ring = adapter->tx_ring;
183 rx_ring = adapter->rx_ring;
184
185 hw_stats = &adapter->hw_stats;
186 dev_stats = &adapter->dev_stats;
187 admin_stats = &adapter->ena_dev->admin_queue.stats;
188
189 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired",
190 CTLFLAG_RD, &dev_stats->wd_expired,
191 "Watchdog expiry count");
192 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up",
193 CTLFLAG_RD, &dev_stats->interface_up,
194 "Network interface up count");
195 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down",
196 CTLFLAG_RD, &dev_stats->interface_down,
197 "Network interface down count");
198 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause",
199 CTLFLAG_RD, &dev_stats->admin_q_pause,
200 "Admin queue pauses");
201
202 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
203 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
204
205 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
206 namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
207 queue_list = SYSCTL_CHILDREN(queue_node);
208
209 adapter->que[i].oid = queue_node;
210
211 #ifdef RSS
212 /* Common stats */
213 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu",
214 CTLFLAG_RD, &adapter->que[i].cpu, 0, "CPU affinity");
215 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain",
216 CTLFLAG_RD, &adapter->que[i].domain, 0, "NUMA domain");
217 #endif
218
219 /* TX specific stats */
220 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,
221 "tx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring");
222 tx_list = SYSCTL_CHILDREN(tx_node);
223
224 tx_stats = &tx_ring->tx_stats;
225
226 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
227 "count", CTLFLAG_RD,
228 &tx_stats->cnt, "Packets sent");
229 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
230 "bytes", CTLFLAG_RD,
231 &tx_stats->bytes, "Bytes sent");
232 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
233 "prepare_ctx_err", CTLFLAG_RD,
234 &tx_stats->prepare_ctx_err,
235 "TX buffer preparation failures");
236 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
237 "dma_mapping_err", CTLFLAG_RD,
238 &tx_stats->dma_mapping_err, "DMA mapping failures");
239 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
240 "doorbells", CTLFLAG_RD,
241 &tx_stats->doorbells, "Queue doorbells");
242 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
243 "missing_tx_comp", CTLFLAG_RD,
244 &tx_stats->missing_tx_comp, "TX completions missed");
245 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
246 "bad_req_id", CTLFLAG_RD,
247 &tx_stats->bad_req_id, "Bad request id count");
248 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
249 "mbuf_collapses", CTLFLAG_RD,
250 &tx_stats->collapse,
251 "Mbuf collapse count");
252 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
253 "mbuf_collapse_err", CTLFLAG_RD,
254 &tx_stats->collapse_err,
255 "Mbuf collapse failures");
256 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
257 "queue_wakeups", CTLFLAG_RD,
258 &tx_stats->queue_wakeup, "Queue wakeups");
259 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
260 "queue_stops", CTLFLAG_RD,
261 &tx_stats->queue_stop, "Queue stops");
262 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
263 "llq_buffer_copy", CTLFLAG_RD,
264 &tx_stats->llq_buffer_copy,
265 "Header copies for llq transaction");
266 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
267 "unmask_interrupt_num", CTLFLAG_RD,
268 &tx_stats->unmask_interrupt_num,
269 "Unmasked interrupt count");
270
271 /* RX specific stats */
272 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,
273 "rx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring");
274 rx_list = SYSCTL_CHILDREN(rx_node);
275
276 rx_stats = &rx_ring->rx_stats;
277
278 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
279 "count", CTLFLAG_RD,
280 &rx_stats->cnt, "Packets received");
281 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
282 "bytes", CTLFLAG_RD,
283 &rx_stats->bytes, "Bytes received");
284 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
285 "refil_partial", CTLFLAG_RD,
286 &rx_stats->refil_partial, "Partial refilled mbufs");
287 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
288 "csum_bad", CTLFLAG_RD,
289 &rx_stats->csum_bad, "Bad RX checksum");
290 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
291 "mbuf_alloc_fail", CTLFLAG_RD,
292 &rx_stats->mbuf_alloc_fail, "Failed mbuf allocs");
293 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
294 "mjum_alloc_fail", CTLFLAG_RD,
295 &rx_stats->mjum_alloc_fail, "Failed jumbo mbuf allocs");
296 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
297 "dma_mapping_err", CTLFLAG_RD,
298 &rx_stats->dma_mapping_err, "DMA mapping errors");
299 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
300 "bad_desc_num", CTLFLAG_RD,
301 &rx_stats->bad_desc_num, "Bad descriptor count");
302 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
303 "bad_req_id", CTLFLAG_RD,
304 &rx_stats->bad_req_id, "Bad request id count");
305 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
306 "empty_rx_ring", CTLFLAG_RD,
307 &rx_stats->empty_rx_ring, "RX descriptors depletion count");
308 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
309 "csum_good", CTLFLAG_RD,
310 &rx_stats->csum_good, "Valid RX checksum calculations");
311 }
312
313 /* Stats read from device */
314 hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats",
315 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware");
316 hw_list = SYSCTL_CHILDREN(hw_node);
317
318 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
319 &hw_stats->rx_packets, "Packets received");
320 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
321 &hw_stats->tx_packets, "Packets transmitted");
322 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
323 &hw_stats->rx_bytes, "Bytes received");
324 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
325 &hw_stats->tx_bytes, "Bytes transmitted");
326 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
327 &hw_stats->rx_drops, "Receive packet drops");
328 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
329 &hw_stats->tx_drops, "Transmit packet drops");
330
331 /* ENA Admin queue stats */
332 admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
333 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics");
334 admin_list = SYSCTL_CHILDREN(admin_node);
335
336 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
337 &admin_stats->aborted_cmd, 0, "Aborted commands");
338 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
339 &admin_stats->submitted_cmd, 0, "Submitted commands");
340 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
341 &admin_stats->completed_cmd, 0, "Completed commands");
342 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
343 &admin_stats->out_of_space, 0, "Queue out of space");
344 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
345 &admin_stats->no_completion, 0, "Commands not completed");
346 }
347
348 static void
ena_sysctl_add_eni_metrics(struct ena_adapter * adapter)349 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter)
350 {
351 device_t dev;
352 struct ena_admin_eni_stats *eni_metrics;
353
354 struct sysctl_ctx_list *ctx;
355 struct sysctl_oid *tree;
356 struct sysctl_oid_list *child;
357
358 struct sysctl_oid *eni_node;
359 struct sysctl_oid_list *eni_list;
360
361 dev = adapter->pdev;
362
363 ctx = device_get_sysctl_ctx(dev);
364 tree = device_get_sysctl_tree(dev);
365 child = SYSCTL_CHILDREN(tree);
366
367 eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics",
368 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics");
369 eni_list = SYSCTL_CHILDREN(eni_node);
370
371 eni_metrics = &adapter->eni_metrics;
372
373 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded",
374 CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0,
375 "Inbound BW allowance exceeded");
376 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded",
377 CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0,
378 "Outbound BW allowance exceeded");
379 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded",
380 CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0,
381 "PPS allowance exceeded");
382 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded",
383 CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0,
384 "Connection tracking allowance exceeded");
385 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded",
386 CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0,
387 "Linklocal packet rate allowance exceeded");
388
389 /*
390 * Tuneable, which determines how often ENI metrics will be read.
391 * 0 means it's turned off. Maximum allowed value is limited by:
392 * ENI_METRICS_MAX_SAMPLE_INTERVAL.
393 */
394 SYSCTL_ADD_PROC(ctx, eni_list, OID_AUTO, "sample_interval",
395 CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
396 ena_sysctl_eni_metrics_interval, "SU",
397 "Interval in seconds for updating ENI emetrics. 0 turns off the update.");
398 }
399
400 static void
ena_sysctl_add_tuneables(struct ena_adapter * adapter)401 ena_sysctl_add_tuneables(struct ena_adapter *adapter)
402 {
403 device_t dev;
404
405 struct sysctl_ctx_list *ctx;
406 struct sysctl_oid *tree;
407 struct sysctl_oid_list *child;
408
409 dev = adapter->pdev;
410
411 ctx = device_get_sysctl_ctx(dev);
412 tree = device_get_sysctl_tree(dev);
413 child = SYSCTL_CHILDREN(tree);
414
415 /* Tuneable number of buffers in the buf-ring (drbr) */
416 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
417 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
418 ena_sysctl_buf_ring_size, "I",
419 "Size of the Tx buffer ring (drbr).");
420
421 /* Tuneable number of the Rx ring size */
422 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
423 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
424 ena_sysctl_rx_queue_size, "I",
425 "Size of the Rx ring. The size should be a power of 2.");
426
427 /* Tuneable number of IO queues */
428 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
429 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
430 ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
431 }
432
433 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
434 #ifndef RSS
435 static void
ena_sysctl_add_rss(struct ena_adapter * adapter)436 ena_sysctl_add_rss(struct ena_adapter *adapter)
437 {
438 device_t dev;
439
440 struct sysctl_ctx_list *ctx;
441 struct sysctl_oid *tree;
442 struct sysctl_oid_list *child;
443
444 dev = adapter->pdev;
445
446 ctx = device_get_sysctl_ctx(dev);
447 tree = device_get_sysctl_tree(dev);
448 child = SYSCTL_CHILDREN(tree);
449
450 /* RSS options */
451 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss",
452 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options.");
453 child = SYSCTL_CHILDREN(tree);
454
455 /* RSS hash key */
456 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key",
457 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
458 ena_sysctl_rss_key, "A", "RSS key.");
459
460 /* Tuneable RSS indirection table */
461 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table",
462 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
463 ena_sysctl_rss_indir_table, "A", "RSS indirection table.");
464
465 /* RSS indirection table size */
466 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size",
467 CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0,
468 "RSS indirection table size.");
469 }
470 #endif /* RSS */
471
472
473 /*
474 * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
475 *
476 * Whether the nodes are registered or unregistered depends on a delta between
477 * the `old` and `new` parameters, representing the number of queues.
478 *
479 * This function is used to hide sysctl attributes for queue nodes which aren't
480 * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`).
481 *
482 * NOTE:
483 * All unregistered nodes must be registered again at detach, i.e. by a call to
484 * this function.
485 */
486 void
ena_sysctl_update_queue_node_nb(struct ena_adapter * adapter,int old,int new)487 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new)
488 {
489 device_t dev;
490 struct sysctl_oid *oid;
491 int min, max, i;
492
493 dev = adapter->pdev;
494 min = MIN(old, new);
495 max = MIN(MAX(old, new), adapter->max_num_io_queues);
496
497 for (i = min; i < max; ++i) {
498 oid = adapter->que[i].oid;
499
500 sysctl_wlock();
501 if (old > new)
502 sysctl_unregister_oid(oid);
503 else
504 sysctl_register_oid(oid);
505 sysctl_wunlock();
506 }
507 }
508
509 static int
ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)510 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
511 {
512 struct ena_adapter *adapter = arg1;
513 uint32_t val;
514 int error;
515
516 ENA_LOCK_LOCK();
517 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
518 error = EINVAL;
519 goto unlock;
520 }
521
522 val = 0;
523 error = sysctl_wire_old_buffer(req, sizeof(val));
524 if (error == 0) {
525 val = adapter->buf_ring_size;
526 error = sysctl_handle_32(oidp, &val, 0, req);
527 }
528 if (error != 0 || req->newptr == NULL)
529 goto unlock;
530
531 if (!powerof2(val) || val == 0) {
532 ena_log(adapter->pdev, ERR,
533 "Requested new Tx buffer ring size (%u) is not a power of 2\n",
534 val);
535 error = EINVAL;
536 goto unlock;
537 }
538
539 if (val != adapter->buf_ring_size) {
540 ena_log(adapter->pdev, INFO,
541 "Requested new Tx buffer ring size: %d. Old size: %d\n",
542 val, adapter->buf_ring_size);
543
544 error = ena_update_buf_ring_size(adapter, val);
545 } else {
546 ena_log(adapter->pdev, ERR,
547 "New Tx buffer ring size is the same as already used: %u\n",
548 adapter->buf_ring_size);
549 }
550
551 unlock:
552 ENA_LOCK_UNLOCK();
553
554 return (error);
555 }
556
557 static int
ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)558 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
559 {
560 struct ena_adapter *adapter = arg1;
561 uint32_t val;
562 int error;
563
564 ENA_LOCK_LOCK();
565 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
566 error = EINVAL;
567 goto unlock;
568 }
569
570 val = 0;
571 error = sysctl_wire_old_buffer(req, sizeof(val));
572 if (error == 0) {
573 val = adapter->requested_rx_ring_size;
574 error = sysctl_handle_32(oidp, &val, 0, req);
575 }
576 if (error != 0 || req->newptr == NULL)
577 goto unlock;
578
579 if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
580 ena_log(adapter->pdev, ERR,
581 "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
582 val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
583 error = EINVAL;
584 goto unlock;
585 }
586
587 /* Check if the parameter is power of 2 */
588 if (!powerof2(val)) {
589 ena_log(adapter->pdev, ERR,
590 "Requested new Rx queue size (%u) is not a power of 2\n",
591 val);
592 error = EINVAL;
593 goto unlock;
594 }
595
596 if (val != adapter->requested_rx_ring_size) {
597 ena_log(adapter->pdev, INFO,
598 "Requested new Rx queue size: %u. Old size: %u\n",
599 val, adapter->requested_rx_ring_size);
600
601 error = ena_update_queue_size(adapter,
602 adapter->requested_tx_ring_size, val);
603 } else {
604 ena_log(adapter->pdev, ERR,
605 "New Rx queue size is the same as already used: %u\n",
606 adapter->requested_rx_ring_size);
607 }
608
609 unlock:
610 ENA_LOCK_UNLOCK();
611
612 return (error);
613 }
614
615 /*
616 * Change number of effectively used IO queues adapter->num_io_queues
617 */
618 static int
ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)619 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
620 {
621 struct ena_adapter *adapter = arg1;
622 uint32_t old_num_queues, tmp = 0;
623 int error;
624
625 ENA_LOCK_LOCK();
626 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
627 error = EINVAL;
628 goto unlock;
629 }
630
631 error = sysctl_wire_old_buffer(req, sizeof(tmp));
632 if (error == 0) {
633 tmp = adapter->num_io_queues;
634 error = sysctl_handle_int(oidp, &tmp, 0, req);
635 }
636 if (error != 0 || req->newptr == NULL)
637 goto unlock;
638
639 if (tmp == 0) {
640 ena_log(adapter->pdev, ERR,
641 "Requested number of IO queues is zero\n");
642 error = EINVAL;
643 goto unlock;
644 }
645
646 /*
647 * The adapter::max_num_io_queues is the HW capability. The system
648 * resources availability may potentially be a tighter limit. Therefore
649 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
650 * always holds true, while the `adapter::msix_vecs` is variable across
651 * device reset (`ena_destroy_device()` + `ena_restore_device()`).
652 */
653 if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
654 ena_log(adapter->pdev, ERR,
655 "Requested number of IO queues is higher than maximum "
656 "allowed (%u)\n", adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
657 error = EINVAL;
658 goto unlock;
659 }
660 if (tmp == adapter->num_io_queues) {
661 ena_log(adapter->pdev, ERR,
662 "Requested number of IO queues is equal to current value "
663 "(%u)\n", adapter->num_io_queues);
664 } else {
665 ena_log(adapter->pdev, INFO,
666 "Requested new number of IO queues: %u, current value: "
667 "%u\n", tmp, adapter->num_io_queues);
668
669 old_num_queues = adapter->num_io_queues;
670 error = ena_update_io_queue_nb(adapter, tmp);
671 if (error != 0)
672 return (error);
673
674 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp);
675 }
676
677 unlock:
678 ENA_LOCK_UNLOCK();
679
680 return (error);
681 }
682
683 static int
ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS)684 ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS)
685 {
686 struct ena_adapter *adapter = arg1;
687 uint16_t interval;
688 int error;
689
690 ENA_LOCK_LOCK();
691 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
692 error = EINVAL;
693 goto unlock;
694 }
695
696 error = sysctl_wire_old_buffer(req, sizeof(interval));
697 if (error == 0) {
698 interval = adapter->eni_metrics_sample_interval;
699 error = sysctl_handle_16(oidp, &interval, 0, req);
700 }
701 if (error != 0 || req->newptr == NULL)
702 goto unlock;
703
704 if (interval > ENI_METRICS_MAX_SAMPLE_INTERVAL) {
705 ena_log(adapter->pdev, ERR,
706 "ENI metrics update interval is out of range - maximum allowed value: %d seconds\n",
707 ENI_METRICS_MAX_SAMPLE_INTERVAL);
708 error = EINVAL;
709 goto unlock;
710 }
711
712 if (interval == 0) {
713 ena_log(adapter->pdev, INFO,
714 "ENI metrics update is now turned off\n");
715 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics));
716 } else {
717 ena_log(adapter->pdev, INFO,
718 "ENI metrics update interval is set to: %"PRIu16" seconds\n",
719 interval);
720 }
721
722 adapter->eni_metrics_sample_interval = interval;
723
724 unlock:
725 ENA_LOCK_UNLOCK();
726
727 return (0);
728 }
729
730 #ifndef RSS
731 /*
732 * Change the Receive Side Scaling hash key.
733 */
734 static int
ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS)735 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS)
736 {
737 struct ena_adapter *adapter = arg1;
738 struct ena_com_dev *ena_dev = adapter->ena_dev;
739 enum ena_admin_hash_functions ena_func;
740 char msg[ENA_HASH_KEY_MSG_SIZE];
741 char elem[3] = { 0 };
742 char *endp;
743 u8 rss_key[ENA_HASH_KEY_SIZE];
744 int error, i;
745
746 ENA_LOCK_LOCK();
747 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
748 error = EINVAL;
749 goto unlock;
750 }
751
752 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
753 error = ENOTSUP;
754 goto unlock;
755 }
756
757 error = sysctl_wire_old_buffer(req, sizeof(msg));
758 if (error != 0)
759 goto unlock;
760
761 error = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
762 if (error != 0) {
763 device_printf(adapter->pdev, "Cannot get hash function\n");
764 goto unlock;
765 }
766
767 if (ena_func != ENA_ADMIN_TOEPLITZ) {
768 error = EINVAL;
769 device_printf(adapter->pdev, "Unsupported hash algorithm\n");
770 goto unlock;
771 }
772
773 error = ena_rss_get_hash_key(ena_dev, rss_key);
774 if (error != 0) {
775 device_printf(adapter->pdev, "Cannot get hash key\n");
776 goto unlock;
777 }
778
779 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i)
780 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]);
781
782 error = sysctl_handle_string(oidp, msg, sizeof(msg), req);
783 if (error != 0 || req->newptr == NULL)
784 goto unlock;
785
786 if (strlen(msg) != sizeof(msg) - 1) {
787 error = EINVAL;
788 device_printf(adapter->pdev, "Invalid key size\n");
789 goto unlock;
790 }
791
792 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) {
793 strncpy(elem, &msg[i * 2], 2);
794 rss_key[i] = strtol(elem, &endp, 16);
795
796 /* Both hex nibbles in the string must be valid to continue. */
797 if (endp == elem || *endp != '\0' || rss_key[i] < 0) {
798 error = EINVAL;
799 device_printf(adapter->pdev,
800 "Invalid key hex value: '%c'\n", *endp);
801 goto unlock;
802 }
803 }
804
805 error = ena_rss_set_hash(ena_dev, rss_key);
806 if (error != 0)
807 device_printf(adapter->pdev, "Cannot fill hash key\n");
808
809 unlock:
810 ENA_LOCK_UNLOCK();
811
812 return (error);
813 }
814
815 /*
816 * Change the Receive Side Scaling indirection table.
817 *
818 * The sysctl entry string consists of one or more `x:y` keypairs, where
819 * x stands for the table index and y for its new value.
820 * Table indices that don't need to be updated can be omitted from the string
821 * and will retain their existing values. If an index is entered more than once,
822 * the last value is used.
823 *
824 * Example:
825 * To update two selected indices in the RSS indirection table, e.g. setting
826 * index 0 to queue 5 and then index 5 to queue 0, the below command should be
827 * used:
828 * sysctl dev.ena.0.rss.indir_table="0:5 5:0"
829 */
830 static int
ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS)831 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS)
832 {
833 int num_queues, error;
834 struct ena_adapter *adapter = arg1;
835 struct ena_com_dev *ena_dev;
836 struct ena_indir *indir;
837 char *msg, *buf, *endp;
838 uint32_t idx, value;
839
840 ENA_LOCK_LOCK();
841 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
842 error = EINVAL;
843 goto unlock;
844 }
845
846 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
847 error = ENOTSUP;
848 goto unlock;
849 }
850
851 ena_dev = adapter->ena_dev;
852 indir = adapter->rss_indir;
853 msg = indir->sysctl_buf;
854
855 if (unlikely(indir == NULL)) {
856 error = ENOTSUP;
857 goto unlock;
858 }
859
860 error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req);
861 if (error != 0 || req->newptr == NULL)
862 goto unlock;
863
864 num_queues = adapter->num_io_queues;
865
866 /*
867 * This sysctl expects msg to be a list of `x:y` record pairs,
868 * where x is the indirection table index and y is its value.
869 */
870 for (buf = msg; *buf != '\0'; buf = endp) {
871 idx = strtol(buf, &endp, 10);
872
873 if (endp == buf || idx < 0) {
874 device_printf(adapter->pdev, "Invalid index: %s\n",
875 buf);
876 error = EINVAL;
877 break;
878 }
879
880 if (idx >= ENA_RX_RSS_TABLE_SIZE) {
881 device_printf(adapter->pdev, "Index %d out of range\n",
882 idx);
883 error = ERANGE;
884 break;
885 }
886
887 buf = endp;
888
889 if (*buf++ != ':') {
890 device_printf(adapter->pdev, "Missing ':' separator\n");
891 error = EINVAL;
892 break;
893 }
894
895 value = strtol(buf, &endp, 10);
896
897 if (endp == buf || value < 0) {
898 device_printf(adapter->pdev, "Invalid value: %s\n",
899 buf);
900 error = EINVAL;
901 break;
902 }
903
904 if (value >= num_queues) {
905 device_printf(adapter->pdev, "Value %d out of range\n",
906 value);
907 error = ERANGE;
908 break;
909 }
910
911 indir->table[idx] = value;
912 }
913
914 if (error != 0) /* Reload indirection table with last good data. */
915 ena_rss_indir_get(adapter, indir->table);
916
917 /* At this point msg has been clobbered by sysctl_handle_string. */
918 ena_rss_copy_indir_buf(msg, indir->table);
919
920 if (error == 0)
921 error = ena_rss_indir_set(adapter, indir->table);
922
923 unlock:
924 ENA_LOCK_UNLOCK();
925
926 return (error);
927 }
928 #endif /* RSS */
929