1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2
3name: netdev
4
5doc:
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc:
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc:
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc:
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc:
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc:
34         This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc:
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc:
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc:
52          Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp().
53      -
54        name: hash
55        doc:
56          Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
57      -
58        name: vlan-tag
59        doc:
60          Device is capable of exposing receive packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
61  -
62    type: flags
63    name: xsk-flags
64    entries:
65      -
66        name: tx-timestamp
67        doc:
68          HW timestamping egress packets is supported by the driver.
69      -
70        name: tx-checksum
71        doc:
72          L3 checksum HW offload is supported by the driver.
73  -
74    name: queue-type
75    type: enum
76    entries: [ rx, tx ]
77  -
78    name: qstats-scope
79    type: flags
80    entries: [ queue ]
81
82attribute-sets:
83  -
84    name: dev
85    attributes:
86      -
87        name: ifindex
88        doc: netdev ifindex
89        type: u32
90        checks:
91          min: 1
92      -
93        name: pad
94        type: pad
95      -
96        name: xdp-features
97        doc: Bitmask of enabled xdp-features.
98        type: u64
99        enum: xdp-act
100      -
101        name: xdp-zc-max-segs
102        doc: max fragment count supported by ZC driver
103        type: u32
104        checks:
105          min: 1
106      -
107        name: xdp-rx-metadata-features
108        doc: Bitmask of supported XDP receive metadata features.
109             See Documentation/networking/xdp-rx-metadata.rst for more details.
110        type: u64
111        enum: xdp-rx-metadata
112      -
113        name: xsk-features
114        doc: Bitmask of enabled AF_XDP features.
115        type: u64
116        enum: xsk-flags
117  -
118    name: io-uring-provider-info
119    attributes: []
120  -
121    name: page-pool
122    attributes:
123      -
124        name: id
125        doc: Unique ID of a Page Pool instance.
126        type: uint
127        checks:
128          min: 1
129          max: u32-max
130      -
131        name: ifindex
132        doc: |
133          ifindex of the netdev to which the pool belongs.
134          May be reported as 0 if the page pool was allocated for a netdev
135          which got destroyed already (page pools may outlast their netdevs
136          because they wait for all memory to be returned).
137        type: u32
138        checks:
139          min: 1
140          max: s32-max
141      -
142        name: napi-id
143        doc: Id of NAPI using this Page Pool instance.
144        type: uint
145        checks:
146          min: 1
147          max: u32-max
148      -
149        name: inflight
150        type: uint
151        doc: |
152          Number of outstanding references to this page pool (allocated
153          but yet to be freed pages). Allocated pages may be held in
154          socket receive queues, driver receive ring, page pool recycling
155          ring, the page pool cache, etc.
156      -
157        name: inflight-mem
158        type: uint
159        doc: |
160          Amount of memory held by inflight pages.
161      -
162        name: detach-time
163        type: uint
164        doc: |
165          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
166          the driver. Once detached Page Pool can no longer be used to
167          allocate memory.
168          Page Pools wait for all the memory allocated from them to be freed
169          before truly disappearing. "Detached" Page Pools cannot be
170          "re-attached", they are just waiting to disappear.
171          Attribute is absent if Page Pool has not been detached, and
172          can still be used to allocate new memory.
173      -
174        name: dmabuf
175        doc: ID of the dmabuf this page-pool is attached to.
176        type: u32
177      -
178        name: io-uring
179        doc: io-uring memory provider information.
180        type: nest
181        nested-attributes: io-uring-provider-info
182  -
183    name: page-pool-info
184    subset-of: page-pool
185    attributes:
186      -
187        name: id
188      -
189        name: ifindex
190  -
191    name: page-pool-stats
192    doc: |
193      Page pool statistics, see docs for struct page_pool_stats
194      for information about individual statistics.
195    attributes:
196      -
197        name: info
198        doc: Page pool identifying information.
199        type: nest
200        nested-attributes: page-pool-info
201      -
202        name: alloc-fast
203        type: uint
204        value: 8 # reserve some attr ids in case we need more metadata later
205      -
206        name: alloc-slow
207        type: uint
208      -
209        name: alloc-slow-high-order
210        type: uint
211      -
212        name: alloc-empty
213        type: uint
214      -
215        name: alloc-refill
216        type: uint
217      -
218        name: alloc-waive
219        type: uint
220      -
221        name: recycle-cached
222        type: uint
223      -
224        name: recycle-cache-full
225        type: uint
226      -
227        name: recycle-ring
228        type: uint
229      -
230        name: recycle-ring-full
231        type: uint
232      -
233        name: recycle-released-refcnt
234        type: uint
235
236  -
237    name: napi
238    attributes:
239      -
240        name: ifindex
241        doc: ifindex of the netdevice to which NAPI instance belongs.
242        type: u32
243        checks:
244          min: 1
245      -
246        name: id
247        doc: ID of the NAPI instance.
248        type: u32
249      -
250        name: irq
251        doc: The associated interrupt vector number for the napi
252        type: u32
253      -
254        name: pid
255        doc: PID of the napi thread, if NAPI is configured to operate in
256             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
257             softirq context), the attribute will be absent.
258        type: u32
259      -
260        name: defer-hard-irqs
261        doc: The number of consecutive empty polls before IRQ deferral ends
262             and hardware IRQs are re-enabled.
263        type: u32
264        checks:
265          max: s32-max
266      -
267        name: gro-flush-timeout
268        doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
269             timer which schedules NAPI processing. Additionally, a non-zero
270             value will also prevent GRO from flushing recent super-frames at
271             the end of a NAPI cycle. This may add receive latency in exchange
272             for reducing the number of frames processed by the network stack.
273        type: uint
274      -
275        name: irq-suspend-timeout
276        doc: The timeout, in nanoseconds, of how long to suspend irq
277             processing, if event polling finds events
278        type: uint
279  -
280    name: xsk-info
281    attributes: []
282  -
283    name: queue
284    attributes:
285      -
286        name: id
287        doc: Queue index; most queue types are indexed like a C array, with
288             indexes starting at 0 and ending at queue count - 1. Queue indexes
289             are scoped to an interface and queue type.
290        type: u32
291      -
292        name: ifindex
293        doc: ifindex of the netdevice to which the queue belongs.
294        type: u32
295        checks:
296          min: 1
297      -
298        name: type
299        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
300             XDP TX queues allocated in the kernel are not linked to NAPIs and
301             thus not listed. AF_XDP queues will have more information set in
302             the xsk attribute.
303        type: u32
304        enum: queue-type
305      -
306        name: napi-id
307        doc: ID of the NAPI instance which services this queue.
308        type: u32
309      -
310        name: dmabuf
311        doc: ID of the dmabuf attached to this queue, if any.
312        type: u32
313      -
314        name: io-uring
315        doc: io_uring memory provider information.
316        type: nest
317        nested-attributes: io-uring-provider-info
318      -
319        name: xsk
320        doc: XSK information for this queue, if any.
321        type: nest
322        nested-attributes: xsk-info
323  -
324    name: qstats
325    doc: |
326      Get device statistics, scoped to a device or a queue.
327      These statistics extend (and partially duplicate) statistics available
328      in struct rtnl_link_stats64.
329      Value of the `scope` attribute determines how statistics are
330      aggregated. When aggregated for the entire device the statistics
331      represent the total number of events since last explicit reset of
332      the device (i.e. not a reconfiguration like changing queue count).
333      When reported per-queue, however, the statistics may not add
334      up to the total number of events, will only be reported for currently
335      active objects, and will likely report the number of events since last
336      reconfiguration.
337    attributes:
338      -
339        name: ifindex
340        doc: ifindex of the netdevice to which stats belong.
341        type: u32
342        checks:
343          min: 1
344      -
345        name: queue-type
346        doc: Queue type as rx, tx, for queue-id.
347        type: u32
348        enum: queue-type
349      -
350        name: queue-id
351        doc: Queue ID, if stats are scoped to a single queue instance.
352        type: u32
353      -
354        name: scope
355        doc: |
356          What object type should be used to iterate over the stats.
357        type: uint
358        enum: qstats-scope
359      -
360        name: rx-packets
361        doc: |
362          Number of wire packets successfully received and passed to the stack.
363          For drivers supporting XDP, XDP is considered the first layer
364          of the stack, so packets consumed by XDP are still counted here.
365        type: uint
366        value: 8 # reserve some attr ids in case we need more metadata later
367      -
368        name: rx-bytes
369        doc: Successfully received bytes, see `rx-packets`.
370        type: uint
371      -
372        name: tx-packets
373        doc: |
374          Number of wire packets successfully sent. Packet is considered to be
375          successfully sent once it is in device memory (usually this means
376          the device has issued a DMA completion for the packet).
377        type: uint
378      -
379        name: tx-bytes
380        doc: Successfully sent bytes, see `tx-packets`.
381        type: uint
382      -
383        name: rx-alloc-fail
384        doc: |
385          Number of times skb or buffer allocation failed on the Rx datapath.
386          Allocation failure may, or may not result in a packet drop, depending
387          on driver implementation and whether system recovers quickly.
388        type: uint
389      -
390        name: rx-hw-drops
391        doc: |
392          Number of all packets which entered the device, but never left it,
393          including but not limited to: packets dropped due to lack of buffer
394          space, processing errors, explicit or implicit policies and packet
395          filters.
396        type: uint
397      -
398        name: rx-hw-drop-overruns
399        doc: |
400          Number of packets dropped due to transient lack of resources, such as
401          buffer space, host descriptors etc.
402        type: uint
403      -
404        name: rx-csum-complete
405        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
406        type: uint
407      -
408        name: rx-csum-unnecessary
409        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
410        type: uint
411      -
412        name: rx-csum-none
413        doc: Number of packets that were not checksummed by device.
414        type: uint
415      -
416        name: rx-csum-bad
417        doc: |
418          Number of packets with bad checksum. The packets are not discarded,
419          but still delivered to the stack.
420        type: uint
421      -
422        name: rx-hw-gro-packets
423        doc: |
424          Number of packets that were coalesced from smaller packets by the device.
425          Counts only packets coalesced with the HW-GRO netdevice feature,
426          LRO-coalesced packets are not counted.
427        type: uint
428      -
429        name: rx-hw-gro-bytes
430        doc: See `rx-hw-gro-packets`.
431        type: uint
432      -
433        name: rx-hw-gro-wire-packets
434        doc: |
435          Number of packets that were coalesced to bigger packetss with the HW-GRO
436          netdevice feature. LRO-coalesced packets are not counted.
437        type: uint
438      -
439        name: rx-hw-gro-wire-bytes
440        doc: See `rx-hw-gro-wire-packets`.
441        type: uint
442      -
443        name: rx-hw-drop-ratelimits
444        doc: |
445          Number of the packets dropped by the device due to the received
446          packets bitrate exceeding the device rate limit.
447        type: uint
448      -
449        name: tx-hw-drops
450        doc: |
451          Number of packets that arrived at the device but never left it,
452          encompassing packets dropped for reasons such as processing errors, as
453          well as those affected by explicitly defined policies and packet
454          filtering criteria.
455        type: uint
456      -
457        name: tx-hw-drop-errors
458        doc: Number of packets dropped because they were invalid or malformed.
459        type: uint
460      -
461        name: tx-csum-none
462        doc: |
463          Number of packets that did not require the device to calculate the
464          checksum.
465        type: uint
466      -
467        name: tx-needs-csum
468        doc: |
469          Number of packets that required the device to calculate the checksum.
470          This counter includes the number of GSO wire packets for which device
471          calculated the L4 checksum.
472        type: uint
473      -
474        name: tx-hw-gso-packets
475        doc: |
476          Number of packets that necessitated segmentation into smaller packets
477          by the device.
478        type: uint
479      -
480        name: tx-hw-gso-bytes
481        doc: See `tx-hw-gso-packets`.
482        type: uint
483      -
484        name: tx-hw-gso-wire-packets
485        doc: |
486          Number of wire-sized packets generated by processing
487          `tx-hw-gso-packets`
488        type: uint
489      -
490        name: tx-hw-gso-wire-bytes
491        doc: See `tx-hw-gso-wire-packets`.
492        type: uint
493      -
494        name: tx-hw-drop-ratelimits
495        doc: |
496          Number of the packets dropped by the device due to the transmit
497          packets bitrate exceeding the device rate limit.
498        type: uint
499      -
500        name: tx-stop
501        doc: |
502          Number of times driver paused accepting new tx packets
503          from the stack to this queue, because the queue was full.
504          Note that if BQL is supported and enabled on the device
505          the networking stack will avoid queuing a lot of data at once.
506        type: uint
507      -
508        name: tx-wake
509        doc: |
510          Number of times driver re-started accepting send
511          requests to this queue from the stack.
512        type: uint
513  -
514    name: queue-id
515    subset-of: queue
516    attributes:
517      -
518        name: id
519      -
520        name: type
521  -
522    name: dmabuf
523    attributes:
524      -
525        name: ifindex
526        doc: netdev ifindex to bind the dmabuf to.
527        type: u32
528        checks:
529          min: 1
530      -
531        name: queues
532        doc: receive queues to bind the dmabuf to.
533        type: nest
534        nested-attributes: queue-id
535        multi-attr: true
536      -
537        name: fd
538        doc: dmabuf file descriptor to bind.
539        type: u32
540      -
541        name: id
542        doc: id of the dmabuf binding
543        type: u32
544        checks:
545          min: 1
546
547operations:
548  list:
549    -
550      name: dev-get
551      doc: Get / dump information about a netdev.
552      attribute-set: dev
553      do:
554        request:
555          attributes:
556            - ifindex
557        reply: &dev-all
558          attributes:
559            - ifindex
560            - xdp-features
561            - xdp-zc-max-segs
562            - xdp-rx-metadata-features
563            - xsk-features
564      dump:
565        reply: *dev-all
566    -
567      name: dev-add-ntf
568      doc: Notification about device appearing.
569      notify: dev-get
570      mcgrp: mgmt
571    -
572      name: dev-del-ntf
573      doc: Notification about device disappearing.
574      notify: dev-get
575      mcgrp: mgmt
576    -
577      name: dev-change-ntf
578      doc: Notification about device configuration being changed.
579      notify: dev-get
580      mcgrp: mgmt
581    -
582      name: page-pool-get
583      doc: |
584        Get / dump information about Page Pools.
585        (Only Page Pools associated with a net_device can be listed.)
586      attribute-set: page-pool
587      do:
588        request:
589          attributes:
590            - id
591        reply: &pp-reply
592          attributes:
593            - id
594            - ifindex
595            - napi-id
596            - inflight
597            - inflight-mem
598            - detach-time
599            - dmabuf
600            - io-uring
601      dump:
602        reply: *pp-reply
603      config-cond: page-pool
604    -
605      name: page-pool-add-ntf
606      doc: Notification about page pool appearing.
607      notify: page-pool-get
608      mcgrp: page-pool
609      config-cond: page-pool
610    -
611      name: page-pool-del-ntf
612      doc: Notification about page pool disappearing.
613      notify: page-pool-get
614      mcgrp: page-pool
615      config-cond: page-pool
616    -
617      name: page-pool-change-ntf
618      doc: Notification about page pool configuration being changed.
619      notify: page-pool-get
620      mcgrp: page-pool
621      config-cond: page-pool
622    -
623      name: page-pool-stats-get
624      doc: Get page pool statistics.
625      attribute-set: page-pool-stats
626      do:
627        request:
628          attributes:
629            - info
630        reply: &pp-stats-reply
631          attributes:
632            - info
633            - alloc-fast
634            - alloc-slow
635            - alloc-slow-high-order
636            - alloc-empty
637            - alloc-refill
638            - alloc-waive
639            - recycle-cached
640            - recycle-cache-full
641            - recycle-ring
642            - recycle-ring-full
643            - recycle-released-refcnt
644      dump:
645        reply: *pp-stats-reply
646      config-cond: page-pool-stats
647    -
648      name: queue-get
649      doc: Get queue information from the kernel.
650           Only configured queues will be reported (as opposed to all available
651           hardware queues).
652      attribute-set: queue
653      do:
654        request:
655          attributes:
656            - ifindex
657            - type
658            - id
659        reply: &queue-get-op
660          attributes:
661            - id
662            - type
663            - napi-id
664            - ifindex
665            - dmabuf
666            - io-uring
667            - xsk
668      dump:
669        request:
670          attributes:
671            - ifindex
672        reply: *queue-get-op
673    -
674      name: napi-get
675      doc: Get information about NAPI instances configured on the system.
676      attribute-set: napi
677      do:
678        request:
679          attributes:
680            - id
681        reply: &napi-get-op
682          attributes:
683            - id
684            - ifindex
685            - irq
686            - pid
687            - defer-hard-irqs
688            - gro-flush-timeout
689            - irq-suspend-timeout
690      dump:
691        request:
692          attributes:
693            - ifindex
694        reply: *napi-get-op
695    -
696      name: qstats-get
697      doc: |
698        Get / dump fine grained statistics. Which statistics are reported
699        depends on the device and the driver, and whether the driver stores
700        software counters per-queue.
701      attribute-set: qstats
702      dump:
703        request:
704          attributes:
705            - ifindex
706            - scope
707        reply:
708          attributes:
709            - ifindex
710            - queue-type
711            - queue-id
712            - rx-packets
713            - rx-bytes
714            - tx-packets
715            - tx-bytes
716    -
717      name: bind-rx
718      doc: Bind dmabuf to netdev
719      attribute-set: dmabuf
720      flags: [ admin-perm ]
721      do:
722        request:
723          attributes:
724            - ifindex
725            - fd
726            - queues
727        reply:
728          attributes:
729            - id
730    -
731      name: napi-set
732      doc: Set configurable NAPI instance settings.
733      attribute-set: napi
734      flags: [ admin-perm ]
735      do:
736        request:
737          attributes:
738            - id
739            - defer-hard-irqs
740            - gro-flush-timeout
741            - irq-suspend-timeout
742
743kernel-family:
744  headers: [ "linux/list.h"]
745  sock-priv: struct list_head
746
747mcast-groups:
748  list:
749    -
750      name: mgmt
751    -
752      name: page-pool
753