1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2021 Ng Peng Nam Sean
5 * Copyright (c) 2022 Alexander V. Chernikov <[email protected]>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include "opt_route.h"
33 #include <sys/types.h>
34 #include <sys/malloc.h>
35 #include <sys/rmlock.h>
36 #include <sys/socket.h>
37
38 #include <net/if.h>
39 #include <net/route.h>
40 #include <net/route/nhop.h>
41 #include <net/route/route_ctl.h>
42 #include <net/route/route_var.h>
43 #include <netinet6/scope6_var.h>
44 #include <netlink/netlink.h>
45 #include <netlink/netlink_ctl.h>
46 #include <netlink/netlink_route.h>
47 #include <netlink/route/route_var.h>
48
49 #define DEBUG_MOD_NAME nl_route
50 #define DEBUG_MAX_LEVEL LOG_DEBUG3
51 #include <netlink/netlink_debug.h>
52 _DECLARE_DEBUG(LOG_INFO);
53
54 static unsigned char
get_rtm_type(const struct nhop_object * nh)55 get_rtm_type(const struct nhop_object *nh)
56 {
57 int nh_flags = nh->nh_flags;
58
59 /* Use the fact that nhg runtime flags are only NHF_MULTIPATH */
60 if (nh_flags & NHF_BLACKHOLE)
61 return (RTN_BLACKHOLE);
62 else if (nh_flags & NHF_REJECT)
63 return (RTN_PROHIBIT);
64 return (RTN_UNICAST);
65 }
66
67 static uint8_t
nl_get_rtm_protocol(const struct nhop_object * nh)68 nl_get_rtm_protocol(const struct nhop_object *nh)
69 {
70 #ifdef ROUTE_MPATH
71 if (NH_IS_NHGRP(nh)) {
72 const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh;
73 uint8_t origin = nhgrp_get_origin(nhg);
74 if (origin != RTPROT_UNSPEC)
75 return (origin);
76 nh = nhg->nhops[0];
77 }
78 #endif
79 uint8_t origin = nhop_get_origin(nh);
80 if (origin != RTPROT_UNSPEC)
81 return (origin);
82 /* TODO: remove guesswork once all kernel users fill in origin */
83 int rt_flags = nhop_get_rtflags(nh);
84 if (rt_flags & RTF_PROTO1)
85 return (RTPROT_ZEBRA);
86 if (rt_flags & RTF_STATIC)
87 return (RTPROT_STATIC);
88 return (RTPROT_KERNEL);
89 }
90
91 static int
get_rtmsg_type_from_rtsock(int cmd)92 get_rtmsg_type_from_rtsock(int cmd)
93 {
94 switch (cmd) {
95 case RTM_ADD:
96 case RTM_CHANGE:
97 case RTM_GET:
98 return NL_RTM_NEWROUTE;
99 case RTM_DELETE:
100 return NL_RTM_DELROUTE;
101 }
102
103 return (0);
104 }
105
106 /*
107 * fibnum heuristics
108 *
109 * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS
110 * msg rtm_table RTA_TABLE result
111 * RTM_GETROUTE/dump 0 - RT_ALL_FIBS
112 * RTM_GETROUTE/dump 1 - 1
113 * RTM_GETROUTE/get 0 - 0
114 *
115 */
116
117 static struct nhop_object *
rc_get_nhop(const struct rib_cmd_info * rc)118 rc_get_nhop(const struct rib_cmd_info *rc)
119 {
120 return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new);
121 }
122
123 static void
dump_rc_nhop_gw(struct nl_writer * nw,const struct nhop_object * nh)124 dump_rc_nhop_gw(struct nl_writer *nw, const struct nhop_object *nh)
125 {
126 #ifdef INET6
127 int upper_family;
128 #endif
129
130 switch (nhop_get_neigh_family(nh)) {
131 case AF_LINK:
132 /* onlink prefix, skip */
133 break;
134 case AF_INET:
135 nlattr_add(nw, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr);
136 break;
137 #ifdef INET6
138 case AF_INET6:
139 upper_family = nhop_get_upper_family(nh);
140 if (upper_family == AF_INET6) {
141 struct in6_addr gw6 = nh->gw6_sa.sin6_addr;
142 in6_clearscope(&gw6);
143
144 nlattr_add(nw, NL_RTA_GATEWAY, 16, &gw6);
145 } else if (upper_family == AF_INET) {
146 /* IPv4 over IPv6 */
147 struct in6_addr gw6 = nh->gw6_sa.sin6_addr;
148 in6_clearscope(&gw6);
149
150 char buf[20];
151 struct rtvia *via = (struct rtvia *)&buf[0];
152 via->rtvia_family = AF_INET6;
153 memcpy(via->rtvia_addr, &gw6, 16);
154 nlattr_add(nw, NL_RTA_VIA, 17, via);
155 }
156 break;
157 #endif
158 }
159 }
160
161 static void
dump_rc_nhop_mtu(struct nl_writer * nw,const struct nhop_object * nh)162 dump_rc_nhop_mtu(struct nl_writer *nw, const struct nhop_object *nh)
163 {
164 int nla_len = sizeof(struct nlattr) * 2 + sizeof(uint32_t);
165 struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
166
167 if (nla == NULL)
168 return;
169 nla->nla_type = NL_RTA_METRICS;
170 nla->nla_len = nla_len;
171 nla++;
172 nla->nla_type = NL_RTAX_MTU;
173 nla->nla_len = sizeof(struct nlattr) + sizeof(uint32_t);
174 *((uint32_t *)(nla + 1)) = nh->nh_mtu;
175 }
176
177 #ifdef ROUTE_MPATH
178 static void
dump_rc_nhg(struct nl_writer * nw,const struct nhgrp_object * nhg,struct rtmsg * rtm)179 dump_rc_nhg(struct nl_writer *nw, const struct nhgrp_object *nhg, struct rtmsg *rtm)
180 {
181 uint32_t uidx = nhgrp_get_uidx(nhg);
182 uint32_t num_nhops;
183 const struct weightened_nhop *wn = nhgrp_get_nhops(nhg, &num_nhops);
184 uint32_t base_rtflags = nhop_get_rtflags(wn[0].nh);
185
186 if (uidx != 0)
187 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx);
188 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhgrp_get_idx(nhg));
189
190 nlattr_add_u32(nw, NL_RTA_RTFLAGS, base_rtflags);
191 int off = nlattr_add_nested(nw, NL_RTA_MULTIPATH);
192 if (off == 0)
193 return;
194
195 for (int i = 0; i < num_nhops; i++) {
196 int nh_off = nlattr_save_offset(nw);
197 struct rtnexthop *rtnh = nlmsg_reserve_object(nw, struct rtnexthop);
198 if (rtnh == NULL)
199 return;
200 rtnh->rtnh_flags = 0;
201 rtnh->rtnh_ifindex = if_getindex(wn[i].nh->nh_ifp);
202 rtnh->rtnh_hops = wn[i].weight;
203 dump_rc_nhop_gw(nw, wn[i].nh);
204 uint32_t rtflags = nhop_get_rtflags(wn[i].nh);
205 if (rtflags != base_rtflags)
206 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags);
207 if (rtflags & RTF_FIXEDMTU)
208 dump_rc_nhop_mtu(nw, wn[i].nh);
209 rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop);
210 /*
211 * nlattr_add() allocates 4-byte aligned storage, no need to aligh
212 * length here
213 * */
214 rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off;
215 }
216 nlattr_set_len(nw, off);
217 }
218 #endif
219
220 static void
dump_rc_nhop(struct nl_writer * nw,const struct route_nhop_data * rnd,struct rtmsg * rtm)221 dump_rc_nhop(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtmsg *rtm)
222 {
223 #ifdef ROUTE_MPATH
224 if (NH_IS_NHGRP(rnd->rnd_nhop)) {
225 dump_rc_nhg(nw, rnd->rnd_nhgrp, rtm);
226 return;
227 }
228 #endif
229 const struct nhop_object *nh = rnd->rnd_nhop;
230 uint32_t rtflags = nhop_get_rtflags(nh);
231
232 /*
233 * IPv4 over IPv6
234 * ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2),
235 * IPv4 w/ gw
236 * ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)],
237 * Direct route:
238 * ('RTA_OIF', 2)
239 */
240 if (nh->nh_flags & NHF_GATEWAY)
241 dump_rc_nhop_gw(nw, nh);
242
243 uint32_t uidx = nhop_get_uidx(nh);
244 if (uidx != 0)
245 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx);
246 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhop_get_idx(nh));
247 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags);
248
249 if (rtflags & RTF_FIXEDMTU)
250 dump_rc_nhop_mtu(nw, nh);
251 uint32_t nh_expire = nhop_get_expire(nh);
252 if (nh_expire > 0)
253 nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime);
254
255 /* In any case, fill outgoing interface */
256 nlattr_add_u32(nw, NL_RTA_OIF, if_getindex(nh->nh_ifp));
257
258 if (rnd->rnd_weight != RT_DEFAULT_WEIGHT)
259 nlattr_add_u32(nw, NL_RTA_WEIGHT, rnd->rnd_weight);
260 }
261
262 /*
263 * Dumps output from a rib command into an rtmsg
264 */
265
266 static int
dump_px(uint32_t fibnum,const struct nlmsghdr * hdr,const struct rtentry * rt,struct route_nhop_data * rnd,struct nl_writer * nw)267 dump_px(uint32_t fibnum, const struct nlmsghdr *hdr,
268 const struct rtentry *rt, struct route_nhop_data *rnd,
269 struct nl_writer *nw)
270 {
271 struct rtmsg *rtm;
272 int error = 0;
273
274 NET_EPOCH_ASSERT();
275
276 if (!nlmsg_reply(nw, hdr, sizeof(struct rtmsg)))
277 goto enomem;
278
279 int family = rt_get_family(rt);
280 int rtm_off = nlattr_save_offset(nw);
281 rtm = nlmsg_reserve_object(nw, struct rtmsg);
282 rtm->rtm_family = family;
283 rtm->rtm_dst_len = 0;
284 rtm->rtm_src_len = 0;
285 rtm->rtm_tos = 0;
286 if (fibnum < 255)
287 rtm->rtm_table = (unsigned char)fibnum;
288 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
289 rtm->rtm_protocol = nl_get_rtm_protocol(rnd->rnd_nhop);
290 rtm->rtm_type = get_rtm_type(rnd->rnd_nhop);
291
292 nlattr_add_u32(nw, NL_RTA_TABLE, fibnum);
293
294 int plen = 0;
295 #if defined(INET) || defined(INET6)
296 uint32_t scopeid;
297 #endif
298 switch (family) {
299 #ifdef INET
300 case AF_INET:
301 {
302 struct in_addr addr;
303 rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
304 nlattr_add(nw, NL_RTA_DST, 4, &addr);
305 break;
306 }
307 #endif
308 #ifdef INET6
309 case AF_INET6:
310 {
311 struct in6_addr addr;
312 rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid);
313 nlattr_add(nw, NL_RTA_DST, 16, &addr);
314 break;
315 }
316 #endif
317 default:
318 FIB_LOG(LOG_NOTICE, fibnum, family, "unsupported rt family: %d", family);
319 error = EAFNOSUPPORT;
320 goto flush;
321 }
322
323 rtm = nlattr_restore_offset(nw, rtm_off, struct rtmsg);
324 if (plen > 0)
325 rtm->rtm_dst_len = plen;
326 dump_rc_nhop(nw, rnd, rtm);
327
328 if (nlmsg_end(nw))
329 return (0);
330 enomem:
331 error = ENOMEM;
332 flush:
333 nlmsg_abort(nw);
334 return (error);
335 }
336
337 static int
family_to_group(int family)338 family_to_group(int family)
339 {
340 switch (family) {
341 case AF_INET:
342 return (RTNLGRP_IPV4_ROUTE);
343 case AF_INET6:
344 return (RTNLGRP_IPV6_ROUTE);
345 }
346 return (0);
347 }
348
349 static void
report_operation(uint32_t fibnum,struct rib_cmd_info * rc,struct nlpcb * nlp,struct nlmsghdr * hdr)350 report_operation(uint32_t fibnum, struct rib_cmd_info *rc,
351 struct nlpcb *nlp, struct nlmsghdr *hdr)
352 {
353 struct nl_writer nw = {};
354 uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt));
355
356 if (nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) {
357 struct route_nhop_data rnd = {
358 .rnd_nhop = rc_get_nhop(rc),
359 .rnd_weight = rc->rc_nh_weight,
360 };
361 hdr->nlmsg_flags &= ~(NLM_F_REPLACE | NLM_F_CREATE);
362 hdr->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_APPEND);
363 switch (rc->rc_cmd) {
364 case RTM_ADD:
365 hdr->nlmsg_type = NL_RTM_NEWROUTE;
366 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
367 break;
368 case RTM_CHANGE:
369 hdr->nlmsg_type = NL_RTM_NEWROUTE;
370 hdr->nlmsg_flags |= NLM_F_REPLACE;
371 break;
372 case RTM_DELETE:
373 hdr->nlmsg_type = NL_RTM_DELROUTE;
374 break;
375 }
376 dump_px(fibnum, hdr, rc->rc_rt, &rnd, &nw);
377 nlmsg_flush(&nw);
378 }
379
380 rtsock_callback_p->route_f(fibnum, rc);
381 }
382
383 static void
set_scope6(struct sockaddr * sa,struct ifnet * ifp)384 set_scope6(struct sockaddr *sa, struct ifnet *ifp)
385 {
386 #ifdef INET6
387 if (sa != NULL && sa->sa_family == AF_INET6 && ifp != NULL) {
388 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa;
389
390 if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr))
391 in6_set_unicast_scopeid(&sa6->sin6_addr, if_getindex(ifp));
392 }
393 #endif
394 }
395
396 struct rta_mpath_nh {
397 struct sockaddr *gw;
398 struct ifnet *ifp;
399 uint8_t rtnh_flags;
400 uint8_t rtnh_weight;
401 };
402
403 #define _IN(_field) offsetof(struct rtnexthop, _field)
404 #define _OUT(_field) offsetof(struct rta_mpath_nh, _field)
405 const static struct nlattr_parser nla_p_rtnh[] = {
406 { .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = nlattr_get_ip },
407 { .type = NL_RTA_VIA, .off = _OUT(gw), .cb = nlattr_get_ipvia },
408 };
409 const static struct nlfield_parser nlf_p_rtnh[] = {
410 { .off_in = _IN(rtnh_flags), .off_out = _OUT(rtnh_flags), .cb = nlf_get_u8 },
411 { .off_in = _IN(rtnh_hops), .off_out = _OUT(rtnh_weight), .cb = nlf_get_u8 },
412 { .off_in = _IN(rtnh_ifindex), .off_out = _OUT(ifp), .cb = nlf_get_ifpz },
413 };
414 #undef _IN
415 #undef _OUT
416
417 static bool
post_p_rtnh(void * _attrs,struct nl_pstate * npt __unused)418 post_p_rtnh(void *_attrs, struct nl_pstate *npt __unused)
419 {
420 struct rta_mpath_nh *attrs = (struct rta_mpath_nh *)_attrs;
421
422 set_scope6(attrs->gw, attrs->ifp);
423 return (true);
424 }
425 NL_DECLARE_PARSER_EXT(mpath_parser, struct rtnexthop, NULL, nlf_p_rtnh, nla_p_rtnh, post_p_rtnh);
426
427 struct rta_mpath {
428 int num_nhops;
429 struct rta_mpath_nh nhops[0];
430 };
431
432 static int
nlattr_get_multipath(struct nlattr * nla,struct nl_pstate * npt,const void * arg,void * target)433 nlattr_get_multipath(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target)
434 {
435 int data_len = nla->nla_len - sizeof(struct nlattr);
436 struct rtnexthop *rtnh;
437
438 int max_nhops = data_len / sizeof(struct rtnexthop);
439
440 struct rta_mpath *mp = npt_alloc(npt, (max_nhops + 2) * sizeof(struct rta_mpath_nh));
441 mp->num_nhops = 0;
442
443 for (rtnh = (struct rtnexthop *)(nla + 1); data_len > 0; ) {
444 struct rta_mpath_nh *mpnh = &mp->nhops[mp->num_nhops++];
445
446 int error = nl_parse_header(rtnh, rtnh->rtnh_len, &mpath_parser,
447 npt, mpnh);
448 if (error != 0) {
449 NLMSG_REPORT_ERR_MSG(npt, "RTA_MULTIPATH: nexhop %d: parse failed",
450 mp->num_nhops - 1);
451 return (error);
452 }
453
454 int len = NL_ITEM_ALIGN(rtnh->rtnh_len);
455 data_len -= len;
456 rtnh = (struct rtnexthop *)((char *)rtnh + len);
457 }
458 if (data_len != 0 || mp->num_nhops == 0) {
459 NLMSG_REPORT_ERR_MSG(npt, "invalid RTA_MULTIPATH attr");
460 return (EINVAL);
461 }
462
463 *((struct rta_mpath **)target) = mp;
464 return (0);
465 }
466
467
468 struct nl_parsed_route {
469 struct sockaddr *rta_dst;
470 struct sockaddr *rta_gw;
471 struct ifnet *rta_oif;
472 struct rta_mpath *rta_multipath;
473 uint32_t rta_table;
474 uint32_t rta_rtflags;
475 uint32_t rta_nh_id;
476 uint32_t rta_weight;
477 uint32_t rtax_mtu;
478 uint8_t rtm_table;
479 uint8_t rtm_family;
480 uint8_t rtm_dst_len;
481 uint8_t rtm_protocol;
482 uint8_t rtm_type;
483 uint32_t rtm_flags;
484 };
485
486 #define _IN(_field) offsetof(struct rtmsg, _field)
487 #define _OUT(_field) offsetof(struct nl_parsed_route, _field)
488 static struct nlattr_parser nla_p_rtmetrics[] = {
489 { .type = NL_RTAX_MTU, .off = _OUT(rtax_mtu), .cb = nlattr_get_uint32 },
490 };
491 NL_DECLARE_ATTR_PARSER(metrics_parser, nla_p_rtmetrics);
492
493 static const struct nlattr_parser nla_p_rtmsg[] = {
494 { .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip },
495 { .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp },
496 { .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip },
497 { .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested },
498 { .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath },
499 { .type = NL_RTA_WEIGHT, .off = _OUT(rta_weight), .cb = nlattr_get_uint32 },
500 { .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = nlattr_get_uint32 },
501 { .type = NL_RTA_TABLE, .off = _OUT(rta_table), .cb = nlattr_get_uint32 },
502 { .type = NL_RTA_VIA, .off = _OUT(rta_gw), .cb = nlattr_get_ipvia },
503 { .type = NL_RTA_NH_ID, .off = _OUT(rta_nh_id), .cb = nlattr_get_uint32 },
504 };
505
506 static const struct nlfield_parser nlf_p_rtmsg[] = {
507 { .off_in = _IN(rtm_family), .off_out = _OUT(rtm_family), .cb = nlf_get_u8 },
508 { .off_in = _IN(rtm_dst_len), .off_out = _OUT(rtm_dst_len), .cb = nlf_get_u8 },
509 { .off_in = _IN(rtm_protocol), .off_out = _OUT(rtm_protocol), .cb = nlf_get_u8 },
510 { .off_in = _IN(rtm_type), .off_out = _OUT(rtm_type), .cb = nlf_get_u8 },
511 { .off_in = _IN(rtm_table), .off_out = _OUT(rtm_table), .cb = nlf_get_u8 },
512 { .off_in = _IN(rtm_flags), .off_out = _OUT(rtm_flags), .cb = nlf_get_u32 },
513 };
514 #undef _IN
515 #undef _OUT
516
517 static bool
post_p_rtmsg(void * _attrs,struct nl_pstate * npt __unused)518 post_p_rtmsg(void *_attrs, struct nl_pstate *npt __unused)
519 {
520 struct nl_parsed_route *attrs = (struct nl_parsed_route *)_attrs;
521
522 set_scope6(attrs->rta_dst, attrs->rta_oif);
523 set_scope6(attrs->rta_gw, attrs->rta_oif);
524 return (true);
525 }
526 NL_DECLARE_PARSER_EXT(rtm_parser, struct rtmsg, NULL, nlf_p_rtmsg, nla_p_rtmsg, post_p_rtmsg);
527
528 struct netlink_walkargs {
529 struct nl_writer *nw;
530 struct route_nhop_data rnd;
531 struct nlmsghdr hdr;
532 struct nlpcb *nlp;
533 uint32_t fibnum;
534 int family;
535 int error;
536 int count;
537 int dumped;
538 int dumped_tables;
539 };
540
541 static int
dump_rtentry(struct rtentry * rt,void * _arg)542 dump_rtentry(struct rtentry *rt, void *_arg)
543 {
544 struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg;
545 int error;
546
547 wa->count++;
548 if (wa->error != 0)
549 return (0);
550 if (!rt_is_exportable(rt, nlp_get_cred(wa->nlp)))
551 return (0);
552 wa->dumped++;
553
554 rt_get_rnd(rt, &wa->rnd);
555
556 error = dump_px(wa->fibnum, &wa->hdr, rt, &wa->rnd, wa->nw);
557
558 IF_DEBUG_LEVEL(LOG_DEBUG3) {
559 char rtbuf[INET6_ADDRSTRLEN + 5];
560 FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family,
561 "Dump %s, offset %u, error %d",
562 rt_print_buf(rt, rtbuf, sizeof(rtbuf)),
563 wa->nw->offset, error);
564 }
565 wa->error = error;
566
567 return (0);
568 }
569
570 static void
dump_rtable_one(struct netlink_walkargs * wa,uint32_t fibnum,int family)571 dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family)
572 {
573 FIB_LOG(LOG_DEBUG2, fibnum, family, "Start dump");
574 wa->count = 0;
575 wa->dumped = 0;
576
577 rib_walk(fibnum, family, false, dump_rtentry, wa);
578
579 wa->dumped_tables++;
580
581 FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d",
582 wa->count, wa->dumped);
583 NL_LOG(LOG_DEBUG2, "Current offset: %d", wa->nw->offset);
584 }
585
586 static int
dump_rtable_fib(struct netlink_walkargs * wa,uint32_t fibnum,int family)587 dump_rtable_fib(struct netlink_walkargs *wa, uint32_t fibnum, int family)
588 {
589 wa->fibnum = fibnum;
590
591 if (family == AF_UNSPEC) {
592 for (int i = 0; i < AF_MAX; i++) {
593 if (rt_tables_get_rnh(fibnum, i) != 0) {
594 wa->family = i;
595 dump_rtable_one(wa, fibnum, i);
596 if (wa->error != 0)
597 break;
598 }
599 }
600 } else {
601 if (rt_tables_get_rnh(fibnum, family) != 0) {
602 wa->family = family;
603 dump_rtable_one(wa, fibnum, family);
604 }
605 }
606
607 return (wa->error);
608 }
609
610 static int
handle_rtm_getroute(struct nlpcb * nlp,struct nl_parsed_route * attrs,struct nlmsghdr * hdr,struct nl_pstate * npt)611 handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs,
612 struct nlmsghdr *hdr, struct nl_pstate *npt)
613 {
614 RIB_RLOCK_TRACKER;
615 struct rib_head *rnh;
616 const struct rtentry *rt;
617 struct route_nhop_data rnd;
618 uint32_t fibnum = attrs->rta_table;
619 sa_family_t family = attrs->rtm_family;
620
621 if (attrs->rta_dst == NULL) {
622 NLMSG_REPORT_ERR_MSG(npt, "No RTA_DST supplied");
623 return (EINVAL);
624 }
625
626 rnh = rt_tables_get_rnh(fibnum, family);
627 if (rnh == NULL)
628 return (EAFNOSUPPORT);
629
630 RIB_RLOCK(rnh);
631
632 struct sockaddr *dst = attrs->rta_dst;
633
634 if (attrs->rtm_flags & RTM_F_PREFIX)
635 rt = rib_lookup_prefix_plen(rnh, dst, attrs->rtm_dst_len, &rnd);
636 else
637 rt = (const struct rtentry *)rnh->rnh_matchaddr(dst, &rnh->head);
638 if (rt == NULL) {
639 RIB_RUNLOCK(rnh);
640 return (ESRCH);
641 }
642
643 rt_get_rnd(rt, &rnd);
644 rnd.rnd_nhop = nhop_select_func(rnd.rnd_nhop, 0);
645
646 RIB_RUNLOCK(rnh);
647
648 if (!rt_is_exportable(rt, nlp_get_cred(nlp)))
649 return (ESRCH);
650
651 IF_DEBUG_LEVEL(LOG_DEBUG2) {
652 char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused;
653 FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s",
654 nhop_print_buf_any(rnd.rnd_nhop, nhbuf, sizeof(nhbuf)),
655 rt_print_buf(rt, rtbuf, sizeof(rtbuf)));
656 }
657
658 hdr->nlmsg_type = NL_RTM_NEWROUTE;
659 dump_px(fibnum, hdr, rt, &rnd, npt->nw);
660
661 return (0);
662 }
663
664 static int
handle_rtm_dump(struct nlpcb * nlp,uint32_t fibnum,int family,struct nlmsghdr * hdr,struct nl_writer * nw)665 handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family,
666 struct nlmsghdr *hdr, struct nl_writer *nw)
667 {
668 struct netlink_walkargs wa = {
669 .nlp = nlp,
670 .nw = nw,
671 .hdr.nlmsg_pid = hdr->nlmsg_pid,
672 .hdr.nlmsg_seq = hdr->nlmsg_seq,
673 .hdr.nlmsg_type = NL_RTM_NEWROUTE,
674 .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
675 };
676
677 if (fibnum == RT_TABLE_UNSPEC) {
678 for (int i = 0; i < V_rt_numfibs; i++) {
679 dump_rtable_fib(&wa, fibnum, family);
680 if (wa.error != 0)
681 break;
682 }
683 } else
684 dump_rtable_fib(&wa, fibnum, family);
685
686 if (wa.error == 0 && wa.dumped_tables == 0) {
687 FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum/family");
688 wa.error = ESRCH;
689 // How do we propagate it?
690 }
691
692 if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) {
693 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
694 return (ENOMEM);
695 }
696
697 return (wa.error);
698 }
699
700 static struct nhop_object *
finalize_nhop(struct nhop_object * nh,const struct sockaddr * dst,int * perror)701 finalize_nhop(struct nhop_object *nh, const struct sockaddr *dst, int *perror)
702 {
703 /*
704 * The following MUST be filled:
705 * nh_ifp, nh_ifa, nh_gw
706 */
707 if (nh->gw_sa.sa_family == 0) {
708 /*
709 * Empty gateway. Can be direct route with RTA_OIF set.
710 */
711 if (nh->nh_ifp != NULL)
712 nhop_set_direct_gw(nh, nh->nh_ifp);
713 else {
714 NL_LOG(LOG_DEBUG, "empty gateway and interface, skipping");
715 *perror = EINVAL;
716 return (NULL);
717 }
718 /* Both nh_ifp and gateway are set */
719 } else {
720 /* Gateway is set up, we can derive ifp if not set */
721 if (nh->nh_ifp == NULL) {
722 uint32_t fibnum = nhop_get_fibnum(nh);
723 uint32_t flags = 0;
724
725 if (nh->nh_flags & NHF_GATEWAY)
726 flags = RTF_GATEWAY;
727 else if (nh->nh_flags & NHF_HOST)
728 flags = RTF_HOST;
729
730 struct ifaddr *ifa = ifa_ifwithroute(flags, dst, &nh->gw_sa, fibnum);
731 if (ifa == NULL) {
732 NL_LOG(LOG_DEBUG, "Unable to determine ifp, skipping");
733 *perror = EINVAL;
734 return (NULL);
735 }
736 nhop_set_transmit_ifp(nh, ifa->ifa_ifp);
737 }
738 }
739 /* Both nh_ifp and gateway are set */
740 if (nh->nh_ifa == NULL) {
741 const struct sockaddr *gw_sa = &nh->gw_sa;
742
743 if (gw_sa->sa_family != dst->sa_family) {
744 /*
745 * Use dst as the target for determining the default
746 * preferred ifa IF
747 * 1) the gateway is link-level (e.g. direct route)
748 * 2) the gateway family is different (e.g. IPv4 over IPv6).
749 */
750 gw_sa = dst;
751 }
752
753 struct ifaddr *ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp);
754 if (ifa == NULL) {
755 /* Try link-level ifa. */
756 gw_sa = &nh->gw_sa;
757 ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp);
758 if (ifa == NULL) {
759 NL_LOG(LOG_DEBUG, "Unable to determine ifa, skipping");
760 *perror = EINVAL;
761 return (NULL);
762 }
763 }
764 nhop_set_src(nh, ifa);
765 }
766
767 return (nhop_get_nhop(nh, perror));
768 }
769
770 static int
get_pxflag(const struct nl_parsed_route * attrs)771 get_pxflag(const struct nl_parsed_route *attrs)
772 {
773 int pxflag = 0;
774 switch (attrs->rtm_family) {
775 case AF_INET:
776 if (attrs->rtm_dst_len == 32)
777 pxflag = NHF_HOST;
778 else if (attrs->rtm_dst_len == 0)
779 pxflag = NHF_DEFAULT;
780 break;
781 case AF_INET6:
782 if (attrs->rtm_dst_len == 128)
783 pxflag = NHF_HOST;
784 else if (attrs->rtm_dst_len == 0)
785 pxflag = NHF_DEFAULT;
786 break;
787 }
788
789 return (pxflag);
790 }
791
792 static int
get_op_flags(int nlm_flags)793 get_op_flags(int nlm_flags)
794 {
795 int op_flags = 0;
796
797 op_flags |= (nlm_flags & NLM_F_REPLACE) ? RTM_F_REPLACE : 0;
798 op_flags |= (nlm_flags & NLM_F_EXCL) ? RTM_F_EXCL : 0;
799 op_flags |= (nlm_flags & NLM_F_CREATE) ? RTM_F_CREATE : 0;
800 op_flags |= (nlm_flags & NLM_F_APPEND) ? RTM_F_APPEND : 0;
801
802 return (op_flags);
803 }
804
805 #ifdef ROUTE_MPATH
806 static int
create_nexthop_one(struct nl_parsed_route * attrs,struct rta_mpath_nh * mpnh,struct nl_pstate * npt,struct nhop_object ** pnh)807 create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh,
808 struct nl_pstate *npt, struct nhop_object **pnh)
809 {
810 int error;
811
812 if (mpnh->gw == NULL)
813 return (EINVAL);
814
815 struct nhop_object *nh = nhop_alloc(attrs->rta_table, attrs->rtm_family);
816 if (nh == NULL)
817 return (ENOMEM);
818
819 error = nl_set_nexthop_gw(nh, mpnh->gw, mpnh->ifp, npt);
820 if (error != 0) {
821 nhop_free(nh);
822 return (error);
823 }
824 if (mpnh->ifp != NULL)
825 nhop_set_transmit_ifp(nh, mpnh->ifp);
826 nhop_set_pxtype_flag(nh, get_pxflag(attrs));
827 nhop_set_rtflags(nh, attrs->rta_rtflags);
828 if (attrs->rtm_protocol > RTPROT_STATIC)
829 nhop_set_origin(nh, attrs->rtm_protocol);
830
831 *pnh = finalize_nhop(nh, attrs->rta_dst, &error);
832
833 return (error);
834 }
835 #endif
836
837 static struct nhop_object *
create_nexthop_from_attrs(struct nl_parsed_route * attrs,struct nl_pstate * npt,int * perror)838 create_nexthop_from_attrs(struct nl_parsed_route *attrs,
839 struct nl_pstate *npt, int *perror)
840 {
841 struct nhop_object *nh = NULL;
842 int error = 0;
843
844 if (attrs->rta_multipath != NULL) {
845 #ifdef ROUTE_MPATH
846 /* Multipath w/o explicit nexthops */
847 int num_nhops = attrs->rta_multipath->num_nhops;
848 struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops);
849
850 for (int i = 0; i < num_nhops; i++) {
851 struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i];
852
853 error = create_nexthop_one(attrs, mpnh, npt, &wn[i].nh);
854 if (error != 0) {
855 for (int j = 0; j < i; j++)
856 nhop_free(wn[j].nh);
857 break;
858 }
859 wn[i].weight = mpnh->rtnh_weight > 0 ? mpnh->rtnh_weight : 1;
860 }
861 if (error == 0) {
862 struct rib_head *rh = nhop_get_rh(wn[0].nh);
863 struct nhgrp_object *nhg;
864
865 nhg = nhgrp_alloc(rh->rib_fibnum, rh->rib_family,
866 wn, num_nhops, perror);
867 if (nhg != NULL) {
868 if (attrs->rtm_protocol > RTPROT_STATIC)
869 nhgrp_set_origin(nhg, attrs->rtm_protocol);
870 nhg = nhgrp_get_nhgrp(nhg, perror);
871 }
872 for (int i = 0; i < num_nhops; i++)
873 nhop_free(wn[i].nh);
874 if (nhg != NULL)
875 return ((struct nhop_object *)nhg);
876 error = *perror;
877 }
878 #else
879 error = ENOTSUP;
880 #endif
881 *perror = error;
882 } else {
883 nh = nhop_alloc(attrs->rta_table, attrs->rtm_family);
884 if (nh == NULL) {
885 *perror = ENOMEM;
886 return (NULL);
887 }
888 if (attrs->rta_gw != NULL) {
889 *perror = nl_set_nexthop_gw(nh, attrs->rta_gw, attrs->rta_oif, npt);
890 if (*perror != 0) {
891 nhop_free(nh);
892 return (NULL);
893 }
894 }
895 if (attrs->rta_oif != NULL)
896 nhop_set_transmit_ifp(nh, attrs->rta_oif);
897 if (attrs->rtax_mtu != 0)
898 nhop_set_mtu(nh, attrs->rtax_mtu, true);
899 if (attrs->rta_rtflags & RTF_BROADCAST)
900 nhop_set_broadcast(nh, true);
901 if (attrs->rtm_protocol > RTPROT_STATIC)
902 nhop_set_origin(nh, attrs->rtm_protocol);
903 nhop_set_pxtype_flag(nh, get_pxflag(attrs));
904 nhop_set_rtflags(nh, attrs->rta_rtflags);
905
906 switch (attrs->rtm_type) {
907 case RTN_UNICAST:
908 break;
909 case RTN_BLACKHOLE:
910 nhop_set_blackhole(nh, RTF_BLACKHOLE);
911 break;
912 case RTN_PROHIBIT:
913 case RTN_UNREACHABLE:
914 nhop_set_blackhole(nh, RTF_REJECT);
915 break;
916 /* TODO: return ENOTSUP for other types if strict option is set */
917 }
918
919 nh = finalize_nhop(nh, attrs->rta_dst, perror);
920 }
921
922 return (nh);
923 }
924
925 static int
rtnl_handle_newroute(struct nlmsghdr * hdr,struct nlpcb * nlp,struct nl_pstate * npt)926 rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp,
927 struct nl_pstate *npt)
928 {
929 struct rib_cmd_info rc = {};
930 struct nhop_object *nh = NULL;
931 int error;
932
933 struct nl_parsed_route attrs = {};
934 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
935 if (error != 0)
936 return (error);
937
938 /* Check if we have enough data */
939 if (attrs.rta_dst == NULL) {
940 NL_LOG(LOG_DEBUG, "missing RTA_DST");
941 return (EINVAL);
942 }
943
944 if (attrs.rtm_table > 0 && attrs.rta_table == 0) {
945 /* pre-2.6.19 Linux API compatibility */
946 attrs.rta_table = attrs.rtm_table;
947 } else if (attrs.rta_table >= V_rt_numfibs) {
948 NLMSG_REPORT_ERR_MSG(npt, "invalid fib");
949 return (EINVAL);
950 }
951
952 if (attrs.rta_nh_id != 0) {
953 /* Referenced uindex */
954 int pxflag = get_pxflag(&attrs);
955 nh = nl_find_nhop(attrs.rta_table, attrs.rtm_family, attrs.rta_nh_id,
956 pxflag, &error);
957 if (error != 0)
958 return (error);
959 } else {
960 nh = create_nexthop_from_attrs(&attrs, npt, &error);
961 if (error != 0) {
962 NL_LOG(LOG_DEBUG, "Error creating nexthop");
963 return (error);
964 }
965 }
966
967 if (!NH_IS_NHGRP(nh) && attrs.rta_weight == 0)
968 attrs.rta_weight = RT_DEFAULT_WEIGHT;
969 struct route_nhop_data rnd = { .rnd_nhop = nh, .rnd_weight = attrs.rta_weight };
970 int op_flags = get_op_flags(hdr->nlmsg_flags);
971
972 error = rib_add_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len,
973 &rnd, op_flags, &rc);
974 if (error == 0)
975 report_operation(attrs.rta_table, &rc, nlp, hdr);
976 return (error);
977 }
978
979 static int
path_match_func(const struct rtentry * rt,const struct nhop_object * nh,void * _data)980 path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
981 {
982 struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data;
983
984 if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw))
985 return (0);
986
987 if ((attrs->rta_oif != NULL) && (attrs->rta_oif != nh->nh_ifp))
988 return (0);
989
990 return (1);
991 }
992
993 static int
rtnl_handle_delroute(struct nlmsghdr * hdr,struct nlpcb * nlp,struct nl_pstate * npt)994 rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp,
995 struct nl_pstate *npt)
996 {
997 struct rib_cmd_info rc;
998 int error;
999
1000 struct nl_parsed_route attrs = {};
1001 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
1002 if (error != 0)
1003 return (error);
1004
1005 if (attrs.rta_dst == NULL) {
1006 NLMSG_REPORT_ERR_MSG(npt, "RTA_DST is not set");
1007 return (ESRCH);
1008 }
1009
1010 if (attrs.rta_table >= V_rt_numfibs) {
1011 NLMSG_REPORT_ERR_MSG(npt, "invalid fib");
1012 return (EINVAL);
1013 }
1014
1015 error = rib_del_route_px(attrs.rta_table, attrs.rta_dst,
1016 attrs.rtm_dst_len, path_match_func, &attrs, 0, &rc);
1017 if (error == 0)
1018 report_operation(attrs.rta_table, &rc, nlp, hdr);
1019 return (error);
1020 }
1021
1022 static int
rtnl_handle_getroute(struct nlmsghdr * hdr,struct nlpcb * nlp,struct nl_pstate * npt)1023 rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
1024 {
1025 int error;
1026
1027 struct nl_parsed_route attrs = {};
1028 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
1029 if (error != 0)
1030 return (error);
1031
1032 if (attrs.rta_table >= V_rt_numfibs) {
1033 NLMSG_REPORT_ERR_MSG(npt, "invalid fib");
1034 return (EINVAL);
1035 }
1036
1037 if (hdr->nlmsg_flags & NLM_F_DUMP)
1038 error = handle_rtm_dump(nlp, attrs.rta_table, attrs.rtm_family, hdr, npt->nw);
1039 else
1040 error = handle_rtm_getroute(nlp, &attrs, hdr, npt);
1041
1042 return (error);
1043 }
1044
1045 void
rtnl_handle_route_event(uint32_t fibnum,const struct rib_cmd_info * rc)1046 rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
1047 {
1048 struct nl_writer nw = {};
1049 int family, nlm_flags = 0;
1050
1051 family = rt_get_family(rc->rc_rt);
1052
1053 /* XXX: check if there are active listeners first */
1054
1055 /* TODO: consider passing PID/type/seq */
1056 switch (rc->rc_cmd) {
1057 case RTM_ADD:
1058 nlm_flags = NLM_F_EXCL | NLM_F_CREATE;
1059 break;
1060 case RTM_CHANGE:
1061 nlm_flags = NLM_F_REPLACE;
1062 break;
1063 case RTM_DELETE:
1064 nlm_flags = 0;
1065 break;
1066 }
1067 IF_DEBUG_LEVEL(LOG_DEBUG2) {
1068 char rtbuf[NHOP_PRINT_BUFSIZE] __unused;
1069 FIB_LOG(LOG_DEBUG2, fibnum, family,
1070 "received event %s for %s / nlm_flags=%X",
1071 rib_print_cmd(rc->rc_cmd),
1072 rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)),
1073 nlm_flags);
1074 }
1075
1076 struct nlmsghdr hdr = {
1077 .nlmsg_flags = nlm_flags,
1078 .nlmsg_type = get_rtmsg_type_from_rtsock(rc->rc_cmd),
1079 };
1080
1081 struct route_nhop_data rnd = {
1082 .rnd_nhop = rc_get_nhop(rc),
1083 .rnd_weight = rc->rc_nh_weight,
1084 };
1085
1086 uint32_t group_id = family_to_group(family);
1087 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) {
1088 NL_LOG(LOG_DEBUG, "error allocating event buffer");
1089 return;
1090 }
1091
1092 dump_px(fibnum, &hdr, rc->rc_rt, &rnd, &nw);
1093 nlmsg_flush(&nw);
1094 }
1095
1096 static const struct rtnl_cmd_handler cmd_handlers[] = {
1097 {
1098 .cmd = NL_RTM_GETROUTE,
1099 .name = "RTM_GETROUTE",
1100 .cb = &rtnl_handle_getroute,
1101 .flags = RTNL_F_ALLOW_NONVNET_JAIL,
1102 },
1103 {
1104 .cmd = NL_RTM_DELROUTE,
1105 .name = "RTM_DELROUTE",
1106 .cb = &rtnl_handle_delroute,
1107 .priv = PRIV_NET_ROUTE,
1108 },
1109 {
1110 .cmd = NL_RTM_NEWROUTE,
1111 .name = "RTM_NEWROUTE",
1112 .cb = &rtnl_handle_newroute,
1113 .priv = PRIV_NET_ROUTE,
1114 }
1115 };
1116
1117 static const struct nlhdr_parser *all_parsers[] = {&mpath_parser, &metrics_parser, &rtm_parser};
1118
1119 void
rtnl_routes_init(void)1120 rtnl_routes_init(void)
1121 {
1122 NL_VERIFY_PARSERS(all_parsers);
1123 rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
1124 }
1125