1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2015-2017, Mellanox Technologies inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include "core_priv.h"
39
40 #include <linux/in.h>
41 #include <linux/in6.h>
42 #include <linux/rcupdate.h>
43
44 #include <rdma/ib_cache.h>
45 #include <rdma/ib_addr.h>
46
47 #include <netinet6/scope6_var.h>
48
49 static struct workqueue_struct *roce_gid_mgmt_wq;
50
51 enum gid_op_type {
52 GID_DEL = 0,
53 GID_ADD
54 };
55
56 struct roce_netdev_event_work {
57 struct work_struct work;
58 struct net_device *ndev;
59 };
60
61 struct roce_rescan_work {
62 struct work_struct work;
63 struct ib_device *ib_dev;
64 };
65
66 static const struct {
67 bool (*is_supported)(const struct ib_device *device, u8 port_num);
68 enum ib_gid_type gid_type;
69 } PORT_CAP_TO_GID_TYPE[] = {
70 {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
71 {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
72 };
73
74 #define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
75
roce_gid_type_mask_support(struct ib_device * ib_dev,u8 port)76 unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
77 {
78 int i;
79 unsigned int ret_flags = 0;
80
81 if (!rdma_protocol_roce(ib_dev, port))
82 return 1UL << IB_GID_TYPE_IB;
83
84 for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
85 if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
86 ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
87
88 return ret_flags;
89 }
90 EXPORT_SYMBOL(roce_gid_type_mask_support);
91
update_gid(enum gid_op_type gid_op,struct ib_device * ib_dev,u8 port,union ib_gid * gid,struct net_device * ndev)92 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
93 u8 port, union ib_gid *gid, struct net_device *ndev)
94 {
95 int i;
96 unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
97 struct ib_gid_attr gid_attr;
98
99 memset(&gid_attr, 0, sizeof(gid_attr));
100 gid_attr.ndev = ndev;
101
102 for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
103 if ((1UL << i) & gid_type_mask) {
104 gid_attr.gid_type = i;
105 switch (gid_op) {
106 case GID_ADD:
107 ib_cache_gid_add(ib_dev, port,
108 gid, &gid_attr);
109 break;
110 case GID_DEL:
111 ib_cache_gid_del(ib_dev, port,
112 gid, &gid_attr);
113 break;
114 }
115 }
116 }
117 }
118
119 static int
roce_gid_match_netdev(struct ib_device * ib_dev,u8 port,struct net_device * idev,void * cookie)120 roce_gid_match_netdev(struct ib_device *ib_dev, u8 port,
121 struct net_device *idev, void *cookie)
122 {
123 struct net_device *ndev = (struct net_device *)cookie;
124 if (idev == NULL)
125 return (0);
126 return (ndev == idev);
127 }
128
129 static int
roce_gid_match_all(struct ib_device * ib_dev,u8 port,struct net_device * idev,void * cookie)130 roce_gid_match_all(struct ib_device *ib_dev, u8 port,
131 struct net_device *idev, void *cookie)
132 {
133 if (idev == NULL)
134 return (0);
135 return (1);
136 }
137
138 static int
roce_gid_enum_netdev_default(struct ib_device * ib_dev,u8 port,struct net_device * idev)139 roce_gid_enum_netdev_default(struct ib_device *ib_dev,
140 u8 port, struct net_device *idev)
141 {
142 unsigned long gid_type_mask;
143
144 gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
145
146 ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask,
147 IB_CACHE_GID_DEFAULT_MODE_SET);
148
149 return (hweight_long(gid_type_mask));
150 }
151
152 static void
roce_gid_update_addr_callback(struct ib_device * device,u8 port,struct net_device * ndev,void * cookie)153 roce_gid_update_addr_callback(struct ib_device *device, u8 port,
154 struct net_device *ndev, void *cookie)
155 {
156 struct ipx_entry {
157 STAILQ_ENTRY(ipx_entry) entry;
158 union ipx_addr {
159 struct sockaddr sa[0];
160 struct sockaddr_in v4;
161 struct sockaddr_in6 v6;
162 } ipx_addr;
163 struct net_device *ndev;
164 };
165 struct ipx_entry *entry;
166 struct net_device *idev;
167 #if defined(INET) || defined(INET6)
168 struct ifaddr *ifa;
169 #endif
170 VNET_ITERATOR_DECL(vnet_iter);
171 struct ib_gid_attr gid_attr;
172 union ib_gid gid;
173 int default_gids;
174 u16 index_num;
175 int i;
176
177 STAILQ_HEAD(, ipx_entry) ipx_head;
178
179 STAILQ_INIT(&ipx_head);
180
181 /* make sure default GIDs are in */
182 default_gids = roce_gid_enum_netdev_default(device, port, ndev);
183
184 VNET_LIST_RLOCK();
185 VNET_FOREACH(vnet_iter) {
186 CURVNET_SET(vnet_iter);
187 IFNET_RLOCK();
188 CK_STAILQ_FOREACH(idev, &V_ifnet, if_link) {
189 if (idev != ndev) {
190 if (idev->if_type != IFT_L2VLAN)
191 continue;
192 if (ndev != rdma_vlan_dev_real_dev(idev))
193 continue;
194 }
195
196 /* clone address information for IPv4 and IPv6 */
197 IF_ADDR_RLOCK(idev);
198 #if defined(INET)
199 CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
200 if (ifa->ifa_addr == NULL ||
201 ifa->ifa_addr->sa_family != AF_INET)
202 continue;
203 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
204 if (entry == NULL) {
205 pr_warn("roce_gid_update_addr_callback: "
206 "couldn't allocate entry for IPv4 update\n");
207 continue;
208 }
209 entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr);
210 entry->ndev = idev;
211 STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
212 }
213 #endif
214 #if defined(INET6)
215 CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
216 if (ifa->ifa_addr == NULL ||
217 ifa->ifa_addr->sa_family != AF_INET6)
218 continue;
219 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
220 if (entry == NULL) {
221 pr_warn("roce_gid_update_addr_callback: "
222 "couldn't allocate entry for IPv6 update\n");
223 continue;
224 }
225 entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr);
226 entry->ndev = idev;
227
228 /* trash IPv6 scope ID */
229 sa6_recoverscope(&entry->ipx_addr.v6);
230 entry->ipx_addr.v6.sin6_scope_id = 0;
231
232 STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
233 }
234 #endif
235 IF_ADDR_RUNLOCK(idev);
236 }
237 IFNET_RUNLOCK();
238 CURVNET_RESTORE();
239 }
240 VNET_LIST_RUNLOCK();
241
242 /* add missing GIDs, if any */
243 STAILQ_FOREACH(entry, &ipx_head, entry) {
244 unsigned long gid_type_mask = roce_gid_type_mask_support(device, port);
245
246 if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0)
247 continue;
248
249 for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
250 if (!((1UL << i) & gid_type_mask))
251 continue;
252 /* check if entry found */
253 if (ib_find_cached_gid_by_port(device, &gid, i,
254 port, entry->ndev, &index_num) == 0)
255 break;
256 }
257 if (i != IB_GID_TYPE_SIZE)
258 continue;
259 /* add new GID */
260 update_gid(GID_ADD, device, port, &gid, entry->ndev);
261 }
262
263 /* remove stale GIDs, if any */
264 for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) {
265 union ipx_addr ipx;
266
267 /* check for valid network device pointer */
268 ndev = gid_attr.ndev;
269 if (ndev == NULL)
270 continue;
271 dev_put(ndev);
272
273 /* don't delete empty entries */
274 if (memcmp(&gid, &zgid, sizeof(zgid)) == 0)
275 continue;
276
277 /* zero default */
278 memset(&ipx, 0, sizeof(ipx));
279
280 rdma_gid2ip(&ipx.sa[0], &gid);
281
282 STAILQ_FOREACH(entry, &ipx_head, entry) {
283 if (entry->ndev == ndev &&
284 memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0)
285 break;
286 }
287 /* check if entry found */
288 if (entry != NULL)
289 continue;
290
291 /* remove GID */
292 update_gid(GID_DEL, device, port, &gid, ndev);
293 }
294
295 while ((entry = STAILQ_FIRST(&ipx_head))) {
296 STAILQ_REMOVE_HEAD(&ipx_head, entry);
297 kfree(entry);
298 }
299 }
300
301 static void
roce_gid_queue_scan_event_handler(struct work_struct * _work)302 roce_gid_queue_scan_event_handler(struct work_struct *_work)
303 {
304 struct roce_netdev_event_work *work =
305 container_of(_work, struct roce_netdev_event_work, work);
306
307 ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev,
308 roce_gid_update_addr_callback, NULL);
309
310 dev_put(work->ndev);
311 kfree(work);
312 }
313
314 static void
roce_gid_queue_scan_event(struct net_device * ndev)315 roce_gid_queue_scan_event(struct net_device *ndev)
316 {
317 struct roce_netdev_event_work *work;
318
319 retry:
320 switch (ndev->if_type) {
321 case IFT_ETHER:
322 break;
323 case IFT_L2VLAN:
324 ndev = rdma_vlan_dev_real_dev(ndev);
325 if (ndev != NULL)
326 goto retry;
327 /* FALLTHROUGH */
328 default:
329 return;
330 }
331
332 work = kmalloc(sizeof(*work), GFP_ATOMIC);
333 if (!work) {
334 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
335 return;
336 }
337
338 INIT_WORK(&work->work, roce_gid_queue_scan_event_handler);
339 dev_hold(ndev);
340
341 work->ndev = ndev;
342
343 queue_work(roce_gid_mgmt_wq, &work->work);
344 }
345
346 static void
roce_gid_delete_all_event_handler(struct work_struct * _work)347 roce_gid_delete_all_event_handler(struct work_struct *_work)
348 {
349 struct roce_netdev_event_work *work =
350 container_of(_work, struct roce_netdev_event_work, work);
351
352 ib_cache_gid_del_all_by_netdev(work->ndev);
353 dev_put(work->ndev);
354 kfree(work);
355 }
356
357 static void
roce_gid_delete_all_event(struct net_device * ndev)358 roce_gid_delete_all_event(struct net_device *ndev)
359 {
360 struct roce_netdev_event_work *work;
361
362 work = kmalloc(sizeof(*work), GFP_ATOMIC);
363 if (!work) {
364 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
365 return;
366 }
367
368 INIT_WORK(&work->work, roce_gid_delete_all_event_handler);
369 dev_hold(ndev);
370 work->ndev = ndev;
371 queue_work(roce_gid_mgmt_wq, &work->work);
372
373 /* make sure job is complete before returning */
374 flush_workqueue(roce_gid_mgmt_wq);
375 }
376
377 static int
inetaddr_event(struct notifier_block * this,unsigned long event,void * ptr)378 inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
379 {
380 struct net_device *ndev = ptr;
381
382 switch (event) {
383 case NETDEV_UNREGISTER:
384 roce_gid_delete_all_event(ndev);
385 break;
386 case NETDEV_REGISTER:
387 case NETDEV_CHANGEADDR:
388 case NETDEV_CHANGEIFADDR:
389 roce_gid_queue_scan_event(ndev);
390 break;
391 default:
392 break;
393 }
394 return NOTIFY_DONE;
395 }
396
397 static struct notifier_block nb_inetaddr = {
398 .notifier_call = inetaddr_event
399 };
400
401 static eventhandler_tag eh_ifnet_event;
402
403 static void
roce_ifnet_event(void * arg,struct ifnet * ifp,int event)404 roce_ifnet_event(void *arg, struct ifnet *ifp, int event)
405 {
406 if (event != IFNET_EVENT_PCP || is_vlan_dev(ifp))
407 return;
408
409 /* make sure GID table is reloaded */
410 roce_gid_delete_all_event(ifp);
411 roce_gid_queue_scan_event(ifp);
412 }
413
414 static void
roce_rescan_device_handler(struct work_struct * _work)415 roce_rescan_device_handler(struct work_struct *_work)
416 {
417 struct roce_rescan_work *work =
418 container_of(_work, struct roce_rescan_work, work);
419
420 ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL,
421 roce_gid_update_addr_callback, NULL);
422 kfree(work);
423 }
424
425 /* Caller must flush system workqueue before removing the ib_device */
roce_rescan_device(struct ib_device * ib_dev)426 int roce_rescan_device(struct ib_device *ib_dev)
427 {
428 struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL);
429
430 if (!work)
431 return -ENOMEM;
432
433 work->ib_dev = ib_dev;
434 INIT_WORK(&work->work, roce_rescan_device_handler);
435 queue_work(roce_gid_mgmt_wq, &work->work);
436
437 return 0;
438 }
439
roce_gid_mgmt_init(void)440 int __init roce_gid_mgmt_init(void)
441 {
442 roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0);
443 if (!roce_gid_mgmt_wq) {
444 pr_warn("roce_gid_mgmt: can't allocate work queue\n");
445 return -ENOMEM;
446 }
447
448 register_inetaddr_notifier(&nb_inetaddr);
449
450 /*
451 * We rely on the netdevice notifier to enumerate all existing
452 * devices in the system. Register to this notifier last to
453 * make sure we will not miss any IP add/del callbacks.
454 */
455 register_netdevice_notifier(&nb_inetaddr);
456
457 eh_ifnet_event = EVENTHANDLER_REGISTER(ifnet_event,
458 roce_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
459
460 return 0;
461 }
462
roce_gid_mgmt_cleanup(void)463 void __exit roce_gid_mgmt_cleanup(void)
464 {
465
466 if (eh_ifnet_event != NULL)
467 EVENTHANDLER_DEREGISTER(ifnet_event, eh_ifnet_event);
468
469 unregister_inetaddr_notifier(&nb_inetaddr);
470 unregister_netdevice_notifier(&nb_inetaddr);
471
472 /*
473 * Ensure all gid deletion tasks complete before we go down,
474 * to avoid any reference to free'd memory. By the time
475 * ib-core is removed, all physical devices have been removed,
476 * so no issue with remaining hardware contexts.
477 */
478 synchronize_rcu();
479 drain_workqueue(roce_gid_mgmt_wq);
480 destroy_workqueue(roce_gid_mgmt_wq);
481 }
482