1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2015-2017, Mellanox Technologies inc.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include "core_priv.h"
39 
40 #include <linux/in.h>
41 #include <linux/in6.h>
42 #include <linux/rcupdate.h>
43 
44 #include <rdma/ib_cache.h>
45 #include <rdma/ib_addr.h>
46 
47 #include <netinet6/scope6_var.h>
48 
49 static struct workqueue_struct *roce_gid_mgmt_wq;
50 
51 enum gid_op_type {
52 	GID_DEL = 0,
53 	GID_ADD
54 };
55 
56 struct roce_netdev_event_work {
57 	struct work_struct work;
58 	struct net_device *ndev;
59 };
60 
61 struct roce_rescan_work {
62 	struct work_struct	work;
63 	struct ib_device	*ib_dev;
64 };
65 
66 static const struct {
67 	bool (*is_supported)(const struct ib_device *device, u8 port_num);
68 	enum ib_gid_type gid_type;
69 } PORT_CAP_TO_GID_TYPE[] = {
70 	{rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
71 	{rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
72 };
73 
74 #define CAP_TO_GID_TABLE_SIZE	ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
75 
roce_gid_type_mask_support(struct ib_device * ib_dev,u8 port)76 unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
77 {
78 	int i;
79 	unsigned int ret_flags = 0;
80 
81 	if (!rdma_protocol_roce(ib_dev, port))
82 		return 1UL << IB_GID_TYPE_IB;
83 
84 	for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
85 		if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
86 			ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
87 
88 	return ret_flags;
89 }
90 EXPORT_SYMBOL(roce_gid_type_mask_support);
91 
update_gid(enum gid_op_type gid_op,struct ib_device * ib_dev,u8 port,union ib_gid * gid,struct net_device * ndev)92 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
93     u8 port, union ib_gid *gid, struct net_device *ndev)
94 {
95 	int i;
96 	unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
97 	struct ib_gid_attr gid_attr;
98 
99 	memset(&gid_attr, 0, sizeof(gid_attr));
100 	gid_attr.ndev = ndev;
101 
102 	for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
103 		if ((1UL << i) & gid_type_mask) {
104 			gid_attr.gid_type = i;
105 			switch (gid_op) {
106 			case GID_ADD:
107 				ib_cache_gid_add(ib_dev, port,
108 						 gid, &gid_attr);
109 				break;
110 			case GID_DEL:
111 				ib_cache_gid_del(ib_dev, port,
112 						 gid, &gid_attr);
113 				break;
114 			}
115 		}
116 	}
117 }
118 
119 static int
roce_gid_match_netdev(struct ib_device * ib_dev,u8 port,struct net_device * idev,void * cookie)120 roce_gid_match_netdev(struct ib_device *ib_dev, u8 port,
121     struct net_device *idev, void *cookie)
122 {
123 	struct net_device *ndev = (struct net_device *)cookie;
124 	if (idev == NULL)
125 		return (0);
126 	return (ndev == idev);
127 }
128 
129 static int
roce_gid_match_all(struct ib_device * ib_dev,u8 port,struct net_device * idev,void * cookie)130 roce_gid_match_all(struct ib_device *ib_dev, u8 port,
131     struct net_device *idev, void *cookie)
132 {
133 	if (idev == NULL)
134 		return (0);
135 	return (1);
136 }
137 
138 static int
roce_gid_enum_netdev_default(struct ib_device * ib_dev,u8 port,struct net_device * idev)139 roce_gid_enum_netdev_default(struct ib_device *ib_dev,
140     u8 port, struct net_device *idev)
141 {
142 	unsigned long gid_type_mask;
143 
144 	gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
145 
146 	ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask,
147 				     IB_CACHE_GID_DEFAULT_MODE_SET);
148 
149 	return (hweight_long(gid_type_mask));
150 }
151 
152 static void
roce_gid_update_addr_callback(struct ib_device * device,u8 port,struct net_device * ndev,void * cookie)153 roce_gid_update_addr_callback(struct ib_device *device, u8 port,
154     struct net_device *ndev, void *cookie)
155 {
156 	struct ipx_entry {
157 		STAILQ_ENTRY(ipx_entry)	entry;
158 		union ipx_addr {
159 			struct sockaddr sa[0];
160 			struct sockaddr_in v4;
161 			struct sockaddr_in6 v6;
162 		} ipx_addr;
163 		struct net_device *ndev;
164 	};
165 	struct ipx_entry *entry;
166 	struct net_device *idev;
167 #if defined(INET) || defined(INET6)
168 	struct ifaddr *ifa;
169 #endif
170 	VNET_ITERATOR_DECL(vnet_iter);
171 	struct ib_gid_attr gid_attr;
172 	union ib_gid gid;
173 	int default_gids;
174 	u16 index_num;
175 	int i;
176 
177 	STAILQ_HEAD(, ipx_entry) ipx_head;
178 
179 	STAILQ_INIT(&ipx_head);
180 
181 	/* make sure default GIDs are in */
182 	default_gids = roce_gid_enum_netdev_default(device, port, ndev);
183 
184 	VNET_LIST_RLOCK();
185 	VNET_FOREACH(vnet_iter) {
186 	    CURVNET_SET(vnet_iter);
187 	    IFNET_RLOCK();
188 	    CK_STAILQ_FOREACH(idev, &V_ifnet, if_link) {
189 		if (idev != ndev) {
190 			if (idev->if_type != IFT_L2VLAN)
191 				continue;
192 			if (ndev != rdma_vlan_dev_real_dev(idev))
193 				continue;
194 		}
195 
196 		/* clone address information for IPv4 and IPv6 */
197 		IF_ADDR_RLOCK(idev);
198 #if defined(INET)
199 		CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
200 			if (ifa->ifa_addr == NULL ||
201 			    ifa->ifa_addr->sa_family != AF_INET)
202 				continue;
203 			entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
204 			if (entry == NULL) {
205 				pr_warn("roce_gid_update_addr_callback: "
206 				    "couldn't allocate entry for IPv4 update\n");
207 				continue;
208 			}
209 			entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr);
210 			entry->ndev = idev;
211 			STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
212 		}
213 #endif
214 #if defined(INET6)
215 		CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
216 			if (ifa->ifa_addr == NULL ||
217 			    ifa->ifa_addr->sa_family != AF_INET6)
218 				continue;
219 			entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
220 			if (entry == NULL) {
221 				pr_warn("roce_gid_update_addr_callback: "
222 				    "couldn't allocate entry for IPv6 update\n");
223 				continue;
224 			}
225 			entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr);
226 			entry->ndev = idev;
227 
228 			/* trash IPv6 scope ID */
229 			sa6_recoverscope(&entry->ipx_addr.v6);
230 			entry->ipx_addr.v6.sin6_scope_id = 0;
231 
232 			STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
233 		}
234 #endif
235 		IF_ADDR_RUNLOCK(idev);
236 	    }
237 	    IFNET_RUNLOCK();
238 	    CURVNET_RESTORE();
239 	}
240 	VNET_LIST_RUNLOCK();
241 
242 	/* add missing GIDs, if any */
243 	STAILQ_FOREACH(entry, &ipx_head, entry) {
244 		unsigned long gid_type_mask = roce_gid_type_mask_support(device, port);
245 
246 		if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0)
247 			continue;
248 
249 		for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
250 			if (!((1UL << i) & gid_type_mask))
251 				continue;
252 			/* check if entry found */
253 			if (ib_find_cached_gid_by_port(device, &gid, i,
254 			    port, entry->ndev, &index_num) == 0)
255 				break;
256 		}
257 		if (i != IB_GID_TYPE_SIZE)
258 			continue;
259 		/* add new GID */
260 		update_gid(GID_ADD, device, port, &gid, entry->ndev);
261 	}
262 
263 	/* remove stale GIDs, if any */
264 	for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) {
265 		union ipx_addr ipx;
266 
267 		/* check for valid network device pointer */
268 		ndev = gid_attr.ndev;
269 		if (ndev == NULL)
270 			continue;
271 		dev_put(ndev);
272 
273 		/* don't delete empty entries */
274 		if (memcmp(&gid, &zgid, sizeof(zgid)) == 0)
275 			continue;
276 
277 		/* zero default */
278 		memset(&ipx, 0, sizeof(ipx));
279 
280 		rdma_gid2ip(&ipx.sa[0], &gid);
281 
282 		STAILQ_FOREACH(entry, &ipx_head, entry) {
283 			if (entry->ndev == ndev &&
284 			    memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0)
285 				break;
286 		}
287 		/* check if entry found */
288 		if (entry != NULL)
289 			continue;
290 
291 		/* remove GID */
292 		update_gid(GID_DEL, device, port, &gid, ndev);
293 	}
294 
295 	while ((entry = STAILQ_FIRST(&ipx_head))) {
296 		STAILQ_REMOVE_HEAD(&ipx_head, entry);
297 		kfree(entry);
298 	}
299 }
300 
301 static void
roce_gid_queue_scan_event_handler(struct work_struct * _work)302 roce_gid_queue_scan_event_handler(struct work_struct *_work)
303 {
304 	struct roce_netdev_event_work *work =
305 		container_of(_work, struct roce_netdev_event_work, work);
306 
307 	ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev,
308 	    roce_gid_update_addr_callback, NULL);
309 
310 	dev_put(work->ndev);
311 	kfree(work);
312 }
313 
314 static void
roce_gid_queue_scan_event(struct net_device * ndev)315 roce_gid_queue_scan_event(struct net_device *ndev)
316 {
317 	struct roce_netdev_event_work *work;
318 
319 retry:
320 	switch (ndev->if_type) {
321 	case IFT_ETHER:
322 		break;
323 	case IFT_L2VLAN:
324 		ndev = rdma_vlan_dev_real_dev(ndev);
325 		if (ndev != NULL)
326 			goto retry;
327 		/* FALLTHROUGH */
328 	default:
329 		return;
330 	}
331 
332 	work = kmalloc(sizeof(*work), GFP_ATOMIC);
333 	if (!work) {
334 		pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
335 		return;
336 	}
337 
338 	INIT_WORK(&work->work, roce_gid_queue_scan_event_handler);
339 	dev_hold(ndev);
340 
341 	work->ndev = ndev;
342 
343 	queue_work(roce_gid_mgmt_wq, &work->work);
344 }
345 
346 static void
roce_gid_delete_all_event_handler(struct work_struct * _work)347 roce_gid_delete_all_event_handler(struct work_struct *_work)
348 {
349 	struct roce_netdev_event_work *work =
350 		container_of(_work, struct roce_netdev_event_work, work);
351 
352 	ib_cache_gid_del_all_by_netdev(work->ndev);
353 	dev_put(work->ndev);
354 	kfree(work);
355 }
356 
357 static void
roce_gid_delete_all_event(struct net_device * ndev)358 roce_gid_delete_all_event(struct net_device *ndev)
359 {
360 	struct roce_netdev_event_work *work;
361 
362 	work = kmalloc(sizeof(*work), GFP_ATOMIC);
363 	if (!work) {
364 		pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
365 		return;
366 	}
367 
368 	INIT_WORK(&work->work, roce_gid_delete_all_event_handler);
369 	dev_hold(ndev);
370 	work->ndev = ndev;
371 	queue_work(roce_gid_mgmt_wq, &work->work);
372 
373 	/* make sure job is complete before returning */
374 	flush_workqueue(roce_gid_mgmt_wq);
375 }
376 
377 static int
inetaddr_event(struct notifier_block * this,unsigned long event,void * ptr)378 inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
379 {
380 	struct net_device *ndev = ptr;
381 
382 	switch (event) {
383 	case NETDEV_UNREGISTER:
384 		roce_gid_delete_all_event(ndev);
385 		break;
386 	case NETDEV_REGISTER:
387 	case NETDEV_CHANGEADDR:
388 	case NETDEV_CHANGEIFADDR:
389 		roce_gid_queue_scan_event(ndev);
390 		break;
391 	default:
392 		break;
393 	}
394 	return NOTIFY_DONE;
395 }
396 
397 static struct notifier_block nb_inetaddr = {
398 	.notifier_call = inetaddr_event
399 };
400 
401 static eventhandler_tag eh_ifnet_event;
402 
403 static void
roce_ifnet_event(void * arg,struct ifnet * ifp,int event)404 roce_ifnet_event(void *arg, struct ifnet *ifp, int event)
405 {
406 	if (event != IFNET_EVENT_PCP || is_vlan_dev(ifp))
407 		return;
408 
409 	/* make sure GID table is reloaded */
410 	roce_gid_delete_all_event(ifp);
411 	roce_gid_queue_scan_event(ifp);
412 }
413 
414 static void
roce_rescan_device_handler(struct work_struct * _work)415 roce_rescan_device_handler(struct work_struct *_work)
416 {
417 	struct roce_rescan_work *work =
418 	    container_of(_work, struct roce_rescan_work, work);
419 
420 	ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL,
421 	    roce_gid_update_addr_callback, NULL);
422 	kfree(work);
423 }
424 
425 /* Caller must flush system workqueue before removing the ib_device */
roce_rescan_device(struct ib_device * ib_dev)426 int roce_rescan_device(struct ib_device *ib_dev)
427 {
428 	struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL);
429 
430 	if (!work)
431 		return -ENOMEM;
432 
433 	work->ib_dev = ib_dev;
434 	INIT_WORK(&work->work, roce_rescan_device_handler);
435 	queue_work(roce_gid_mgmt_wq, &work->work);
436 
437 	return 0;
438 }
439 
roce_gid_mgmt_init(void)440 int __init roce_gid_mgmt_init(void)
441 {
442 	roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0);
443 	if (!roce_gid_mgmt_wq) {
444 		pr_warn("roce_gid_mgmt: can't allocate work queue\n");
445 		return -ENOMEM;
446 	}
447 
448 	register_inetaddr_notifier(&nb_inetaddr);
449 
450 	/*
451 	 * We rely on the netdevice notifier to enumerate all existing
452 	 * devices in the system. Register to this notifier last to
453 	 * make sure we will not miss any IP add/del callbacks.
454 	 */
455 	register_netdevice_notifier(&nb_inetaddr);
456 
457 	eh_ifnet_event = EVENTHANDLER_REGISTER(ifnet_event,
458 	    roce_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
459 
460 	return 0;
461 }
462 
roce_gid_mgmt_cleanup(void)463 void __exit roce_gid_mgmt_cleanup(void)
464 {
465 
466 	if (eh_ifnet_event != NULL)
467 		EVENTHANDLER_DEREGISTER(ifnet_event, eh_ifnet_event);
468 
469 	unregister_inetaddr_notifier(&nb_inetaddr);
470 	unregister_netdevice_notifier(&nb_inetaddr);
471 
472 	/*
473 	 * Ensure all gid deletion tasks complete before we go down,
474 	 * to avoid any reference to free'd memory. By the time
475 	 * ib-core is removed, all physical devices have been removed,
476 	 * so no issue with remaining hardware contexts.
477 	 */
478 	synchronize_rcu();
479 	drain_workqueue(roce_gid_mgmt_wq);
480 	destroy_workqueue(roce_gid_mgmt_wq);
481 }
482