xref: /linux-6.15/net/openvswitch/datapath.c (revision 26ee65e6)
1 /*
2  * Copyright (c) 2007-2012 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18 
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 
21 #include <linux/init.h>
22 #include <linux/module.h>
23 #include <linux/if_arp.h>
24 #include <linux/if_vlan.h>
25 #include <linux/in.h>
26 #include <linux/ip.h>
27 #include <linux/jhash.h>
28 #include <linux/delay.h>
29 #include <linux/time.h>
30 #include <linux/etherdevice.h>
31 #include <linux/genetlink.h>
32 #include <linux/kernel.h>
33 #include <linux/kthread.h>
34 #include <linux/mutex.h>
35 #include <linux/percpu.h>
36 #include <linux/rcupdate.h>
37 #include <linux/tcp.h>
38 #include <linux/udp.h>
39 #include <linux/ethtool.h>
40 #include <linux/wait.h>
41 #include <asm/div64.h>
42 #include <linux/highmem.h>
43 #include <linux/netfilter_bridge.h>
44 #include <linux/netfilter_ipv4.h>
45 #include <linux/inetdevice.h>
46 #include <linux/list.h>
47 #include <linux/lockdep.h>
48 #include <linux/openvswitch.h>
49 #include <linux/rculist.h>
50 #include <linux/dmi.h>
51 #include <linux/workqueue.h>
52 #include <net/genetlink.h>
53 #include <net/net_namespace.h>
54 #include <net/netns/generic.h>
55 
56 #include "datapath.h"
57 #include "flow.h"
58 #include "vport-internal_dev.h"
59 
60 
61 #define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
62 static void rehash_flow_table(struct work_struct *work);
63 static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
64 
65 int ovs_net_id __read_mostly;
66 
67 static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
68 		       struct genl_multicast_group *grp)
69 {
70 	genl_notify(skb, genl_info_net(info), info->snd_portid,
71 		    grp->id, info->nlhdr, GFP_KERNEL);
72 }
73 
74 /**
75  * DOC: Locking:
76  *
77  * All writes e.g. Writes to device state (add/remove datapath, port, set
78  * operations on vports, etc.), Writes to other state (flow table
79  * modifications, set miscellaneous datapath parameters, etc.) are protected
80  * by ovs_lock.
81  *
82  * Reads are protected by RCU.
83  *
84  * There are a few special cases (mostly stats) that have their own
85  * synchronization but they nest under all of above and don't interact with
86  * each other.
87  *
88  * The RTNL lock nests inside ovs_mutex.
89  */
90 
91 static DEFINE_MUTEX(ovs_mutex);
92 
93 void ovs_lock(void)
94 {
95 	mutex_lock(&ovs_mutex);
96 }
97 
98 void ovs_unlock(void)
99 {
100 	mutex_unlock(&ovs_mutex);
101 }
102 
103 #ifdef CONFIG_LOCKDEP
104 int lockdep_ovsl_is_held(void)
105 {
106 	if (debug_locks)
107 		return lockdep_is_held(&ovs_mutex);
108 	else
109 		return 1;
110 }
111 #endif
112 
113 static struct vport *new_vport(const struct vport_parms *);
114 static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
115 			     const struct dp_upcall_info *);
116 static int queue_userspace_packet(struct net *, int dp_ifindex,
117 				  struct sk_buff *,
118 				  const struct dp_upcall_info *);
119 
120 /* Must be called with rcu_read_lock or ovs_mutex. */
121 static struct datapath *get_dp(struct net *net, int dp_ifindex)
122 {
123 	struct datapath *dp = NULL;
124 	struct net_device *dev;
125 
126 	rcu_read_lock();
127 	dev = dev_get_by_index_rcu(net, dp_ifindex);
128 	if (dev) {
129 		struct vport *vport = ovs_internal_dev_get_vport(dev);
130 		if (vport)
131 			dp = vport->dp;
132 	}
133 	rcu_read_unlock();
134 
135 	return dp;
136 }
137 
138 /* Must be called with rcu_read_lock or ovs_mutex. */
139 const char *ovs_dp_name(const struct datapath *dp)
140 {
141 	struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
142 	return vport->ops->get_name(vport);
143 }
144 
145 static int get_dpifindex(struct datapath *dp)
146 {
147 	struct vport *local;
148 	int ifindex;
149 
150 	rcu_read_lock();
151 
152 	local = ovs_vport_rcu(dp, OVSP_LOCAL);
153 	if (local)
154 		ifindex = local->ops->get_ifindex(local);
155 	else
156 		ifindex = 0;
157 
158 	rcu_read_unlock();
159 
160 	return ifindex;
161 }
162 
163 static void destroy_dp_rcu(struct rcu_head *rcu)
164 {
165 	struct datapath *dp = container_of(rcu, struct datapath, rcu);
166 
167 	ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
168 	free_percpu(dp->stats_percpu);
169 	release_net(ovs_dp_get_net(dp));
170 	kfree(dp->ports);
171 	kfree(dp);
172 }
173 
174 static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
175 					    u16 port_no)
176 {
177 	return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
178 }
179 
180 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
181 {
182 	struct vport *vport;
183 	struct hlist_head *head;
184 
185 	head = vport_hash_bucket(dp, port_no);
186 	hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
187 		if (vport->port_no == port_no)
188 			return vport;
189 	}
190 	return NULL;
191 }
192 
193 /* Called with ovs_mutex. */
194 static struct vport *new_vport(const struct vport_parms *parms)
195 {
196 	struct vport *vport;
197 
198 	vport = ovs_vport_add(parms);
199 	if (!IS_ERR(vport)) {
200 		struct datapath *dp = parms->dp;
201 		struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
202 
203 		hlist_add_head_rcu(&vport->dp_hash_node, head);
204 	}
205 	return vport;
206 }
207 
208 void ovs_dp_detach_port(struct vport *p)
209 {
210 	ASSERT_OVSL();
211 
212 	/* First drop references to device. */
213 	hlist_del_rcu(&p->dp_hash_node);
214 
215 	/* Then destroy it. */
216 	ovs_vport_del(p);
217 }
218 
219 /* Must be called with rcu_read_lock. */
220 void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
221 {
222 	struct datapath *dp = p->dp;
223 	struct sw_flow *flow;
224 	struct dp_stats_percpu *stats;
225 	struct sw_flow_key key;
226 	u64 *stats_counter;
227 	int error;
228 	int key_len;
229 
230 	stats = this_cpu_ptr(dp->stats_percpu);
231 
232 	/* Extract flow from 'skb' into 'key'. */
233 	error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
234 	if (unlikely(error)) {
235 		kfree_skb(skb);
236 		return;
237 	}
238 
239 	/* Look up flow. */
240 	flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
241 	if (unlikely(!flow)) {
242 		struct dp_upcall_info upcall;
243 
244 		upcall.cmd = OVS_PACKET_CMD_MISS;
245 		upcall.key = &key;
246 		upcall.userdata = NULL;
247 		upcall.portid = p->upcall_portid;
248 		ovs_dp_upcall(dp, skb, &upcall);
249 		consume_skb(skb);
250 		stats_counter = &stats->n_missed;
251 		goto out;
252 	}
253 
254 	OVS_CB(skb)->flow = flow;
255 
256 	stats_counter = &stats->n_hit;
257 	ovs_flow_used(OVS_CB(skb)->flow, skb);
258 	ovs_execute_actions(dp, skb);
259 
260 out:
261 	/* Update datapath statistics. */
262 	u64_stats_update_begin(&stats->sync);
263 	(*stats_counter)++;
264 	u64_stats_update_end(&stats->sync);
265 }
266 
267 static struct genl_family dp_packet_genl_family = {
268 	.id = GENL_ID_GENERATE,
269 	.hdrsize = sizeof(struct ovs_header),
270 	.name = OVS_PACKET_FAMILY,
271 	.version = OVS_PACKET_VERSION,
272 	.maxattr = OVS_PACKET_ATTR_MAX,
273 	.netnsok = true
274 };
275 
276 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
277 		  const struct dp_upcall_info *upcall_info)
278 {
279 	struct dp_stats_percpu *stats;
280 	int dp_ifindex;
281 	int err;
282 
283 	if (upcall_info->portid == 0) {
284 		err = -ENOTCONN;
285 		goto err;
286 	}
287 
288 	dp_ifindex = get_dpifindex(dp);
289 	if (!dp_ifindex) {
290 		err = -ENODEV;
291 		goto err;
292 	}
293 
294 	if (!skb_is_gso(skb))
295 		err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
296 	else
297 		err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
298 	if (err)
299 		goto err;
300 
301 	return 0;
302 
303 err:
304 	stats = this_cpu_ptr(dp->stats_percpu);
305 
306 	u64_stats_update_begin(&stats->sync);
307 	stats->n_lost++;
308 	u64_stats_update_end(&stats->sync);
309 
310 	return err;
311 }
312 
313 static int queue_gso_packets(struct net *net, int dp_ifindex,
314 			     struct sk_buff *skb,
315 			     const struct dp_upcall_info *upcall_info)
316 {
317 	unsigned short gso_type = skb_shinfo(skb)->gso_type;
318 	struct dp_upcall_info later_info;
319 	struct sw_flow_key later_key;
320 	struct sk_buff *segs, *nskb;
321 	int err;
322 
323 	segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
324 	if (IS_ERR(segs))
325 		return PTR_ERR(segs);
326 
327 	/* Queue all of the segments. */
328 	skb = segs;
329 	do {
330 		err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
331 		if (err)
332 			break;
333 
334 		if (skb == segs && gso_type & SKB_GSO_UDP) {
335 			/* The initial flow key extracted by ovs_flow_extract()
336 			 * in this case is for a first fragment, so we need to
337 			 * properly mark later fragments.
338 			 */
339 			later_key = *upcall_info->key;
340 			later_key.ip.frag = OVS_FRAG_TYPE_LATER;
341 
342 			later_info = *upcall_info;
343 			later_info.key = &later_key;
344 			upcall_info = &later_info;
345 		}
346 	} while ((skb = skb->next));
347 
348 	/* Free all of the segments. */
349 	skb = segs;
350 	do {
351 		nskb = skb->next;
352 		if (err)
353 			kfree_skb(skb);
354 		else
355 			consume_skb(skb);
356 	} while ((skb = nskb));
357 	return err;
358 }
359 
360 static size_t key_attr_size(void)
361 {
362 	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
363 		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
364 		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
365 		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
366 		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
367 		+ nla_total_size(4)   /* OVS_KEY_ATTR_8021Q */
368 		+ nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
369 		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
370 		+ nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
371 		+ nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
372 		+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
373 }
374 
375 static size_t upcall_msg_size(const struct sk_buff *skb,
376 			      const struct nlattr *userdata)
377 {
378 	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
379 		+ nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
380 		+ nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
381 
382 	/* OVS_PACKET_ATTR_USERDATA */
383 	if (userdata)
384 		size += NLA_ALIGN(userdata->nla_len);
385 
386 	return size;
387 }
388 
389 static int queue_userspace_packet(struct net *net, int dp_ifindex,
390 				  struct sk_buff *skb,
391 				  const struct dp_upcall_info *upcall_info)
392 {
393 	struct ovs_header *upcall;
394 	struct sk_buff *nskb = NULL;
395 	struct sk_buff *user_skb; /* to be queued to userspace */
396 	struct nlattr *nla;
397 	int err;
398 
399 	if (vlan_tx_tag_present(skb)) {
400 		nskb = skb_clone(skb, GFP_ATOMIC);
401 		if (!nskb)
402 			return -ENOMEM;
403 
404 		nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
405 		if (!nskb)
406 			return -ENOMEM;
407 
408 		nskb->vlan_tci = 0;
409 		skb = nskb;
410 	}
411 
412 	if (nla_attr_size(skb->len) > USHRT_MAX) {
413 		err = -EFBIG;
414 		goto out;
415 	}
416 
417 	user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);
418 	if (!user_skb) {
419 		err = -ENOMEM;
420 		goto out;
421 	}
422 
423 	upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
424 			     0, upcall_info->cmd);
425 	upcall->dp_ifindex = dp_ifindex;
426 
427 	nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
428 	ovs_flow_to_nlattrs(upcall_info->key, user_skb);
429 	nla_nest_end(user_skb, nla);
430 
431 	if (upcall_info->userdata)
432 		__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
433 			  nla_len(upcall_info->userdata),
434 			  nla_data(upcall_info->userdata));
435 
436 	nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
437 
438 	skb_copy_and_csum_dev(skb, nla_data(nla));
439 
440 	genlmsg_end(user_skb, upcall);
441 	err = genlmsg_unicast(net, user_skb, upcall_info->portid);
442 
443 out:
444 	kfree_skb(nskb);
445 	return err;
446 }
447 
448 /* Called with ovs_mutex. */
449 static int flush_flows(struct datapath *dp)
450 {
451 	struct flow_table *old_table;
452 	struct flow_table *new_table;
453 
454 	old_table = ovsl_dereference(dp->table);
455 	new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
456 	if (!new_table)
457 		return -ENOMEM;
458 
459 	rcu_assign_pointer(dp->table, new_table);
460 
461 	ovs_flow_tbl_deferred_destroy(old_table);
462 	return 0;
463 }
464 
465 static int validate_actions(const struct nlattr *attr,
466 				const struct sw_flow_key *key, int depth);
467 
468 static int validate_sample(const struct nlattr *attr,
469 				const struct sw_flow_key *key, int depth)
470 {
471 	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
472 	const struct nlattr *probability, *actions;
473 	const struct nlattr *a;
474 	int rem;
475 
476 	memset(attrs, 0, sizeof(attrs));
477 	nla_for_each_nested(a, attr, rem) {
478 		int type = nla_type(a);
479 		if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
480 			return -EINVAL;
481 		attrs[type] = a;
482 	}
483 	if (rem)
484 		return -EINVAL;
485 
486 	probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
487 	if (!probability || nla_len(probability) != sizeof(u32))
488 		return -EINVAL;
489 
490 	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
491 	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
492 		return -EINVAL;
493 	return validate_actions(actions, key, depth + 1);
494 }
495 
496 static int validate_tp_port(const struct sw_flow_key *flow_key)
497 {
498 	if (flow_key->eth.type == htons(ETH_P_IP)) {
499 		if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
500 			return 0;
501 	} else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
502 		if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
503 			return 0;
504 	}
505 
506 	return -EINVAL;
507 }
508 
509 static int validate_set(const struct nlattr *a,
510 			const struct sw_flow_key *flow_key)
511 {
512 	const struct nlattr *ovs_key = nla_data(a);
513 	int key_type = nla_type(ovs_key);
514 
515 	/* There can be only one key in a action */
516 	if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
517 		return -EINVAL;
518 
519 	if (key_type > OVS_KEY_ATTR_MAX ||
520 	    nla_len(ovs_key) != ovs_key_lens[key_type])
521 		return -EINVAL;
522 
523 	switch (key_type) {
524 	const struct ovs_key_ipv4 *ipv4_key;
525 	const struct ovs_key_ipv6 *ipv6_key;
526 
527 	case OVS_KEY_ATTR_PRIORITY:
528 	case OVS_KEY_ATTR_SKB_MARK:
529 	case OVS_KEY_ATTR_ETHERNET:
530 		break;
531 
532 	case OVS_KEY_ATTR_IPV4:
533 		if (flow_key->eth.type != htons(ETH_P_IP))
534 			return -EINVAL;
535 
536 		if (!flow_key->ip.proto)
537 			return -EINVAL;
538 
539 		ipv4_key = nla_data(ovs_key);
540 		if (ipv4_key->ipv4_proto != flow_key->ip.proto)
541 			return -EINVAL;
542 
543 		if (ipv4_key->ipv4_frag != flow_key->ip.frag)
544 			return -EINVAL;
545 
546 		break;
547 
548 	case OVS_KEY_ATTR_IPV6:
549 		if (flow_key->eth.type != htons(ETH_P_IPV6))
550 			return -EINVAL;
551 
552 		if (!flow_key->ip.proto)
553 			return -EINVAL;
554 
555 		ipv6_key = nla_data(ovs_key);
556 		if (ipv6_key->ipv6_proto != flow_key->ip.proto)
557 			return -EINVAL;
558 
559 		if (ipv6_key->ipv6_frag != flow_key->ip.frag)
560 			return -EINVAL;
561 
562 		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
563 			return -EINVAL;
564 
565 		break;
566 
567 	case OVS_KEY_ATTR_TCP:
568 		if (flow_key->ip.proto != IPPROTO_TCP)
569 			return -EINVAL;
570 
571 		return validate_tp_port(flow_key);
572 
573 	case OVS_KEY_ATTR_UDP:
574 		if (flow_key->ip.proto != IPPROTO_UDP)
575 			return -EINVAL;
576 
577 		return validate_tp_port(flow_key);
578 
579 	default:
580 		return -EINVAL;
581 	}
582 
583 	return 0;
584 }
585 
586 static int validate_userspace(const struct nlattr *attr)
587 {
588 	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =	{
589 		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
590 		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
591 	};
592 	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
593 	int error;
594 
595 	error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
596 				 attr, userspace_policy);
597 	if (error)
598 		return error;
599 
600 	if (!a[OVS_USERSPACE_ATTR_PID] ||
601 	    !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
602 		return -EINVAL;
603 
604 	return 0;
605 }
606 
607 static int validate_actions(const struct nlattr *attr,
608 				const struct sw_flow_key *key,  int depth)
609 {
610 	const struct nlattr *a;
611 	int rem, err;
612 
613 	if (depth >= SAMPLE_ACTION_DEPTH)
614 		return -EOVERFLOW;
615 
616 	nla_for_each_nested(a, attr, rem) {
617 		/* Expected argument lengths, (u32)-1 for variable length. */
618 		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
619 			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
620 			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
621 			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
622 			[OVS_ACTION_ATTR_POP_VLAN] = 0,
623 			[OVS_ACTION_ATTR_SET] = (u32)-1,
624 			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1
625 		};
626 		const struct ovs_action_push_vlan *vlan;
627 		int type = nla_type(a);
628 
629 		if (type > OVS_ACTION_ATTR_MAX ||
630 		    (action_lens[type] != nla_len(a) &&
631 		     action_lens[type] != (u32)-1))
632 			return -EINVAL;
633 
634 		switch (type) {
635 		case OVS_ACTION_ATTR_UNSPEC:
636 			return -EINVAL;
637 
638 		case OVS_ACTION_ATTR_USERSPACE:
639 			err = validate_userspace(a);
640 			if (err)
641 				return err;
642 			break;
643 
644 		case OVS_ACTION_ATTR_OUTPUT:
645 			if (nla_get_u32(a) >= DP_MAX_PORTS)
646 				return -EINVAL;
647 			break;
648 
649 
650 		case OVS_ACTION_ATTR_POP_VLAN:
651 			break;
652 
653 		case OVS_ACTION_ATTR_PUSH_VLAN:
654 			vlan = nla_data(a);
655 			if (vlan->vlan_tpid != htons(ETH_P_8021Q))
656 				return -EINVAL;
657 			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
658 				return -EINVAL;
659 			break;
660 
661 		case OVS_ACTION_ATTR_SET:
662 			err = validate_set(a, key);
663 			if (err)
664 				return err;
665 			break;
666 
667 		case OVS_ACTION_ATTR_SAMPLE:
668 			err = validate_sample(a, key, depth);
669 			if (err)
670 				return err;
671 			break;
672 
673 		default:
674 			return -EINVAL;
675 		}
676 	}
677 
678 	if (rem > 0)
679 		return -EINVAL;
680 
681 	return 0;
682 }
683 
684 static void clear_stats(struct sw_flow *flow)
685 {
686 	flow->used = 0;
687 	flow->tcp_flags = 0;
688 	flow->packet_count = 0;
689 	flow->byte_count = 0;
690 }
691 
692 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
693 {
694 	struct ovs_header *ovs_header = info->userhdr;
695 	struct nlattr **a = info->attrs;
696 	struct sw_flow_actions *acts;
697 	struct sk_buff *packet;
698 	struct sw_flow *flow;
699 	struct datapath *dp;
700 	struct ethhdr *eth;
701 	int len;
702 	int err;
703 	int key_len;
704 
705 	err = -EINVAL;
706 	if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
707 	    !a[OVS_PACKET_ATTR_ACTIONS])
708 		goto err;
709 
710 	len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
711 	packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
712 	err = -ENOMEM;
713 	if (!packet)
714 		goto err;
715 	skb_reserve(packet, NET_IP_ALIGN);
716 
717 	nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
718 
719 	skb_reset_mac_header(packet);
720 	eth = eth_hdr(packet);
721 
722 	/* Normally, setting the skb 'protocol' field would be handled by a
723 	 * call to eth_type_trans(), but it assumes there's a sending
724 	 * device, which we may not have. */
725 	if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
726 		packet->protocol = eth->h_proto;
727 	else
728 		packet->protocol = htons(ETH_P_802_2);
729 
730 	/* Build an sw_flow for sending this packet. */
731 	flow = ovs_flow_alloc();
732 	err = PTR_ERR(flow);
733 	if (IS_ERR(flow))
734 		goto err_kfree_skb;
735 
736 	err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
737 	if (err)
738 		goto err_flow_free;
739 
740 	err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
741 					     &flow->key.phy.skb_mark,
742 					     &flow->key.phy.in_port,
743 					     a[OVS_PACKET_ATTR_KEY]);
744 	if (err)
745 		goto err_flow_free;
746 
747 	err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
748 	if (err)
749 		goto err_flow_free;
750 
751 	flow->hash = ovs_flow_hash(&flow->key, key_len);
752 
753 	acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
754 	err = PTR_ERR(acts);
755 	if (IS_ERR(acts))
756 		goto err_flow_free;
757 	rcu_assign_pointer(flow->sf_acts, acts);
758 
759 	OVS_CB(packet)->flow = flow;
760 	packet->priority = flow->key.phy.priority;
761 	packet->mark = flow->key.phy.skb_mark;
762 
763 	rcu_read_lock();
764 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
765 	err = -ENODEV;
766 	if (!dp)
767 		goto err_unlock;
768 
769 	local_bh_disable();
770 	err = ovs_execute_actions(dp, packet);
771 	local_bh_enable();
772 	rcu_read_unlock();
773 
774 	ovs_flow_free(flow);
775 	return err;
776 
777 err_unlock:
778 	rcu_read_unlock();
779 err_flow_free:
780 	ovs_flow_free(flow);
781 err_kfree_skb:
782 	kfree_skb(packet);
783 err:
784 	return err;
785 }
786 
787 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
788 	[OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
789 	[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
790 	[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
791 };
792 
793 static struct genl_ops dp_packet_genl_ops[] = {
794 	{ .cmd = OVS_PACKET_CMD_EXECUTE,
795 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
796 	  .policy = packet_policy,
797 	  .doit = ovs_packet_cmd_execute
798 	}
799 };
800 
801 static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
802 {
803 	int i;
804 	struct flow_table *table = ovsl_dereference(dp->table);
805 
806 	stats->n_flows = ovs_flow_tbl_count(table);
807 
808 	stats->n_hit = stats->n_missed = stats->n_lost = 0;
809 	for_each_possible_cpu(i) {
810 		const struct dp_stats_percpu *percpu_stats;
811 		struct dp_stats_percpu local_stats;
812 		unsigned int start;
813 
814 		percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
815 
816 		do {
817 			start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
818 			local_stats = *percpu_stats;
819 		} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
820 
821 		stats->n_hit += local_stats.n_hit;
822 		stats->n_missed += local_stats.n_missed;
823 		stats->n_lost += local_stats.n_lost;
824 	}
825 }
826 
827 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
828 	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
829 	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
830 	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
831 };
832 
833 static struct genl_family dp_flow_genl_family = {
834 	.id = GENL_ID_GENERATE,
835 	.hdrsize = sizeof(struct ovs_header),
836 	.name = OVS_FLOW_FAMILY,
837 	.version = OVS_FLOW_VERSION,
838 	.maxattr = OVS_FLOW_ATTR_MAX,
839 	.netnsok = true
840 };
841 
842 static struct genl_multicast_group ovs_dp_flow_multicast_group = {
843 	.name = OVS_FLOW_MCGROUP
844 };
845 
846 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
847 {
848 	return NLMSG_ALIGN(sizeof(struct ovs_header))
849 		+ nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
850 		+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
851 		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
852 		+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
853 		+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
854 }
855 
856 /* Called with ovs_mutex. */
857 static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
858 				  struct sk_buff *skb, u32 portid,
859 				  u32 seq, u32 flags, u8 cmd)
860 {
861 	const int skb_orig_len = skb->len;
862 	const struct sw_flow_actions *sf_acts;
863 	struct ovs_flow_stats stats;
864 	struct ovs_header *ovs_header;
865 	struct nlattr *nla;
866 	unsigned long used;
867 	u8 tcp_flags;
868 	int err;
869 
870 	sf_acts = ovsl_dereference(flow->sf_acts);
871 
872 	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
873 	if (!ovs_header)
874 		return -EMSGSIZE;
875 
876 	ovs_header->dp_ifindex = get_dpifindex(dp);
877 
878 	nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
879 	if (!nla)
880 		goto nla_put_failure;
881 	err = ovs_flow_to_nlattrs(&flow->key, skb);
882 	if (err)
883 		goto error;
884 	nla_nest_end(skb, nla);
885 
886 	spin_lock_bh(&flow->lock);
887 	used = flow->used;
888 	stats.n_packets = flow->packet_count;
889 	stats.n_bytes = flow->byte_count;
890 	tcp_flags = flow->tcp_flags;
891 	spin_unlock_bh(&flow->lock);
892 
893 	if (used &&
894 	    nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
895 		goto nla_put_failure;
896 
897 	if (stats.n_packets &&
898 	    nla_put(skb, OVS_FLOW_ATTR_STATS,
899 		    sizeof(struct ovs_flow_stats), &stats))
900 		goto nla_put_failure;
901 
902 	if (tcp_flags &&
903 	    nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
904 		goto nla_put_failure;
905 
906 	/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
907 	 * this is the first flow to be dumped into 'skb'.  This is unusual for
908 	 * Netlink but individual action lists can be longer than
909 	 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
910 	 * The userspace caller can always fetch the actions separately if it
911 	 * really wants them.  (Most userspace callers in fact don't care.)
912 	 *
913 	 * This can only fail for dump operations because the skb is always
914 	 * properly sized for single flows.
915 	 */
916 	err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
917 		      sf_acts->actions);
918 	if (err < 0 && skb_orig_len)
919 		goto error;
920 
921 	return genlmsg_end(skb, ovs_header);
922 
923 nla_put_failure:
924 	err = -EMSGSIZE;
925 error:
926 	genlmsg_cancel(skb, ovs_header);
927 	return err;
928 }
929 
930 static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
931 {
932 	const struct sw_flow_actions *sf_acts;
933 
934 	sf_acts = ovsl_dereference(flow->sf_acts);
935 
936 	return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
937 }
938 
939 static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
940 					       struct datapath *dp,
941 					       u32 portid, u32 seq, u8 cmd)
942 {
943 	struct sk_buff *skb;
944 	int retval;
945 
946 	skb = ovs_flow_cmd_alloc_info(flow);
947 	if (!skb)
948 		return ERR_PTR(-ENOMEM);
949 
950 	retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
951 	BUG_ON(retval < 0);
952 	return skb;
953 }
954 
955 static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
956 {
957 	struct nlattr **a = info->attrs;
958 	struct ovs_header *ovs_header = info->userhdr;
959 	struct sw_flow_key key;
960 	struct sw_flow *flow;
961 	struct sk_buff *reply;
962 	struct datapath *dp;
963 	struct flow_table *table;
964 	int error;
965 	int key_len;
966 
967 	/* Extract key. */
968 	error = -EINVAL;
969 	if (!a[OVS_FLOW_ATTR_KEY])
970 		goto error;
971 	error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
972 	if (error)
973 		goto error;
974 
975 	/* Validate actions. */
976 	if (a[OVS_FLOW_ATTR_ACTIONS]) {
977 		error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0);
978 		if (error)
979 			goto error;
980 	} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
981 		error = -EINVAL;
982 		goto error;
983 	}
984 
985 	ovs_lock();
986 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
987 	error = -ENODEV;
988 	if (!dp)
989 		goto err_unlock_ovs;
990 
991 	table = ovsl_dereference(dp->table);
992 	flow = ovs_flow_tbl_lookup(table, &key, key_len);
993 	if (!flow) {
994 		struct sw_flow_actions *acts;
995 
996 		/* Bail out if we're not allowed to create a new flow. */
997 		error = -ENOENT;
998 		if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
999 			goto err_unlock_ovs;
1000 
1001 		/* Expand table, if necessary, to make room. */
1002 		if (ovs_flow_tbl_need_to_expand(table)) {
1003 			struct flow_table *new_table;
1004 
1005 			new_table = ovs_flow_tbl_expand(table);
1006 			if (!IS_ERR(new_table)) {
1007 				rcu_assign_pointer(dp->table, new_table);
1008 				ovs_flow_tbl_deferred_destroy(table);
1009 				table = ovsl_dereference(dp->table);
1010 			}
1011 		}
1012 
1013 		/* Allocate flow. */
1014 		flow = ovs_flow_alloc();
1015 		if (IS_ERR(flow)) {
1016 			error = PTR_ERR(flow);
1017 			goto err_unlock_ovs;
1018 		}
1019 		flow->key = key;
1020 		clear_stats(flow);
1021 
1022 		/* Obtain actions. */
1023 		acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
1024 		error = PTR_ERR(acts);
1025 		if (IS_ERR(acts))
1026 			goto error_free_flow;
1027 		rcu_assign_pointer(flow->sf_acts, acts);
1028 
1029 		/* Put flow in bucket. */
1030 		flow->hash = ovs_flow_hash(&key, key_len);
1031 		ovs_flow_tbl_insert(table, flow);
1032 
1033 		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1034 						info->snd_seq,
1035 						OVS_FLOW_CMD_NEW);
1036 	} else {
1037 		/* We found a matching flow. */
1038 		struct sw_flow_actions *old_acts;
1039 		struct nlattr *acts_attrs;
1040 
1041 		/* Bail out if we're not allowed to modify an existing flow.
1042 		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1043 		 * because Generic Netlink treats the latter as a dump
1044 		 * request.  We also accept NLM_F_EXCL in case that bug ever
1045 		 * gets fixed.
1046 		 */
1047 		error = -EEXIST;
1048 		if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1049 		    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1050 			goto err_unlock_ovs;
1051 
1052 		/* Update actions. */
1053 		old_acts = ovsl_dereference(flow->sf_acts);
1054 		acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
1055 		if (acts_attrs &&
1056 		   (old_acts->actions_len != nla_len(acts_attrs) ||
1057 		   memcmp(old_acts->actions, nla_data(acts_attrs),
1058 			  old_acts->actions_len))) {
1059 			struct sw_flow_actions *new_acts;
1060 
1061 			new_acts = ovs_flow_actions_alloc(acts_attrs);
1062 			error = PTR_ERR(new_acts);
1063 			if (IS_ERR(new_acts))
1064 				goto err_unlock_ovs;
1065 
1066 			rcu_assign_pointer(flow->sf_acts, new_acts);
1067 			ovs_flow_deferred_free_acts(old_acts);
1068 		}
1069 
1070 		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1071 					       info->snd_seq, OVS_FLOW_CMD_NEW);
1072 
1073 		/* Clear stats. */
1074 		if (a[OVS_FLOW_ATTR_CLEAR]) {
1075 			spin_lock_bh(&flow->lock);
1076 			clear_stats(flow);
1077 			spin_unlock_bh(&flow->lock);
1078 		}
1079 	}
1080 	ovs_unlock();
1081 
1082 	if (!IS_ERR(reply))
1083 		ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1084 	else
1085 		netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1086 				ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
1087 	return 0;
1088 
1089 error_free_flow:
1090 	ovs_flow_free(flow);
1091 err_unlock_ovs:
1092 	ovs_unlock();
1093 error:
1094 	return error;
1095 }
1096 
1097 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1098 {
1099 	struct nlattr **a = info->attrs;
1100 	struct ovs_header *ovs_header = info->userhdr;
1101 	struct sw_flow_key key;
1102 	struct sk_buff *reply;
1103 	struct sw_flow *flow;
1104 	struct datapath *dp;
1105 	struct flow_table *table;
1106 	int err;
1107 	int key_len;
1108 
1109 	if (!a[OVS_FLOW_ATTR_KEY])
1110 		return -EINVAL;
1111 	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1112 	if (err)
1113 		return err;
1114 
1115 	ovs_lock();
1116 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1117 	if (!dp) {
1118 		err = -ENODEV;
1119 		goto unlock;
1120 	}
1121 
1122 	table = ovsl_dereference(dp->table);
1123 	flow = ovs_flow_tbl_lookup(table, &key, key_len);
1124 	if (!flow) {
1125 		err = -ENOENT;
1126 		goto unlock;
1127 	}
1128 
1129 	reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1130 					info->snd_seq, OVS_FLOW_CMD_NEW);
1131 	if (IS_ERR(reply)) {
1132 		err = PTR_ERR(reply);
1133 		goto unlock;
1134 	}
1135 
1136 	ovs_unlock();
1137 	return genlmsg_reply(reply, info);
1138 unlock:
1139 	ovs_unlock();
1140 	return err;
1141 }
1142 
1143 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1144 {
1145 	struct nlattr **a = info->attrs;
1146 	struct ovs_header *ovs_header = info->userhdr;
1147 	struct sw_flow_key key;
1148 	struct sk_buff *reply;
1149 	struct sw_flow *flow;
1150 	struct datapath *dp;
1151 	struct flow_table *table;
1152 	int err;
1153 	int key_len;
1154 
1155 	ovs_lock();
1156 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1157 	if (!dp) {
1158 		err = -ENODEV;
1159 		goto unlock;
1160 	}
1161 
1162 	if (!a[OVS_FLOW_ATTR_KEY]) {
1163 		err = flush_flows(dp);
1164 		goto unlock;
1165 	}
1166 	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1167 	if (err)
1168 		goto unlock;
1169 
1170 	table = ovsl_dereference(dp->table);
1171 	flow = ovs_flow_tbl_lookup(table, &key, key_len);
1172 	if (!flow) {
1173 		err = -ENOENT;
1174 		goto unlock;
1175 	}
1176 
1177 	reply = ovs_flow_cmd_alloc_info(flow);
1178 	if (!reply) {
1179 		err = -ENOMEM;
1180 		goto unlock;
1181 	}
1182 
1183 	ovs_flow_tbl_remove(table, flow);
1184 
1185 	err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
1186 				     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1187 	BUG_ON(err < 0);
1188 
1189 	ovs_flow_deferred_free(flow);
1190 	ovs_unlock();
1191 
1192 	ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1193 	return 0;
1194 unlock:
1195 	ovs_unlock();
1196 	return err;
1197 }
1198 
1199 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1200 {
1201 	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1202 	struct datapath *dp;
1203 	struct flow_table *table;
1204 
1205 	ovs_lock();
1206 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1207 	if (!dp) {
1208 		ovs_unlock();
1209 		return -ENODEV;
1210 	}
1211 
1212 	table = ovsl_dereference(dp->table);
1213 
1214 	for (;;) {
1215 		struct sw_flow *flow;
1216 		u32 bucket, obj;
1217 
1218 		bucket = cb->args[0];
1219 		obj = cb->args[1];
1220 		flow = ovs_flow_tbl_next(table, &bucket, &obj);
1221 		if (!flow)
1222 			break;
1223 
1224 		if (ovs_flow_cmd_fill_info(flow, dp, skb,
1225 					   NETLINK_CB(cb->skb).portid,
1226 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1227 					   OVS_FLOW_CMD_NEW) < 0)
1228 			break;
1229 
1230 		cb->args[0] = bucket;
1231 		cb->args[1] = obj;
1232 	}
1233 	ovs_unlock();
1234 	return skb->len;
1235 }
1236 
1237 static struct genl_ops dp_flow_genl_ops[] = {
1238 	{ .cmd = OVS_FLOW_CMD_NEW,
1239 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1240 	  .policy = flow_policy,
1241 	  .doit = ovs_flow_cmd_new_or_set
1242 	},
1243 	{ .cmd = OVS_FLOW_CMD_DEL,
1244 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1245 	  .policy = flow_policy,
1246 	  .doit = ovs_flow_cmd_del
1247 	},
1248 	{ .cmd = OVS_FLOW_CMD_GET,
1249 	  .flags = 0,		    /* OK for unprivileged users. */
1250 	  .policy = flow_policy,
1251 	  .doit = ovs_flow_cmd_get,
1252 	  .dumpit = ovs_flow_cmd_dump
1253 	},
1254 	{ .cmd = OVS_FLOW_CMD_SET,
1255 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1256 	  .policy = flow_policy,
1257 	  .doit = ovs_flow_cmd_new_or_set,
1258 	},
1259 };
1260 
1261 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1262 	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1263 	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1264 };
1265 
1266 static struct genl_family dp_datapath_genl_family = {
1267 	.id = GENL_ID_GENERATE,
1268 	.hdrsize = sizeof(struct ovs_header),
1269 	.name = OVS_DATAPATH_FAMILY,
1270 	.version = OVS_DATAPATH_VERSION,
1271 	.maxattr = OVS_DP_ATTR_MAX,
1272 	.netnsok = true
1273 };
1274 
1275 static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
1276 	.name = OVS_DATAPATH_MCGROUP
1277 };
1278 
1279 static size_t ovs_dp_cmd_msg_size(void)
1280 {
1281 	size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1282 
1283 	msgsize += nla_total_size(IFNAMSIZ);
1284 	msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1285 
1286 	return msgsize;
1287 }
1288 
1289 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1290 				u32 portid, u32 seq, u32 flags, u8 cmd)
1291 {
1292 	struct ovs_header *ovs_header;
1293 	struct ovs_dp_stats dp_stats;
1294 	int err;
1295 
1296 	ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1297 				   flags, cmd);
1298 	if (!ovs_header)
1299 		goto error;
1300 
1301 	ovs_header->dp_ifindex = get_dpifindex(dp);
1302 
1303 	rcu_read_lock();
1304 	err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1305 	rcu_read_unlock();
1306 	if (err)
1307 		goto nla_put_failure;
1308 
1309 	get_dp_stats(dp, &dp_stats);
1310 	if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats))
1311 		goto nla_put_failure;
1312 
1313 	return genlmsg_end(skb, ovs_header);
1314 
1315 nla_put_failure:
1316 	genlmsg_cancel(skb, ovs_header);
1317 error:
1318 	return -EMSGSIZE;
1319 }
1320 
1321 static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1322 					     u32 seq, u8 cmd)
1323 {
1324 	struct sk_buff *skb;
1325 	int retval;
1326 
1327 	skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1328 	if (!skb)
1329 		return ERR_PTR(-ENOMEM);
1330 
1331 	retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
1332 	if (retval < 0) {
1333 		kfree_skb(skb);
1334 		return ERR_PTR(retval);
1335 	}
1336 	return skb;
1337 }
1338 
1339 /* Called with ovs_mutex. */
1340 static struct datapath *lookup_datapath(struct net *net,
1341 					struct ovs_header *ovs_header,
1342 					struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1343 {
1344 	struct datapath *dp;
1345 
1346 	if (!a[OVS_DP_ATTR_NAME])
1347 		dp = get_dp(net, ovs_header->dp_ifindex);
1348 	else {
1349 		struct vport *vport;
1350 
1351 		rcu_read_lock();
1352 		vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1353 		dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1354 		rcu_read_unlock();
1355 	}
1356 	return dp ? dp : ERR_PTR(-ENODEV);
1357 }
1358 
1359 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1360 {
1361 	struct nlattr **a = info->attrs;
1362 	struct vport_parms parms;
1363 	struct sk_buff *reply;
1364 	struct datapath *dp;
1365 	struct vport *vport;
1366 	struct ovs_net *ovs_net;
1367 	int err, i;
1368 
1369 	err = -EINVAL;
1370 	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1371 		goto err;
1372 
1373 	ovs_lock();
1374 
1375 	err = -ENOMEM;
1376 	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1377 	if (dp == NULL)
1378 		goto err_unlock_ovs;
1379 
1380 	ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1381 
1382 	/* Allocate table. */
1383 	err = -ENOMEM;
1384 	rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
1385 	if (!dp->table)
1386 		goto err_free_dp;
1387 
1388 	dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1389 	if (!dp->stats_percpu) {
1390 		err = -ENOMEM;
1391 		goto err_destroy_table;
1392 	}
1393 
1394 	dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1395 			GFP_KERNEL);
1396 	if (!dp->ports) {
1397 		err = -ENOMEM;
1398 		goto err_destroy_percpu;
1399 	}
1400 
1401 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1402 		INIT_HLIST_HEAD(&dp->ports[i]);
1403 
1404 	/* Set up our datapath device. */
1405 	parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1406 	parms.type = OVS_VPORT_TYPE_INTERNAL;
1407 	parms.options = NULL;
1408 	parms.dp = dp;
1409 	parms.port_no = OVSP_LOCAL;
1410 	parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
1411 
1412 	vport = new_vport(&parms);
1413 	if (IS_ERR(vport)) {
1414 		err = PTR_ERR(vport);
1415 		if (err == -EBUSY)
1416 			err = -EEXIST;
1417 
1418 		goto err_destroy_ports_array;
1419 	}
1420 
1421 	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1422 				      info->snd_seq, OVS_DP_CMD_NEW);
1423 	err = PTR_ERR(reply);
1424 	if (IS_ERR(reply))
1425 		goto err_destroy_local_port;
1426 
1427 	ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1428 	list_add_tail(&dp->list_node, &ovs_net->dps);
1429 
1430 	ovs_unlock();
1431 
1432 	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1433 	return 0;
1434 
1435 err_destroy_local_port:
1436 	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1437 err_destroy_ports_array:
1438 	kfree(dp->ports);
1439 err_destroy_percpu:
1440 	free_percpu(dp->stats_percpu);
1441 err_destroy_table:
1442 	ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
1443 err_free_dp:
1444 	release_net(ovs_dp_get_net(dp));
1445 	kfree(dp);
1446 err_unlock_ovs:
1447 	ovs_unlock();
1448 err:
1449 	return err;
1450 }
1451 
1452 /* Called with ovs_mutex. */
1453 static void __dp_destroy(struct datapath *dp)
1454 {
1455 	int i;
1456 
1457 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1458 		struct vport *vport;
1459 		struct hlist_node *n;
1460 
1461 		hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1462 			if (vport->port_no != OVSP_LOCAL)
1463 				ovs_dp_detach_port(vport);
1464 	}
1465 
1466 	list_del(&dp->list_node);
1467 
1468 	/* OVSP_LOCAL is datapath internal port. We need to make sure that
1469 	 * all port in datapath are destroyed first before freeing datapath.
1470 	 */
1471 	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1472 
1473 	call_rcu(&dp->rcu, destroy_dp_rcu);
1474 }
1475 
1476 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1477 {
1478 	struct sk_buff *reply;
1479 	struct datapath *dp;
1480 	int err;
1481 
1482 	ovs_lock();
1483 	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1484 	err = PTR_ERR(dp);
1485 	if (IS_ERR(dp))
1486 		goto unlock;
1487 
1488 	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1489 				      info->snd_seq, OVS_DP_CMD_DEL);
1490 	err = PTR_ERR(reply);
1491 	if (IS_ERR(reply))
1492 		goto unlock;
1493 
1494 	__dp_destroy(dp);
1495 	ovs_unlock();
1496 
1497 	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1498 
1499 	return 0;
1500 unlock:
1501 	ovs_unlock();
1502 	return err;
1503 }
1504 
1505 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1506 {
1507 	struct sk_buff *reply;
1508 	struct datapath *dp;
1509 	int err;
1510 
1511 	ovs_lock();
1512 	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1513 	err = PTR_ERR(dp);
1514 	if (IS_ERR(dp))
1515 		goto unlock;
1516 
1517 	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1518 				      info->snd_seq, OVS_DP_CMD_NEW);
1519 	if (IS_ERR(reply)) {
1520 		err = PTR_ERR(reply);
1521 		netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1522 				ovs_dp_datapath_multicast_group.id, err);
1523 		err = 0;
1524 		goto unlock;
1525 	}
1526 
1527 	ovs_unlock();
1528 	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1529 
1530 	return 0;
1531 unlock:
1532 	ovs_unlock();
1533 	return err;
1534 }
1535 
1536 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1537 {
1538 	struct sk_buff *reply;
1539 	struct datapath *dp;
1540 	int err;
1541 
1542 	ovs_lock();
1543 	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1544 	if (IS_ERR(dp)) {
1545 		err = PTR_ERR(dp);
1546 		goto unlock;
1547 	}
1548 
1549 	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1550 				      info->snd_seq, OVS_DP_CMD_NEW);
1551 	if (IS_ERR(reply)) {
1552 		err = PTR_ERR(reply);
1553 		goto unlock;
1554 	}
1555 
1556 	ovs_unlock();
1557 	return genlmsg_reply(reply, info);
1558 
1559 unlock:
1560 	ovs_unlock();
1561 	return err;
1562 }
1563 
1564 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1565 {
1566 	struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1567 	struct datapath *dp;
1568 	int skip = cb->args[0];
1569 	int i = 0;
1570 
1571 	ovs_lock();
1572 	list_for_each_entry(dp, &ovs_net->dps, list_node) {
1573 		if (i >= skip &&
1574 		    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1575 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1576 					 OVS_DP_CMD_NEW) < 0)
1577 			break;
1578 		i++;
1579 	}
1580 	ovs_unlock();
1581 
1582 	cb->args[0] = i;
1583 
1584 	return skb->len;
1585 }
1586 
1587 static struct genl_ops dp_datapath_genl_ops[] = {
1588 	{ .cmd = OVS_DP_CMD_NEW,
1589 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1590 	  .policy = datapath_policy,
1591 	  .doit = ovs_dp_cmd_new
1592 	},
1593 	{ .cmd = OVS_DP_CMD_DEL,
1594 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1595 	  .policy = datapath_policy,
1596 	  .doit = ovs_dp_cmd_del
1597 	},
1598 	{ .cmd = OVS_DP_CMD_GET,
1599 	  .flags = 0,		    /* OK for unprivileged users. */
1600 	  .policy = datapath_policy,
1601 	  .doit = ovs_dp_cmd_get,
1602 	  .dumpit = ovs_dp_cmd_dump
1603 	},
1604 	{ .cmd = OVS_DP_CMD_SET,
1605 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1606 	  .policy = datapath_policy,
1607 	  .doit = ovs_dp_cmd_set,
1608 	},
1609 };
1610 
1611 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
1612 	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1613 	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
1614 	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1615 	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1616 	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1617 	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1618 };
1619 
1620 static struct genl_family dp_vport_genl_family = {
1621 	.id = GENL_ID_GENERATE,
1622 	.hdrsize = sizeof(struct ovs_header),
1623 	.name = OVS_VPORT_FAMILY,
1624 	.version = OVS_VPORT_VERSION,
1625 	.maxattr = OVS_VPORT_ATTR_MAX,
1626 	.netnsok = true
1627 };
1628 
1629 struct genl_multicast_group ovs_dp_vport_multicast_group = {
1630 	.name = OVS_VPORT_MCGROUP
1631 };
1632 
1633 /* Called with ovs_mutex or RCU read lock. */
1634 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1635 				   u32 portid, u32 seq, u32 flags, u8 cmd)
1636 {
1637 	struct ovs_header *ovs_header;
1638 	struct ovs_vport_stats vport_stats;
1639 	int err;
1640 
1641 	ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1642 				 flags, cmd);
1643 	if (!ovs_header)
1644 		return -EMSGSIZE;
1645 
1646 	ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1647 
1648 	if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1649 	    nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1650 	    nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
1651 	    nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
1652 		goto nla_put_failure;
1653 
1654 	ovs_vport_get_stats(vport, &vport_stats);
1655 	if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1656 		    &vport_stats))
1657 		goto nla_put_failure;
1658 
1659 	err = ovs_vport_get_options(vport, skb);
1660 	if (err == -EMSGSIZE)
1661 		goto error;
1662 
1663 	return genlmsg_end(skb, ovs_header);
1664 
1665 nla_put_failure:
1666 	err = -EMSGSIZE;
1667 error:
1668 	genlmsg_cancel(skb, ovs_header);
1669 	return err;
1670 }
1671 
1672 /* Called with ovs_mutex or RCU read lock. */
1673 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1674 					 u32 seq, u8 cmd)
1675 {
1676 	struct sk_buff *skb;
1677 	int retval;
1678 
1679 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1680 	if (!skb)
1681 		return ERR_PTR(-ENOMEM);
1682 
1683 	retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1684 	BUG_ON(retval < 0);
1685 
1686 	return skb;
1687 }
1688 
1689 /* Called with ovs_mutex or RCU read lock. */
1690 static struct vport *lookup_vport(struct net *net,
1691 				  struct ovs_header *ovs_header,
1692 				  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1693 {
1694 	struct datapath *dp;
1695 	struct vport *vport;
1696 
1697 	if (a[OVS_VPORT_ATTR_NAME]) {
1698 		vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1699 		if (!vport)
1700 			return ERR_PTR(-ENODEV);
1701 		if (ovs_header->dp_ifindex &&
1702 		    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1703 			return ERR_PTR(-ENODEV);
1704 		return vport;
1705 	} else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1706 		u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1707 
1708 		if (port_no >= DP_MAX_PORTS)
1709 			return ERR_PTR(-EFBIG);
1710 
1711 		dp = get_dp(net, ovs_header->dp_ifindex);
1712 		if (!dp)
1713 			return ERR_PTR(-ENODEV);
1714 
1715 		vport = ovs_vport_ovsl_rcu(dp, port_no);
1716 		if (!vport)
1717 			return ERR_PTR(-ENODEV);
1718 		return vport;
1719 	} else
1720 		return ERR_PTR(-EINVAL);
1721 }
1722 
1723 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1724 {
1725 	struct nlattr **a = info->attrs;
1726 	struct ovs_header *ovs_header = info->userhdr;
1727 	struct vport_parms parms;
1728 	struct sk_buff *reply;
1729 	struct vport *vport;
1730 	struct datapath *dp;
1731 	u32 port_no;
1732 	int err;
1733 
1734 	err = -EINVAL;
1735 	if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1736 	    !a[OVS_VPORT_ATTR_UPCALL_PID])
1737 		goto exit;
1738 
1739 	ovs_lock();
1740 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1741 	err = -ENODEV;
1742 	if (!dp)
1743 		goto exit_unlock;
1744 
1745 	if (a[OVS_VPORT_ATTR_PORT_NO]) {
1746 		port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1747 
1748 		err = -EFBIG;
1749 		if (port_no >= DP_MAX_PORTS)
1750 			goto exit_unlock;
1751 
1752 		vport = ovs_vport_ovsl(dp, port_no);
1753 		err = -EBUSY;
1754 		if (vport)
1755 			goto exit_unlock;
1756 	} else {
1757 		for (port_no = 1; ; port_no++) {
1758 			if (port_no >= DP_MAX_PORTS) {
1759 				err = -EFBIG;
1760 				goto exit_unlock;
1761 			}
1762 			vport = ovs_vport_ovsl(dp, port_no);
1763 			if (!vport)
1764 				break;
1765 		}
1766 	}
1767 
1768 	parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1769 	parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1770 	parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1771 	parms.dp = dp;
1772 	parms.port_no = port_no;
1773 	parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1774 
1775 	vport = new_vport(&parms);
1776 	err = PTR_ERR(vport);
1777 	if (IS_ERR(vport))
1778 		goto exit_unlock;
1779 
1780 	err = 0;
1781 	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1782 					 OVS_VPORT_CMD_NEW);
1783 	if (IS_ERR(reply)) {
1784 		err = PTR_ERR(reply);
1785 		ovs_dp_detach_port(vport);
1786 		goto exit_unlock;
1787 	}
1788 
1789 	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1790 
1791 exit_unlock:
1792 	ovs_unlock();
1793 exit:
1794 	return err;
1795 }
1796 
1797 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1798 {
1799 	struct nlattr **a = info->attrs;
1800 	struct sk_buff *reply;
1801 	struct vport *vport;
1802 	int err;
1803 
1804 	ovs_lock();
1805 	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1806 	err = PTR_ERR(vport);
1807 	if (IS_ERR(vport))
1808 		goto exit_unlock;
1809 
1810 	err = 0;
1811 	if (a[OVS_VPORT_ATTR_TYPE] &&
1812 	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
1813 		err = -EINVAL;
1814 
1815 	reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1816 	if (!reply) {
1817 		err = -ENOMEM;
1818 		goto exit_unlock;
1819 	}
1820 
1821 	if (!err && a[OVS_VPORT_ATTR_OPTIONS])
1822 		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1823 	if (err)
1824 		goto exit_free;
1825 
1826 	if (a[OVS_VPORT_ATTR_UPCALL_PID])
1827 		vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1828 
1829 	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1830 				      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1831 	BUG_ON(err < 0);
1832 
1833 	ovs_unlock();
1834 	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1835 	return 0;
1836 
1837 	rtnl_unlock();
1838 	return 0;
1839 
1840 exit_free:
1841 	kfree_skb(reply);
1842 exit_unlock:
1843 	ovs_unlock();
1844 	return err;
1845 }
1846 
1847 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1848 {
1849 	struct nlattr **a = info->attrs;
1850 	struct sk_buff *reply;
1851 	struct vport *vport;
1852 	int err;
1853 
1854 	ovs_lock();
1855 	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1856 	err = PTR_ERR(vport);
1857 	if (IS_ERR(vport))
1858 		goto exit_unlock;
1859 
1860 	if (vport->port_no == OVSP_LOCAL) {
1861 		err = -EINVAL;
1862 		goto exit_unlock;
1863 	}
1864 
1865 	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1866 					 OVS_VPORT_CMD_DEL);
1867 	err = PTR_ERR(reply);
1868 	if (IS_ERR(reply))
1869 		goto exit_unlock;
1870 
1871 	err = 0;
1872 	ovs_dp_detach_port(vport);
1873 
1874 	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1875 
1876 exit_unlock:
1877 	ovs_unlock();
1878 	return err;
1879 }
1880 
1881 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1882 {
1883 	struct nlattr **a = info->attrs;
1884 	struct ovs_header *ovs_header = info->userhdr;
1885 	struct sk_buff *reply;
1886 	struct vport *vport;
1887 	int err;
1888 
1889 	rcu_read_lock();
1890 	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
1891 	err = PTR_ERR(vport);
1892 	if (IS_ERR(vport))
1893 		goto exit_unlock;
1894 
1895 	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1896 					 OVS_VPORT_CMD_NEW);
1897 	err = PTR_ERR(reply);
1898 	if (IS_ERR(reply))
1899 		goto exit_unlock;
1900 
1901 	rcu_read_unlock();
1902 
1903 	return genlmsg_reply(reply, info);
1904 
1905 exit_unlock:
1906 	rcu_read_unlock();
1907 	return err;
1908 }
1909 
1910 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1911 {
1912 	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1913 	struct datapath *dp;
1914 	int bucket = cb->args[0], skip = cb->args[1];
1915 	int i, j = 0;
1916 
1917 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1918 	if (!dp)
1919 		return -ENODEV;
1920 
1921 	rcu_read_lock();
1922 	for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
1923 		struct vport *vport;
1924 
1925 		j = 0;
1926 		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
1927 			if (j >= skip &&
1928 			    ovs_vport_cmd_fill_info(vport, skb,
1929 						    NETLINK_CB(cb->skb).portid,
1930 						    cb->nlh->nlmsg_seq,
1931 						    NLM_F_MULTI,
1932 						    OVS_VPORT_CMD_NEW) < 0)
1933 				goto out;
1934 
1935 			j++;
1936 		}
1937 		skip = 0;
1938 	}
1939 out:
1940 	rcu_read_unlock();
1941 
1942 	cb->args[0] = i;
1943 	cb->args[1] = j;
1944 
1945 	return skb->len;
1946 }
1947 
1948 static struct genl_ops dp_vport_genl_ops[] = {
1949 	{ .cmd = OVS_VPORT_CMD_NEW,
1950 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1951 	  .policy = vport_policy,
1952 	  .doit = ovs_vport_cmd_new
1953 	},
1954 	{ .cmd = OVS_VPORT_CMD_DEL,
1955 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1956 	  .policy = vport_policy,
1957 	  .doit = ovs_vport_cmd_del
1958 	},
1959 	{ .cmd = OVS_VPORT_CMD_GET,
1960 	  .flags = 0,		    /* OK for unprivileged users. */
1961 	  .policy = vport_policy,
1962 	  .doit = ovs_vport_cmd_get,
1963 	  .dumpit = ovs_vport_cmd_dump
1964 	},
1965 	{ .cmd = OVS_VPORT_CMD_SET,
1966 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1967 	  .policy = vport_policy,
1968 	  .doit = ovs_vport_cmd_set,
1969 	},
1970 };
1971 
1972 struct genl_family_and_ops {
1973 	struct genl_family *family;
1974 	struct genl_ops *ops;
1975 	int n_ops;
1976 	struct genl_multicast_group *group;
1977 };
1978 
1979 static const struct genl_family_and_ops dp_genl_families[] = {
1980 	{ &dp_datapath_genl_family,
1981 	  dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1982 	  &ovs_dp_datapath_multicast_group },
1983 	{ &dp_vport_genl_family,
1984 	  dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1985 	  &ovs_dp_vport_multicast_group },
1986 	{ &dp_flow_genl_family,
1987 	  dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1988 	  &ovs_dp_flow_multicast_group },
1989 	{ &dp_packet_genl_family,
1990 	  dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1991 	  NULL },
1992 };
1993 
1994 static void dp_unregister_genl(int n_families)
1995 {
1996 	int i;
1997 
1998 	for (i = 0; i < n_families; i++)
1999 		genl_unregister_family(dp_genl_families[i].family);
2000 }
2001 
2002 static int dp_register_genl(void)
2003 {
2004 	int n_registered;
2005 	int err;
2006 	int i;
2007 
2008 	n_registered = 0;
2009 	for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2010 		const struct genl_family_and_ops *f = &dp_genl_families[i];
2011 
2012 		err = genl_register_family_with_ops(f->family, f->ops,
2013 						    f->n_ops);
2014 		if (err)
2015 			goto error;
2016 		n_registered++;
2017 
2018 		if (f->group) {
2019 			err = genl_register_mc_group(f->family, f->group);
2020 			if (err)
2021 				goto error;
2022 		}
2023 	}
2024 
2025 	return 0;
2026 
2027 error:
2028 	dp_unregister_genl(n_registered);
2029 	return err;
2030 }
2031 
2032 static void rehash_flow_table(struct work_struct *work)
2033 {
2034 	struct datapath *dp;
2035 	struct net *net;
2036 
2037 	ovs_lock();
2038 	rtnl_lock();
2039 	for_each_net(net) {
2040 		struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2041 
2042 		list_for_each_entry(dp, &ovs_net->dps, list_node) {
2043 			struct flow_table *old_table = ovsl_dereference(dp->table);
2044 			struct flow_table *new_table;
2045 
2046 			new_table = ovs_flow_tbl_rehash(old_table);
2047 			if (!IS_ERR(new_table)) {
2048 				rcu_assign_pointer(dp->table, new_table);
2049 				ovs_flow_tbl_deferred_destroy(old_table);
2050 			}
2051 		}
2052 	}
2053 	rtnl_unlock();
2054 	ovs_unlock();
2055 	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2056 }
2057 
2058 static int __net_init ovs_init_net(struct net *net)
2059 {
2060 	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2061 
2062 	INIT_LIST_HEAD(&ovs_net->dps);
2063 	INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2064 	return 0;
2065 }
2066 
2067 static void __net_exit ovs_exit_net(struct net *net)
2068 {
2069 	struct datapath *dp, *dp_next;
2070 	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2071 
2072 	ovs_lock();
2073 	list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2074 		__dp_destroy(dp);
2075 	ovs_unlock();
2076 
2077 	cancel_work_sync(&ovs_net->dp_notify_work);
2078 }
2079 
2080 static struct pernet_operations ovs_net_ops = {
2081 	.init = ovs_init_net,
2082 	.exit = ovs_exit_net,
2083 	.id   = &ovs_net_id,
2084 	.size = sizeof(struct ovs_net),
2085 };
2086 
2087 static int __init dp_init(void)
2088 {
2089 	int err;
2090 
2091 	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2092 
2093 	pr_info("Open vSwitch switching datapath\n");
2094 
2095 	err = ovs_flow_init();
2096 	if (err)
2097 		goto error;
2098 
2099 	err = ovs_vport_init();
2100 	if (err)
2101 		goto error_flow_exit;
2102 
2103 	err = register_pernet_device(&ovs_net_ops);
2104 	if (err)
2105 		goto error_vport_exit;
2106 
2107 	err = register_netdevice_notifier(&ovs_dp_device_notifier);
2108 	if (err)
2109 		goto error_netns_exit;
2110 
2111 	err = dp_register_genl();
2112 	if (err < 0)
2113 		goto error_unreg_notifier;
2114 
2115 	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2116 
2117 	return 0;
2118 
2119 error_unreg_notifier:
2120 	unregister_netdevice_notifier(&ovs_dp_device_notifier);
2121 error_netns_exit:
2122 	unregister_pernet_device(&ovs_net_ops);
2123 error_vport_exit:
2124 	ovs_vport_exit();
2125 error_flow_exit:
2126 	ovs_flow_exit();
2127 error:
2128 	return err;
2129 }
2130 
2131 static void dp_cleanup(void)
2132 {
2133 	cancel_delayed_work_sync(&rehash_flow_wq);
2134 	dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2135 	unregister_netdevice_notifier(&ovs_dp_device_notifier);
2136 	unregister_pernet_device(&ovs_net_ops);
2137 	rcu_barrier();
2138 	ovs_vport_exit();
2139 	ovs_flow_exit();
2140 }
2141 
2142 module_init(dp_init);
2143 module_exit(dp_cleanup);
2144 
2145 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2146 MODULE_LICENSE("GPL");
2147