xref: /linux-6.15/net/ipv4/ip_input.c (revision 5df7ca0b)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
41da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
51da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  *		The Internet Protocol (IP) module.
81da177e4SLinus Torvalds  *
902c30a84SJesper Juhl  * Authors:	Ross Biro
101da177e4SLinus Torvalds  *		Fred N. van Kempen, <[email protected]>
111da177e4SLinus Torvalds  *		Donald Becker, <[email protected]>
12113aa838SAlan Cox  *		Alan Cox, <[email protected]>
131da177e4SLinus Torvalds  *		Richard Underwood
141da177e4SLinus Torvalds  *		Stefan Becker, <[email protected]>
151da177e4SLinus Torvalds  *		Jorge Cwik, <[email protected]>
161da177e4SLinus Torvalds  *		Arnt Gulbrandsen, <[email protected]>
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  * Fixes:
191da177e4SLinus Torvalds  *		Alan Cox	:	Commented a couple of minor bits of surplus code
201da177e4SLinus Torvalds  *		Alan Cox	:	Undefining IP_FORWARD doesn't include the code
211da177e4SLinus Torvalds  *					(just stops a compiler warning).
221da177e4SLinus Torvalds  *		Alan Cox	:	Frames with >=MAX_ROUTE record routes, strict routes or loose routes
231da177e4SLinus Torvalds  *					are junked rather than corrupting things.
241da177e4SLinus Torvalds  *		Alan Cox	:	Frames to bad broadcast subnets are dumped
251da177e4SLinus Torvalds  *					We used to process them non broadcast and
261da177e4SLinus Torvalds  *					boy could that cause havoc.
271da177e4SLinus Torvalds  *		Alan Cox	:	ip_forward sets the free flag on the
281da177e4SLinus Torvalds  *					new frame it queues. Still crap because
291da177e4SLinus Torvalds  *					it copies the frame but at least it
301da177e4SLinus Torvalds  *					doesn't eat memory too.
311da177e4SLinus Torvalds  *		Alan Cox	:	Generic queue code and memory fixes.
321da177e4SLinus Torvalds  *		Fred Van Kempen :	IP fragment support (borrowed from NET2E)
331da177e4SLinus Torvalds  *		Gerhard Koerting:	Forward fragmented frames correctly.
341da177e4SLinus Torvalds  *		Gerhard Koerting: 	Fixes to my fix of the above 8-).
351da177e4SLinus Torvalds  *		Gerhard Koerting:	IP interface addressing fix.
361da177e4SLinus Torvalds  *		Linus Torvalds	:	More robustness checks
371da177e4SLinus Torvalds  *		Alan Cox	:	Even more checks: Still not as robust as it ought to be
381da177e4SLinus Torvalds  *		Alan Cox	:	Save IP header pointer for later
391da177e4SLinus Torvalds  *		Alan Cox	:	ip option setting
401da177e4SLinus Torvalds  *		Alan Cox	:	Use ip_tos/ip_ttl settings
411da177e4SLinus Torvalds  *		Alan Cox	:	Fragmentation bogosity removed
421da177e4SLinus Torvalds  *					(Thanks to [email protected])
431da177e4SLinus Torvalds  *		Dmitry Gorodchanin :	Send of a raw packet crash fix.
441da177e4SLinus Torvalds  *		Alan Cox	:	Silly ip bug when an overlength
451da177e4SLinus Torvalds  *					fragment turns up. Now frees the
461da177e4SLinus Torvalds  *					queue.
471da177e4SLinus Torvalds  *		Linus Torvalds/ :	Memory leakage on fragmentation
481da177e4SLinus Torvalds  *		Alan Cox	:	handling.
491da177e4SLinus Torvalds  *		Gerhard Koerting:	Forwarding uses IP priority hints
501da177e4SLinus Torvalds  *		Teemu Rantanen	:	Fragment problems.
511da177e4SLinus Torvalds  *		Alan Cox	:	General cleanup, comments and reformat
521da177e4SLinus Torvalds  *		Alan Cox	:	SNMP statistics
531da177e4SLinus Torvalds  *		Alan Cox	:	BSD address rule semantics. Also see
541da177e4SLinus Torvalds  *					UDP as there is a nasty checksum issue
551da177e4SLinus Torvalds  *					if you do things the wrong way.
561da177e4SLinus Torvalds  *		Alan Cox	:	Always defrag, moved IP_FORWARD to the config.in file
571da177e4SLinus Torvalds  *		Alan Cox	: 	IP options adjust sk->priority.
581da177e4SLinus Torvalds  *		Pedro Roque	:	Fix mtu/length error in ip_forward.
591da177e4SLinus Torvalds  *		Alan Cox	:	Avoid ip_chk_addr when possible.
601da177e4SLinus Torvalds  *	Richard Underwood	:	IP multicasting.
611da177e4SLinus Torvalds  *		Alan Cox	:	Cleaned up multicast handlers.
621da177e4SLinus Torvalds  *		Alan Cox	:	RAW sockets demultiplex in the BSD style.
631da177e4SLinus Torvalds  *		Gunther Mayer	:	Fix the SNMP reporting typo
641da177e4SLinus Torvalds  *		Alan Cox	:	Always in group 224.0.0.1
651da177e4SLinus Torvalds  *	Pauline Middelink	:	Fast ip_checksum update when forwarding
661da177e4SLinus Torvalds  *					Masquerading support.
671da177e4SLinus Torvalds  *		Alan Cox	:	Multicast loopback error for 224.0.0.1
681da177e4SLinus Torvalds  *		Alan Cox	:	IP_MULTICAST_LOOP option.
691da177e4SLinus Torvalds  *		Alan Cox	:	Use notifiers.
701da177e4SLinus Torvalds  *		Bjorn Ekwall	:	Removed ip_csum (from slhc.c too)
711da177e4SLinus Torvalds  *		Bjorn Ekwall	:	Moved ip_fast_csum to ip.h (inline!)
721da177e4SLinus Torvalds  *		Stefan Becker   :       Send out ICMP HOST REDIRECT
731da177e4SLinus Torvalds  *	Arnt Gulbrandsen	:	ip_build_xmit
741da177e4SLinus Torvalds  *		Alan Cox	:	Per socket routing cache
751da177e4SLinus Torvalds  *		Alan Cox	:	Fixed routing cache, added header cache.
761da177e4SLinus Torvalds  *		Alan Cox	:	Loopback didn't work right in original ip_build_xmit - fixed it.
771da177e4SLinus Torvalds  *		Alan Cox	:	Only send ICMP_REDIRECT if src/dest are the same net.
781da177e4SLinus Torvalds  *		Alan Cox	:	Incoming IP option handling.
791da177e4SLinus Torvalds  *		Alan Cox	:	Set saddr on raw output frames as per BSD.
801da177e4SLinus Torvalds  *		Alan Cox	:	Stopped broadcast source route explosions.
811da177e4SLinus Torvalds  *		Alan Cox	:	Can disable source routing
821da177e4SLinus Torvalds  *		Takeshi Sone    :	Masquerading didn't work.
831da177e4SLinus Torvalds  *	Dave Bonn,Alan Cox	:	Faster IP forwarding whenever possible.
841da177e4SLinus Torvalds  *		Alan Cox	:	Memory leaks, tramples, misc debugging.
851da177e4SLinus Torvalds  *		Alan Cox	:	Fixed multicast (by popular demand 8))
861da177e4SLinus Torvalds  *		Alan Cox	:	Fixed forwarding (by even more popular demand 8))
871da177e4SLinus Torvalds  *		Alan Cox	:	Fixed SNMP statistics [I think]
881da177e4SLinus Torvalds  *	Gerhard Koerting	:	IP fragmentation forwarding fix
891da177e4SLinus Torvalds  *		Alan Cox	:	Device lock against page fault.
901da177e4SLinus Torvalds  *		Alan Cox	:	IP_HDRINCL facility.
911da177e4SLinus Torvalds  *	Werner Almesberger	:	Zero fragment bug
921da177e4SLinus Torvalds  *		Alan Cox	:	RAW IP frame length bug
931da177e4SLinus Torvalds  *		Alan Cox	:	Outgoing firewall on build_xmit
941da177e4SLinus Torvalds  *		A.N.Kuznetsov	:	IP_OPTIONS support throughout the kernel
951da177e4SLinus Torvalds  *		Alan Cox	:	Multicast routing hooks
961da177e4SLinus Torvalds  *		Jos Vos		:	Do accounting *before* call_in_firewall
971da177e4SLinus Torvalds  *	Willy Konynenberg	:	Transparent proxying support
981da177e4SLinus Torvalds  *
991da177e4SLinus Torvalds  * To Fix:
1001da177e4SLinus Torvalds  *		IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
1011da177e4SLinus Torvalds  *		and could be made very efficient with the addition of some virtual memory hacks to permit
1021da177e4SLinus Torvalds  *		the allocation of a buffer that can then be 'grown' by twiddling page tables.
1031da177e4SLinus Torvalds  *		Output fragmentation wants updating along with the buffer management to use a single
1041da177e4SLinus Torvalds  *		interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
1051da177e4SLinus Torvalds  *		output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
1061da177e4SLinus Torvalds  *		fragmentation anyway.
1071da177e4SLinus Torvalds  */
1081da177e4SLinus Torvalds 
109afd46503SJoe Perches #define pr_fmt(fmt) "IPv4: " fmt
110afd46503SJoe Perches 
1111da177e4SLinus Torvalds #include <linux/module.h>
1121da177e4SLinus Torvalds #include <linux/types.h>
1131da177e4SLinus Torvalds #include <linux/kernel.h>
1141da177e4SLinus Torvalds #include <linux/string.h>
1151da177e4SLinus Torvalds #include <linux/errno.h>
1165a0e3ad6STejun Heo #include <linux/slab.h>
1171da177e4SLinus Torvalds 
1181da177e4SLinus Torvalds #include <linux/net.h>
1191da177e4SLinus Torvalds #include <linux/socket.h>
1201da177e4SLinus Torvalds #include <linux/sockios.h>
1211da177e4SLinus Torvalds #include <linux/in.h>
1221da177e4SLinus Torvalds #include <linux/inet.h>
12314c85021SArnaldo Carvalho de Melo #include <linux/inetdevice.h>
1241da177e4SLinus Torvalds #include <linux/netdevice.h>
1251da177e4SLinus Torvalds #include <linux/etherdevice.h>
1260e219ae4SPaolo Abeni #include <linux/indirect_call_wrapper.h>
1271da177e4SLinus Torvalds 
1281da177e4SLinus Torvalds #include <net/snmp.h>
1291da177e4SLinus Torvalds #include <net/ip.h>
1301da177e4SLinus Torvalds #include <net/protocol.h>
1311da177e4SLinus Torvalds #include <net/route.h>
1321da177e4SLinus Torvalds #include <linux/skbuff.h>
1331da177e4SLinus Torvalds #include <net/sock.h>
1341da177e4SLinus Torvalds #include <net/arp.h>
1351da177e4SLinus Torvalds #include <net/icmp.h>
1361da177e4SLinus Torvalds #include <net/raw.h>
1371da177e4SLinus Torvalds #include <net/checksum.h>
1381f07d03eSEric Dumazet #include <net/inet_ecn.h>
1391da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h>
1401da177e4SLinus Torvalds #include <net/xfrm.h>
1411da177e4SLinus Torvalds #include <linux/mroute.h>
1421da177e4SLinus Torvalds #include <linux/netlink.h>
143f38a9eb1SThomas Graf #include <net/dst_metadata.h>
1441da177e4SLinus Torvalds 
1451da177e4SLinus Torvalds /*
14666018506SEric Dumazet  *	Process Router Attention IP option (RFC 2113)
1471da177e4SLinus Torvalds  */
ip_call_ra_chain(struct sk_buff * skb)148ba57b4dbSDavid S. Miller bool ip_call_ra_chain(struct sk_buff *skb)
1491da177e4SLinus Torvalds {
1501da177e4SLinus Torvalds 	struct ip_ra_chain *ra;
151eddc9ec5SArnaldo Carvalho de Melo 	u8 protocol = ip_hdr(skb)->protocol;
1521da177e4SLinus Torvalds 	struct sock *last = NULL;
153cb84663eSDenis V. Lunev 	struct net_device *dev = skb->dev;
15437fcbab6SEric W. Biederman 	struct net *net = dev_net(dev);
1551da177e4SLinus Torvalds 
1565796ef75SKirill Tkhai 	for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
1571da177e4SLinus Torvalds 		struct sock *sk = ra->sk;
1581da177e4SLinus Torvalds 
1591da177e4SLinus Torvalds 		/* If socket is bound to an interface, only report
1601da177e4SLinus Torvalds 		 * the packet if it came  from that interface.
1611da177e4SLinus Torvalds 		 */
162c720c7e8SEric Dumazet 		if (sk && inet_sk(sk)->inet_num == protocol &&
1631da177e4SLinus Torvalds 		    (!sk->sk_bound_dev_if ||
1645796ef75SKirill Tkhai 		     sk->sk_bound_dev_if == dev->ifindex)) {
16556f8a75cSPaul Gortmaker 			if (ip_is_fragment(ip_hdr(skb))) {
16619bcf9f2SEric W. Biederman 				if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
167ba57b4dbSDavid S. Miller 					return true;
1681da177e4SLinus Torvalds 			}
1691da177e4SLinus Torvalds 			if (last) {
1701da177e4SLinus Torvalds 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1711da177e4SLinus Torvalds 				if (skb2)
1721da177e4SLinus Torvalds 					raw_rcv(last, skb2);
1731da177e4SLinus Torvalds 			}
1741da177e4SLinus Torvalds 			last = sk;
1751da177e4SLinus Torvalds 		}
1761da177e4SLinus Torvalds 	}
1771da177e4SLinus Torvalds 
1781da177e4SLinus Torvalds 	if (last) {
1791da177e4SLinus Torvalds 		raw_rcv(last, skb);
180ba57b4dbSDavid S. Miller 		return true;
1811da177e4SLinus Torvalds 	}
182ba57b4dbSDavid S. Miller 	return false;
1831da177e4SLinus Torvalds }
1841da177e4SLinus Torvalds 
1850e219ae4SPaolo Abeni INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *));
1860e219ae4SPaolo Abeni INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *));
ip_protocol_deliver_rcu(struct net * net,struct sk_buff * skb,int protocol)18768cb7d53SPaolo Abeni void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol)
1881da177e4SLinus Torvalds {
18932613090SAlexey Dobriyan 	const struct net_protocol *ipprot;
19068cb7d53SPaolo Abeni 	int raw, ret;
1911da177e4SLinus Torvalds 
1921da177e4SLinus Torvalds resubmit:
1937bc54c90SPavel Emelyanov 	raw = raw_local_deliver(skb, protocol);
1947bc54c90SPavel Emelyanov 
195f9242b6bSDavid S. Miller 	ipprot = rcu_dereference(inet_protos[protocol]);
19600db4124SIan Morris 	if (ipprot) {
197b59c2701SPatrick McHardy 		if (!ipprot->no_policy) {
198b59c2701SPatrick McHardy 			if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
19910580c47SMenglong Dong 				kfree_skb_reason(skb,
20010580c47SMenglong Dong 						 SKB_DROP_REASON_XFRM_POLICY);
20168cb7d53SPaolo Abeni 				return;
2021da177e4SLinus Torvalds 			}
203895b5c9fSFlorian Westphal 			nf_reset_ct(skb);
204b59c2701SPatrick McHardy 		}
2050e219ae4SPaolo Abeni 		ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv,
2060e219ae4SPaolo Abeni 				      skb);
2071da177e4SLinus Torvalds 		if (ret < 0) {
2081da177e4SLinus Torvalds 			protocol = -ret;
2091da177e4SLinus Torvalds 			goto resubmit;
2101da177e4SLinus Torvalds 		}
211b45386efSEric Dumazet 		__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
2121da177e4SLinus Torvalds 	} else {
2137bc54c90SPavel Emelyanov 		if (!raw) {
2141da177e4SLinus Torvalds 			if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
215b45386efSEric Dumazet 				__IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
2161da177e4SLinus Torvalds 				icmp_send(skb, ICMP_DEST_UNREACH,
2171da177e4SLinus Torvalds 					  ICMP_PROT_UNREACH, 0);
2181da177e4SLinus Torvalds 			}
21910580c47SMenglong Dong 			kfree_skb_reason(skb, SKB_DROP_REASON_IP_NOPROTO);
220d8c6f4b9SNeil Horman 		} else {
221b45386efSEric Dumazet 			__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
222d8c6f4b9SNeil Horman 			consume_skb(skb);
223d8c6f4b9SNeil Horman 		}
2241da177e4SLinus Torvalds 	}
2251da177e4SLinus Torvalds }
22668cb7d53SPaolo Abeni 
ip_local_deliver_finish(struct net * net,struct sock * sk,struct sk_buff * skb)22768cb7d53SPaolo Abeni static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
22868cb7d53SPaolo Abeni {
229cd14e9b7SMartin KaFai Lau 	skb_clear_delivery_time(skb);
23068cb7d53SPaolo Abeni 	__skb_pull(skb, skb_network_header_len(skb));
23168cb7d53SPaolo Abeni 
23268cb7d53SPaolo Abeni 	rcu_read_lock();
23368cb7d53SPaolo Abeni 	ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol);
2341da177e4SLinus Torvalds 	rcu_read_unlock();
2351da177e4SLinus Torvalds 
2361da177e4SLinus Torvalds 	return 0;
2371da177e4SLinus Torvalds }
2381da177e4SLinus Torvalds 
2391da177e4SLinus Torvalds /*
2401da177e4SLinus Torvalds  * 	Deliver IP Packets to the higher protocol layers.
2411da177e4SLinus Torvalds  */
ip_local_deliver(struct sk_buff * skb)2421da177e4SLinus Torvalds int ip_local_deliver(struct sk_buff *skb)
2431da177e4SLinus Torvalds {
2441da177e4SLinus Torvalds 	/*
2451da177e4SLinus Torvalds 	 *	Reassemble IP fragments.
2461da177e4SLinus Torvalds 	 */
24719bcf9f2SEric W. Biederman 	struct net *net = dev_net(skb->dev);
2481da177e4SLinus Torvalds 
24956f8a75cSPaul Gortmaker 	if (ip_is_fragment(ip_hdr(skb))) {
25019bcf9f2SEric W. Biederman 		if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER))
2511da177e4SLinus Torvalds 			return 0;
2521da177e4SLinus Torvalds 	}
2531da177e4SLinus Torvalds 
25429a26a56SEric W. Biederman 	return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
25519bcf9f2SEric W. Biederman 		       net, NULL, skb, skb->dev, NULL,
2561da177e4SLinus Torvalds 		       ip_local_deliver_finish);
2571da177e4SLinus Torvalds }
258e43b2190SBrian Vazquez EXPORT_SYMBOL(ip_local_deliver);
2591da177e4SLinus Torvalds 
ip_rcv_options(struct sk_buff * skb,struct net_device * dev)2608c83f2dfSStephen Suryaputra static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
261d245407eSThomas Graf {
262d245407eSThomas Graf 	struct ip_options *opt;
263b71d1d42SEric Dumazet 	const struct iphdr *iph;
264d245407eSThomas Graf 
265d245407eSThomas Graf 	/* It looks as overkill, because not all
266d245407eSThomas Graf 	   IP options require packet mangling.
267d245407eSThomas Graf 	   But it is the easiest for now, especially taking
268d245407eSThomas Graf 	   into account that combination of IP options
269d245407eSThomas Graf 	   and running sniffer is extremely rare condition.
270d245407eSThomas Graf 					      --ANK (980813)
271d245407eSThomas Graf 	*/
272d245407eSThomas Graf 	if (skb_cow(skb, skb_headroom(skb))) {
273b45386efSEric Dumazet 		__IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS);
274d245407eSThomas Graf 		goto drop;
275d245407eSThomas Graf 	}
276d245407eSThomas Graf 
277eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
27822aba383SDenis V. Lunev 	opt = &(IPCB(skb)->opt);
27922aba383SDenis V. Lunev 	opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
280d245407eSThomas Graf 
281c346dca1SYOSHIFUJI Hideaki 	if (ip_options_compile(dev_net(dev), opt, skb)) {
282b45386efSEric Dumazet 		__IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
283d245407eSThomas Graf 		goto drop;
284d245407eSThomas Graf 	}
285d245407eSThomas Graf 
286d245407eSThomas Graf 	if (unlikely(opt->srr)) {
2876e8b11b4SEric Dumazet 		struct in_device *in_dev = __in_dev_get_rcu(dev);
2886e8b11b4SEric Dumazet 
289d245407eSThomas Graf 		if (in_dev) {
290d245407eSThomas Graf 			if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
291e87cc472SJoe Perches 				if (IN_DEV_LOG_MARTIANS(in_dev))
292e87cc472SJoe Perches 					net_info_ratelimited("source route option %pI4 -> %pI4\n",
293e87cc472SJoe Perches 							     &iph->saddr,
294e87cc472SJoe Perches 							     &iph->daddr);
295d245407eSThomas Graf 				goto drop;
296d245407eSThomas Graf 			}
297d245407eSThomas Graf 		}
298d245407eSThomas Graf 
2998c83f2dfSStephen Suryaputra 		if (ip_options_rcv_srr(skb, dev))
300d245407eSThomas Graf 			goto drop;
301d245407eSThomas Graf 	}
302d245407eSThomas Graf 
3036a91395fSDavid S. Miller 	return false;
304d245407eSThomas Graf drop:
3056a91395fSDavid S. Miller 	return true;
306d245407eSThomas Graf }
307d245407eSThomas Graf 
ip_can_use_hint(const struct sk_buff * skb,const struct iphdr * iph,const struct sk_buff * hint)30802b24941SPaolo Abeni static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
30902b24941SPaolo Abeni 			    const struct sk_buff *hint)
31002b24941SPaolo Abeni {
31102b24941SPaolo Abeni 	return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr &&
31202b24941SPaolo Abeni 	       ip_hdr(hint)->tos == iph->tos;
31302b24941SPaolo Abeni }
31402b24941SPaolo Abeni 
31511052589SKuniyuki Iwashima int tcp_v4_early_demux(struct sk_buff *skb);
31611052589SKuniyuki Iwashima int udp_v4_early_demux(struct sk_buff *skb);
ip_rcv_finish_core(struct net * net,struct sk_buff * skb,struct net_device * dev,const struct sk_buff * hint)317*5df7ca0bSYu Tian static int ip_rcv_finish_core(struct net *net,
31802b24941SPaolo Abeni 			      struct sk_buff *skb, struct net_device *dev,
31902b24941SPaolo Abeni 			      const struct sk_buff *hint)
3201da177e4SLinus Torvalds {
321eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
322c1f166d1SMenglong Dong 	int err, drop_reason;
3237487449cSPaolo Abeni 	struct rtable *rt;
324c1f166d1SMenglong Dong 
32502b24941SPaolo Abeni 	if (ip_can_use_hint(skb, iph, hint)) {
326479aed04SMenglong Dong 		drop_reason = ip_route_use_hint(skb, iph->daddr, iph->saddr,
3272b78d306SGuillaume Nault 						ip4h_dscp(iph), dev, hint);
328479aed04SMenglong Dong 		if (unlikely(drop_reason))
32902b24941SPaolo Abeni 			goto drop_error;
33002b24941SPaolo Abeni 	}
33102b24941SPaolo Abeni 
332479aed04SMenglong Dong 	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
33311052589SKuniyuki Iwashima 	if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
33463e51b6aSEric Dumazet 	    !skb_dst(skb) &&
33563e51b6aSEric Dumazet 	    !skb->sk &&
33663e51b6aSEric Dumazet 	    !ip_is_fragment(iph)) {
33711052589SKuniyuki Iwashima 		switch (iph->protocol) {
33811052589SKuniyuki Iwashima 		case IPPROTO_TCP:
33911052589SKuniyuki Iwashima 			if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
34011052589SKuniyuki Iwashima 				tcp_v4_early_demux(skb);
34141063e9dSDavid S. Miller 
3429cb429d6SEric Dumazet 				/* must reload iph, skb->head might have changed */
3439cb429d6SEric Dumazet 				iph = ip_hdr(skb);
3449cb429d6SEric Dumazet 			}
34511052589SKuniyuki Iwashima 			break;
34611052589SKuniyuki Iwashima 		case IPPROTO_UDP:
34711052589SKuniyuki Iwashima 			if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
34811052589SKuniyuki Iwashima 				err = udp_v4_early_demux(skb);
34911052589SKuniyuki Iwashima 				if (unlikely(err))
35011052589SKuniyuki Iwashima 					goto drop_error;
35111052589SKuniyuki Iwashima 
35211052589SKuniyuki Iwashima 				/* must reload iph, skb->head might have changed */
35311052589SKuniyuki Iwashima 				iph = ip_hdr(skb);
35411052589SKuniyuki Iwashima 			}
35511052589SKuniyuki Iwashima 			break;
35611052589SKuniyuki Iwashima 		}
3576648bd7eSAlexander Duyck 	}
35841063e9dSDavid S. Miller 
359160eb5a6SDavid S. Miller 	/*
360160eb5a6SDavid S. Miller 	 *	Initialise the virtual path cache for the packet. It describes
361160eb5a6SDavid S. Miller 	 *	how the packet travels inside Linux networking.
362160eb5a6SDavid S. Miller 	 */
363f38a9eb1SThomas Graf 	if (!skb_valid_dst(skb)) {
36482d9983eSMenglong Dong 		drop_reason = ip_route_input_noref(skb, iph->daddr, iph->saddr,
36566fb6386SGuillaume Nault 						   ip4h_dscp(iph), dev);
36682d9983eSMenglong Dong 		if (unlikely(drop_reason))
3677487449cSPaolo Abeni 			goto drop_error;
36882d9983eSMenglong Dong 		drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
3693a591318SEyal Birger 	} else {
3703a591318SEyal Birger 		struct in_device *in_dev = __in_dev_get_rcu(dev);
3713a591318SEyal Birger 
3723a591318SEyal Birger 		if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY))
3733a591318SEyal Birger 			IPCB(skb)->flags |= IPSKB_NOPOLICY;
3742c2910a4SDietmar Eggemann 	}
3751da177e4SLinus Torvalds 
376c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
377adf30907SEric Dumazet 	if (unlikely(skb_dst(skb)->tclassid)) {
3787a9b2d59SEric Dumazet 		struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
379adf30907SEric Dumazet 		u32 idx = skb_dst(skb)->tclassid;
3801da177e4SLinus Torvalds 		st[idx&0xFF].o_packets++;
3811da177e4SLinus Torvalds 		st[idx&0xFF].o_bytes += skb->len;
3821da177e4SLinus Torvalds 		st[(idx>>16)&0xFF].i_packets++;
3831da177e4SLinus Torvalds 		st[(idx>>16)&0xFF].i_bytes += skb->len;
3841da177e4SLinus Torvalds 	}
3851da177e4SLinus Torvalds #endif
3861da177e4SLinus Torvalds 
3878c83f2dfSStephen Suryaputra 	if (iph->ihl > 5 && ip_rcv_options(skb, dev))
3881da177e4SLinus Torvalds 		goto drop;
3891da177e4SLinus Torvalds 
390511c3f92SEric Dumazet 	rt = skb_rtable(skb);
391edf391ffSNeil Horman 	if (rt->rt_type == RTN_MULTICAST) {
392b15084ecSEric Dumazet 		__IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len);
39312b74dfaSJohannes Berg 	} else if (rt->rt_type == RTN_BROADCAST) {
394b15084ecSEric Dumazet 		__IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
39512b74dfaSJohannes Berg 	} else if (skb->pkt_type == PACKET_BROADCAST ||
39612b74dfaSJohannes Berg 		   skb->pkt_type == PACKET_MULTICAST) {
397d6f64d72SMark Tomlinson 		struct in_device *in_dev = __in_dev_get_rcu(dev);
39812b74dfaSJohannes Berg 
39912b74dfaSJohannes Berg 		/* RFC 1122 3.3.6:
40012b74dfaSJohannes Berg 		 *
40112b74dfaSJohannes Berg 		 *   When a host sends a datagram to a link-layer broadcast
40212b74dfaSJohannes Berg 		 *   address, the IP destination address MUST be a legal IP
40312b74dfaSJohannes Berg 		 *   broadcast or IP multicast address.
40412b74dfaSJohannes Berg 		 *
40512b74dfaSJohannes Berg 		 *   A host SHOULD silently discard a datagram that is received
40612b74dfaSJohannes Berg 		 *   via a link-layer broadcast (see Section 2.4) but does not
40712b74dfaSJohannes Berg 		 *   specify an IP multicast or broadcast destination address.
40812b74dfaSJohannes Berg 		 *
40912b74dfaSJohannes Berg 		 * This doesn't explicitly say L2 *broadcast*, but broadcast is
41012b74dfaSJohannes Berg 		 * in a way a form of multicast and the most common use case for
41112b74dfaSJohannes Berg 		 * this is 802.11 protecting against cross-station spoofing (the
41212b74dfaSJohannes Berg 		 * so-called "hole-196" attack) so do it for both.
41312b74dfaSJohannes Berg 		 */
41412b74dfaSJohannes Berg 		if (in_dev &&
415c1f166d1SMenglong Dong 		    IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) {
416c1f166d1SMenglong Dong 			drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST;
41712b74dfaSJohannes Berg 			goto drop;
41812b74dfaSJohannes Berg 		}
419c1f166d1SMenglong Dong 	}
4205506b54bSMitsuru Chinen 
4215fa12739SEdward Cree 	return NET_RX_SUCCESS;
4221da177e4SLinus Torvalds 
4231da177e4SLinus Torvalds drop:
424c1f166d1SMenglong Dong 	kfree_skb_reason(skb, drop_reason);
4251da177e4SLinus Torvalds 	return NET_RX_DROP;
4267487449cSPaolo Abeni 
4277487449cSPaolo Abeni drop_error:
42837653a0bSMenglong Dong 	if (drop_reason == SKB_DROP_REASON_IP_RPFILTER)
4297487449cSPaolo Abeni 		__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
4307487449cSPaolo Abeni 	goto drop;
4311da177e4SLinus Torvalds }
4321da177e4SLinus Torvalds 
ip_rcv_finish(struct net * net,struct sock * sk,struct sk_buff * skb)4335fa12739SEdward Cree static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
4345fa12739SEdward Cree {
435a1fd1ad2SDavid Ahern 	struct net_device *dev = skb->dev;
436efe6aacaSEdward Cree 	int ret;
4375fa12739SEdward Cree 
438efe6aacaSEdward Cree 	/* if ingress device is enslaved to an L3 master device pass the
439efe6aacaSEdward Cree 	 * skb to its handler for processing
440efe6aacaSEdward Cree 	 */
441efe6aacaSEdward Cree 	skb = l3mdev_ip_rcv(skb);
442efe6aacaSEdward Cree 	if (!skb)
443efe6aacaSEdward Cree 		return NET_RX_SUCCESS;
444efe6aacaSEdward Cree 
445*5df7ca0bSYu Tian 	ret = ip_rcv_finish_core(net, skb, dev, NULL);
4465fa12739SEdward Cree 	if (ret != NET_RX_DROP)
4475fa12739SEdward Cree 		ret = dst_input(skb);
4485fa12739SEdward Cree 	return ret;
4495fa12739SEdward Cree }
4505fa12739SEdward Cree 
4511da177e4SLinus Torvalds /*
4521da177e4SLinus Torvalds  * 	Main IP Receive routine.
4531da177e4SLinus Torvalds  */
ip_rcv_core(struct sk_buff * skb,struct net * net)45417266ee9SEdward Cree static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
4551da177e4SLinus Torvalds {
456b71d1d42SEric Dumazet 	const struct iphdr *iph;
45733cba429SMenglong Dong 	int drop_reason;
45858615242SThomas Graf 	u32 len;
4591da177e4SLinus Torvalds 
4601da177e4SLinus Torvalds 	/* When the interface is in promisc. mode, drop all the crap
4611da177e4SLinus Torvalds 	 * that it receives, do not try to analyse it.
4621da177e4SLinus Torvalds 	 */
46333cba429SMenglong Dong 	if (skb->pkt_type == PACKET_OTHERHOST) {
464794c24e9SJeffrey Ji 		dev_core_stats_rx_otherhost_dropped_inc(skb->dev);
46533cba429SMenglong Dong 		drop_reason = SKB_DROP_REASON_OTHERHOST;
4661da177e4SLinus Torvalds 		goto drop;
46733cba429SMenglong Dong 	}
4681da177e4SLinus Torvalds 
469b15084ecSEric Dumazet 	__IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
4701da177e4SLinus Torvalds 
47151456b29SIan Morris 	skb = skb_share_check(skb, GFP_ATOMIC);
47251456b29SIan Morris 	if (!skb) {
473b45386efSEric Dumazet 		__IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
4741da177e4SLinus Torvalds 		goto out;
4751da177e4SLinus Torvalds 	}
4761da177e4SLinus Torvalds 
47733cba429SMenglong Dong 	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
4781da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
4791da177e4SLinus Torvalds 		goto inhdr_error;
4801da177e4SLinus Torvalds 
481eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
4821da177e4SLinus Torvalds 
4831da177e4SLinus Torvalds 	/*
484c67fa027SJ.H.M. Dassen (Ray) 	 *	RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
4851da177e4SLinus Torvalds 	 *
4861da177e4SLinus Torvalds 	 *	Is the datagram acceptable?
4871da177e4SLinus Torvalds 	 *
4881da177e4SLinus Torvalds 	 *	1.	Length at least the size of an ip header
4891da177e4SLinus Torvalds 	 *	2.	Version of 4
4901da177e4SLinus Torvalds 	 *	3.	Checksums correctly. [Speed optimisation for later, skip loopback checksums]
4911da177e4SLinus Torvalds 	 *	4.	Doesn't have a bogus length
4921da177e4SLinus Torvalds 	 */
4931da177e4SLinus Torvalds 
4941da177e4SLinus Torvalds 	if (iph->ihl < 5 || iph->version != 4)
4951da177e4SLinus Torvalds 		goto inhdr_error;
4961da177e4SLinus Torvalds 
4971f07d03eSEric Dumazet 	BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
4981f07d03eSEric Dumazet 	BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
4991f07d03eSEric Dumazet 	BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
50098f61995SEric Dumazet 	__IP_ADD_STATS(net,
5011f07d03eSEric Dumazet 		       IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
5021f07d03eSEric Dumazet 		       max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
5031f07d03eSEric Dumazet 
5041da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, iph->ihl*4))
5051da177e4SLinus Torvalds 		goto inhdr_error;
5061da177e4SLinus Torvalds 
507eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
5081da177e4SLinus Torvalds 
509e9c60422SThomas Graf 	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
5106a5dc9e5SEric Dumazet 		goto csum_error;
5111da177e4SLinus Torvalds 
512b1a78b9bSXin Long 	len = iph_totlen(skb, iph);
513704aed53SMitsuru Chinen 	if (skb->len < len) {
51433cba429SMenglong Dong 		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
515b45386efSEric Dumazet 		__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
516704aed53SMitsuru Chinen 		goto drop;
517704aed53SMitsuru Chinen 	} else if (len < (iph->ihl*4))
5181da177e4SLinus Torvalds 		goto inhdr_error;
5191da177e4SLinus Torvalds 
5201da177e4SLinus Torvalds 	/* Our transport medium may have padded the buffer out. Now we know it
5211da177e4SLinus Torvalds 	 * is IP we can trim to the true length of the frame.
5221da177e4SLinus Torvalds 	 * Note this now means skb->len holds ntohs(iph->tot_len).
5231da177e4SLinus Torvalds 	 */
5241da177e4SLinus Torvalds 	if (pskb_trim_rcsum(skb, len)) {
525b45386efSEric Dumazet 		__IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
5261da177e4SLinus Torvalds 		goto drop;
5271da177e4SLinus Torvalds 	}
5281da177e4SLinus Torvalds 
5296c57f045SRoss Lagerwall 	iph = ip_hdr(skb);
53021d1196aSEric Dumazet 	skb->transport_header = skb->network_header + iph->ihl*4;
53121d1196aSEric Dumazet 
53253602f92SStephen Hemminger 	/* Remove any debris in the socket control block */
533d569f1d7SGuillaume Chazarain 	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
5340b922b7aSDavid Ahern 	IPCB(skb)->iif = skb->skb_iif;
53553602f92SStephen Hemminger 
53671f9dacdSHerbert Xu 	/* Must drop socket now because of tproxy. */
537cf7fbe66SJoe Stringer 	if (!skb_sk_is_prefetched(skb))
53871f9dacdSHerbert Xu 		skb_orphan(skb);
53971f9dacdSHerbert Xu 
54017266ee9SEdward Cree 	return skb;
5411da177e4SLinus Torvalds 
5426a5dc9e5SEric Dumazet csum_error:
54333cba429SMenglong Dong 	drop_reason = SKB_DROP_REASON_IP_CSUM;
544b45386efSEric Dumazet 	__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
5451da177e4SLinus Torvalds inhdr_error:
54633cba429SMenglong Dong 	if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED)
54733cba429SMenglong Dong 		drop_reason = SKB_DROP_REASON_IP_INHDR;
548b45386efSEric Dumazet 	__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
5491da177e4SLinus Torvalds drop:
55033cba429SMenglong Dong 	kfree_skb_reason(skb, drop_reason);
5511da177e4SLinus Torvalds out:
55217266ee9SEdward Cree 	return NULL;
55317266ee9SEdward Cree }
55417266ee9SEdward Cree 
55517266ee9SEdward Cree /*
55617266ee9SEdward Cree  * IP receive entry point
55717266ee9SEdward Cree  */
ip_rcv(struct sk_buff * skb,struct net_device * dev,struct packet_type * pt,struct net_device * orig_dev)55817266ee9SEdward Cree int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
55917266ee9SEdward Cree 	   struct net_device *orig_dev)
56017266ee9SEdward Cree {
56117266ee9SEdward Cree 	struct net *net = dev_net(dev);
56217266ee9SEdward Cree 
56317266ee9SEdward Cree 	skb = ip_rcv_core(skb, net);
56417266ee9SEdward Cree 	if (skb == NULL)
5651da177e4SLinus Torvalds 		return NET_RX_DROP;
566fb1b6999SYang Wei 
56717266ee9SEdward Cree 	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
56817266ee9SEdward Cree 		       net, NULL, skb, dev, NULL,
56917266ee9SEdward Cree 		       ip_rcv_finish);
57017266ee9SEdward Cree }
57117266ee9SEdward Cree 
ip_sublist_rcv_finish(struct list_head * head)5725fa12739SEdward Cree static void ip_sublist_rcv_finish(struct list_head *head)
57317266ee9SEdward Cree {
57417266ee9SEdward Cree 	struct sk_buff *skb, *next;
57517266ee9SEdward Cree 
5760761680dSJesper Dangaard Brouer 	list_for_each_entry_safe(skb, next, head, list) {
577992cba7eSDavid S. Miller 		skb_list_del_init(skb);
5785fa12739SEdward Cree 		dst_input(skb);
5795fa12739SEdward Cree 	}
5800761680dSJesper Dangaard Brouer }
5815fa12739SEdward Cree 
ip_extract_route_hint(const struct net * net,struct sk_buff * skb,int rt_type)58202b24941SPaolo Abeni static struct sk_buff *ip_extract_route_hint(const struct net *net,
58302b24941SPaolo Abeni 					     struct sk_buff *skb, int rt_type)
58402b24941SPaolo Abeni {
5856ac66cb0SSriram Yagnaraman 	if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
5866ac66cb0SSriram Yagnaraman 	    IPCB(skb)->flags & IPSKB_MULTIPATH)
58702b24941SPaolo Abeni 		return NULL;
58802b24941SPaolo Abeni 
58902b24941SPaolo Abeni 	return skb;
59002b24941SPaolo Abeni }
59102b24941SPaolo Abeni 
ip_list_rcv_finish(struct net * net,struct list_head * head)592*5df7ca0bSYu Tian static void ip_list_rcv_finish(struct net *net, struct list_head *head)
5935fa12739SEdward Cree {
59402b24941SPaolo Abeni 	struct sk_buff *skb, *next, *hint = NULL;
5955fa12739SEdward Cree 	struct dst_entry *curr_dst = NULL;
596cecbe5c8SHongbo Li 	LIST_HEAD(sublist);
5975fa12739SEdward Cree 
5985fa12739SEdward Cree 	list_for_each_entry_safe(skb, next, head, list) {
599a1fd1ad2SDavid Ahern 		struct net_device *dev = skb->dev;
6005fa12739SEdward Cree 		struct dst_entry *dst;
6015fa12739SEdward Cree 
60222f6bbb7SEdward Cree 		skb_list_del_init(skb);
603efe6aacaSEdward Cree 		/* if ingress device is enslaved to an L3 master device pass the
604efe6aacaSEdward Cree 		 * skb to its handler for processing
605efe6aacaSEdward Cree 		 */
606efe6aacaSEdward Cree 		skb = l3mdev_ip_rcv(skb);
607efe6aacaSEdward Cree 		if (!skb)
608efe6aacaSEdward Cree 			continue;
609*5df7ca0bSYu Tian 		if (ip_rcv_finish_core(net, skb, dev, hint) == NET_RX_DROP)
6105fa12739SEdward Cree 			continue;
6115fa12739SEdward Cree 
6125fa12739SEdward Cree 		dst = skb_dst(skb);
6135fa12739SEdward Cree 		if (curr_dst != dst) {
61402b24941SPaolo Abeni 			hint = ip_extract_route_hint(net, skb,
61505d6d492SEric Dumazet 						     dst_rtable(dst)->rt_type);
61602b24941SPaolo Abeni 
6175fa12739SEdward Cree 			/* dispatch old sublist */
6185fa12739SEdward Cree 			if (!list_empty(&sublist))
6195fa12739SEdward Cree 				ip_sublist_rcv_finish(&sublist);
6205fa12739SEdward Cree 			/* start new sublist */
621a4ca8b7dSEdward Cree 			INIT_LIST_HEAD(&sublist);
6225fa12739SEdward Cree 			curr_dst = dst;
6235fa12739SEdward Cree 		}
624a4ca8b7dSEdward Cree 		list_add_tail(&skb->list, &sublist);
6255fa12739SEdward Cree 	}
6265fa12739SEdward Cree 	/* dispatch final sublist */
627a4ca8b7dSEdward Cree 	ip_sublist_rcv_finish(&sublist);
6285fa12739SEdward Cree }
6295fa12739SEdward Cree 
ip_sublist_rcv(struct list_head * head,struct net_device * dev,struct net * net)6305fa12739SEdward Cree static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
6315fa12739SEdward Cree 			   struct net *net)
6325fa12739SEdward Cree {
63317266ee9SEdward Cree 	NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
63417266ee9SEdward Cree 		     head, dev, NULL, ip_rcv_finish);
635*5df7ca0bSYu Tian 	ip_list_rcv_finish(net, head);
63617266ee9SEdward Cree }
63717266ee9SEdward Cree 
63817266ee9SEdward Cree /* Receive a list of IP packets */
ip_list_rcv(struct list_head * head,struct packet_type * pt,struct net_device * orig_dev)63917266ee9SEdward Cree void ip_list_rcv(struct list_head *head, struct packet_type *pt,
64017266ee9SEdward Cree 		 struct net_device *orig_dev)
64117266ee9SEdward Cree {
64217266ee9SEdward Cree 	struct net_device *curr_dev = NULL;
64317266ee9SEdward Cree 	struct net *curr_net = NULL;
64417266ee9SEdward Cree 	struct sk_buff *skb, *next;
645cecbe5c8SHongbo Li 	LIST_HEAD(sublist);
64617266ee9SEdward Cree 
64717266ee9SEdward Cree 	list_for_each_entry_safe(skb, next, head, list) {
64817266ee9SEdward Cree 		struct net_device *dev = skb->dev;
64917266ee9SEdward Cree 		struct net *net = dev_net(dev);
65017266ee9SEdward Cree 
65122f6bbb7SEdward Cree 		skb_list_del_init(skb);
65217266ee9SEdward Cree 		skb = ip_rcv_core(skb, net);
65317266ee9SEdward Cree 		if (skb == NULL)
65417266ee9SEdward Cree 			continue;
65517266ee9SEdward Cree 
65617266ee9SEdward Cree 		if (curr_dev != dev || curr_net != net) {
65717266ee9SEdward Cree 			/* dispatch old sublist */
65817266ee9SEdward Cree 			if (!list_empty(&sublist))
659a4ca8b7dSEdward Cree 				ip_sublist_rcv(&sublist, curr_dev, curr_net);
66017266ee9SEdward Cree 			/* start new sublist */
661a4ca8b7dSEdward Cree 			INIT_LIST_HEAD(&sublist);
66217266ee9SEdward Cree 			curr_dev = dev;
66317266ee9SEdward Cree 			curr_net = net;
66417266ee9SEdward Cree 		}
665a4ca8b7dSEdward Cree 		list_add_tail(&skb->list, &sublist);
66617266ee9SEdward Cree 	}
66717266ee9SEdward Cree 	/* dispatch final sublist */
66851210ad5SFlorian Westphal 	if (!list_empty(&sublist))
669a4ca8b7dSEdward Cree 		ip_sublist_rcv(&sublist, curr_dev, curr_net);
6701da177e4SLinus Torvalds }
671