1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4##############################################################################
5# Topology description. p1 looped back to p2, p3 to p4 and so on.
6
7declare -A NETIFS=(
8    [p1]=veth0
9    [p2]=veth1
10    [p3]=veth2
11    [p4]=veth3
12    [p5]=veth4
13    [p6]=veth5
14    [p7]=veth6
15    [p8]=veth7
16    [p9]=veth8
17    [p10]=veth9
18)
19
20# Port that does not have a cable connected.
21: "${NETIF_NO_CABLE:=eth8}"
22
23##############################################################################
24# Defines
25
26# Networking utilities.
27: "${PING:=ping}"
28: "${PING6:=ping6}"	# Some distros just use ping.
29: "${ARPING:=arping}"
30: "${TROUTE6:=traceroute6}"
31
32# Packet generator.
33: "${MZ:=mausezahn}"	# Some distributions use 'mz'.
34: "${MZ_DELAY:=0}"
35
36# Host configuration tools.
37: "${TEAMD:=teamd}"
38: "${MCD:=smcrouted}"
39: "${MC_CLI:=smcroutectl}"
40
41# Constants for netdevice bring-up:
42# Default time in seconds to wait for an interface to come up before giving up
43# and bailing out. Used during initial setup.
44: "${INTERFACE_TIMEOUT:=600}"
45# Like INTERFACE_TIMEOUT, but default for ad-hoc waiting in testing scripts.
46: "${WAIT_TIMEOUT:=20}"
47# Time to wait after interfaces participating in the test are all UP.
48: "${WAIT_TIME:=5}"
49
50# Whether to pause on, respectively, after a failure and before cleanup.
51: "${PAUSE_ON_FAIL:=no}"
52: "${PAUSE_ON_CLEANUP:=no}"
53
54# Whether to create virtual interfaces, and what netdevice type they should be.
55: "${NETIF_CREATE:=yes}"
56: "${NETIF_TYPE:=veth}"
57
58# Constants for ping tests:
59# How many packets should be sent.
60: "${PING_COUNT:=10}"
61# Timeout (in seconds) before ping exits regardless of how many packets have
62# been sent or received
63: "${PING_TIMEOUT:=5}"
64
65# Minimum ageing_time (in centiseconds) supported by hardware
66: "${LOW_AGEING_TIME:=1000}"
67
68# Whether to check for availability of certain tools.
69: "${REQUIRE_JQ:=yes}"
70: "${REQUIRE_MZ:=yes}"
71: "${REQUIRE_MTOOLS:=no}"
72
73# Whether to override MAC addresses on interfaces participating in the test.
74: "${STABLE_MAC_ADDRS:=no}"
75
76# Flags for tcpdump
77: "${TCPDUMP_EXTRA_FLAGS:=}"
78
79# Flags for TC filters.
80: "${TC_FLAG:=skip_hw}"
81
82# Whether the machine is "slow" -- i.e. might be incapable of running tests
83# involving heavy traffic. This might be the case on a debug kernel, a VM, or
84# e.g. a low-power board.
85: "${KSFT_MACHINE_SLOW:=no}"
86
87##############################################################################
88# Find netifs by test-specified driver name
89
90driver_name_get()
91{
92	local dev=$1; shift
93	local driver_path="/sys/class/net/$dev/device/driver"
94
95	if [[ -L $driver_path ]]; then
96		basename `realpath $driver_path`
97	fi
98}
99
100netif_find_driver()
101{
102	local ifnames=`ip -j link show | jq -r ".[].ifname"`
103	local count=0
104
105	for ifname in $ifnames
106	do
107		local driver_name=`driver_name_get $ifname`
108		if [[ ! -z $driver_name && $driver_name == $NETIF_FIND_DRIVER ]]; then
109			count=$((count + 1))
110			NETIFS[p$count]="$ifname"
111		fi
112	done
113}
114
115# Whether to find netdevice according to the driver speficied by the importer
116: "${NETIF_FIND_DRIVER:=}"
117
118if [[ $NETIF_FIND_DRIVER ]]; then
119	unset NETIFS
120	declare -A NETIFS
121	netif_find_driver
122fi
123
124net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
125
126if [[ -f $net_forwarding_dir/forwarding.config ]]; then
127	source "$net_forwarding_dir/forwarding.config"
128fi
129
130source "$net_forwarding_dir/../lib.sh"
131
132##############################################################################
133# Sanity checks
134
135check_tc_version()
136{
137	tc -j &> /dev/null
138	if [[ $? -ne 0 ]]; then
139		echo "SKIP: iproute2 too old; tc is missing JSON support"
140		exit $ksft_skip
141	fi
142}
143
144# Old versions of tc don't understand "mpls_uc"
145check_tc_mpls_support()
146{
147	local dev=$1; shift
148
149	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
150		matchall action pipe &> /dev/null
151	if [[ $? -ne 0 ]]; then
152		echo "SKIP: iproute2 too old; tc is missing MPLS support"
153		return $ksft_skip
154	fi
155	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
156		matchall
157}
158
159# Old versions of tc produce invalid json output for mpls lse statistics
160check_tc_mpls_lse_stats()
161{
162	local dev=$1; shift
163	local ret;
164
165	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
166		flower mpls lse depth 2                                 \
167		action continue &> /dev/null
168
169	if [[ $? -ne 0 ]]; then
170		echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
171		return $ksft_skip
172	fi
173
174	tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
175	ret=$?
176	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
177		flower
178
179	if [[ $ret -ne 0 ]]; then
180		echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
181		return $ksft_skip
182	fi
183}
184
185check_tc_shblock_support()
186{
187	tc filter help 2>&1 | grep block &> /dev/null
188	if [[ $? -ne 0 ]]; then
189		echo "SKIP: iproute2 too old; tc is missing shared block support"
190		exit $ksft_skip
191	fi
192}
193
194check_tc_chain_support()
195{
196	tc help 2>&1|grep chain &> /dev/null
197	if [[ $? -ne 0 ]]; then
198		echo "SKIP: iproute2 too old; tc is missing chain support"
199		exit $ksft_skip
200	fi
201}
202
203check_tc_action_hw_stats_support()
204{
205	tc actions help 2>&1 | grep -q hw_stats
206	if [[ $? -ne 0 ]]; then
207		echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
208		exit $ksft_skip
209	fi
210}
211
212check_tc_fp_support()
213{
214	tc qdisc add dev lo mqprio help 2>&1 | grep -q "fp "
215	if [[ $? -ne 0 ]]; then
216		echo "SKIP: iproute2 too old; tc is missing frame preemption support"
217		exit $ksft_skip
218	fi
219}
220
221check_ethtool_lanes_support()
222{
223	ethtool --help 2>&1| grep lanes &> /dev/null
224	if [[ $? -ne 0 ]]; then
225		echo "SKIP: ethtool too old; it is missing lanes support"
226		exit $ksft_skip
227	fi
228}
229
230check_ethtool_mm_support()
231{
232	ethtool --help 2>&1| grep -- '--show-mm' &> /dev/null
233	if [[ $? -ne 0 ]]; then
234		echo "SKIP: ethtool too old; it is missing MAC Merge layer support"
235		exit $ksft_skip
236	fi
237}
238
239check_ethtool_counter_group_support()
240{
241	ethtool --help 2>&1| grep -- '--all-groups' &> /dev/null
242	if [[ $? -ne 0 ]]; then
243		echo "SKIP: ethtool too old; it is missing standard counter group support"
244		exit $ksft_skip
245	fi
246}
247
248check_ethtool_pmac_std_stats_support()
249{
250	local dev=$1; shift
251	local grp=$1; shift
252
253	[ 0 -ne $(ethtool --json -S $dev --all-groups --src pmac 2>/dev/null \
254		| jq ".[].\"$grp\" | length") ]
255}
256
257check_locked_port_support()
258{
259	if ! bridge -d link show | grep -q " locked"; then
260		echo "SKIP: iproute2 too old; Locked port feature not supported."
261		return $ksft_skip
262	fi
263}
264
265check_port_mab_support()
266{
267	if ! bridge -d link show | grep -q "mab"; then
268		echo "SKIP: iproute2 too old; MacAuth feature not supported."
269		return $ksft_skip
270	fi
271}
272
273if [[ "$(id -u)" -ne 0 ]]; then
274	echo "SKIP: need root privileges"
275	exit $ksft_skip
276fi
277
278check_driver()
279{
280	local dev=$1; shift
281	local expected=$1; shift
282	local driver_name=`driver_name_get $dev`
283
284	if [[ $driver_name != $expected ]]; then
285		echo "SKIP: expected driver $expected for $dev, got $driver_name instead"
286		exit $ksft_skip
287	fi
288}
289
290if [[ "$CHECK_TC" = "yes" ]]; then
291	check_tc_version
292fi
293
294require_command()
295{
296	local cmd=$1; shift
297
298	if [[ ! -x "$(command -v "$cmd")" ]]; then
299		echo "SKIP: $cmd not installed"
300		exit $ksft_skip
301	fi
302}
303
304# IPv6 support was added in v3.0
305check_mtools_version()
306{
307	local version="$(msend -v)"
308	local major
309
310	version=${version##msend version }
311	major=$(echo $version | cut -d. -f1)
312
313	if [ $major -lt 3 ]; then
314		echo "SKIP: expected mtools version 3.0, got $version"
315		exit $ksft_skip
316	fi
317}
318
319if [[ "$REQUIRE_JQ" = "yes" ]]; then
320	require_command jq
321fi
322if [[ "$REQUIRE_MZ" = "yes" ]]; then
323	require_command $MZ
324fi
325if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
326	# https://github.com/troglobit/mtools
327	require_command msend
328	require_command mreceive
329	check_mtools_version
330fi
331
332##############################################################################
333# Command line options handling
334
335count=0
336
337while [[ $# -gt 0 ]]; do
338	if [[ "$count" -eq "0" ]]; then
339		unset NETIFS
340		declare -A NETIFS
341	fi
342	count=$((count + 1))
343	NETIFS[p$count]="$1"
344	shift
345done
346
347##############################################################################
348# Network interfaces configuration
349
350if [[ ! -v NUM_NETIFS ]]; then
351	echo "SKIP: importer does not define \"NUM_NETIFS\""
352	exit $ksft_skip
353fi
354
355if (( NUM_NETIFS > ${#NETIFS[@]} )); then
356	echo "SKIP: Importer requires $NUM_NETIFS NETIFS, but only ${#NETIFS[@]} are defined (${NETIFS[@]})"
357	exit $ksft_skip
358fi
359
360for i in $(seq ${#NETIFS[@]}); do
361	if [[ ! ${NETIFS[p$i]} ]]; then
362		echo "SKIP: NETIFS[p$i] not given"
363		exit $ksft_skip
364	fi
365done
366
367create_netif_veth()
368{
369	local i
370
371	for ((i = 1; i <= NUM_NETIFS; ++i)); do
372		local j=$((i+1))
373
374		if [ -z ${NETIFS[p$i]} ]; then
375			echo "SKIP: Cannot create interface. Name not specified"
376			exit $ksft_skip
377		fi
378
379		ip link show dev ${NETIFS[p$i]} &> /dev/null
380		if [[ $? -ne 0 ]]; then
381			ip link add ${NETIFS[p$i]} type veth \
382				peer name ${NETIFS[p$j]}
383			if [[ $? -ne 0 ]]; then
384				echo "Failed to create netif"
385				exit 1
386			fi
387		fi
388		i=$j
389	done
390}
391
392create_netif()
393{
394	case "$NETIF_TYPE" in
395	veth) create_netif_veth
396	      ;;
397	*) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
398	   exit 1
399	   ;;
400	esac
401}
402
403declare -A MAC_ADDR_ORIG
404mac_addr_prepare()
405{
406	local new_addr=
407	local dev=
408
409	for ((i = 1; i <= NUM_NETIFS; ++i)); do
410		dev=${NETIFS[p$i]}
411		new_addr=$(printf "00:01:02:03:04:%02x" $i)
412
413		MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address')
414		# Strip quotes
415		MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
416		ip link set dev $dev address $new_addr
417	done
418}
419
420mac_addr_restore()
421{
422	local dev=
423
424	for ((i = 1; i <= NUM_NETIFS; ++i)); do
425		dev=${NETIFS[p$i]}
426		ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]}
427	done
428}
429
430if [[ "$NETIF_CREATE" = "yes" ]]; then
431	create_netif
432fi
433
434if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
435	mac_addr_prepare
436fi
437
438for ((i = 1; i <= NUM_NETIFS; ++i)); do
439	ip link show dev ${NETIFS[p$i]} &> /dev/null
440	if [[ $? -ne 0 ]]; then
441		echo "SKIP: could not find all required interfaces"
442		exit $ksft_skip
443	fi
444done
445
446##############################################################################
447# Helpers
448
449# Exit status to return at the end. Set in case one of the tests fails.
450EXIT_STATUS=0
451# Per-test return value. Clear at the beginning of each test.
452RET=0
453
454ret_set_ksft_status()
455{
456	local ksft_status=$1; shift
457	local msg=$1; shift
458
459	RET=$(ksft_status_merge $RET $ksft_status)
460	if (( $? )); then
461		retmsg=$msg
462	fi
463}
464
465# Whether FAILs should be interpreted as XFAILs. Internal.
466FAIL_TO_XFAIL=
467
468check_err()
469{
470	local err=$1
471	local msg=$2
472
473	if ((err)); then
474		if [[ $FAIL_TO_XFAIL = yes ]]; then
475			ret_set_ksft_status $ksft_xfail "$msg"
476		else
477			ret_set_ksft_status $ksft_fail "$msg"
478		fi
479	fi
480}
481
482check_fail()
483{
484	local err=$1
485	local msg=$2
486
487	check_err $((!err)) "$msg"
488}
489
490check_err_fail()
491{
492	local should_fail=$1; shift
493	local err=$1; shift
494	local what=$1; shift
495
496	if ((should_fail)); then
497		check_fail $err "$what succeeded, but should have failed"
498	else
499		check_err $err "$what failed"
500	fi
501}
502
503xfail_on_slow()
504{
505	if [[ $KSFT_MACHINE_SLOW = yes ]]; then
506		FAIL_TO_XFAIL=yes "$@"
507	else
508		"$@"
509	fi
510}
511
512xfail_on_veth()
513{
514	local dev=$1; shift
515	local kind
516
517	kind=$(ip -j -d link show dev $dev |
518			jq -r '.[].linkinfo.info_kind')
519	if [[ $kind = veth ]]; then
520		FAIL_TO_XFAIL=yes "$@"
521	else
522		"$@"
523	fi
524}
525
526log_test_result()
527{
528	local test_name=$1; shift
529	local opt_str=$1; shift
530	local result=$1; shift
531	local retmsg=$1; shift
532
533	printf "TEST: %-60s  [%s]\n" "$test_name $opt_str" "$result"
534	if [[ $retmsg ]]; then
535		printf "\t%s\n" "$retmsg"
536	fi
537}
538
539pause_on_fail()
540{
541	if [[ $PAUSE_ON_FAIL == yes ]]; then
542		echo "Hit enter to continue, 'q' to quit"
543		read a
544		[[ $a == q ]] && exit 1
545	fi
546}
547
548handle_test_result_pass()
549{
550	local test_name=$1; shift
551	local opt_str=$1; shift
552
553	log_test_result "$test_name" "$opt_str" " OK "
554}
555
556handle_test_result_fail()
557{
558	local test_name=$1; shift
559	local opt_str=$1; shift
560
561	log_test_result "$test_name" "$opt_str" FAIL "$retmsg"
562	pause_on_fail
563}
564
565handle_test_result_xfail()
566{
567	local test_name=$1; shift
568	local opt_str=$1; shift
569
570	log_test_result "$test_name" "$opt_str" XFAIL "$retmsg"
571	pause_on_fail
572}
573
574handle_test_result_skip()
575{
576	local test_name=$1; shift
577	local opt_str=$1; shift
578
579	log_test_result "$test_name" "$opt_str" SKIP "$retmsg"
580}
581
582log_test()
583{
584	local test_name=$1
585	local opt_str=$2
586
587	if [[ $# -eq 2 ]]; then
588		opt_str="($opt_str)"
589	fi
590
591	if ((RET == ksft_pass)); then
592		handle_test_result_pass "$test_name" "$opt_str"
593	elif ((RET == ksft_xfail)); then
594		handle_test_result_xfail "$test_name" "$opt_str"
595	elif ((RET == ksft_skip)); then
596		handle_test_result_skip "$test_name" "$opt_str"
597	else
598		handle_test_result_fail "$test_name" "$opt_str"
599	fi
600
601	EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET)
602	return $RET
603}
604
605log_test_skip()
606{
607	RET=$ksft_skip retmsg= log_test "$@"
608}
609
610log_test_xfail()
611{
612	RET=$ksft_xfail retmsg= log_test "$@"
613}
614
615log_info()
616{
617	local msg=$1
618
619	echo "INFO: $msg"
620}
621
622not()
623{
624	"$@"
625	[[ $? != 0 ]]
626}
627
628get_max()
629{
630	local arr=("$@")
631
632	max=${arr[0]}
633	for cur in ${arr[@]}; do
634		if [[ $cur -gt $max ]]; then
635			max=$cur
636		fi
637	done
638
639	echo $max
640}
641
642grep_bridge_fdb()
643{
644	local addr=$1; shift
645	local word
646	local flag
647
648	if [ "$1" == "self" ] || [ "$1" == "master" ]; then
649		word=$1; shift
650		if [ "$1" == "-v" ]; then
651			flag=$1; shift
652		fi
653	fi
654
655	$@ | grep $addr | grep $flag "$word"
656}
657
658wait_for_port_up()
659{
660	"$@" | grep -q "Link detected: yes"
661}
662
663wait_for_offload()
664{
665	"$@" | grep -q offload
666}
667
668wait_for_trap()
669{
670	"$@" | grep -q trap
671}
672
673setup_wait_dev()
674{
675	local dev=$1; shift
676	local wait_time=${1:-$WAIT_TIME}; shift
677
678	setup_wait_dev_with_timeout "$dev" $INTERFACE_TIMEOUT $wait_time
679
680	if (($?)); then
681		check_err 1
682		log_test setup_wait_dev ": Interface $dev does not come up."
683		exit 1
684	fi
685}
686
687setup_wait_dev_with_timeout()
688{
689	local dev=$1; shift
690	local max_iterations=${1:-$WAIT_TIMEOUT}; shift
691	local wait_time=${1:-$WAIT_TIME}; shift
692	local i
693
694	for ((i = 1; i <= $max_iterations; ++i)); do
695		ip link show dev $dev up \
696			| grep 'state UP' &> /dev/null
697		if [[ $? -ne 0 ]]; then
698			sleep 1
699		else
700			sleep $wait_time
701			return 0
702		fi
703	done
704
705	return 1
706}
707
708setup_wait()
709{
710	local num_netifs=${1:-$NUM_NETIFS}
711	local i
712
713	for ((i = 1; i <= num_netifs; ++i)); do
714		setup_wait_dev ${NETIFS[p$i]} 0
715	done
716
717	# Make sure links are ready.
718	sleep $WAIT_TIME
719}
720
721wait_for_dev()
722{
723        local dev=$1; shift
724        local timeout=${1:-$WAIT_TIMEOUT}; shift
725
726        slowwait $timeout ip link show dev $dev &> /dev/null
727        if (( $? )); then
728                check_err 1
729                log_test wait_for_dev "Interface $dev did not appear."
730                exit $EXIT_STATUS
731        fi
732}
733
734cmd_jq()
735{
736	local cmd=$1
737	local jq_exp=$2
738	local jq_opts=$3
739	local ret
740	local output
741
742	output="$($cmd)"
743	# it the command fails, return error right away
744	ret=$?
745	if [[ $ret -ne 0 ]]; then
746		return $ret
747	fi
748	output=$(echo $output | jq -r $jq_opts "$jq_exp")
749	ret=$?
750	if [[ $ret -ne 0 ]]; then
751		return $ret
752	fi
753	echo $output
754	# return success only in case of non-empty output
755	[ ! -z "$output" ]
756}
757
758pre_cleanup()
759{
760	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
761		echo "Pausing before cleanup, hit any key to continue"
762		read
763	fi
764
765	if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
766		mac_addr_restore
767	fi
768}
769
770vrf_prepare()
771{
772	ip -4 rule add pref 32765 table local
773	ip -4 rule del pref 0
774	ip -6 rule add pref 32765 table local
775	ip -6 rule del pref 0
776}
777
778vrf_cleanup()
779{
780	ip -6 rule add pref 0 table local
781	ip -6 rule del pref 32765
782	ip -4 rule add pref 0 table local
783	ip -4 rule del pref 32765
784}
785
786__last_tb_id=0
787declare -A __TB_IDS
788
789__vrf_td_id_assign()
790{
791	local vrf_name=$1
792
793	__last_tb_id=$((__last_tb_id + 1))
794	__TB_IDS[$vrf_name]=$__last_tb_id
795	return $__last_tb_id
796}
797
798__vrf_td_id_lookup()
799{
800	local vrf_name=$1
801
802	return ${__TB_IDS[$vrf_name]}
803}
804
805vrf_create()
806{
807	local vrf_name=$1
808	local tb_id
809
810	__vrf_td_id_assign $vrf_name
811	tb_id=$?
812
813	ip link add dev $vrf_name type vrf table $tb_id
814	ip -4 route add table $tb_id unreachable default metric 4278198272
815	ip -6 route add table $tb_id unreachable default metric 4278198272
816}
817
818vrf_destroy()
819{
820	local vrf_name=$1
821	local tb_id
822
823	__vrf_td_id_lookup $vrf_name
824	tb_id=$?
825
826	ip -6 route del table $tb_id unreachable default metric 4278198272
827	ip -4 route del table $tb_id unreachable default metric 4278198272
828	ip link del dev $vrf_name
829}
830
831__addr_add_del()
832{
833	local if_name=$1
834	local add_del=$2
835	local array
836
837	shift
838	shift
839	array=("${@}")
840
841	for addrstr in "${array[@]}"; do
842		ip address $add_del $addrstr dev $if_name
843	done
844}
845
846__simple_if_init()
847{
848	local if_name=$1; shift
849	local vrf_name=$1; shift
850	local addrs=("${@}")
851
852	ip link set dev $if_name master $vrf_name
853	ip link set dev $if_name up
854
855	__addr_add_del $if_name add "${addrs[@]}"
856}
857
858__simple_if_fini()
859{
860	local if_name=$1; shift
861	local addrs=("${@}")
862
863	__addr_add_del $if_name del "${addrs[@]}"
864
865	ip link set dev $if_name down
866	ip link set dev $if_name nomaster
867}
868
869simple_if_init()
870{
871	local if_name=$1
872	local vrf_name
873	local array
874
875	shift
876	vrf_name=v$if_name
877	array=("${@}")
878
879	vrf_create $vrf_name
880	ip link set dev $vrf_name up
881	__simple_if_init $if_name $vrf_name "${array[@]}"
882}
883
884simple_if_fini()
885{
886	local if_name=$1
887	local vrf_name
888	local array
889
890	shift
891	vrf_name=v$if_name
892	array=("${@}")
893
894	__simple_if_fini $if_name "${array[@]}"
895	vrf_destroy $vrf_name
896}
897
898tunnel_create()
899{
900	local name=$1; shift
901	local type=$1; shift
902	local local=$1; shift
903	local remote=$1; shift
904
905	ip link add name $name type $type \
906	   local $local remote $remote "$@"
907	ip link set dev $name up
908}
909
910tunnel_destroy()
911{
912	local name=$1; shift
913
914	ip link del dev $name
915}
916
917vlan_create()
918{
919	local if_name=$1; shift
920	local vid=$1; shift
921	local vrf=$1; shift
922	local ips=("${@}")
923	local name=$if_name.$vid
924
925	ip link add name $name link $if_name type vlan id $vid
926	if [ "$vrf" != "" ]; then
927		ip link set dev $name master $vrf
928	fi
929	ip link set dev $name up
930	__addr_add_del $name add "${ips[@]}"
931}
932
933vlan_destroy()
934{
935	local if_name=$1; shift
936	local vid=$1; shift
937	local name=$if_name.$vid
938
939	ip link del dev $name
940}
941
942team_create()
943{
944	local if_name=$1; shift
945	local mode=$1; shift
946
947	require_command $TEAMD
948	$TEAMD -t $if_name -d -c '{"runner": {"name": "'$mode'"}}'
949	for slave in "$@"; do
950		ip link set dev $slave down
951		ip link set dev $slave master $if_name
952		ip link set dev $slave up
953	done
954	ip link set dev $if_name up
955}
956
957team_destroy()
958{
959	local if_name=$1; shift
960
961	$TEAMD -t $if_name -k
962}
963
964master_name_get()
965{
966	local if_name=$1
967
968	ip -j link show dev $if_name | jq -r '.[]["master"]'
969}
970
971link_stats_get()
972{
973	local if_name=$1; shift
974	local dir=$1; shift
975	local stat=$1; shift
976
977	ip -j -s link show dev $if_name \
978		| jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
979}
980
981link_stats_tx_packets_get()
982{
983	link_stats_get $1 tx packets
984}
985
986link_stats_rx_errors_get()
987{
988	link_stats_get $1 rx errors
989}
990
991ethtool_stats_get()
992{
993	local dev=$1; shift
994	local stat=$1; shift
995
996	ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
997}
998
999ethtool_std_stats_get()
1000{
1001	local dev=$1; shift
1002	local grp=$1; shift
1003	local name=$1; shift
1004	local src=$1; shift
1005
1006	ethtool --json -S $dev --groups $grp -- --src $src | \
1007		jq '.[]."'"$grp"'"."'$name'"'
1008}
1009
1010qdisc_stats_get()
1011{
1012	local dev=$1; shift
1013	local handle=$1; shift
1014	local selector=$1; shift
1015
1016	tc -j -s qdisc show dev "$dev" \
1017	    | jq '.[] | select(.handle == "'"$handle"'") | '"$selector"
1018}
1019
1020qdisc_parent_stats_get()
1021{
1022	local dev=$1; shift
1023	local parent=$1; shift
1024	local selector=$1; shift
1025
1026	tc -j -s qdisc show dev "$dev" invisible \
1027	    | jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
1028}
1029
1030ipv6_stats_get()
1031{
1032	local dev=$1; shift
1033	local stat=$1; shift
1034
1035	cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
1036}
1037
1038hw_stats_get()
1039{
1040	local suite=$1; shift
1041	local if_name=$1; shift
1042	local dir=$1; shift
1043	local stat=$1; shift
1044
1045	ip -j stats show dev $if_name group offload subgroup $suite |
1046		jq ".[0].stats64.$dir.$stat"
1047}
1048
1049__nh_stats_get()
1050{
1051	local key=$1; shift
1052	local group_id=$1; shift
1053	local member_id=$1; shift
1054
1055	ip -j -s -s nexthop show id $group_id |
1056	    jq --argjson member_id "$member_id" --arg key "$key" \
1057	       '.[].group_stats[] | select(.id == $member_id) | .[$key]'
1058}
1059
1060nh_stats_get()
1061{
1062	local group_id=$1; shift
1063	local member_id=$1; shift
1064
1065	__nh_stats_get packets "$group_id" "$member_id"
1066}
1067
1068nh_stats_get_hw()
1069{
1070	local group_id=$1; shift
1071	local member_id=$1; shift
1072
1073	__nh_stats_get packets_hw "$group_id" "$member_id"
1074}
1075
1076humanize()
1077{
1078	local speed=$1; shift
1079
1080	for unit in bps Kbps Mbps Gbps; do
1081		if (($(echo "$speed < 1024" | bc))); then
1082			break
1083		fi
1084
1085		speed=$(echo "scale=1; $speed / 1024" | bc)
1086	done
1087
1088	echo "$speed${unit}"
1089}
1090
1091rate()
1092{
1093	local t0=$1; shift
1094	local t1=$1; shift
1095	local interval=$1; shift
1096
1097	echo $((8 * (t1 - t0) / interval))
1098}
1099
1100packets_rate()
1101{
1102	local t0=$1; shift
1103	local t1=$1; shift
1104	local interval=$1; shift
1105
1106	echo $(((t1 - t0) / interval))
1107}
1108
1109mac_get()
1110{
1111	local if_name=$1
1112
1113	ip -j link show dev $if_name | jq -r '.[]["address"]'
1114}
1115
1116ipv6_lladdr_get()
1117{
1118	local if_name=$1
1119
1120	ip -j addr show dev $if_name | \
1121		jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \
1122		head -1
1123}
1124
1125bridge_ageing_time_get()
1126{
1127	local bridge=$1
1128	local ageing_time
1129
1130	# Need to divide by 100 to convert to seconds.
1131	ageing_time=$(ip -j -d link show dev $bridge \
1132		      | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
1133	echo $((ageing_time / 100))
1134}
1135
1136declare -A SYSCTL_ORIG
1137sysctl_save()
1138{
1139	local key=$1; shift
1140
1141	SYSCTL_ORIG[$key]=$(sysctl -n $key)
1142}
1143
1144sysctl_set()
1145{
1146	local key=$1; shift
1147	local value=$1; shift
1148
1149	sysctl_save "$key"
1150	sysctl -qw $key="$value"
1151}
1152
1153sysctl_restore()
1154{
1155	local key=$1; shift
1156
1157	sysctl -qw $key="${SYSCTL_ORIG[$key]}"
1158}
1159
1160forwarding_enable()
1161{
1162	sysctl_set net.ipv4.conf.all.forwarding 1
1163	sysctl_set net.ipv6.conf.all.forwarding 1
1164}
1165
1166forwarding_restore()
1167{
1168	sysctl_restore net.ipv6.conf.all.forwarding
1169	sysctl_restore net.ipv4.conf.all.forwarding
1170}
1171
1172declare -A MTU_ORIG
1173mtu_set()
1174{
1175	local dev=$1; shift
1176	local mtu=$1; shift
1177
1178	MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu')
1179	ip link set dev $dev mtu $mtu
1180}
1181
1182mtu_restore()
1183{
1184	local dev=$1; shift
1185
1186	ip link set dev $dev mtu ${MTU_ORIG["$dev"]}
1187}
1188
1189tc_offload_check()
1190{
1191	local num_netifs=${1:-$NUM_NETIFS}
1192
1193	for ((i = 1; i <= num_netifs; ++i)); do
1194		ethtool -k ${NETIFS[p$i]} \
1195			| grep "hw-tc-offload: on" &> /dev/null
1196		if [[ $? -ne 0 ]]; then
1197			return 1
1198		fi
1199	done
1200
1201	return 0
1202}
1203
1204trap_install()
1205{
1206	local dev=$1; shift
1207	local direction=$1; shift
1208
1209	# Some devices may not support or need in-hardware trapping of traffic
1210	# (e.g. the veth pairs that this library creates for non-existent
1211	# loopbacks). Use continue instead, so that there is a filter in there
1212	# (some tests check counters), and so that other filters are still
1213	# processed.
1214	tc filter add dev $dev $direction pref 1 \
1215		flower skip_sw action trap 2>/dev/null \
1216	    || tc filter add dev $dev $direction pref 1 \
1217		       flower action continue
1218}
1219
1220trap_uninstall()
1221{
1222	local dev=$1; shift
1223	local direction=$1; shift
1224
1225	tc filter del dev $dev $direction pref 1 flower
1226}
1227
1228slow_path_trap_install()
1229{
1230	# For slow-path testing, we need to install a trap to get to
1231	# slow path the packets that would otherwise be switched in HW.
1232	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
1233		trap_install "$@"
1234	fi
1235}
1236
1237slow_path_trap_uninstall()
1238{
1239	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
1240		trap_uninstall "$@"
1241	fi
1242}
1243
1244__icmp_capture_add_del()
1245{
1246	local add_del=$1; shift
1247	local pref=$1; shift
1248	local vsuf=$1; shift
1249	local tundev=$1; shift
1250	local filter=$1; shift
1251
1252	tc filter $add_del dev "$tundev" ingress \
1253	   proto ip$vsuf pref $pref \
1254	   flower ip_proto icmp$vsuf $filter \
1255	   action pass
1256}
1257
1258icmp_capture_install()
1259{
1260	local tundev=$1; shift
1261	local filter=$1; shift
1262
1263	__icmp_capture_add_del add 100 "" "$tundev" "$filter"
1264}
1265
1266icmp_capture_uninstall()
1267{
1268	local tundev=$1; shift
1269	local filter=$1; shift
1270
1271	__icmp_capture_add_del del 100 "" "$tundev" "$filter"
1272}
1273
1274icmp6_capture_install()
1275{
1276	local tundev=$1; shift
1277	local filter=$1; shift
1278
1279	__icmp_capture_add_del add 100 v6 "$tundev" "$filter"
1280}
1281
1282icmp6_capture_uninstall()
1283{
1284	local tundev=$1; shift
1285	local filter=$1; shift
1286
1287	__icmp_capture_add_del del 100 v6 "$tundev" "$filter"
1288}
1289
1290__vlan_capture_add_del()
1291{
1292	local add_del=$1; shift
1293	local pref=$1; shift
1294	local dev=$1; shift
1295	local filter=$1; shift
1296
1297	tc filter $add_del dev "$dev" ingress \
1298	   proto 802.1q pref $pref \
1299	   flower $filter \
1300	   action pass
1301}
1302
1303vlan_capture_install()
1304{
1305	local dev=$1; shift
1306	local filter=$1; shift
1307
1308	__vlan_capture_add_del add 100 "$dev" "$filter"
1309}
1310
1311vlan_capture_uninstall()
1312{
1313	local dev=$1; shift
1314	local filter=$1; shift
1315
1316	__vlan_capture_add_del del 100 "$dev" "$filter"
1317}
1318
1319__dscp_capture_add_del()
1320{
1321	local add_del=$1; shift
1322	local dev=$1; shift
1323	local base=$1; shift
1324	local dscp;
1325
1326	for prio in {0..7}; do
1327		dscp=$((base + prio))
1328		__icmp_capture_add_del $add_del $((dscp + 100)) "" $dev \
1329				       "skip_hw ip_tos $((dscp << 2))"
1330	done
1331}
1332
1333dscp_capture_install()
1334{
1335	local dev=$1; shift
1336	local base=$1; shift
1337
1338	__dscp_capture_add_del add $dev $base
1339}
1340
1341dscp_capture_uninstall()
1342{
1343	local dev=$1; shift
1344	local base=$1; shift
1345
1346	__dscp_capture_add_del del $dev $base
1347}
1348
1349dscp_fetch_stats()
1350{
1351	local dev=$1; shift
1352	local base=$1; shift
1353
1354	for prio in {0..7}; do
1355		local dscp=$((base + prio))
1356		local t=$(tc_rule_stats_get $dev $((dscp + 100)))
1357		echo "[$dscp]=$t "
1358	done
1359}
1360
1361matchall_sink_create()
1362{
1363	local dev=$1; shift
1364
1365	tc qdisc add dev $dev clsact
1366	tc filter add dev $dev ingress \
1367	   pref 10000 \
1368	   matchall \
1369	   action drop
1370}
1371
1372tests_run()
1373{
1374	local current_test
1375
1376	for current_test in ${TESTS:-$ALL_TESTS}; do
1377		$current_test
1378	done
1379}
1380
1381multipath_eval()
1382{
1383	local desc="$1"
1384	local weight_rp12=$2
1385	local weight_rp13=$3
1386	local packets_rp12=$4
1387	local packets_rp13=$5
1388	local weights_ratio packets_ratio diff
1389
1390	RET=0
1391
1392	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1393		weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
1394				| bc -l)
1395	else
1396		weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" \
1397				| bc -l)
1398	fi
1399
1400	if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
1401	       check_err 1 "Packet difference is 0"
1402	       log_test "Multipath"
1403	       log_info "Expected ratio $weights_ratio"
1404	       return
1405	fi
1406
1407	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1408		packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
1409				| bc -l)
1410	else
1411		packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" \
1412				| bc -l)
1413	fi
1414
1415	diff=$(echo $weights_ratio - $packets_ratio | bc -l)
1416	diff=${diff#-}
1417
1418	test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
1419	check_err $? "Too large discrepancy between expected and measured ratios"
1420	log_test "$desc"
1421	log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
1422}
1423
1424in_ns()
1425{
1426	local name=$1; shift
1427
1428	ip netns exec $name bash <<-EOF
1429		NUM_NETIFS=0
1430		source lib.sh
1431		$(for a in "$@"; do printf "%q${IFS:0:1}" "$a"; done)
1432	EOF
1433}
1434
1435##############################################################################
1436# Tests
1437
1438ping_do()
1439{
1440	local if_name=$1
1441	local dip=$2
1442	local args=$3
1443	local vrf_name
1444
1445	vrf_name=$(master_name_get $if_name)
1446	ip vrf exec $vrf_name \
1447		$PING $args $dip -c $PING_COUNT -i 0.1 \
1448		-w $PING_TIMEOUT &> /dev/null
1449}
1450
1451ping_test()
1452{
1453	RET=0
1454
1455	ping_do $1 $2
1456	check_err $?
1457	log_test "ping$3"
1458}
1459
1460ping_test_fails()
1461{
1462	RET=0
1463
1464	ping_do $1 $2
1465	check_fail $?
1466	log_test "ping fails$3"
1467}
1468
1469ping6_do()
1470{
1471	local if_name=$1
1472	local dip=$2
1473	local args=$3
1474	local vrf_name
1475
1476	vrf_name=$(master_name_get $if_name)
1477	ip vrf exec $vrf_name \
1478		$PING6 $args $dip -c $PING_COUNT -i 0.1 \
1479		-w $PING_TIMEOUT &> /dev/null
1480}
1481
1482ping6_test()
1483{
1484	RET=0
1485
1486	ping6_do $1 $2
1487	check_err $?
1488	log_test "ping6$3"
1489}
1490
1491ping6_test_fails()
1492{
1493	RET=0
1494
1495	ping6_do $1 $2
1496	check_fail $?
1497	log_test "ping6 fails$3"
1498}
1499
1500learning_test()
1501{
1502	local bridge=$1
1503	local br_port1=$2	# Connected to `host1_if`.
1504	local host1_if=$3
1505	local host2_if=$4
1506	local mac=de:ad:be:ef:13:37
1507	local ageing_time
1508
1509	RET=0
1510
1511	bridge -j fdb show br $bridge brport $br_port1 \
1512		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1513	check_fail $? "Found FDB record when should not"
1514
1515	# Disable unknown unicast flooding on `br_port1` to make sure
1516	# packets are only forwarded through the port after a matching
1517	# FDB entry was installed.
1518	bridge link set dev $br_port1 flood off
1519
1520	ip link set $host1_if promisc on
1521	tc qdisc add dev $host1_if ingress
1522	tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
1523		flower dst_mac $mac action drop
1524
1525	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1526	sleep 1
1527
1528	tc -j -s filter show dev $host1_if ingress \
1529		| jq -e ".[] | select(.options.handle == 101) \
1530		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1531	check_fail $? "Packet reached first host when should not"
1532
1533	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1534	sleep 1
1535
1536	bridge -j fdb show br $bridge brport $br_port1 \
1537		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1538	check_err $? "Did not find FDB record when should"
1539
1540	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1541	sleep 1
1542
1543	tc -j -s filter show dev $host1_if ingress \
1544		| jq -e ".[] | select(.options.handle == 101) \
1545		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1546	check_err $? "Packet did not reach second host when should"
1547
1548	# Wait for 10 seconds after the ageing time to make sure FDB
1549	# record was aged-out.
1550	ageing_time=$(bridge_ageing_time_get $bridge)
1551	sleep $((ageing_time + 10))
1552
1553	bridge -j fdb show br $bridge brport $br_port1 \
1554		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1555	check_fail $? "Found FDB record when should not"
1556
1557	bridge link set dev $br_port1 learning off
1558
1559	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1560	sleep 1
1561
1562	bridge -j fdb show br $bridge brport $br_port1 \
1563		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1564	check_fail $? "Found FDB record when should not"
1565
1566	bridge link set dev $br_port1 learning on
1567
1568	tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
1569	tc qdisc del dev $host1_if ingress
1570	ip link set $host1_if promisc off
1571
1572	bridge link set dev $br_port1 flood on
1573
1574	log_test "FDB learning"
1575}
1576
1577flood_test_do()
1578{
1579	local should_flood=$1
1580	local mac=$2
1581	local ip=$3
1582	local host1_if=$4
1583	local host2_if=$5
1584	local err=0
1585
1586	# Add an ACL on `host2_if` which will tell us whether the packet
1587	# was flooded to it or not.
1588	ip link set $host2_if promisc on
1589	tc qdisc add dev $host2_if ingress
1590	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
1591		flower dst_mac $mac action drop
1592
1593	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
1594	sleep 1
1595
1596	tc -j -s filter show dev $host2_if ingress \
1597		| jq -e ".[] | select(.options.handle == 101) \
1598		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1599	if [[ $? -ne 0 && $should_flood == "true" || \
1600	      $? -eq 0 && $should_flood == "false" ]]; then
1601		err=1
1602	fi
1603
1604	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
1605	tc qdisc del dev $host2_if ingress
1606	ip link set $host2_if promisc off
1607
1608	return $err
1609}
1610
1611flood_unicast_test()
1612{
1613	local br_port=$1
1614	local host1_if=$2
1615	local host2_if=$3
1616	local mac=de:ad:be:ef:13:37
1617	local ip=192.0.2.100
1618
1619	RET=0
1620
1621	bridge link set dev $br_port flood off
1622
1623	flood_test_do false $mac $ip $host1_if $host2_if
1624	check_err $? "Packet flooded when should not"
1625
1626	bridge link set dev $br_port flood on
1627
1628	flood_test_do true $mac $ip $host1_if $host2_if
1629	check_err $? "Packet was not flooded when should"
1630
1631	log_test "Unknown unicast flood"
1632}
1633
1634flood_multicast_test()
1635{
1636	local br_port=$1
1637	local host1_if=$2
1638	local host2_if=$3
1639	local mac=01:00:5e:00:00:01
1640	local ip=239.0.0.1
1641
1642	RET=0
1643
1644	bridge link set dev $br_port mcast_flood off
1645
1646	flood_test_do false $mac $ip $host1_if $host2_if
1647	check_err $? "Packet flooded when should not"
1648
1649	bridge link set dev $br_port mcast_flood on
1650
1651	flood_test_do true $mac $ip $host1_if $host2_if
1652	check_err $? "Packet was not flooded when should"
1653
1654	log_test "Unregistered multicast flood"
1655}
1656
1657flood_test()
1658{
1659	# `br_port` is connected to `host2_if`
1660	local br_port=$1
1661	local host1_if=$2
1662	local host2_if=$3
1663
1664	flood_unicast_test $br_port $host1_if $host2_if
1665	flood_multicast_test $br_port $host1_if $host2_if
1666}
1667
1668__start_traffic()
1669{
1670	local pktsize=$1; shift
1671	local proto=$1; shift
1672	local h_in=$1; shift    # Where the traffic egresses the host
1673	local sip=$1; shift
1674	local dip=$1; shift
1675	local dmac=$1; shift
1676	local -a mz_args=("$@")
1677
1678	$MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
1679		-a own -b $dmac -t "$proto" -q "${mz_args[@]}" &
1680	sleep 1
1681}
1682
1683start_traffic_pktsize()
1684{
1685	local pktsize=$1; shift
1686	local h_in=$1; shift
1687	local sip=$1; shift
1688	local dip=$1; shift
1689	local dmac=$1; shift
1690	local -a mz_args=("$@")
1691
1692	__start_traffic $pktsize udp "$h_in" "$sip" "$dip" "$dmac" \
1693			"${mz_args[@]}"
1694}
1695
1696start_tcp_traffic_pktsize()
1697{
1698	local pktsize=$1; shift
1699	local h_in=$1; shift
1700	local sip=$1; shift
1701	local dip=$1; shift
1702	local dmac=$1; shift
1703	local -a mz_args=("$@")
1704
1705	__start_traffic $pktsize tcp "$h_in" "$sip" "$dip" "$dmac" \
1706			"${mz_args[@]}"
1707}
1708
1709start_traffic()
1710{
1711	local h_in=$1; shift
1712	local sip=$1; shift
1713	local dip=$1; shift
1714	local dmac=$1; shift
1715	local -a mz_args=("$@")
1716
1717	start_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
1718			      "${mz_args[@]}"
1719}
1720
1721start_tcp_traffic()
1722{
1723	local h_in=$1; shift
1724	local sip=$1; shift
1725	local dip=$1; shift
1726	local dmac=$1; shift
1727	local -a mz_args=("$@")
1728
1729	start_tcp_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
1730				  "${mz_args[@]}"
1731}
1732
1733stop_traffic()
1734{
1735	# Suppress noise from killing mausezahn.
1736	{ kill %% && wait %%; } 2>/dev/null
1737}
1738
1739declare -A cappid
1740declare -A capfile
1741declare -A capout
1742
1743tcpdump_start()
1744{
1745	local if_name=$1; shift
1746	local ns=$1; shift
1747
1748	capfile[$if_name]=$(mktemp)
1749	capout[$if_name]=$(mktemp)
1750
1751	if [ -z $ns ]; then
1752		ns_cmd=""
1753	else
1754		ns_cmd="ip netns exec ${ns}"
1755	fi
1756
1757	if [ -z $SUDO_USER ] ; then
1758		capuser=""
1759	else
1760		capuser="-Z $SUDO_USER"
1761	fi
1762
1763	$ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
1764		-s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \
1765		> "${capout[$if_name]}" 2>&1 &
1766	cappid[$if_name]=$!
1767
1768	sleep 1
1769}
1770
1771tcpdump_stop()
1772{
1773	local if_name=$1
1774	local pid=${cappid[$if_name]}
1775
1776	$ns_cmd kill "$pid" && wait "$pid"
1777	sleep 1
1778}
1779
1780tcpdump_cleanup()
1781{
1782	local if_name=$1
1783
1784	rm ${capfile[$if_name]} ${capout[$if_name]}
1785}
1786
1787tcpdump_show()
1788{
1789	local if_name=$1
1790
1791	tcpdump -e -n -r ${capfile[$if_name]} 2>&1
1792}
1793
1794# return 0 if the packet wasn't seen on host2_if or 1 if it was
1795mcast_packet_test()
1796{
1797	local mac=$1
1798	local src_ip=$2
1799	local ip=$3
1800	local host1_if=$4
1801	local host2_if=$5
1802	local seen=0
1803	local tc_proto="ip"
1804	local mz_v6arg=""
1805
1806	# basic check to see if we were passed an IPv4 address, if not assume IPv6
1807	if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
1808		tc_proto="ipv6"
1809		mz_v6arg="-6"
1810	fi
1811
1812	# Add an ACL on `host2_if` which will tell us whether the packet
1813	# was received by it or not.
1814	tc qdisc add dev $host2_if ingress
1815	tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \
1816		flower ip_proto udp dst_mac $mac action drop
1817
1818	$MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
1819	sleep 1
1820
1821	tc -j -s filter show dev $host2_if ingress \
1822		| jq -e ".[] | select(.options.handle == 101) \
1823		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1824	if [[ $? -eq 0 ]]; then
1825		seen=1
1826	fi
1827
1828	tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower
1829	tc qdisc del dev $host2_if ingress
1830
1831	return $seen
1832}
1833
1834brmcast_check_sg_entries()
1835{
1836	local report=$1; shift
1837	local slist=("$@")
1838	local sarg=""
1839
1840	for src in "${slist[@]}"; do
1841		sarg="${sarg} and .source_list[].address == \"$src\""
1842	done
1843	bridge -j -d -s mdb show dev br0 \
1844		| jq -e ".[].mdb[] | \
1845			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
1846	check_err $? "Wrong *,G entry source list after $report report"
1847
1848	for sgent in "${slist[@]}"; do
1849		bridge -j -d -s mdb show dev br0 \
1850			| jq -e ".[].mdb[] | \
1851				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
1852		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
1853	done
1854}
1855
1856brmcast_check_sg_fwding()
1857{
1858	local should_fwd=$1; shift
1859	local sources=("$@")
1860
1861	for src in "${sources[@]}"; do
1862		local retval=0
1863
1864		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
1865		retval=$?
1866		if [ $should_fwd -eq 1 ]; then
1867			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
1868		else
1869			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
1870		fi
1871	done
1872}
1873
1874brmcast_check_sg_state()
1875{
1876	local is_blocked=$1; shift
1877	local sources=("$@")
1878	local should_fail=1
1879
1880	if [ $is_blocked -eq 1 ]; then
1881		should_fail=0
1882	fi
1883
1884	for src in "${sources[@]}"; do
1885		bridge -j -d -s mdb show dev br0 \
1886			| jq -e ".[].mdb[] | \
1887				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
1888				 .source_list[] |
1889				 select(.address == \"$src\") |
1890				 select(.timer == \"0.00\")" &>/dev/null
1891		check_err_fail $should_fail $? "Entry $src has zero timer"
1892
1893		bridge -j -d -s mdb show dev br0 \
1894			| jq -e ".[].mdb[] | \
1895				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
1896				 .flags[] == \"blocked\")" &>/dev/null
1897		check_err_fail $should_fail $? "Entry $src has blocked flag"
1898	done
1899}
1900
1901mc_join()
1902{
1903	local if_name=$1
1904	local group=$2
1905	local vrf_name=$(master_name_get $if_name)
1906
1907	# We don't care about actual reception, just about joining the
1908	# IP multicast group and adding the L2 address to the device's
1909	# MAC filtering table
1910	ip vrf exec $vrf_name \
1911		mreceive -g $group -I $if_name > /dev/null 2>&1 &
1912	mreceive_pid=$!
1913
1914	sleep 1
1915}
1916
1917mc_leave()
1918{
1919	kill "$mreceive_pid" && wait "$mreceive_pid"
1920}
1921
1922mc_send()
1923{
1924	local if_name=$1
1925	local groups=$2
1926	local vrf_name=$(master_name_get $if_name)
1927
1928	ip vrf exec $vrf_name \
1929		msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
1930}
1931
1932start_ip_monitor()
1933{
1934	local mtype=$1; shift
1935	local ip=${1-ip}; shift
1936
1937	# start the monitor in the background
1938	tmpfile=`mktemp /var/run/nexthoptestXXX`
1939	mpid=`($ip monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
1940	sleep 0.2
1941	echo "$mpid $tmpfile"
1942}
1943
1944stop_ip_monitor()
1945{
1946	local mpid=$1; shift
1947	local tmpfile=$1; shift
1948	local el=$1; shift
1949	local what=$1; shift
1950
1951	sleep 0.2
1952	kill $mpid
1953	local lines=`grep '^\w' $tmpfile | wc -l`
1954	test $lines -eq $el
1955	check_err $? "$what: $lines lines of events, expected $el"
1956	rm -rf $tmpfile
1957}
1958
1959hw_stats_monitor_test()
1960{
1961	local dev=$1; shift
1962	local type=$1; shift
1963	local make_suitable=$1; shift
1964	local make_unsuitable=$1; shift
1965	local ip=${1-ip}; shift
1966
1967	RET=0
1968
1969	# Expect a notification about enablement.
1970	local ipmout=$(start_ip_monitor stats "$ip")
1971	$ip stats set dev $dev ${type}_stats on
1972	stop_ip_monitor $ipmout 1 "${type}_stats enablement"
1973
1974	# Expect a notification about offload.
1975	local ipmout=$(start_ip_monitor stats "$ip")
1976	$make_suitable
1977	stop_ip_monitor $ipmout 1 "${type}_stats installation"
1978
1979	# Expect a notification about loss of offload.
1980	local ipmout=$(start_ip_monitor stats "$ip")
1981	$make_unsuitable
1982	stop_ip_monitor $ipmout 1 "${type}_stats deinstallation"
1983
1984	# Expect a notification about disablement
1985	local ipmout=$(start_ip_monitor stats "$ip")
1986	$ip stats set dev $dev ${type}_stats off
1987	stop_ip_monitor $ipmout 1 "${type}_stats disablement"
1988
1989	log_test "${type}_stats notifications"
1990}
1991
1992ipv4_to_bytes()
1993{
1994	local IP=$1; shift
1995
1996	printf '%02x:' ${IP//./ } |
1997	    sed 's/:$//'
1998}
1999
2000# Convert a given IPv6 address, `IP' such that the :: token, if present, is
2001# expanded, and each 16-bit group is padded with zeroes to be 4 hexadecimal
2002# digits. An optional `BYTESEP' parameter can be given to further separate
2003# individual bytes of each 16-bit group.
2004expand_ipv6()
2005{
2006	local IP=$1; shift
2007	local bytesep=$1; shift
2008
2009	local cvt_ip=${IP/::/_}
2010	local colons=${cvt_ip//[^:]/}
2011	local allcol=:::::::
2012	# IP where :: -> the appropriate number of colons:
2013	local allcol_ip=${cvt_ip/_/${allcol:${#colons}}}
2014
2015	echo $allcol_ip | tr : '\n' |
2016	    sed s/^/0000/ |
2017	    sed 's/.*\(..\)\(..\)/\1'"$bytesep"'\2/' |
2018	    tr '\n' : |
2019	    sed 's/:$//'
2020}
2021
2022ipv6_to_bytes()
2023{
2024	local IP=$1; shift
2025
2026	expand_ipv6 "$IP" :
2027}
2028
2029u16_to_bytes()
2030{
2031	local u16=$1; shift
2032
2033	printf "%04x" $u16 | sed 's/^/000/;s/^.*\(..\)\(..\)$/\1:\2/'
2034}
2035
2036# Given a mausezahn-formatted payload (colon-separated bytes given as %02x),
2037# possibly with a keyword CHECKSUM stashed where a 16-bit checksum should be,
2038# calculate checksum as per RFC 1071, assuming the CHECKSUM field (if any)
2039# stands for 00:00.
2040payload_template_calc_checksum()
2041{
2042	local payload=$1; shift
2043
2044	(
2045	    # Set input radix.
2046	    echo "16i"
2047	    # Push zero for the initial checksum.
2048	    echo 0
2049
2050	    # Pad the payload with a terminating 00: in case we get an odd
2051	    # number of bytes.
2052	    echo "${payload%:}:00:" |
2053		sed 's/CHECKSUM/00:00/g' |
2054		tr '[:lower:]' '[:upper:]' |
2055		# Add the word to the checksum.
2056		sed 's/\(..\):\(..\):/\1\2+\n/g' |
2057		# Strip the extra odd byte we pushed if left unconverted.
2058		sed 's/\(..\):$//'
2059
2060	    echo "10000 ~ +"	# Calculate and add carry.
2061	    echo "FFFF r - p"	# Bit-flip and print.
2062	) |
2063	    dc |
2064	    tr '[:upper:]' '[:lower:]'
2065}
2066
2067payload_template_expand_checksum()
2068{
2069	local payload=$1; shift
2070	local checksum=$1; shift
2071
2072	local ckbytes=$(u16_to_bytes $checksum)
2073
2074	echo "$payload" | sed "s/CHECKSUM/$ckbytes/g"
2075}
2076
2077payload_template_nbytes()
2078{
2079	local payload=$1; shift
2080
2081	payload_template_expand_checksum "${payload%:}" 0 |
2082		sed 's/:/\n/g' | wc -l
2083}
2084
2085igmpv3_is_in_get()
2086{
2087	local GRP=$1; shift
2088	local sources=("$@")
2089
2090	local igmpv3
2091	local nsources=$(u16_to_bytes ${#sources[@]})
2092
2093	# IS_IN ( $sources )
2094	igmpv3=$(:
2095		)"22:"$(			: Type - Membership Report
2096		)"00:"$(			: Reserved
2097		)"CHECKSUM:"$(			: Checksum
2098		)"00:00:"$(			: Reserved
2099		)"00:01:"$(			: Number of Group Records
2100		)"01:"$(			: Record Type - IS_IN
2101		)"00:"$(			: Aux Data Len
2102		)"${nsources}:"$(		: Number of Sources
2103		)"$(ipv4_to_bytes $GRP):"$(	: Multicast Address
2104		)"$(for src in "${sources[@]}"; do
2105			ipv4_to_bytes $src
2106			echo -n :
2107		    done)"$(			: Source Addresses
2108		)
2109	local checksum=$(payload_template_calc_checksum "$igmpv3")
2110
2111	payload_template_expand_checksum "$igmpv3" $checksum
2112}
2113
2114igmpv2_leave_get()
2115{
2116	local GRP=$1; shift
2117
2118	local payload=$(:
2119		)"17:"$(			: Type - Leave Group
2120		)"00:"$(			: Max Resp Time - not meaningful
2121		)"CHECKSUM:"$(			: Checksum
2122		)"$(ipv4_to_bytes $GRP)"$(	: Group Address
2123		)
2124	local checksum=$(payload_template_calc_checksum "$payload")
2125
2126	payload_template_expand_checksum "$payload" $checksum
2127}
2128
2129mldv2_is_in_get()
2130{
2131	local SIP=$1; shift
2132	local GRP=$1; shift
2133	local sources=("$@")
2134
2135	local hbh
2136	local icmpv6
2137	local nsources=$(u16_to_bytes ${#sources[@]})
2138
2139	hbh=$(:
2140		)"3a:"$(			: Next Header - ICMPv6
2141		)"00:"$(			: Hdr Ext Len
2142		)"00:00:00:00:00:00:"$(		: Options and Padding
2143		)
2144
2145	icmpv6=$(:
2146		)"8f:"$(			: Type - MLDv2 Report
2147		)"00:"$(			: Code
2148		)"CHECKSUM:"$(			: Checksum
2149		)"00:00:"$(			: Reserved
2150		)"00:01:"$(			: Number of Group Records
2151		)"01:"$(			: Record Type - IS_IN
2152		)"00:"$(			: Aux Data Len
2153		)"${nsources}:"$(		: Number of Sources
2154		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
2155		)"$(for src in "${sources[@]}"; do
2156			ipv6_to_bytes $src
2157			echo -n :
2158		    done)"$(			: Source Addresses
2159		)
2160
2161	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
2162	local sudohdr=$(:
2163		)"$(ipv6_to_bytes $SIP):"$(	: SIP
2164		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
2165	        )"${len}:"$(			: Upper-layer length
2166	        )"00:3a:"$(			: Zero and next-header
2167	        )
2168	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
2169
2170	payload_template_expand_checksum "$hbh$icmpv6" $checksum
2171}
2172
2173mldv1_done_get()
2174{
2175	local SIP=$1; shift
2176	local GRP=$1; shift
2177
2178	local hbh
2179	local icmpv6
2180
2181	hbh=$(:
2182		)"3a:"$(			: Next Header - ICMPv6
2183		)"00:"$(			: Hdr Ext Len
2184		)"00:00:00:00:00:00:"$(		: Options and Padding
2185		)
2186
2187	icmpv6=$(:
2188		)"84:"$(			: Type - MLDv1 Done
2189		)"00:"$(			: Code
2190		)"CHECKSUM:"$(			: Checksum
2191		)"00:00:"$(			: Max Resp Delay - not meaningful
2192		)"00:00:"$(			: Reserved
2193		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
2194		)
2195
2196	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
2197	local sudohdr=$(:
2198		)"$(ipv6_to_bytes $SIP):"$(	: SIP
2199		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
2200	        )"${len}:"$(			: Upper-layer length
2201	        )"00:3a:"$(			: Zero and next-header
2202	        )
2203	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
2204
2205	payload_template_expand_checksum "$hbh$icmpv6" $checksum
2206}
2207
2208bail_on_lldpad()
2209{
2210	local reason1="$1"; shift
2211	local reason2="$1"; shift
2212	local caller=${FUNCNAME[1]}
2213	local src=${BASH_SOURCE[1]}
2214
2215	if systemctl is-active --quiet lldpad; then
2216
2217		cat >/dev/stderr <<-EOF
2218		WARNING: lldpad is running
2219
2220			lldpad will likely $reason1, and this test will
2221			$reason2. Both are not supported at the same time,
2222			one of them is arbitrarily going to overwrite the
2223			other. That will cause spurious failures (or, unlikely,
2224			passes) of this test.
2225		EOF
2226
2227		if [[ -z $ALLOW_LLDPAD ]]; then
2228			cat >/dev/stderr <<-EOF
2229
2230				If you want to run the test anyway, please set
2231				an environment variable ALLOW_LLDPAD to a
2232				non-empty string.
2233			EOF
2234			log_test_skip $src:$caller
2235			exit $EXIT_STATUS
2236		else
2237			return
2238		fi
2239	fi
2240}
2241
2242absval()
2243{
2244	local v=$1; shift
2245
2246	echo $((v > 0 ? v : -v))
2247}
2248