1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# A test for switch behavior under MC overload. An issue in Spectrum chips
5# causes throughput of UC traffic to drop severely when a switch is under heavy
6# MC load. This issue can be overcome by putting the switch to MC-aware mode.
7# This test verifies that UC performance stays intact even as the switch is
8# under MC flood, and therefore that the MC-aware mode is enabled and correctly
9# configured.
10#
11# Because mlxsw throttles CPU port, the traffic can't actually reach userspace
12# at full speed. That makes it impossible to use iperf3 to simply measure the
13# throughput, because many packets (that reach $h3) don't get to the kernel at
14# all even in UDP mode (the situation is even worse in TCP mode, where one can't
15# hope to see more than a couple Mbps).
16#
17# So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
18# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
19# each gets a different priority and we can use per-prio ethtool counters to
20# measure the throughput. In order to avoid prioritizing unicast traffic, prio
21# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
22# thus TC 0).
23#
24# Mausezahn can't actually saturate the links unless it's using large frames.
25# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
26# multicast traffic uses 8K frames.
27#
28# +---------------------------+            +----------------------------------+
29# | H1                        |            |                               H2 |
30# |                           |            |  unicast --> + $h2.111           |
31# |                 multicast |            |  traffic     | 192.0.2.129/28    |
32# |                 traffic   |            |              | e-qos-map 0:1     |
33# |           $h1 + <-----    |            |              |                   |
34# | 192.0.2.65/28 |           |            |              + $h2               |
35# +---------------|-----------+            +--------------|-------------------+
36#                 |                                       |
37# +---------------|---------------------------------------|-------------------+
38# |         $swp1 +                                       + $swp2             |
39# |        >1Gbps |                                       | >1Gbps            |
40# | +-------------|------+                     +----------|----------------+  |
41# | |     $swp1.1 +      |                     |          + $swp2.111      |  |
42# | |                BR1 |             SW      | BR111                     |  |
43# | |     $swp3.1 +      |                     |          + $swp3.111      |  |
44# | +-------------|------+                     +----------|----------------+  |
45# |               \_______________________________________/                   |
46# |                                    |                                      |
47# |                                    + $swp3                                |
48# |                                    | 1Gbps bottleneck                     |
49# |                                    | prio qdisc: {0..7} -> 7              |
50# +------------------------------------|--------------------------------------+
51#                                      |
52#                                   +--|-----------------+
53#                                   |  + $h3          H3 |
54#                                   |  | 192.0.2.66/28   |
55#                                   |  |                 |
56#                                   |  + $h3.111         |
57#                                   |    192.0.2.130/28  |
58#                                   +--------------------+
59
60ALL_TESTS="
61	ping_ipv4
62	test_mc_aware
63	test_uc_aware
64"
65
66lib_dir=$(dirname $0)/../../../net/forwarding
67
68NUM_NETIFS=6
69source $lib_dir/lib.sh
70
71h1_create()
72{
73	simple_if_init $h1 192.0.2.65/28
74	mtu_set $h1 10000
75}
76
77h1_destroy()
78{
79	mtu_restore $h1
80	simple_if_fini $h1 192.0.2.65/28
81}
82
83h2_create()
84{
85	simple_if_init $h2
86	mtu_set $h2 10000
87
88	vlan_create $h2 111 v$h2 192.0.2.129/28
89	ip link set dev $h2.111 type vlan egress-qos-map 0:1
90}
91
92h2_destroy()
93{
94	vlan_destroy $h2 111
95
96	mtu_restore $h2
97	simple_if_fini $h2
98}
99
100h3_create()
101{
102	simple_if_init $h3 192.0.2.66/28
103	mtu_set $h3 10000
104
105	vlan_create $h3 111 v$h3 192.0.2.130/28
106}
107
108h3_destroy()
109{
110	vlan_destroy $h3 111
111
112	mtu_restore $h3
113	simple_if_fini $h3 192.0.2.66/28
114}
115
116switch_create()
117{
118	ip link set dev $swp1 up
119	mtu_set $swp1 10000
120
121	ip link set dev $swp2 up
122	mtu_set $swp2 10000
123
124	ip link set dev $swp3 up
125	mtu_set $swp3 10000
126
127	vlan_create $swp2 111
128	vlan_create $swp3 111
129
130	ethtool -s $swp3 speed 1000 autoneg off
131	tc qdisc replace dev $swp3 root handle 3: \
132	   prio bands 8 priomap 7 7 7 7 7 7 7 7
133
134	ip link add name br1 type bridge vlan_filtering 0
135	ip link set dev br1 up
136	ip link set dev $swp1 master br1
137	ip link set dev $swp3 master br1
138
139	ip link add name br111 type bridge vlan_filtering 0
140	ip link set dev br111 up
141	ip link set dev $swp2.111 master br111
142	ip link set dev $swp3.111 master br111
143}
144
145switch_destroy()
146{
147	ip link del dev br111
148	ip link del dev br1
149
150	tc qdisc del dev $swp3 root handle 3:
151	ethtool -s $swp3 autoneg on
152
153	vlan_destroy $swp3 111
154	vlan_destroy $swp2 111
155
156	mtu_restore $swp3
157	ip link set dev $swp3 down
158
159	mtu_restore $swp2
160	ip link set dev $swp2 down
161
162	mtu_restore $swp1
163	ip link set dev $swp1 down
164}
165
166setup_prepare()
167{
168	h1=${NETIFS[p1]}
169	swp1=${NETIFS[p2]}
170
171	swp2=${NETIFS[p3]}
172	h2=${NETIFS[p4]}
173
174	swp3=${NETIFS[p5]}
175	h3=${NETIFS[p6]}
176
177	h3mac=$(mac_get $h3)
178
179	vrf_prepare
180
181	h1_create
182	h2_create
183	h3_create
184	switch_create
185}
186
187cleanup()
188{
189	pre_cleanup
190
191	switch_destroy
192	h3_destroy
193	h2_destroy
194	h1_destroy
195
196	vrf_cleanup
197}
198
199ping_ipv4()
200{
201	ping_test $h2 192.0.2.130
202}
203
204humanize()
205{
206	local speed=$1; shift
207
208	for unit in bps Kbps Mbps Gbps; do
209		if (($(echo "$speed < 1024" | bc))); then
210			break
211		fi
212
213		speed=$(echo "scale=1; $speed / 1024" | bc)
214	done
215
216	echo "$speed${unit}"
217}
218
219rate()
220{
221	local t0=$1; shift
222	local t1=$1; shift
223	local interval=$1; shift
224
225	echo $((8 * (t1 - t0) / interval))
226}
227
228check_rate()
229{
230	local rate=$1; shift
231	local min=$1; shift
232	local what=$1; shift
233
234	if ((rate > min)); then
235		return 0
236	fi
237
238	echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr
239	return 1
240}
241
242measure_uc_rate()
243{
244	local what=$1; shift
245
246	local interval=10
247	local i
248	local ret=0
249
250	# Dips in performance might cause momentary ingress rate to drop below
251	# 1Gbps. That wouldn't saturate egress and MC would thus get through,
252	# seemingly winning bandwidth on account of UC. Demand at least 2Gbps
253	# average ingress rate to somewhat mitigate this.
254	local min_ingress=2147483648
255
256	$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
257		-a own -b $h3mac -t udp -q &
258	sleep 1
259
260	for i in {5..0}; do
261		local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
262		local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
263		sleep $interval
264		local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
265		local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
266
267		local ir=$(rate $u0 $u1 $interval)
268		local er=$(rate $t0 $t1 $interval)
269
270		if check_rate $ir $min_ingress "$what ingress rate"; then
271			break
272		fi
273
274		# Fail the test if we can't get the throughput.
275		if ((i == 0)); then
276			ret=1
277		fi
278	done
279
280	# Suppress noise from killing mausezahn.
281	{ kill %% && wait; } 2>/dev/null
282
283	echo $ir $er
284	exit $ret
285}
286
287test_mc_aware()
288{
289	RET=0
290
291	local -a uc_rate
292	uc_rate=($(measure_uc_rate "UC-only"))
293	check_err $? "Could not get high enough UC-only ingress rate"
294	local ucth1=${uc_rate[1]}
295
296	$MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
297
298	local d0=$(date +%s)
299	local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
300	local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
301
302	local -a uc_rate_2
303	uc_rate_2=($(measure_uc_rate "UC+MC"))
304	check_err $? "Could not get high enough UC+MC ingress rate"
305	local ucth2=${uc_rate_2[1]}
306
307	local d1=$(date +%s)
308	local t1=$(ethtool_stats_get $h3 rx_octets_prio_0)
309	local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0)
310
311	local deg=$(bc <<< "
312			scale=2
313			ret = 100 * ($ucth1 - $ucth2) / $ucth1
314			if (ret > 0) { ret } else { 0 }
315		    ")
316	check_err $(bc <<< "$deg > 25")
317
318	local interval=$((d1 - d0))
319	local mc_ir=$(rate $u0 $u1 $interval)
320	local mc_er=$(rate $t0 $t1 $interval)
321
322	# Suppress noise from killing mausezahn.
323	{ kill %% && wait; } 2>/dev/null
324
325	log_test "UC performace under MC overload"
326
327	echo "UC-only throughput  $(humanize $ucth1)"
328	echo "UC+MC throughput    $(humanize $ucth2)"
329	echo "Degradation         $deg %"
330	echo
331	echo "Full report:"
332	echo "  UC only:"
333	echo "    ingress UC throughput $(humanize ${uc_rate[0]})"
334	echo "    egress UC throughput  $(humanize ${uc_rate[1]})"
335	echo "  UC+MC:"
336	echo "    ingress UC throughput $(humanize ${uc_rate_2[0]})"
337	echo "    egress UC throughput  $(humanize ${uc_rate_2[1]})"
338	echo "    ingress MC throughput $(humanize $mc_ir)"
339	echo "    egress MC throughput  $(humanize $mc_er)"
340	echo
341}
342
343test_uc_aware()
344{
345	RET=0
346
347	$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
348		-a own -b $h3mac -t udp -q &
349
350	local d0=$(date +%s)
351	local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
352	local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
353	sleep 1
354
355	local attempts=50
356	local passes=0
357	local i
358
359	for ((i = 0; i < attempts; ++i)); do
360		if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then
361			((passes++))
362		fi
363
364		sleep 0.1
365	done
366
367	local d1=$(date +%s)
368	local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
369	local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
370
371	local interval=$((d1 - d0))
372	local uc_ir=$(rate $u0 $u1 $interval)
373	local uc_er=$(rate $t0 $t1 $interval)
374
375	((attempts == passes))
376	check_err $?
377
378	# Suppress noise from killing mausezahn.
379	{ kill %% && wait; } 2>/dev/null
380
381	log_test "MC performace under UC overload"
382	echo "    ingress UC throughput $(humanize ${uc_ir})"
383	echo "    egress UC throughput  $(humanize ${uc_er})"
384	echo "    sent $attempts BC ARPs, got $passes responses"
385}
386
387trap cleanup EXIT
388
389setup_prepare
390setup_wait
391
392tests_run
393
394exit $EXIT_STATUS
395