1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# A test for switch behavior under MC overload. An issue in Spectrum chips
5# causes throughput of UC traffic to drop severely when a switch is under heavy
6# MC load. This issue can be overcome by putting the switch to MC-aware mode.
7# This test verifies that UC performance stays intact even as the switch is
8# under MC flood, and therefore that the MC-aware mode is enabled and correctly
9# configured.
10#
11# Because mlxsw throttles CPU port, the traffic can't actually reach userspace
12# at full speed. That makes it impossible to use iperf3 to simply measure the
13# throughput, because many packets (that reach $h3) don't get to the kernel at
14# all even in UDP mode (the situation is even worse in TCP mode, where one can't
15# hope to see more than a couple Mbps).
16#
17# So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
18# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
19# each gets a different priority and we can use per-prio ethtool counters to
20# measure the throughput. In order to avoid prioritizing unicast traffic, prio
21# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
22# thus TC 0).
23#
24# Mausezahn can't actually saturate the links unless it's using large frames.
25# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
26# multicast traffic uses 8K frames.
27#
28# +-----------------------+                +----------------------------------+
29# | H1                    |                |                               H2 |
30# |                       |                |  unicast --> + $h2.111           |
31# |                       |                |  traffic     | 192.0.2.129/28    |
32# |          multicast    |                |              | e-qos-map 0:1     |
33# |          traffic      |                |              |                   |
34# | $h1 + <-----          |                |              + $h2               |
35# +-----|-----------------+                +--------------|-------------------+
36#       |                                                 |
37# +-----|-------------------------------------------------|-------------------+
38# |     + $swp1                                           + $swp2             |
39# |     | >1Gbps                                          | >1Gbps            |
40# | +---|----------------+                     +----------|----------------+  |
41# | |   + $swp1.1        |                     |          + $swp2.111      |  |
42# | |                BR1 |             SW      | BR111                     |  |
43# | |   + $swp3.1        |                     |          + $swp3.111      |  |
44# | +---|----------------+                     +----------|----------------+  |
45# |     \_________________________________________________/                   |
46# |                                    |                                      |
47# |                                    + $swp3                                |
48# |                                    | 1Gbps bottleneck                     |
49# |                                    | prio qdisc: {0..7} -> 7              |
50# +------------------------------------|--------------------------------------+
51#                                      |
52#                                   +--|-----------------+
53#                                   |  + $h3          H3 |
54#                                   |  |                 |
55#                                   |  + $h3.111         |
56#                                   |    192.0.2.130/28  |
57#                                   +--------------------+
58
59ALL_TESTS="
60	ping_ipv4
61	test_mc_aware
62"
63
64lib_dir=$(dirname $0)/../../../net/forwarding
65
66NUM_NETIFS=6
67source $lib_dir/lib.sh
68
69h1_create()
70{
71	simple_if_init $h1
72	mtu_set $h1 10000
73}
74
75h1_destroy()
76{
77	mtu_restore $h1
78	simple_if_fini $h1
79}
80
81h2_create()
82{
83	simple_if_init $h2
84	mtu_set $h2 10000
85
86	vlan_create $h2 111 v$h2 192.0.2.129/28
87	ip link set dev $h2.111 type vlan egress-qos-map 0:1
88}
89
90h2_destroy()
91{
92	vlan_destroy $h2 111
93
94	mtu_restore $h2
95	simple_if_fini $h2
96}
97
98h3_create()
99{
100	simple_if_init $h3
101	mtu_set $h3 10000
102
103	vlan_create $h3 111 v$h3 192.0.2.130/28
104}
105
106h3_destroy()
107{
108	vlan_destroy $h3 111
109
110	mtu_restore $h3
111	simple_if_fini $h3
112}
113
114switch_create()
115{
116	ip link set dev $swp1 up
117	mtu_set $swp1 10000
118
119	ip link set dev $swp2 up
120	mtu_set $swp2 10000
121
122	ip link set dev $swp3 up
123	mtu_set $swp3 10000
124
125	vlan_create $swp2 111
126	vlan_create $swp3 111
127
128	ethtool -s $swp3 speed 1000 autoneg off
129	tc qdisc replace dev $swp3 root handle 3: \
130	   prio bands 8 priomap 7 7 7 7 7 7 7 7
131
132	ip link add name br1 type bridge vlan_filtering 0
133	ip link set dev br1 up
134	ip link set dev $swp1 master br1
135	ip link set dev $swp3 master br1
136
137	ip link add name br111 type bridge vlan_filtering 0
138	ip link set dev br111 up
139	ip link set dev $swp2.111 master br111
140	ip link set dev $swp3.111 master br111
141}
142
143switch_destroy()
144{
145	ip link del dev br111
146	ip link del dev br1
147
148	tc qdisc del dev $swp3 root handle 3:
149	ethtool -s $swp3 autoneg on
150
151	vlan_destroy $swp3 111
152	vlan_destroy $swp2 111
153
154	mtu_restore $swp3
155	ip link set dev $swp3 down
156
157	mtu_restore $swp2
158	ip link set dev $swp2 down
159
160	mtu_restore $swp1
161	ip link set dev $swp1 down
162}
163
164setup_prepare()
165{
166	h1=${NETIFS[p1]}
167	swp1=${NETIFS[p2]}
168
169	swp2=${NETIFS[p3]}
170	h2=${NETIFS[p4]}
171
172	swp3=${NETIFS[p5]}
173	h3=${NETIFS[p6]}
174
175	h3mac=$(mac_get $h3)
176
177	vrf_prepare
178
179	h1_create
180	h2_create
181	h3_create
182	switch_create
183}
184
185cleanup()
186{
187	pre_cleanup
188
189	switch_destroy
190	h3_destroy
191	h2_destroy
192	h1_destroy
193
194	vrf_cleanup
195}
196
197ping_ipv4()
198{
199	ping_test $h2 192.0.2.130
200}
201
202humanize()
203{
204	local speed=$1; shift
205
206	for unit in bps Kbps Mbps Gbps; do
207		if (($(echo "$speed < 1024" | bc))); then
208			break
209		fi
210
211		speed=$(echo "scale=1; $speed / 1024" | bc)
212	done
213
214	echo "$speed${unit}"
215}
216
217rate()
218{
219	local t0=$1; shift
220	local t1=$1; shift
221	local interval=$1; shift
222
223	echo $((8 * (t1 - t0) / interval))
224}
225
226check_rate()
227{
228	local rate=$1; shift
229	local min=$1; shift
230	local what=$1; shift
231
232	if ((rate > min)); then
233		return 0
234	fi
235
236	echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr
237	return 1
238}
239
240measure_uc_rate()
241{
242	local what=$1; shift
243
244	local interval=10
245	local i
246	local ret=0
247
248	# Dips in performance might cause momentary ingress rate to drop below
249	# 1Gbps. That wouldn't saturate egress and MC would thus get through,
250	# seemingly winning bandwidth on account of UC. Demand at least 2Gbps
251	# average ingress rate to somewhat mitigate this.
252	local min_ingress=2147483648
253
254	mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
255		-a own -b $h3mac -t udp -q &
256	sleep 1
257
258	for i in {5..0}; do
259		local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
260		local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
261		sleep $interval
262		local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
263		local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
264
265		local ir=$(rate $u0 $u1 $interval)
266		local er=$(rate $t0 $t1 $interval)
267
268		if check_rate $ir $min_ingress "$what ingress rate"; then
269			break
270		fi
271
272		# Fail the test if we can't get the throughput.
273		if ((i == 0)); then
274			ret=1
275		fi
276	done
277
278	# Suppress noise from killing mausezahn.
279	{ kill %% && wait; } 2>/dev/null
280
281	echo $ir $er
282	exit $ret
283}
284
285test_mc_aware()
286{
287	RET=0
288
289	local -a uc_rate
290	uc_rate=($(measure_uc_rate "UC-only"))
291	check_err $? "Could not get high enough UC-only ingress rate"
292	local ucth1=${uc_rate[1]}
293
294	mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
295
296	local d0=$(date +%s)
297	local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
298	local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
299
300	local -a uc_rate_2
301	uc_rate_2=($(measure_uc_rate "UC+MC"))
302	check_err $? "Could not get high enough UC+MC ingress rate"
303	local ucth2=${uc_rate_2[1]}
304
305	local d1=$(date +%s)
306	local t1=$(ethtool_stats_get $h3 rx_octets_prio_0)
307	local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0)
308
309	local deg=$(bc <<< "
310			scale=2
311			ret = 100 * ($ucth1 - $ucth2) / $ucth1
312			if (ret > 0) { ret } else { 0 }
313		    ")
314	check_err $(bc <<< "$deg > 10")
315
316	local interval=$((d1 - d0))
317	local mc_ir=$(rate $u0 $u1 $interval)
318	local mc_er=$(rate $t0 $t1 $interval)
319
320	# Suppress noise from killing mausezahn.
321	{ kill %% && wait; } 2>/dev/null
322
323	log_test "UC performace under MC overload"
324
325	echo "UC-only throughput  $(humanize $ucth1)"
326	echo "UC+MC throughput    $(humanize $ucth2)"
327	echo "Degradation         $deg %"
328	echo
329	echo "Full report:"
330	echo "  UC only:"
331	echo "    ingress UC throughput $(humanize ${uc_rate[0]})"
332	echo "    egress UC throughput  $(humanize ${uc_rate[1]})"
333	echo "  UC+MC:"
334	echo "    ingress UC throughput $(humanize ${uc_rate_2[0]})"
335	echo "    egress UC throughput  $(humanize ${uc_rate_2[1]})"
336	echo "    ingress MC throughput $(humanize $mc_ir)"
337	echo "    egress MC throughput  $(humanize $mc_er)"
338}
339
340trap cleanup EXIT
341
342setup_prepare
343setup_wait
344
345tests_run
346
347exit $EXIT_STATUS
348