xref: /openbmc/linux/drivers/net/ethernet/sfc/tc_counters.c (revision 6486c0f44ed8e91073c1b08e83075e3832618ae5)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2022 Advanced Micro Devices, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation, incorporated herein by reference.
9  */
10 
11 #include "tc_counters.h"
12 #include "tc_encap_actions.h"
13 #include "mae_counter_format.h"
14 #include "mae.h"
15 #include "rx_common.h"
16 
17 /* Counter-management hashtables */
18 
19 static const struct rhashtable_params efx_tc_counter_id_ht_params = {
20 	.key_len	= offsetof(struct efx_tc_counter_index, linkage),
21 	.key_offset	= 0,
22 	.head_offset	= offsetof(struct efx_tc_counter_index, linkage),
23 };
24 
25 static const struct rhashtable_params efx_tc_counter_ht_params = {
26 	.key_len	= offsetof(struct efx_tc_counter, linkage),
27 	.key_offset	= 0,
28 	.head_offset	= offsetof(struct efx_tc_counter, linkage),
29 };
30 
31 static void efx_tc_counter_free(void *ptr, void *__unused)
32 {
33 	struct efx_tc_counter *cnt = ptr;
34 
35 	WARN_ON(!list_empty(&cnt->users));
36 	/* We'd like to synchronize_rcu() here, but unfortunately we aren't
37 	 * removing the element from the hashtable (it's not clear that's a
38 	 * safe thing to do in an rhashtable_free_and_destroy free_fn), so
39 	 * threads could still be obtaining new pointers to *cnt if they can
40 	 * race against this function at all.
41 	 */
42 	flush_work(&cnt->work);
43 	EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock));
44 	kfree(cnt);
45 }
46 
47 static void efx_tc_counter_id_free(void *ptr, void *__unused)
48 {
49 	struct efx_tc_counter_index *ctr = ptr;
50 
51 	WARN_ON(refcount_read(&ctr->ref));
52 	kfree(ctr);
53 }
54 
55 int efx_tc_init_counters(struct efx_nic *efx)
56 {
57 	int rc;
58 
59 	rc = rhashtable_init(&efx->tc->counter_id_ht, &efx_tc_counter_id_ht_params);
60 	if (rc < 0)
61 		goto fail_counter_id_ht;
62 	rc = rhashtable_init(&efx->tc->counter_ht, &efx_tc_counter_ht_params);
63 	if (rc < 0)
64 		goto fail_counter_ht;
65 	return 0;
66 fail_counter_ht:
67 	rhashtable_destroy(&efx->tc->counter_id_ht);
68 fail_counter_id_ht:
69 	return rc;
70 }
71 
72 /* Only call this in init failure teardown.
73  * Normal exit should fini instead as there may be entries in the table.
74  */
75 void efx_tc_destroy_counters(struct efx_nic *efx)
76 {
77 	rhashtable_destroy(&efx->tc->counter_ht);
78 	rhashtable_destroy(&efx->tc->counter_id_ht);
79 }
80 
81 void efx_tc_fini_counters(struct efx_nic *efx)
82 {
83 	rhashtable_free_and_destroy(&efx->tc->counter_id_ht, efx_tc_counter_id_free, NULL);
84 	rhashtable_free_and_destroy(&efx->tc->counter_ht, efx_tc_counter_free, NULL);
85 }
86 
87 static void efx_tc_counter_work(struct work_struct *work)
88 {
89 	struct efx_tc_counter *cnt = container_of(work, struct efx_tc_counter, work);
90 	struct efx_tc_encap_action *encap;
91 	struct efx_tc_action_set *act;
92 	unsigned long touched;
93 	struct neighbour *n;
94 
95 	spin_lock_bh(&cnt->lock);
96 	touched = READ_ONCE(cnt->touched);
97 
98 	list_for_each_entry(act, &cnt->users, count_user) {
99 		encap = act->encap_md;
100 		if (!encap)
101 			continue;
102 		if (!encap->neigh) /* can't happen */
103 			continue;
104 		if (time_after_eq(encap->neigh->used, touched))
105 			continue;
106 		encap->neigh->used = touched;
107 		/* We have passed traffic using this ARP entry, so
108 		 * indicate to the ARP cache that it's still active
109 		 */
110 		if (encap->neigh->dst_ip)
111 			n = neigh_lookup(&arp_tbl, &encap->neigh->dst_ip,
112 					 encap->neigh->egdev);
113 		else
114 #if IS_ENABLED(CONFIG_IPV6)
115 			n = neigh_lookup(ipv6_stub->nd_tbl,
116 					 &encap->neigh->dst_ip6,
117 					 encap->neigh->egdev);
118 #else
119 			n = NULL;
120 #endif
121 		if (!n)
122 			continue;
123 
124 		neigh_event_send(n, NULL);
125 		neigh_release(n);
126 	}
127 	spin_unlock_bh(&cnt->lock);
128 }
129 
130 /* Counter allocation */
131 
132 struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx,
133 						      int type)
134 {
135 	struct efx_tc_counter *cnt;
136 	int rc, rc2;
137 
138 	cnt = kzalloc(sizeof(*cnt), GFP_USER);
139 	if (!cnt)
140 		return ERR_PTR(-ENOMEM);
141 
142 	spin_lock_init(&cnt->lock);
143 	INIT_WORK(&cnt->work, efx_tc_counter_work);
144 	cnt->touched = jiffies;
145 	cnt->type = type;
146 
147 	rc = efx_mae_allocate_counter(efx, cnt);
148 	if (rc)
149 		goto fail1;
150 	INIT_LIST_HEAD(&cnt->users);
151 	rc = rhashtable_insert_fast(&efx->tc->counter_ht, &cnt->linkage,
152 				    efx_tc_counter_ht_params);
153 	if (rc)
154 		goto fail2;
155 	return cnt;
156 fail2:
157 	/* If we get here, it implies that we couldn't insert into the table,
158 	 * which in turn probably means that the fw_id was already taken.
159 	 * In that case, it's unclear whether we really 'own' the fw_id; but
160 	 * the firmware seemed to think we did, so it's proper to free it.
161 	 */
162 	rc2 = efx_mae_free_counter(efx, cnt);
163 	if (rc2)
164 		netif_warn(efx, hw, efx->net_dev,
165 			   "Failed to free MAE counter %u, rc %d\n",
166 			   cnt->fw_id, rc2);
167 fail1:
168 	kfree(cnt);
169 	return ERR_PTR(rc > 0 ? -EIO : rc);
170 }
171 
172 void efx_tc_flower_release_counter(struct efx_nic *efx,
173 				   struct efx_tc_counter *cnt)
174 {
175 	int rc;
176 
177 	rhashtable_remove_fast(&efx->tc->counter_ht, &cnt->linkage,
178 			       efx_tc_counter_ht_params);
179 	rc = efx_mae_free_counter(efx, cnt);
180 	if (rc)
181 		netif_warn(efx, hw, efx->net_dev,
182 			   "Failed to free MAE counter %u, rc %d\n",
183 			   cnt->fw_id, rc);
184 	WARN_ON(!list_empty(&cnt->users));
185 	/* This doesn't protect counter updates coming in arbitrarily long
186 	 * after we deleted the counter.  The RCU just ensures that we won't
187 	 * free the counter while another thread has a pointer to it.
188 	 * Ensuring we don't update the wrong counter if the ID gets re-used
189 	 * is handled by the generation count.
190 	 */
191 	synchronize_rcu();
192 	flush_work(&cnt->work);
193 	EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock));
194 	kfree(cnt);
195 }
196 
197 static struct efx_tc_counter *efx_tc_flower_find_counter_by_fw_id(
198 				struct efx_nic *efx, int type, u32 fw_id)
199 {
200 	struct efx_tc_counter key = {};
201 
202 	key.fw_id = fw_id;
203 	key.type = type;
204 
205 	return rhashtable_lookup_fast(&efx->tc->counter_ht, &key,
206 				      efx_tc_counter_ht_params);
207 }
208 
209 /* TC cookie to counter mapping */
210 
211 void efx_tc_flower_put_counter_index(struct efx_nic *efx,
212 				     struct efx_tc_counter_index *ctr)
213 {
214 	if (!refcount_dec_and_test(&ctr->ref))
215 		return; /* still in use */
216 	rhashtable_remove_fast(&efx->tc->counter_id_ht, &ctr->linkage,
217 			       efx_tc_counter_id_ht_params);
218 	efx_tc_flower_release_counter(efx, ctr->cnt);
219 	kfree(ctr);
220 }
221 
222 struct efx_tc_counter_index *efx_tc_flower_get_counter_index(
223 				struct efx_nic *efx, unsigned long cookie,
224 				enum efx_tc_counter_type type)
225 {
226 	struct efx_tc_counter_index *ctr, *old;
227 	struct efx_tc_counter *cnt;
228 
229 	ctr = kzalloc(sizeof(*ctr), GFP_USER);
230 	if (!ctr)
231 		return ERR_PTR(-ENOMEM);
232 	ctr->cookie = cookie;
233 	old = rhashtable_lookup_get_insert_fast(&efx->tc->counter_id_ht,
234 						&ctr->linkage,
235 						efx_tc_counter_id_ht_params);
236 	if (old) {
237 		/* don't need our new entry */
238 		kfree(ctr);
239 		if (!refcount_inc_not_zero(&old->ref))
240 			return ERR_PTR(-EAGAIN);
241 		/* existing entry found */
242 		ctr = old;
243 	} else {
244 		cnt = efx_tc_flower_allocate_counter(efx, type);
245 		if (IS_ERR(cnt)) {
246 			rhashtable_remove_fast(&efx->tc->counter_id_ht,
247 					       &ctr->linkage,
248 					       efx_tc_counter_id_ht_params);
249 			kfree(ctr);
250 			return (void *)cnt; /* it's an ERR_PTR */
251 		}
252 		ctr->cnt = cnt;
253 		refcount_set(&ctr->ref, 1);
254 	}
255 	return ctr;
256 }
257 
258 struct efx_tc_counter_index *efx_tc_flower_find_counter_index(
259 				struct efx_nic *efx, unsigned long cookie)
260 {
261 	struct efx_tc_counter_index key = {};
262 
263 	key.cookie = cookie;
264 	return rhashtable_lookup_fast(&efx->tc->counter_id_ht, &key,
265 				      efx_tc_counter_id_ht_params);
266 }
267 
268 /* TC Channel.  Counter updates are delivered on this channel's RXQ. */
269 
270 static void efx_tc_handle_no_channel(struct efx_nic *efx)
271 {
272 	netif_warn(efx, drv, efx->net_dev,
273 		   "MAE counters require MSI-X and 1 additional interrupt vector.\n");
274 }
275 
276 static int efx_tc_probe_channel(struct efx_channel *channel)
277 {
278 	struct efx_rx_queue *rx_queue = &channel->rx_queue;
279 
280 	channel->irq_moderation_us = 0;
281 	rx_queue->core_index = 0;
282 
283 	INIT_WORK(&rx_queue->grant_work, efx_mae_counters_grant_credits);
284 
285 	return 0;
286 }
287 
288 static int efx_tc_start_channel(struct efx_channel *channel)
289 {
290 	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
291 	struct efx_nic *efx = channel->efx;
292 
293 	return efx_mae_start_counters(efx, rx_queue);
294 }
295 
296 static void efx_tc_stop_channel(struct efx_channel *channel)
297 {
298 	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
299 	struct efx_nic *efx = channel->efx;
300 	int rc;
301 
302 	rc = efx_mae_stop_counters(efx, rx_queue);
303 	if (rc)
304 		netif_warn(efx, drv, efx->net_dev,
305 			   "Failed to stop MAE counters streaming, rc=%d.\n",
306 			   rc);
307 	rx_queue->grant_credits = false;
308 	flush_work(&rx_queue->grant_work);
309 }
310 
311 static void efx_tc_remove_channel(struct efx_channel *channel)
312 {
313 }
314 
315 static void efx_tc_get_channel_name(struct efx_channel *channel,
316 				    char *buf, size_t len)
317 {
318 	snprintf(buf, len, "%s-mae", channel->efx->name);
319 }
320 
321 static void efx_tc_counter_update(struct efx_nic *efx,
322 				  enum efx_tc_counter_type counter_type,
323 				  u32 counter_idx, u64 packets, u64 bytes,
324 				  u32 mark)
325 {
326 	struct efx_tc_counter *cnt;
327 
328 	rcu_read_lock(); /* Protect against deletion of 'cnt' */
329 	cnt = efx_tc_flower_find_counter_by_fw_id(efx, counter_type, counter_idx);
330 	if (!cnt) {
331 		/* This can legitimately happen when a counter is removed,
332 		 * with updates for the counter still in-flight; however this
333 		 * should be an infrequent occurrence.
334 		 */
335 		if (net_ratelimit())
336 			netif_dbg(efx, drv, efx->net_dev,
337 				  "Got update for unwanted MAE counter %u type %u\n",
338 				  counter_idx, counter_type);
339 		goto out;
340 	}
341 
342 	spin_lock_bh(&cnt->lock);
343 	if ((s32)mark - (s32)cnt->gen < 0) {
344 		/* This counter update packet is from before the counter was
345 		 * allocated; thus it must be for a previous counter with
346 		 * the same ID that has since been freed, and it should be
347 		 * ignored.
348 		 */
349 	} else {
350 		/* Update latest seen generation count.  This ensures that
351 		 * even a long-lived counter won't start getting ignored if
352 		 * the generation count wraps around, unless it somehow
353 		 * manages to go 1<<31 generations without an update.
354 		 */
355 		cnt->gen = mark;
356 		/* update counter values */
357 		cnt->packets += packets;
358 		cnt->bytes += bytes;
359 		cnt->touched = jiffies;
360 	}
361 	spin_unlock_bh(&cnt->lock);
362 	schedule_work(&cnt->work);
363 out:
364 	rcu_read_unlock();
365 }
366 
367 static void efx_tc_rx_version_1(struct efx_nic *efx, const u8 *data, u32 mark)
368 {
369 	u16 n_counters, i;
370 
371 	/* Header format:
372 	 * + |   0    |   1    |   2    |   3    |
373 	 * 0 |version |         reserved         |
374 	 * 4 |    seq_index    |   n_counters    |
375 	 */
376 
377 	n_counters = le16_to_cpu(*(const __le16 *)(data + 6));
378 
379 	/* Counter update entry format:
380 	 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | a | b | c | d | e | f |
381 	 * |  counter_idx  |     packet_count      |      byte_count       |
382 	 */
383 	for (i = 0; i < n_counters; i++) {
384 		const void *entry = data + 8 + 16 * i;
385 		u64 packet_count, byte_count;
386 		u32 counter_idx;
387 
388 		counter_idx = le32_to_cpu(*(const __le32 *)entry);
389 		packet_count = le32_to_cpu(*(const __le32 *)(entry + 4)) |
390 			       ((u64)le16_to_cpu(*(const __le16 *)(entry + 8)) << 32);
391 		byte_count = le16_to_cpu(*(const __le16 *)(entry + 10)) |
392 			     ((u64)le32_to_cpu(*(const __le32 *)(entry + 12)) << 16);
393 		efx_tc_counter_update(efx, EFX_TC_COUNTER_TYPE_AR, counter_idx,
394 				      packet_count, byte_count, mark);
395 	}
396 }
397 
398 #define TCV2_HDR_PTR(pkt, field)						\
399 	((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 7),	\
400 	 (pkt) + ERF_SC_PACKETISER_HEADER_##field##_LBN / 8)
401 #define TCV2_HDR_BYTE(pkt, field)						\
402 	((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 8),\
403 	 *TCV2_HDR_PTR(pkt, field))
404 #define TCV2_HDR_WORD(pkt, field)						\
405 	((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 16),\
406 	 (void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 15),	\
407 	 *(__force const __le16 *)TCV2_HDR_PTR(pkt, field))
408 #define TCV2_PKT_PTR(pkt, poff, i, field)					\
409 	((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_PAYLOAD_##field##_LBN & 7),	\
410 	 (pkt) + ERF_SC_PACKETISER_PAYLOAD_##field##_LBN/8 + poff +		\
411 	 i * ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE)
412 
413 /* Read a little-endian 48-bit field with 16-bit alignment */
414 static u64 efx_tc_read48(const __le16 *field)
415 {
416 	u64 out = 0;
417 	int i;
418 
419 	for (i = 0; i < 3; i++)
420 		out |= (u64)le16_to_cpu(field[i]) << (i * 16);
421 	return out;
422 }
423 
424 static enum efx_tc_counter_type efx_tc_rx_version_2(struct efx_nic *efx,
425 						    const u8 *data, u32 mark)
426 {
427 	u8 payload_offset, header_offset, ident;
428 	enum efx_tc_counter_type type;
429 	u16 n_counters, i;
430 
431 	ident = TCV2_HDR_BYTE(data, IDENTIFIER);
432 	switch (ident) {
433 	case ERF_SC_PACKETISER_HEADER_IDENTIFIER_AR:
434 		type = EFX_TC_COUNTER_TYPE_AR;
435 		break;
436 	case ERF_SC_PACKETISER_HEADER_IDENTIFIER_CT:
437 		type = EFX_TC_COUNTER_TYPE_CT;
438 		break;
439 	case ERF_SC_PACKETISER_HEADER_IDENTIFIER_OR:
440 		type = EFX_TC_COUNTER_TYPE_OR;
441 		break;
442 	default:
443 		if (net_ratelimit())
444 			netif_err(efx, drv, efx->net_dev,
445 				  "ignored v2 MAE counter packet (bad identifier %u"
446 				  "), counters may be inaccurate\n", ident);
447 		return EFX_TC_COUNTER_TYPE_MAX;
448 	}
449 	header_offset = TCV2_HDR_BYTE(data, HEADER_OFFSET);
450 	/* mae_counter_format.h implies that this offset is fixed, since it
451 	 * carries on with SOP-based LBNs for the fields in this header
452 	 */
453 	if (header_offset != ERF_SC_PACKETISER_HEADER_HEADER_OFFSET_DEFAULT) {
454 		if (net_ratelimit())
455 			netif_err(efx, drv, efx->net_dev,
456 				  "choked on v2 MAE counter packet (bad header_offset %u"
457 				  "), counters may be inaccurate\n", header_offset);
458 		return EFX_TC_COUNTER_TYPE_MAX;
459 	}
460 	payload_offset = TCV2_HDR_BYTE(data, PAYLOAD_OFFSET);
461 	n_counters = le16_to_cpu(TCV2_HDR_WORD(data, COUNT));
462 
463 	for (i = 0; i < n_counters; i++) {
464 		const void *counter_idx_p, *packet_count_p, *byte_count_p;
465 		u64 packet_count, byte_count;
466 		u32 counter_idx;
467 
468 		/* 24-bit field with 32-bit alignment */
469 		counter_idx_p = TCV2_PKT_PTR(data, payload_offset, i, COUNTER_INDEX);
470 		BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_WIDTH != 24);
471 		BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_LBN & 31);
472 		counter_idx = le32_to_cpu(*(const __le32 *)counter_idx_p) & 0xffffff;
473 		/* 48-bit field with 16-bit alignment */
474 		packet_count_p = TCV2_PKT_PTR(data, payload_offset, i, PACKET_COUNT);
475 		BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_WIDTH != 48);
476 		BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LBN & 15);
477 		packet_count = efx_tc_read48((const __le16 *)packet_count_p);
478 		/* 48-bit field with 16-bit alignment */
479 		byte_count_p = TCV2_PKT_PTR(data, payload_offset, i, BYTE_COUNT);
480 		BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_WIDTH != 48);
481 		BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LBN & 15);
482 		byte_count = efx_tc_read48((const __le16 *)byte_count_p);
483 
484 		if (type == EFX_TC_COUNTER_TYPE_CT) {
485 			/* CT counters are 1-bit saturating counters to update
486 			 * the lastuse time in CT stats. A received CT counter
487 			 * should have packet counter to 0 and only LSB bit on
488 			 * in byte counter.
489 			 */
490 			if (packet_count || byte_count != 1)
491 				netdev_warn_once(efx->net_dev,
492 						 "CT counter with inconsistent state (%llu, %llu)\n",
493 						 packet_count, byte_count);
494 			/* Do not increment the driver's byte counter */
495 			byte_count = 0;
496 		}
497 
498 		efx_tc_counter_update(efx, type, counter_idx, packet_count,
499 				      byte_count, mark);
500 	}
501 	return type;
502 }
503 
504 /* We always swallow the packet, whether successful or not, since it's not
505  * a network packet and shouldn't ever be forwarded to the stack.
506  * @mark is the generation count for counter allocations.
507  */
508 static bool efx_tc_rx(struct efx_rx_queue *rx_queue, u32 mark)
509 {
510 	struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
511 	struct efx_rx_buffer *rx_buf = efx_rx_buffer(rx_queue,
512 						     channel->rx_pkt_index);
513 	const u8 *data = efx_rx_buf_va(rx_buf);
514 	struct efx_nic *efx = rx_queue->efx;
515 	enum efx_tc_counter_type type;
516 	u8 version;
517 
518 	/* version is always first byte of packet */
519 	version = *data;
520 	switch (version) {
521 	case 1:
522 		type = EFX_TC_COUNTER_TYPE_AR;
523 		efx_tc_rx_version_1(efx, data, mark);
524 		break;
525 	case ERF_SC_PACKETISER_HEADER_VERSION_VALUE: // 2
526 		type = efx_tc_rx_version_2(efx, data, mark);
527 		break;
528 	default:
529 		if (net_ratelimit())
530 			netif_err(efx, drv, efx->net_dev,
531 				  "choked on MAE counter packet (bad version %u"
532 				  "); counters may be inaccurate\n",
533 				  version);
534 		goto out;
535 	}
536 
537 	if (type < EFX_TC_COUNTER_TYPE_MAX) {
538 		/* Update seen_gen unconditionally, to avoid a missed wakeup if
539 		 * we race with efx_mae_stop_counters().
540 		 */
541 		efx->tc->seen_gen[type] = mark;
542 		if (efx->tc->flush_counters &&
543 		    (s32)(efx->tc->flush_gen[type] - mark) <= 0)
544 			wake_up(&efx->tc->flush_wq);
545 	}
546 out:
547 	efx_free_rx_buffers(rx_queue, rx_buf, 1);
548 	channel->rx_pkt_n_frags = 0;
549 	return true;
550 }
551 
552 const struct efx_channel_type efx_tc_channel_type = {
553 	.handle_no_channel	= efx_tc_handle_no_channel,
554 	.pre_probe		= efx_tc_probe_channel,
555 	.start			= efx_tc_start_channel,
556 	.stop			= efx_tc_stop_channel,
557 	.post_remove		= efx_tc_remove_channel,
558 	.get_name		= efx_tc_get_channel_name,
559 	.receive_raw		= efx_tc_rx,
560 	.keep_eventq		= true,
561 };
562