1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 #ifndef _NFT_SET_PIPAPO_H
4 
5 #include <linux/log2.h>
6 #include <net/ipv6.h>			/* For the maximum length of a field */
7 
8 /* Count of concatenated fields depends on count of 32-bit nftables registers */
9 #define NFT_PIPAPO_MAX_FIELDS		NFT_REG32_COUNT
10 
11 /* Restrict usage to multiple fields, make sure rbtree is used otherwise */
12 #define NFT_PIPAPO_MIN_FIELDS		2
13 
14 /* Largest supported field size */
15 #define NFT_PIPAPO_MAX_BYTES		(sizeof(struct in6_addr))
16 #define NFT_PIPAPO_MAX_BITS		(NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE)
17 
18 /* Bits to be grouped together in table buckets depending on set size */
19 #define NFT_PIPAPO_GROUP_BITS_INIT	NFT_PIPAPO_GROUP_BITS_SMALL_SET
20 #define NFT_PIPAPO_GROUP_BITS_SMALL_SET	8
21 #define NFT_PIPAPO_GROUP_BITS_LARGE_SET	4
22 #define NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4				\
23 	BUILD_BUG_ON((NFT_PIPAPO_GROUP_BITS_SMALL_SET != 8) ||		\
24 		     (NFT_PIPAPO_GROUP_BITS_LARGE_SET != 4))
25 #define NFT_PIPAPO_GROUPS_PER_BYTE(f)	(BITS_PER_BYTE / (f)->bb)
26 
27 /* If a lookup table gets bigger than NFT_PIPAPO_LT_SIZE_HIGH, switch to the
28  * small group width, and switch to the big group width if the table gets
29  * smaller than NFT_PIPAPO_LT_SIZE_LOW.
30  *
31  * Picking 2MiB as threshold (for a single table) avoids as much as possible
32  * crossing page boundaries on most architectures (x86-64 and MIPS huge pages,
33  * ARMv7 supersections, POWER "large" pages, SPARC Level 1 regions, etc.), which
34  * keeps performance nice in case kvmalloc() gives us non-contiguous areas.
35  */
36 #define NFT_PIPAPO_LT_SIZE_THRESHOLD	(1 << 21)
37 #define NFT_PIPAPO_LT_SIZE_HYSTERESIS	(1 << 16)
38 #define NFT_PIPAPO_LT_SIZE_HIGH		NFT_PIPAPO_LT_SIZE_THRESHOLD
39 #define NFT_PIPAPO_LT_SIZE_LOW		NFT_PIPAPO_LT_SIZE_THRESHOLD -	\
40 					NFT_PIPAPO_LT_SIZE_HYSTERESIS
41 
42 /* Fields are padded to 32 bits in input registers */
43 #define NFT_PIPAPO_GROUPS_PADDED_SIZE(f)				\
44 	(round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32)))
45 #define NFT_PIPAPO_GROUPS_PADDING(f)					\
46 	(NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups /		\
47 					    NFT_PIPAPO_GROUPS_PER_BYTE(f))
48 
49 /* Number of buckets given by 2 ^ n, with n bucket bits */
50 #define NFT_PIPAPO_BUCKETS(bb)		(1 << (bb))
51 
52 /* Each n-bit range maps to up to n * 2 rules */
53 #define NFT_PIPAPO_MAP_NBITS		(const_ilog2(NFT_PIPAPO_MAX_BITS * 2))
54 
55 /* Use the rest of mapping table buckets for rule indices, but it makes no sense
56  * to exceed 32 bits
57  */
58 #if BITS_PER_LONG == 64
59 #define NFT_PIPAPO_MAP_TOBITS		32
60 #else
61 #define NFT_PIPAPO_MAP_TOBITS		(BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS)
62 #endif
63 
64 /* ...which gives us the highest allowed index for a rule */
65 #define NFT_PIPAPO_RULE0_MAX		((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \
66 					- (1UL << NFT_PIPAPO_MAP_NBITS))
67 
68 /* Definitions for vectorised implementations */
69 #ifdef NFT_PIPAPO_ALIGN
70 #define NFT_PIPAPO_ALIGN_HEADROOM					\
71 	(NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN)
72 #define NFT_PIPAPO_LT_ALIGN(lt)		(PTR_ALIGN((lt), NFT_PIPAPO_ALIGN))
73 #define NFT_PIPAPO_LT_ASSIGN(field, x)					\
74 	do {								\
75 		(field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x);		\
76 		(field)->lt = (x);					\
77 	} while (0)
78 #else
79 #define NFT_PIPAPO_ALIGN_HEADROOM	0
80 #define NFT_PIPAPO_LT_ALIGN(lt)		(lt)
81 #define NFT_PIPAPO_LT_ASSIGN(field, x)	((field)->lt = (x))
82 #endif /* NFT_PIPAPO_ALIGN */
83 
84 #define nft_pipapo_for_each_field(field, index, match)		\
85 	for ((field) = (match)->f, (index) = 0;			\
86 	     (index) < (match)->field_count;			\
87 	     (index)++, (field)++)
88 
89 /**
90  * union nft_pipapo_map_bucket - Bucket of mapping table
91  * @to:		First rule number (in next field) this rule maps to
92  * @n:		Number of rules (in next field) this rule maps to
93  * @e:		If there's no next field, pointer to element this rule maps to
94  */
95 union nft_pipapo_map_bucket {
96 	struct {
97 #if BITS_PER_LONG == 64
98 		static_assert(NFT_PIPAPO_MAP_TOBITS <= 32);
99 		u32 to;
100 
101 		static_assert(NFT_PIPAPO_MAP_NBITS <= 32);
102 		u32 n;
103 #else
104 		unsigned long to:NFT_PIPAPO_MAP_TOBITS;
105 		unsigned long  n:NFT_PIPAPO_MAP_NBITS;
106 #endif
107 	};
108 	struct nft_pipapo_elem *e;
109 };
110 
111 /**
112  * struct nft_pipapo_field - Lookup, mapping tables and related data for a field
113  * @groups:	Amount of bit groups
114  * @rules:	Number of inserted rules
115  * @bsize:	Size of each bucket in lookup table, in longs
116  * @bb:		Number of bits grouped together in lookup table buckets
117  * @lt:		Lookup table: 'groups' rows of buckets
118  * @lt_aligned:	Version of @lt aligned to NFT_PIPAPO_ALIGN bytes
119  * @mt:		Mapping table: one bucket per rule
120  */
121 struct nft_pipapo_field {
122 	int groups;
123 	unsigned long rules;
124 	size_t bsize;
125 	int bb;
126 #ifdef NFT_PIPAPO_ALIGN
127 	unsigned long *lt_aligned;
128 #endif
129 	unsigned long *lt;
130 	union nft_pipapo_map_bucket *mt;
131 };
132 
133 /**
134  * struct nft_pipapo_scratch - percpu data used for lookup and matching
135  * @map_index:	Current working bitmap index, toggled between field matches
136  * @align_off:	Offset to get the originally allocated address
137  * @map:	store partial matching results during lookup
138  */
139 struct nft_pipapo_scratch {
140 	u8 map_index;
141 	u32 align_off;
142 	unsigned long map[];
143 };
144 
145 /**
146  * struct nft_pipapo_match - Data used for lookup and matching
147  * @field_count		Amount of fields in set
148  * @scratch:		Preallocated per-CPU maps for partial matching results
149  * @bsize_max:		Maximum lookup table bucket size of all fields, in longs
150  * @rcu			Matching data is swapped on commits
151  * @f:			Fields, with lookup and mapping tables
152  */
153 struct nft_pipapo_match {
154 	int field_count;
155 	struct nft_pipapo_scratch * __percpu *scratch;
156 	size_t bsize_max;
157 	struct rcu_head rcu;
158 	struct nft_pipapo_field f[] __counted_by(field_count);
159 };
160 
161 /**
162  * struct nft_pipapo - Representation of a set
163  * @match:	Currently in-use matching data
164  * @clone:	Copy where pending insertions and deletions are kept
165  * @width:	Total bytes to be matched for one packet, including padding
166  * @dirty:	Working copy has pending insertions or deletions
167  * @last_gc:	Timestamp of last garbage collection run, jiffies
168  */
169 struct nft_pipapo {
170 	struct nft_pipapo_match __rcu *match;
171 	struct nft_pipapo_match *clone;
172 	int width;
173 	bool dirty;
174 	unsigned long last_gc;
175 };
176 
177 struct nft_pipapo_elem;
178 
179 /**
180  * struct nft_pipapo_elem - API-facing representation of single set element
181  * @ext:	nftables API extensions
182  */
183 struct nft_pipapo_elem {
184 	struct nft_set_ext ext;
185 };
186 
187 int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
188 		  union nft_pipapo_map_bucket *mt, bool match_only);
189 
190 /**
191  * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets
192  * @f:		Field including lookup table
193  * @dst:	Area to store result
194  * @data:	Input data selecting table buckets
195  */
pipapo_and_field_buckets_4bit(struct nft_pipapo_field * f,unsigned long * dst,const u8 * data)196 static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f,
197 						 unsigned long *dst,
198 						 const u8 *data)
199 {
200 	unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
201 	int group;
202 
203 	for (group = 0; group < f->groups; group += BITS_PER_BYTE / 4, data++) {
204 		u8 v;
205 
206 		v = *data >> 4;
207 		__bitmap_and(dst, dst, lt + v * f->bsize,
208 			     f->bsize * BITS_PER_LONG);
209 		lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
210 
211 		v = *data & 0x0f;
212 		__bitmap_and(dst, dst, lt + v * f->bsize,
213 			     f->bsize * BITS_PER_LONG);
214 		lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
215 	}
216 }
217 
218 /**
219  * pipapo_and_field_buckets_8bit() - Intersect 8-bit buckets
220  * @f:		Field including lookup table
221  * @dst:	Area to store result
222  * @data:	Input data selecting table buckets
223  */
pipapo_and_field_buckets_8bit(struct nft_pipapo_field * f,unsigned long * dst,const u8 * data)224 static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f,
225 						 unsigned long *dst,
226 						 const u8 *data)
227 {
228 	unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
229 	int group;
230 
231 	for (group = 0; group < f->groups; group++, data++) {
232 		__bitmap_and(dst, dst, lt + *data * f->bsize,
233 			     f->bsize * BITS_PER_LONG);
234 		lt += f->bsize * NFT_PIPAPO_BUCKETS(8);
235 	}
236 }
237 
238 /**
239  * pipapo_estimate_size() - Estimate worst-case for set size
240  * @desc:	Set description, element count and field description used here
241  *
242  * The size for this set type can vary dramatically, as it depends on the number
243  * of rules (composing netmasks) the entries expand to. We compute the worst
244  * case here.
245  *
246  * In general, for a non-ranged entry or a single composing netmask, we need
247  * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that
248  * is, each input bit needs four bits of matching data), plus a bucket in the
249  * mapping table for each field.
250  *
251  * Return: worst-case set size in bytes, 0 on any overflow
252  */
pipapo_estimate_size(const struct nft_set_desc * desc)253 static u64 pipapo_estimate_size(const struct nft_set_desc *desc)
254 {
255 	unsigned long entry_size;
256 	u64 size;
257 	int i;
258 
259 	for (i = 0, entry_size = 0; i < desc->field_count; i++) {
260 		unsigned long rules;
261 
262 		if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES)
263 			return 0;
264 
265 		/* Worst-case ranges for each concatenated field: each n-bit
266 		 * field can expand to up to n * 2 rules in each bucket, and
267 		 * each rule also needs a mapping bucket.
268 		 */
269 		rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2;
270 		entry_size += rules *
271 			      NFT_PIPAPO_BUCKETS(NFT_PIPAPO_GROUP_BITS_INIT) /
272 			      BITS_PER_BYTE;
273 		entry_size += rules * sizeof(union nft_pipapo_map_bucket);
274 	}
275 
276 	/* Rules in lookup and mapping tables are needed for each entry */
277 	size = desc->size * entry_size;
278 	if (size && div_u64(size, desc->size) != entry_size)
279 		return 0;
280 
281 	size += sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match) * 2;
282 
283 	size += sizeof(struct nft_pipapo_field) * desc->field_count;
284 
285 	return size;
286 }
287 
288 #endif /* _NFT_SET_PIPAPO_H */
289