xref: /openbmc/linux/net/netfilter/xt_HMARK.c (revision f66501dc)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * xt_HMARK - Netfilter module to set mark by means of hashing
4  *
5  * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com>
6  * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
7  */
8 
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 
11 #include <linux/module.h>
12 #include <linux/skbuff.h>
13 #include <linux/icmp.h>
14 
15 #include <linux/netfilter/x_tables.h>
16 #include <linux/netfilter/xt_HMARK.h>
17 
18 #include <net/ip.h>
19 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
20 #include <net/netfilter/nf_conntrack.h>
21 #endif
22 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
23 #include <net/ipv6.h>
24 #include <linux/netfilter_ipv6/ip6_tables.h>
25 #endif
26 
27 MODULE_LICENSE("GPL");
28 MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>");
29 MODULE_DESCRIPTION("Xtables: packet marking using hash calculation");
30 MODULE_ALIAS("ipt_HMARK");
31 MODULE_ALIAS("ip6t_HMARK");
32 
33 struct hmark_tuple {
34 	__be32			src;
35 	__be32			dst;
36 	union hmark_ports	uports;
37 	u8			proto;
38 };
39 
40 static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask)
41 {
42 	return (addr32[0] & mask[0]) ^
43 	       (addr32[1] & mask[1]) ^
44 	       (addr32[2] & mask[2]) ^
45 	       (addr32[3] & mask[3]);
46 }
47 
48 static inline __be32
49 hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask)
50 {
51 	switch (l3num) {
52 	case AF_INET:
53 		return *addr32 & *mask;
54 	case AF_INET6:
55 		return hmark_addr6_mask(addr32, mask);
56 	}
57 	return 0;
58 }
59 
60 static inline void hmark_swap_ports(union hmark_ports *uports,
61 				    const struct xt_hmark_info *info)
62 {
63 	union hmark_ports hp;
64 	u16 src, dst;
65 
66 	hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32;
67 	src = ntohs(hp.b16.src);
68 	dst = ntohs(hp.b16.dst);
69 
70 	if (dst > src)
71 		uports->v32 = (dst << 16) | src;
72 	else
73 		uports->v32 = (src << 16) | dst;
74 }
75 
76 static int
77 hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
78 		    const struct xt_hmark_info *info)
79 {
80 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
81 	enum ip_conntrack_info ctinfo;
82 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
83 	struct nf_conntrack_tuple *otuple;
84 	struct nf_conntrack_tuple *rtuple;
85 
86 	if (ct == NULL)
87 		return -1;
88 
89 	otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
90 	rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
91 
92 	t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6,
93 				 info->src_mask.ip6);
94 	t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6,
95 				 info->dst_mask.ip6);
96 
97 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
98 		return 0;
99 
100 	t->proto = nf_ct_protonum(ct);
101 	if (t->proto != IPPROTO_ICMP) {
102 		t->uports.b16.src = otuple->src.u.all;
103 		t->uports.b16.dst = rtuple->src.u.all;
104 		hmark_swap_ports(&t->uports, info);
105 	}
106 
107 	return 0;
108 #else
109 	return -1;
110 #endif
111 }
112 
113 /* This hash function is endian independent, to ensure consistent hashing if
114  * the cluster is composed of big and little endian systems. */
115 static inline u32
116 hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
117 {
118 	u32 hash;
119 	u32 src = ntohl(t->src);
120 	u32 dst = ntohl(t->dst);
121 
122 	if (dst < src)
123 		swap(src, dst);
124 
125 	hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd);
126 	hash = hash ^ (t->proto & info->proto_mask);
127 
128 	return reciprocal_scale(hash, info->hmodulus) + info->hoffset;
129 }
130 
131 static void
132 hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff,
133 		      struct hmark_tuple *t, const struct xt_hmark_info *info)
134 {
135 	int protoff;
136 
137 	protoff = proto_ports_offset(t->proto);
138 	if (protoff < 0)
139 		return;
140 
141 	nhoff += protoff;
142 	if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0)
143 		return;
144 
145 	hmark_swap_ports(&t->uports, info);
146 }
147 
148 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
149 static int get_inner6_hdr(const struct sk_buff *skb, int *offset)
150 {
151 	struct icmp6hdr *icmp6h, _ih6;
152 
153 	icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6);
154 	if (icmp6h == NULL)
155 		return 0;
156 
157 	if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) {
158 		*offset += sizeof(struct icmp6hdr);
159 		return 1;
160 	}
161 	return 0;
162 }
163 
164 static int
165 hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
166 			  const struct xt_hmark_info *info)
167 {
168 	struct ipv6hdr *ip6, _ip6;
169 	int flag = IP6_FH_F_AUTH;
170 	unsigned int nhoff = 0;
171 	u16 fragoff = 0;
172 	int nexthdr;
173 
174 	ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb));
175 	nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
176 	if (nexthdr < 0)
177 		return 0;
178 	/* No need to check for icmp errors on fragments */
179 	if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6))
180 		goto noicmp;
181 	/* Use inner header in case of ICMP errors */
182 	if (get_inner6_hdr(skb, &nhoff)) {
183 		ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6);
184 		if (ip6 == NULL)
185 			return -1;
186 		/* If AH present, use SPI like in ESP. */
187 		flag = IP6_FH_F_AUTH;
188 		nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
189 		if (nexthdr < 0)
190 			return -1;
191 	}
192 noicmp:
193 	t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6);
194 	t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6);
195 
196 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
197 		return 0;
198 
199 	t->proto = nexthdr;
200 	if (t->proto == IPPROTO_ICMPV6)
201 		return 0;
202 
203 	if (flag & IP6_FH_F_FRAG)
204 		return 0;
205 
206 	hmark_set_tuple_ports(skb, nhoff, t, info);
207 	return 0;
208 }
209 
210 static unsigned int
211 hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par)
212 {
213 	const struct xt_hmark_info *info = par->targinfo;
214 	struct hmark_tuple t;
215 
216 	memset(&t, 0, sizeof(struct hmark_tuple));
217 
218 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
219 		if (hmark_ct_set_htuple(skb, &t, info) < 0)
220 			return XT_CONTINUE;
221 	} else {
222 		if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0)
223 			return XT_CONTINUE;
224 	}
225 
226 	skb->mark = hmark_hash(&t, info);
227 	return XT_CONTINUE;
228 }
229 #endif
230 
231 static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff)
232 {
233 	const struct icmphdr *icmph;
234 	struct icmphdr _ih;
235 
236 	/* Not enough header? */
237 	icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih);
238 	if (icmph == NULL || icmph->type > NR_ICMP_TYPES)
239 		return 0;
240 
241 	/* Error message? */
242 	if (icmph->type != ICMP_DEST_UNREACH &&
243 	    icmph->type != ICMP_SOURCE_QUENCH &&
244 	    icmph->type != ICMP_TIME_EXCEEDED &&
245 	    icmph->type != ICMP_PARAMETERPROB &&
246 	    icmph->type != ICMP_REDIRECT)
247 		return 0;
248 
249 	*nhoff += iphsz + sizeof(_ih);
250 	return 1;
251 }
252 
253 static int
254 hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t,
255 			  const struct xt_hmark_info *info)
256 {
257 	struct iphdr *ip, _ip;
258 	int nhoff = skb_network_offset(skb);
259 
260 	ip = (struct iphdr *) (skb->data + nhoff);
261 	if (ip->protocol == IPPROTO_ICMP) {
262 		/* Use inner header in case of ICMP errors */
263 		if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) {
264 			ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip);
265 			if (ip == NULL)
266 				return -1;
267 		}
268 	}
269 
270 	t->src = ip->saddr & info->src_mask.ip;
271 	t->dst = ip->daddr & info->dst_mask.ip;
272 
273 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
274 		return 0;
275 
276 	t->proto = ip->protocol;
277 
278 	/* ICMP has no ports, skip */
279 	if (t->proto == IPPROTO_ICMP)
280 		return 0;
281 
282 	/* follow-up fragments don't contain ports, skip all fragments */
283 	if (ip->frag_off & htons(IP_MF | IP_OFFSET))
284 		return 0;
285 
286 	hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info);
287 
288 	return 0;
289 }
290 
291 static unsigned int
292 hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par)
293 {
294 	const struct xt_hmark_info *info = par->targinfo;
295 	struct hmark_tuple t;
296 
297 	memset(&t, 0, sizeof(struct hmark_tuple));
298 
299 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
300 		if (hmark_ct_set_htuple(skb, &t, info) < 0)
301 			return XT_CONTINUE;
302 	} else {
303 		if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0)
304 			return XT_CONTINUE;
305 	}
306 
307 	skb->mark = hmark_hash(&t, info);
308 	return XT_CONTINUE;
309 }
310 
311 static int hmark_tg_check(const struct xt_tgchk_param *par)
312 {
313 	const struct xt_hmark_info *info = par->targinfo;
314 	const char *errmsg = "proto mask must be zero with L3 mode";
315 
316 	if (!info->hmodulus)
317 		return -EINVAL;
318 
319 	if (info->proto_mask &&
320 	    (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)))
321 		goto err;
322 
323 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) &&
324 	    (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) |
325 			     XT_HMARK_FLAG(XT_HMARK_DPORT_MASK))))
326 		return -EINVAL;
327 
328 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) &&
329 	    (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) |
330 			     XT_HMARK_FLAG(XT_HMARK_DPORT)))) {
331 		errmsg = "spi-set and port-set can't be combined";
332 		goto err;
333 	}
334 	return 0;
335 err:
336 	pr_info_ratelimited("%s\n", errmsg);
337 	return -EINVAL;
338 }
339 
340 static struct xt_target hmark_tg_reg[] __read_mostly = {
341 	{
342 		.name		= "HMARK",
343 		.family		= NFPROTO_IPV4,
344 		.target		= hmark_tg_v4,
345 		.targetsize	= sizeof(struct xt_hmark_info),
346 		.checkentry	= hmark_tg_check,
347 		.me		= THIS_MODULE,
348 	},
349 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
350 	{
351 		.name		= "HMARK",
352 		.family		= NFPROTO_IPV6,
353 		.target		= hmark_tg_v6,
354 		.targetsize	= sizeof(struct xt_hmark_info),
355 		.checkentry	= hmark_tg_check,
356 		.me		= THIS_MODULE,
357 	},
358 #endif
359 };
360 
361 static int __init hmark_tg_init(void)
362 {
363 	return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
364 }
365 
366 static void __exit hmark_tg_exit(void)
367 {
368 	xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
369 }
370 
371 module_init(hmark_tg_init);
372 module_exit(hmark_tg_exit);
373