xref: /openbmc/linux/net/netfilter/xt_HMARK.c (revision e657c18a)
1 /*
2  * xt_HMARK - Netfilter module to set mark by means of hashing
3  *
4  * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com>
5  * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 as published by
9  * the Free Software Foundation.
10  */
11 
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 
14 #include <linux/module.h>
15 #include <linux/skbuff.h>
16 #include <linux/icmp.h>
17 
18 #include <linux/netfilter/x_tables.h>
19 #include <linux/netfilter/xt_HMARK.h>
20 
21 #include <net/ip.h>
22 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
23 #include <net/netfilter/nf_conntrack.h>
24 #endif
25 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
26 #include <net/ipv6.h>
27 #include <linux/netfilter_ipv6/ip6_tables.h>
28 #endif
29 
30 MODULE_LICENSE("GPL");
31 MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>");
32 MODULE_DESCRIPTION("Xtables: packet marking using hash calculation");
33 MODULE_ALIAS("ipt_HMARK");
34 MODULE_ALIAS("ip6t_HMARK");
35 
36 struct hmark_tuple {
37 	__be32			src;
38 	__be32			dst;
39 	union hmark_ports	uports;
40 	u8			proto;
41 };
42 
43 static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask)
44 {
45 	return (addr32[0] & mask[0]) ^
46 	       (addr32[1] & mask[1]) ^
47 	       (addr32[2] & mask[2]) ^
48 	       (addr32[3] & mask[3]);
49 }
50 
51 static inline __be32
52 hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask)
53 {
54 	switch (l3num) {
55 	case AF_INET:
56 		return *addr32 & *mask;
57 	case AF_INET6:
58 		return hmark_addr6_mask(addr32, mask);
59 	}
60 	return 0;
61 }
62 
63 static inline void hmark_swap_ports(union hmark_ports *uports,
64 				    const struct xt_hmark_info *info)
65 {
66 	union hmark_ports hp;
67 	u16 src, dst;
68 
69 	hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32;
70 	src = ntohs(hp.b16.src);
71 	dst = ntohs(hp.b16.dst);
72 
73 	if (dst > src)
74 		uports->v32 = (dst << 16) | src;
75 	else
76 		uports->v32 = (src << 16) | dst;
77 }
78 
79 static int
80 hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
81 		    const struct xt_hmark_info *info)
82 {
83 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
84 	enum ip_conntrack_info ctinfo;
85 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
86 	struct nf_conntrack_tuple *otuple;
87 	struct nf_conntrack_tuple *rtuple;
88 
89 	if (ct == NULL)
90 		return -1;
91 
92 	otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
93 	rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
94 
95 	t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6,
96 				 info->src_mask.ip6);
97 	t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6,
98 				 info->dst_mask.ip6);
99 
100 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
101 		return 0;
102 
103 	t->proto = nf_ct_protonum(ct);
104 	if (t->proto != IPPROTO_ICMP) {
105 		t->uports.b16.src = otuple->src.u.all;
106 		t->uports.b16.dst = rtuple->src.u.all;
107 		hmark_swap_ports(&t->uports, info);
108 	}
109 
110 	return 0;
111 #else
112 	return -1;
113 #endif
114 }
115 
116 /* This hash function is endian independent, to ensure consistent hashing if
117  * the cluster is composed of big and little endian systems. */
118 static inline u32
119 hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
120 {
121 	u32 hash;
122 	u32 src = ntohl(t->src);
123 	u32 dst = ntohl(t->dst);
124 
125 	if (dst < src)
126 		swap(src, dst);
127 
128 	hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd);
129 	hash = hash ^ (t->proto & info->proto_mask);
130 
131 	return reciprocal_scale(hash, info->hmodulus) + info->hoffset;
132 }
133 
134 static void
135 hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff,
136 		      struct hmark_tuple *t, const struct xt_hmark_info *info)
137 {
138 	int protoff;
139 
140 	protoff = proto_ports_offset(t->proto);
141 	if (protoff < 0)
142 		return;
143 
144 	nhoff += protoff;
145 	if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0)
146 		return;
147 
148 	hmark_swap_ports(&t->uports, info);
149 }
150 
151 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
152 static int get_inner6_hdr(const struct sk_buff *skb, int *offset)
153 {
154 	struct icmp6hdr *icmp6h, _ih6;
155 
156 	icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6);
157 	if (icmp6h == NULL)
158 		return 0;
159 
160 	if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) {
161 		*offset += sizeof(struct icmp6hdr);
162 		return 1;
163 	}
164 	return 0;
165 }
166 
167 static int
168 hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
169 			  const struct xt_hmark_info *info)
170 {
171 	struct ipv6hdr *ip6, _ip6;
172 	int flag = IP6_FH_F_AUTH;
173 	unsigned int nhoff = 0;
174 	u16 fragoff = 0;
175 	int nexthdr;
176 
177 	ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb));
178 	nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
179 	if (nexthdr < 0)
180 		return 0;
181 	/* No need to check for icmp errors on fragments */
182 	if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6))
183 		goto noicmp;
184 	/* Use inner header in case of ICMP errors */
185 	if (get_inner6_hdr(skb, &nhoff)) {
186 		ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6);
187 		if (ip6 == NULL)
188 			return -1;
189 		/* If AH present, use SPI like in ESP. */
190 		flag = IP6_FH_F_AUTH;
191 		nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
192 		if (nexthdr < 0)
193 			return -1;
194 	}
195 noicmp:
196 	t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6);
197 	t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6);
198 
199 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
200 		return 0;
201 
202 	t->proto = nexthdr;
203 	if (t->proto == IPPROTO_ICMPV6)
204 		return 0;
205 
206 	if (flag & IP6_FH_F_FRAG)
207 		return 0;
208 
209 	hmark_set_tuple_ports(skb, nhoff, t, info);
210 	return 0;
211 }
212 
213 static unsigned int
214 hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par)
215 {
216 	const struct xt_hmark_info *info = par->targinfo;
217 	struct hmark_tuple t;
218 
219 	memset(&t, 0, sizeof(struct hmark_tuple));
220 
221 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
222 		if (hmark_ct_set_htuple(skb, &t, info) < 0)
223 			return XT_CONTINUE;
224 	} else {
225 		if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0)
226 			return XT_CONTINUE;
227 	}
228 
229 	skb->mark = hmark_hash(&t, info);
230 	return XT_CONTINUE;
231 }
232 #endif
233 
234 static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff)
235 {
236 	const struct icmphdr *icmph;
237 	struct icmphdr _ih;
238 
239 	/* Not enough header? */
240 	icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih);
241 	if (icmph == NULL || icmph->type > NR_ICMP_TYPES)
242 		return 0;
243 
244 	/* Error message? */
245 	if (icmph->type != ICMP_DEST_UNREACH &&
246 	    icmph->type != ICMP_SOURCE_QUENCH &&
247 	    icmph->type != ICMP_TIME_EXCEEDED &&
248 	    icmph->type != ICMP_PARAMETERPROB &&
249 	    icmph->type != ICMP_REDIRECT)
250 		return 0;
251 
252 	*nhoff += iphsz + sizeof(_ih);
253 	return 1;
254 }
255 
256 static int
257 hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t,
258 			  const struct xt_hmark_info *info)
259 {
260 	struct iphdr *ip, _ip;
261 	int nhoff = skb_network_offset(skb);
262 
263 	ip = (struct iphdr *) (skb->data + nhoff);
264 	if (ip->protocol == IPPROTO_ICMP) {
265 		/* Use inner header in case of ICMP errors */
266 		if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) {
267 			ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip);
268 			if (ip == NULL)
269 				return -1;
270 		}
271 	}
272 
273 	t->src = ip->saddr & info->src_mask.ip;
274 	t->dst = ip->daddr & info->dst_mask.ip;
275 
276 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
277 		return 0;
278 
279 	t->proto = ip->protocol;
280 
281 	/* ICMP has no ports, skip */
282 	if (t->proto == IPPROTO_ICMP)
283 		return 0;
284 
285 	/* follow-up fragments don't contain ports, skip all fragments */
286 	if (ip->frag_off & htons(IP_MF | IP_OFFSET))
287 		return 0;
288 
289 	hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info);
290 
291 	return 0;
292 }
293 
294 static unsigned int
295 hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par)
296 {
297 	const struct xt_hmark_info *info = par->targinfo;
298 	struct hmark_tuple t;
299 
300 	memset(&t, 0, sizeof(struct hmark_tuple));
301 
302 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
303 		if (hmark_ct_set_htuple(skb, &t, info) < 0)
304 			return XT_CONTINUE;
305 	} else {
306 		if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0)
307 			return XT_CONTINUE;
308 	}
309 
310 	skb->mark = hmark_hash(&t, info);
311 	return XT_CONTINUE;
312 }
313 
314 static int hmark_tg_check(const struct xt_tgchk_param *par)
315 {
316 	const struct xt_hmark_info *info = par->targinfo;
317 	const char *errmsg = "proto mask must be zero with L3 mode";
318 
319 	if (!info->hmodulus)
320 		return -EINVAL;
321 
322 	if (info->proto_mask &&
323 	    (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)))
324 		goto err;
325 
326 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) &&
327 	    (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) |
328 			     XT_HMARK_FLAG(XT_HMARK_DPORT_MASK))))
329 		return -EINVAL;
330 
331 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) &&
332 	    (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) |
333 			     XT_HMARK_FLAG(XT_HMARK_DPORT)))) {
334 		errmsg = "spi-set and port-set can't be combined";
335 		goto err;
336 	}
337 	return 0;
338 err:
339 	pr_info_ratelimited("%s\n", errmsg);
340 	return -EINVAL;
341 }
342 
343 static struct xt_target hmark_tg_reg[] __read_mostly = {
344 	{
345 		.name		= "HMARK",
346 		.family		= NFPROTO_IPV4,
347 		.target		= hmark_tg_v4,
348 		.targetsize	= sizeof(struct xt_hmark_info),
349 		.checkentry	= hmark_tg_check,
350 		.me		= THIS_MODULE,
351 	},
352 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
353 	{
354 		.name		= "HMARK",
355 		.family		= NFPROTO_IPV6,
356 		.target		= hmark_tg_v6,
357 		.targetsize	= sizeof(struct xt_hmark_info),
358 		.checkentry	= hmark_tg_check,
359 		.me		= THIS_MODULE,
360 	},
361 #endif
362 };
363 
364 static int __init hmark_tg_init(void)
365 {
366 	return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
367 }
368 
369 static void __exit hmark_tg_exit(void)
370 {
371 	xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
372 }
373 
374 module_init(hmark_tg_init);
375 module_exit(hmark_tg_exit);
376