1 /* 2 * xt_HMARK - Netfilter module to set mark by means of hashing 3 * 4 * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com> 5 * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published by 9 * the Free Software Foundation. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/module.h> 15 #include <linux/skbuff.h> 16 #include <linux/icmp.h> 17 18 #include <linux/netfilter/x_tables.h> 19 #include <linux/netfilter/xt_HMARK.h> 20 21 #include <net/ip.h> 22 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 23 #include <net/netfilter/nf_conntrack.h> 24 #endif 25 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 26 #include <net/ipv6.h> 27 #include <linux/netfilter_ipv6/ip6_tables.h> 28 #endif 29 30 MODULE_LICENSE("GPL"); 31 MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>"); 32 MODULE_DESCRIPTION("Xtables: packet marking using hash calculation"); 33 MODULE_ALIAS("ipt_HMARK"); 34 MODULE_ALIAS("ip6t_HMARK"); 35 36 struct hmark_tuple { 37 __be32 src; 38 __be32 dst; 39 union hmark_ports uports; 40 u8 proto; 41 }; 42 43 static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask) 44 { 45 return (addr32[0] & mask[0]) ^ 46 (addr32[1] & mask[1]) ^ 47 (addr32[2] & mask[2]) ^ 48 (addr32[3] & mask[3]); 49 } 50 51 static inline __be32 52 hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask) 53 { 54 switch (l3num) { 55 case AF_INET: 56 return *addr32 & *mask; 57 case AF_INET6: 58 return hmark_addr6_mask(addr32, mask); 59 } 60 return 0; 61 } 62 63 static inline void hmark_swap_ports(union hmark_ports *uports, 64 const struct xt_hmark_info *info) 65 { 66 union hmark_ports hp; 67 u16 src, dst; 68 69 hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32; 70 src = ntohs(hp.b16.src); 71 dst = ntohs(hp.b16.dst); 72 73 if (dst > src) 74 uports->v32 = (dst << 16) | src; 75 else 76 uports->v32 = (src << 16) | dst; 77 } 78 79 static int 80 hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, 81 const struct xt_hmark_info *info) 82 { 83 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 84 enum ip_conntrack_info ctinfo; 85 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 86 struct nf_conntrack_tuple *otuple; 87 struct nf_conntrack_tuple *rtuple; 88 89 if (ct == NULL) 90 return -1; 91 92 otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 93 rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 94 95 t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6, 96 info->src_mask.ip6); 97 t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6, 98 info->dst_mask.ip6); 99 100 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 101 return 0; 102 103 t->proto = nf_ct_protonum(ct); 104 if (t->proto != IPPROTO_ICMP) { 105 t->uports.b16.src = otuple->src.u.all; 106 t->uports.b16.dst = rtuple->src.u.all; 107 hmark_swap_ports(&t->uports, info); 108 } 109 110 return 0; 111 #else 112 return -1; 113 #endif 114 } 115 116 /* This hash function is endian independent, to ensure consistent hashing if 117 * the cluster is composed of big and little endian systems. */ 118 static inline u32 119 hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info) 120 { 121 u32 hash; 122 u32 src = ntohl(t->src); 123 u32 dst = ntohl(t->dst); 124 125 if (dst < src) 126 swap(src, dst); 127 128 hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd); 129 hash = hash ^ (t->proto & info->proto_mask); 130 131 return reciprocal_scale(hash, info->hmodulus) + info->hoffset; 132 } 133 134 static void 135 hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff, 136 struct hmark_tuple *t, const struct xt_hmark_info *info) 137 { 138 int protoff; 139 140 protoff = proto_ports_offset(t->proto); 141 if (protoff < 0) 142 return; 143 144 nhoff += protoff; 145 if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0) 146 return; 147 148 hmark_swap_ports(&t->uports, info); 149 } 150 151 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 152 static int get_inner6_hdr(const struct sk_buff *skb, int *offset) 153 { 154 struct icmp6hdr *icmp6h, _ih6; 155 156 icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6); 157 if (icmp6h == NULL) 158 return 0; 159 160 if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) { 161 *offset += sizeof(struct icmp6hdr); 162 return 1; 163 } 164 return 0; 165 } 166 167 static int 168 hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, 169 const struct xt_hmark_info *info) 170 { 171 struct ipv6hdr *ip6, _ip6; 172 int flag = IP6_FH_F_AUTH; 173 unsigned int nhoff = 0; 174 u16 fragoff = 0; 175 int nexthdr; 176 177 ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb)); 178 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); 179 if (nexthdr < 0) 180 return 0; 181 /* No need to check for icmp errors on fragments */ 182 if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6)) 183 goto noicmp; 184 /* Use inner header in case of ICMP errors */ 185 if (get_inner6_hdr(skb, &nhoff)) { 186 ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6); 187 if (ip6 == NULL) 188 return -1; 189 /* If AH present, use SPI like in ESP. */ 190 flag = IP6_FH_F_AUTH; 191 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); 192 if (nexthdr < 0) 193 return -1; 194 } 195 noicmp: 196 t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6); 197 t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6); 198 199 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 200 return 0; 201 202 t->proto = nexthdr; 203 if (t->proto == IPPROTO_ICMPV6) 204 return 0; 205 206 if (flag & IP6_FH_F_FRAG) 207 return 0; 208 209 hmark_set_tuple_ports(skb, nhoff, t, info); 210 return 0; 211 } 212 213 static unsigned int 214 hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par) 215 { 216 const struct xt_hmark_info *info = par->targinfo; 217 struct hmark_tuple t; 218 219 memset(&t, 0, sizeof(struct hmark_tuple)); 220 221 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { 222 if (hmark_ct_set_htuple(skb, &t, info) < 0) 223 return XT_CONTINUE; 224 } else { 225 if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0) 226 return XT_CONTINUE; 227 } 228 229 skb->mark = hmark_hash(&t, info); 230 return XT_CONTINUE; 231 } 232 #endif 233 234 static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff) 235 { 236 const struct icmphdr *icmph; 237 struct icmphdr _ih; 238 239 /* Not enough header? */ 240 icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih); 241 if (icmph == NULL || icmph->type > NR_ICMP_TYPES) 242 return 0; 243 244 /* Error message? */ 245 if (icmph->type != ICMP_DEST_UNREACH && 246 icmph->type != ICMP_SOURCE_QUENCH && 247 icmph->type != ICMP_TIME_EXCEEDED && 248 icmph->type != ICMP_PARAMETERPROB && 249 icmph->type != ICMP_REDIRECT) 250 return 0; 251 252 *nhoff += iphsz + sizeof(_ih); 253 return 1; 254 } 255 256 static int 257 hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t, 258 const struct xt_hmark_info *info) 259 { 260 struct iphdr *ip, _ip; 261 int nhoff = skb_network_offset(skb); 262 263 ip = (struct iphdr *) (skb->data + nhoff); 264 if (ip->protocol == IPPROTO_ICMP) { 265 /* Use inner header in case of ICMP errors */ 266 if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) { 267 ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip); 268 if (ip == NULL) 269 return -1; 270 } 271 } 272 273 t->src = ip->saddr & info->src_mask.ip; 274 t->dst = ip->daddr & info->dst_mask.ip; 275 276 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 277 return 0; 278 279 t->proto = ip->protocol; 280 281 /* ICMP has no ports, skip */ 282 if (t->proto == IPPROTO_ICMP) 283 return 0; 284 285 /* follow-up fragments don't contain ports, skip all fragments */ 286 if (ip->frag_off & htons(IP_MF | IP_OFFSET)) 287 return 0; 288 289 hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info); 290 291 return 0; 292 } 293 294 static unsigned int 295 hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par) 296 { 297 const struct xt_hmark_info *info = par->targinfo; 298 struct hmark_tuple t; 299 300 memset(&t, 0, sizeof(struct hmark_tuple)); 301 302 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { 303 if (hmark_ct_set_htuple(skb, &t, info) < 0) 304 return XT_CONTINUE; 305 } else { 306 if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0) 307 return XT_CONTINUE; 308 } 309 310 skb->mark = hmark_hash(&t, info); 311 return XT_CONTINUE; 312 } 313 314 static int hmark_tg_check(const struct xt_tgchk_param *par) 315 { 316 const struct xt_hmark_info *info = par->targinfo; 317 const char *errmsg = "proto mask must be zero with L3 mode"; 318 319 if (!info->hmodulus) 320 return -EINVAL; 321 322 if (info->proto_mask && 323 (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) 324 goto err; 325 326 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && 327 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | 328 XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) 329 return -EINVAL; 330 331 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && 332 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | 333 XT_HMARK_FLAG(XT_HMARK_DPORT)))) { 334 errmsg = "spi-set and port-set can't be combined"; 335 goto err; 336 } 337 return 0; 338 err: 339 pr_info_ratelimited("%s\n", errmsg); 340 return -EINVAL; 341 } 342 343 static struct xt_target hmark_tg_reg[] __read_mostly = { 344 { 345 .name = "HMARK", 346 .family = NFPROTO_IPV4, 347 .target = hmark_tg_v4, 348 .targetsize = sizeof(struct xt_hmark_info), 349 .checkentry = hmark_tg_check, 350 .me = THIS_MODULE, 351 }, 352 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 353 { 354 .name = "HMARK", 355 .family = NFPROTO_IPV6, 356 .target = hmark_tg_v6, 357 .targetsize = sizeof(struct xt_hmark_info), 358 .checkentry = hmark_tg_check, 359 .me = THIS_MODULE, 360 }, 361 #endif 362 }; 363 364 static int __init hmark_tg_init(void) 365 { 366 return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); 367 } 368 369 static void __exit hmark_tg_exit(void) 370 { 371 xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); 372 } 373 374 module_init(hmark_tg_init); 375 module_exit(hmark_tg_exit); 376