1 /* 2 * xt_HMARK - Netfilter module to set mark by means of hashing 3 * 4 * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com> 5 * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published by 9 * the Free Software Foundation. 10 */ 11 12 #include <linux/module.h> 13 #include <linux/skbuff.h> 14 #include <linux/icmp.h> 15 16 #include <linux/netfilter/x_tables.h> 17 #include <linux/netfilter/xt_HMARK.h> 18 19 #include <net/ip.h> 20 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 21 #include <net/netfilter/nf_conntrack.h> 22 #endif 23 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 24 #include <net/ipv6.h> 25 #include <linux/netfilter_ipv6/ip6_tables.h> 26 #endif 27 28 MODULE_LICENSE("GPL"); 29 MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>"); 30 MODULE_DESCRIPTION("Xtables: packet marking using hash calculation"); 31 MODULE_ALIAS("ipt_HMARK"); 32 MODULE_ALIAS("ip6t_HMARK"); 33 34 struct hmark_tuple { 35 __be32 src; 36 __be32 dst; 37 union hmark_ports uports; 38 u8 proto; 39 }; 40 41 static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask) 42 { 43 return (addr32[0] & mask[0]) ^ 44 (addr32[1] & mask[1]) ^ 45 (addr32[2] & mask[2]) ^ 46 (addr32[3] & mask[3]); 47 } 48 49 static inline __be32 50 hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask) 51 { 52 switch (l3num) { 53 case AF_INET: 54 return *addr32 & *mask; 55 case AF_INET6: 56 return hmark_addr6_mask(addr32, mask); 57 } 58 return 0; 59 } 60 61 static inline void hmark_swap_ports(union hmark_ports *uports, 62 const struct xt_hmark_info *info) 63 { 64 union hmark_ports hp; 65 u16 src, dst; 66 67 hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32; 68 src = ntohs(hp.b16.src); 69 dst = ntohs(hp.b16.dst); 70 71 if (dst > src) 72 uports->v32 = (dst << 16) | src; 73 else 74 uports->v32 = (src << 16) | dst; 75 } 76 77 static int 78 hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, 79 const struct xt_hmark_info *info) 80 { 81 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 82 enum ip_conntrack_info ctinfo; 83 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 84 struct nf_conntrack_tuple *otuple; 85 struct nf_conntrack_tuple *rtuple; 86 87 if (ct == NULL || nf_ct_is_untracked(ct)) 88 return -1; 89 90 otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 91 rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 92 93 t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6, 94 info->src_mask.ip6); 95 t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6, 96 info->dst_mask.ip6); 97 98 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 99 return 0; 100 101 t->proto = nf_ct_protonum(ct); 102 if (t->proto != IPPROTO_ICMP) { 103 t->uports.b16.src = otuple->src.u.all; 104 t->uports.b16.dst = rtuple->src.u.all; 105 hmark_swap_ports(&t->uports, info); 106 } 107 108 return 0; 109 #else 110 return -1; 111 #endif 112 } 113 114 /* This hash function is endian independent, to ensure consistent hashing if 115 * the cluster is composed of big and little endian systems. */ 116 static inline u32 117 hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info) 118 { 119 u32 hash; 120 u32 src = ntohl(t->src); 121 u32 dst = ntohl(t->dst); 122 123 if (dst < src) 124 swap(src, dst); 125 126 hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd); 127 hash = hash ^ (t->proto & info->proto_mask); 128 129 return reciprocal_scale(hash, info->hmodulus) + info->hoffset; 130 } 131 132 static void 133 hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff, 134 struct hmark_tuple *t, const struct xt_hmark_info *info) 135 { 136 int protoff; 137 138 protoff = proto_ports_offset(t->proto); 139 if (protoff < 0) 140 return; 141 142 nhoff += protoff; 143 if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0) 144 return; 145 146 hmark_swap_ports(&t->uports, info); 147 } 148 149 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 150 static int get_inner6_hdr(const struct sk_buff *skb, int *offset) 151 { 152 struct icmp6hdr *icmp6h, _ih6; 153 154 icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6); 155 if (icmp6h == NULL) 156 return 0; 157 158 if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) { 159 *offset += sizeof(struct icmp6hdr); 160 return 1; 161 } 162 return 0; 163 } 164 165 static int 166 hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, 167 const struct xt_hmark_info *info) 168 { 169 struct ipv6hdr *ip6, _ip6; 170 int flag = IP6_FH_F_AUTH; 171 unsigned int nhoff = 0; 172 u16 fragoff = 0; 173 int nexthdr; 174 175 ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb)); 176 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); 177 if (nexthdr < 0) 178 return 0; 179 /* No need to check for icmp errors on fragments */ 180 if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6)) 181 goto noicmp; 182 /* Use inner header in case of ICMP errors */ 183 if (get_inner6_hdr(skb, &nhoff)) { 184 ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6); 185 if (ip6 == NULL) 186 return -1; 187 /* If AH present, use SPI like in ESP. */ 188 flag = IP6_FH_F_AUTH; 189 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); 190 if (nexthdr < 0) 191 return -1; 192 } 193 noicmp: 194 t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6); 195 t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6); 196 197 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 198 return 0; 199 200 t->proto = nexthdr; 201 if (t->proto == IPPROTO_ICMPV6) 202 return 0; 203 204 if (flag & IP6_FH_F_FRAG) 205 return 0; 206 207 hmark_set_tuple_ports(skb, nhoff, t, info); 208 return 0; 209 } 210 211 static unsigned int 212 hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par) 213 { 214 const struct xt_hmark_info *info = par->targinfo; 215 struct hmark_tuple t; 216 217 memset(&t, 0, sizeof(struct hmark_tuple)); 218 219 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { 220 if (hmark_ct_set_htuple(skb, &t, info) < 0) 221 return XT_CONTINUE; 222 } else { 223 if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0) 224 return XT_CONTINUE; 225 } 226 227 skb->mark = hmark_hash(&t, info); 228 return XT_CONTINUE; 229 } 230 #endif 231 232 static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff) 233 { 234 const struct icmphdr *icmph; 235 struct icmphdr _ih; 236 237 /* Not enough header? */ 238 icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih); 239 if (icmph == NULL || icmph->type > NR_ICMP_TYPES) 240 return 0; 241 242 /* Error message? */ 243 if (icmph->type != ICMP_DEST_UNREACH && 244 icmph->type != ICMP_SOURCE_QUENCH && 245 icmph->type != ICMP_TIME_EXCEEDED && 246 icmph->type != ICMP_PARAMETERPROB && 247 icmph->type != ICMP_REDIRECT) 248 return 0; 249 250 *nhoff += iphsz + sizeof(_ih); 251 return 1; 252 } 253 254 static int 255 hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t, 256 const struct xt_hmark_info *info) 257 { 258 struct iphdr *ip, _ip; 259 int nhoff = skb_network_offset(skb); 260 261 ip = (struct iphdr *) (skb->data + nhoff); 262 if (ip->protocol == IPPROTO_ICMP) { 263 /* Use inner header in case of ICMP errors */ 264 if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) { 265 ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip); 266 if (ip == NULL) 267 return -1; 268 } 269 } 270 271 t->src = ip->saddr & info->src_mask.ip; 272 t->dst = ip->daddr & info->dst_mask.ip; 273 274 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 275 return 0; 276 277 t->proto = ip->protocol; 278 279 /* ICMP has no ports, skip */ 280 if (t->proto == IPPROTO_ICMP) 281 return 0; 282 283 /* follow-up fragments don't contain ports, skip all fragments */ 284 if (ip->frag_off & htons(IP_MF | IP_OFFSET)) 285 return 0; 286 287 hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info); 288 289 return 0; 290 } 291 292 static unsigned int 293 hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par) 294 { 295 const struct xt_hmark_info *info = par->targinfo; 296 struct hmark_tuple t; 297 298 memset(&t, 0, sizeof(struct hmark_tuple)); 299 300 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { 301 if (hmark_ct_set_htuple(skb, &t, info) < 0) 302 return XT_CONTINUE; 303 } else { 304 if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0) 305 return XT_CONTINUE; 306 } 307 308 skb->mark = hmark_hash(&t, info); 309 return XT_CONTINUE; 310 } 311 312 static int hmark_tg_check(const struct xt_tgchk_param *par) 313 { 314 const struct xt_hmark_info *info = par->targinfo; 315 316 if (!info->hmodulus) { 317 pr_info("xt_HMARK: hash modulus can't be zero\n"); 318 return -EINVAL; 319 } 320 if (info->proto_mask && 321 (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) { 322 pr_info("xt_HMARK: proto mask must be zero with L3 mode\n"); 323 return -EINVAL; 324 } 325 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && 326 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | 327 XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) { 328 pr_info("xt_HMARK: spi-mask and port-mask can't be combined\n"); 329 return -EINVAL; 330 } 331 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && 332 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | 333 XT_HMARK_FLAG(XT_HMARK_DPORT)))) { 334 pr_info("xt_HMARK: spi-set and port-set can't be combined\n"); 335 return -EINVAL; 336 } 337 return 0; 338 } 339 340 static struct xt_target hmark_tg_reg[] __read_mostly = { 341 { 342 .name = "HMARK", 343 .family = NFPROTO_IPV4, 344 .target = hmark_tg_v4, 345 .targetsize = sizeof(struct xt_hmark_info), 346 .checkentry = hmark_tg_check, 347 .me = THIS_MODULE, 348 }, 349 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 350 { 351 .name = "HMARK", 352 .family = NFPROTO_IPV6, 353 .target = hmark_tg_v6, 354 .targetsize = sizeof(struct xt_hmark_info), 355 .checkentry = hmark_tg_check, 356 .me = THIS_MODULE, 357 }, 358 #endif 359 }; 360 361 static int __init hmark_tg_init(void) 362 { 363 return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); 364 } 365 366 static void __exit hmark_tg_exit(void) 367 { 368 xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); 369 } 370 371 module_init(hmark_tg_init); 372 module_exit(hmark_tg_exit); 373