1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * xt_HMARK - Netfilter module to set mark by means of hashing 4 * 5 * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com> 6 * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> 7 */ 8 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/module.h> 12 #include <linux/skbuff.h> 13 #include <linux/icmp.h> 14 15 #include <linux/netfilter/x_tables.h> 16 #include <linux/netfilter/xt_HMARK.h> 17 18 #include <net/ip.h> 19 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 20 #include <net/netfilter/nf_conntrack.h> 21 #endif 22 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 23 #include <net/ipv6.h> 24 #include <linux/netfilter_ipv6/ip6_tables.h> 25 #endif 26 27 MODULE_LICENSE("GPL"); 28 MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>"); 29 MODULE_DESCRIPTION("Xtables: packet marking using hash calculation"); 30 MODULE_ALIAS("ipt_HMARK"); 31 MODULE_ALIAS("ip6t_HMARK"); 32 33 struct hmark_tuple { 34 __be32 src; 35 __be32 dst; 36 union hmark_ports uports; 37 u8 proto; 38 }; 39 40 static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask) 41 { 42 return (addr32[0] & mask[0]) ^ 43 (addr32[1] & mask[1]) ^ 44 (addr32[2] & mask[2]) ^ 45 (addr32[3] & mask[3]); 46 } 47 48 static inline __be32 49 hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask) 50 { 51 switch (l3num) { 52 case AF_INET: 53 return *addr32 & *mask; 54 case AF_INET6: 55 return hmark_addr6_mask(addr32, mask); 56 } 57 return 0; 58 } 59 60 static inline void hmark_swap_ports(union hmark_ports *uports, 61 const struct xt_hmark_info *info) 62 { 63 union hmark_ports hp; 64 u16 src, dst; 65 66 hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32; 67 src = ntohs(hp.b16.src); 68 dst = ntohs(hp.b16.dst); 69 70 if (dst > src) 71 uports->v32 = (dst << 16) | src; 72 else 73 uports->v32 = (src << 16) | dst; 74 } 75 76 static int 77 hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, 78 const struct xt_hmark_info *info) 79 { 80 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 81 enum ip_conntrack_info ctinfo; 82 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 83 struct nf_conntrack_tuple *otuple; 84 struct nf_conntrack_tuple *rtuple; 85 86 if (ct == NULL) 87 return -1; 88 89 otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 90 rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 91 92 t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6, 93 info->src_mask.ip6); 94 t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6, 95 info->dst_mask.ip6); 96 97 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 98 return 0; 99 100 t->proto = nf_ct_protonum(ct); 101 if (t->proto != IPPROTO_ICMP) { 102 t->uports.b16.src = otuple->src.u.all; 103 t->uports.b16.dst = rtuple->src.u.all; 104 hmark_swap_ports(&t->uports, info); 105 } 106 107 return 0; 108 #else 109 return -1; 110 #endif 111 } 112 113 /* This hash function is endian independent, to ensure consistent hashing if 114 * the cluster is composed of big and little endian systems. */ 115 static inline u32 116 hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info) 117 { 118 u32 hash; 119 u32 src = ntohl(t->src); 120 u32 dst = ntohl(t->dst); 121 122 if (dst < src) 123 swap(src, dst); 124 125 hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd); 126 hash = hash ^ (t->proto & info->proto_mask); 127 128 return reciprocal_scale(hash, info->hmodulus) + info->hoffset; 129 } 130 131 static void 132 hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff, 133 struct hmark_tuple *t, const struct xt_hmark_info *info) 134 { 135 int protoff; 136 137 protoff = proto_ports_offset(t->proto); 138 if (protoff < 0) 139 return; 140 141 nhoff += protoff; 142 if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0) 143 return; 144 145 hmark_swap_ports(&t->uports, info); 146 } 147 148 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 149 static int get_inner6_hdr(const struct sk_buff *skb, int *offset) 150 { 151 struct icmp6hdr *icmp6h, _ih6; 152 153 icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6); 154 if (icmp6h == NULL) 155 return 0; 156 157 if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) { 158 *offset += sizeof(struct icmp6hdr); 159 return 1; 160 } 161 return 0; 162 } 163 164 static int 165 hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, 166 const struct xt_hmark_info *info) 167 { 168 struct ipv6hdr *ip6, _ip6; 169 int flag = IP6_FH_F_AUTH; 170 unsigned int nhoff = 0; 171 u16 fragoff = 0; 172 int nexthdr; 173 174 ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb)); 175 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); 176 if (nexthdr < 0) 177 return 0; 178 /* No need to check for icmp errors on fragments */ 179 if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6)) 180 goto noicmp; 181 /* Use inner header in case of ICMP errors */ 182 if (get_inner6_hdr(skb, &nhoff)) { 183 ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6); 184 if (ip6 == NULL) 185 return -1; 186 /* If AH present, use SPI like in ESP. */ 187 flag = IP6_FH_F_AUTH; 188 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); 189 if (nexthdr < 0) 190 return -1; 191 } 192 noicmp: 193 t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6); 194 t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6); 195 196 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 197 return 0; 198 199 t->proto = nexthdr; 200 if (t->proto == IPPROTO_ICMPV6) 201 return 0; 202 203 if (flag & IP6_FH_F_FRAG) 204 return 0; 205 206 hmark_set_tuple_ports(skb, nhoff, t, info); 207 return 0; 208 } 209 210 static unsigned int 211 hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par) 212 { 213 const struct xt_hmark_info *info = par->targinfo; 214 struct hmark_tuple t; 215 216 memset(&t, 0, sizeof(struct hmark_tuple)); 217 218 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { 219 if (hmark_ct_set_htuple(skb, &t, info) < 0) 220 return XT_CONTINUE; 221 } else { 222 if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0) 223 return XT_CONTINUE; 224 } 225 226 skb->mark = hmark_hash(&t, info); 227 return XT_CONTINUE; 228 } 229 #endif 230 231 static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff) 232 { 233 const struct icmphdr *icmph; 234 struct icmphdr _ih; 235 236 /* Not enough header? */ 237 icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih); 238 if (icmph == NULL || icmph->type > NR_ICMP_TYPES) 239 return 0; 240 241 /* Error message? */ 242 if (icmph->type != ICMP_DEST_UNREACH && 243 icmph->type != ICMP_SOURCE_QUENCH && 244 icmph->type != ICMP_TIME_EXCEEDED && 245 icmph->type != ICMP_PARAMETERPROB && 246 icmph->type != ICMP_REDIRECT) 247 return 0; 248 249 *nhoff += iphsz + sizeof(_ih); 250 return 1; 251 } 252 253 static int 254 hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t, 255 const struct xt_hmark_info *info) 256 { 257 struct iphdr *ip, _ip; 258 int nhoff = skb_network_offset(skb); 259 260 ip = (struct iphdr *) (skb->data + nhoff); 261 if (ip->protocol == IPPROTO_ICMP) { 262 /* Use inner header in case of ICMP errors */ 263 if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) { 264 ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip); 265 if (ip == NULL) 266 return -1; 267 } 268 } 269 270 t->src = ip->saddr & info->src_mask.ip; 271 t->dst = ip->daddr & info->dst_mask.ip; 272 273 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 274 return 0; 275 276 t->proto = ip->protocol; 277 278 /* ICMP has no ports, skip */ 279 if (t->proto == IPPROTO_ICMP) 280 return 0; 281 282 /* follow-up fragments don't contain ports, skip all fragments */ 283 if (ip->frag_off & htons(IP_MF | IP_OFFSET)) 284 return 0; 285 286 hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info); 287 288 return 0; 289 } 290 291 static unsigned int 292 hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par) 293 { 294 const struct xt_hmark_info *info = par->targinfo; 295 struct hmark_tuple t; 296 297 memset(&t, 0, sizeof(struct hmark_tuple)); 298 299 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { 300 if (hmark_ct_set_htuple(skb, &t, info) < 0) 301 return XT_CONTINUE; 302 } else { 303 if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0) 304 return XT_CONTINUE; 305 } 306 307 skb->mark = hmark_hash(&t, info); 308 return XT_CONTINUE; 309 } 310 311 static int hmark_tg_check(const struct xt_tgchk_param *par) 312 { 313 const struct xt_hmark_info *info = par->targinfo; 314 const char *errmsg = "proto mask must be zero with L3 mode"; 315 316 if (!info->hmodulus) 317 return -EINVAL; 318 319 if (info->proto_mask && 320 (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) 321 goto err; 322 323 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && 324 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | 325 XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) 326 return -EINVAL; 327 328 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && 329 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | 330 XT_HMARK_FLAG(XT_HMARK_DPORT)))) { 331 errmsg = "spi-set and port-set can't be combined"; 332 goto err; 333 } 334 return 0; 335 err: 336 pr_info_ratelimited("%s\n", errmsg); 337 return -EINVAL; 338 } 339 340 static struct xt_target hmark_tg_reg[] __read_mostly = { 341 { 342 .name = "HMARK", 343 .family = NFPROTO_IPV4, 344 .target = hmark_tg_v4, 345 .targetsize = sizeof(struct xt_hmark_info), 346 .checkentry = hmark_tg_check, 347 .me = THIS_MODULE, 348 }, 349 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 350 { 351 .name = "HMARK", 352 .family = NFPROTO_IPV6, 353 .target = hmark_tg_v6, 354 .targetsize = sizeof(struct xt_hmark_info), 355 .checkentry = hmark_tg_check, 356 .me = THIS_MODULE, 357 }, 358 #endif 359 }; 360 361 static int __init hmark_tg_init(void) 362 { 363 return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); 364 } 365 366 static void __exit hmark_tg_exit(void) 367 { 368 xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); 369 } 370 371 module_init(hmark_tg_init); 372 module_exit(hmark_tg_exit); 373