1 /* nf_nat_helper.c - generic support functions for NAT helpers 2 * 3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org> 4 * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 #include <linux/module.h> 11 #include <linux/gfp.h> 12 #include <linux/types.h> 13 #include <linux/skbuff.h> 14 #include <linux/tcp.h> 15 #include <linux/udp.h> 16 #include <net/tcp.h> 17 18 #include <net/netfilter/nf_conntrack.h> 19 #include <net/netfilter/nf_conntrack_helper.h> 20 #include <net/netfilter/nf_conntrack_ecache.h> 21 #include <net/netfilter/nf_conntrack_expect.h> 22 #include <net/netfilter/nf_nat.h> 23 #include <net/netfilter/nf_nat_l3proto.h> 24 #include <net/netfilter/nf_nat_l4proto.h> 25 #include <net/netfilter/nf_nat_core.h> 26 #include <net/netfilter/nf_nat_helper.h> 27 28 #define DUMP_OFFSET(x) \ 29 pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \ 30 x->offset_before, x->offset_after, x->correction_pos); 31 32 static DEFINE_SPINLOCK(nf_nat_seqofs_lock); 33 34 /* Setup TCP sequence correction given this change at this sequence */ 35 static inline void 36 adjust_tcp_sequence(u32 seq, 37 int sizediff, 38 struct nf_conn *ct, 39 enum ip_conntrack_info ctinfo) 40 { 41 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 42 struct nf_conn_nat *nat = nfct_nat(ct); 43 struct nf_nat_seq *this_way = &nat->seq[dir]; 44 45 pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", 46 seq, sizediff); 47 48 pr_debug("adjust_tcp_sequence: Seq_offset before: "); 49 DUMP_OFFSET(this_way); 50 51 spin_lock_bh(&nf_nat_seqofs_lock); 52 53 /* SYN adjust. If it's uninitialized, or this is after last 54 * correction, record it: we don't handle more than one 55 * adjustment in the window, but do deal with common case of a 56 * retransmit */ 57 if (this_way->offset_before == this_way->offset_after || 58 before(this_way->correction_pos, seq)) { 59 this_way->correction_pos = seq; 60 this_way->offset_before = this_way->offset_after; 61 this_way->offset_after += sizediff; 62 } 63 spin_unlock_bh(&nf_nat_seqofs_lock); 64 65 pr_debug("adjust_tcp_sequence: Seq_offset after: "); 66 DUMP_OFFSET(this_way); 67 } 68 69 /* Get the offset value, for conntrack */ 70 s16 nf_nat_get_offset(const struct nf_conn *ct, 71 enum ip_conntrack_dir dir, 72 u32 seq) 73 { 74 struct nf_conn_nat *nat = nfct_nat(ct); 75 struct nf_nat_seq *this_way; 76 s16 offset; 77 78 if (!nat) 79 return 0; 80 81 this_way = &nat->seq[dir]; 82 spin_lock_bh(&nf_nat_seqofs_lock); 83 offset = after(seq, this_way->correction_pos) 84 ? this_way->offset_after : this_way->offset_before; 85 spin_unlock_bh(&nf_nat_seqofs_lock); 86 87 return offset; 88 } 89 90 /* Frobs data inside this packet, which is linear. */ 91 static void mangle_contents(struct sk_buff *skb, 92 unsigned int dataoff, 93 unsigned int match_offset, 94 unsigned int match_len, 95 const char *rep_buffer, 96 unsigned int rep_len) 97 { 98 unsigned char *data; 99 100 BUG_ON(skb_is_nonlinear(skb)); 101 data = skb_network_header(skb) + dataoff; 102 103 /* move post-replacement */ 104 memmove(data + match_offset + rep_len, 105 data + match_offset + match_len, 106 skb->tail - (skb->network_header + dataoff + 107 match_offset + match_len)); 108 109 /* insert data from buffer */ 110 memcpy(data + match_offset, rep_buffer, rep_len); 111 112 /* update skb info */ 113 if (rep_len > match_len) { 114 pr_debug("nf_nat_mangle_packet: Extending packet by " 115 "%u from %u bytes\n", rep_len - match_len, skb->len); 116 skb_put(skb, rep_len - match_len); 117 } else { 118 pr_debug("nf_nat_mangle_packet: Shrinking packet from " 119 "%u from %u bytes\n", match_len - rep_len, skb->len); 120 __skb_trim(skb, skb->len + rep_len - match_len); 121 } 122 123 if (nf_ct_l3num((struct nf_conn *)skb->nfct) == NFPROTO_IPV4) { 124 /* fix IP hdr checksum information */ 125 ip_hdr(skb)->tot_len = htons(skb->len); 126 ip_send_check(ip_hdr(skb)); 127 } else 128 ipv6_hdr(skb)->payload_len = 129 htons(skb->len - sizeof(struct ipv6hdr)); 130 } 131 132 /* Unusual, but possible case. */ 133 static int enlarge_skb(struct sk_buff *skb, unsigned int extra) 134 { 135 if (skb->len + extra > 65535) 136 return 0; 137 138 if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC)) 139 return 0; 140 141 return 1; 142 } 143 144 void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, 145 __be32 seq, s16 off) 146 { 147 if (!off) 148 return; 149 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); 150 adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); 151 nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); 152 } 153 EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); 154 155 void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, 156 u32 ctinfo, int off) 157 { 158 const struct tcphdr *th; 159 160 if (nf_ct_protonum(ct) != IPPROTO_TCP) 161 return; 162 163 th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb)); 164 nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); 165 } 166 EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust); 167 168 /* Generic function for mangling variable-length address changes inside 169 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX 170 * command in FTP). 171 * 172 * Takes care about all the nasty sequence number changes, checksumming, 173 * skb enlargement, ... 174 * 175 * */ 176 int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, 177 struct nf_conn *ct, 178 enum ip_conntrack_info ctinfo, 179 unsigned int protoff, 180 unsigned int match_offset, 181 unsigned int match_len, 182 const char *rep_buffer, 183 unsigned int rep_len, bool adjust) 184 { 185 const struct nf_nat_l3proto *l3proto; 186 struct tcphdr *tcph; 187 int oldlen, datalen; 188 189 if (!skb_make_writable(skb, skb->len)) 190 return 0; 191 192 if (rep_len > match_len && 193 rep_len - match_len > skb_tailroom(skb) && 194 !enlarge_skb(skb, rep_len - match_len)) 195 return 0; 196 197 SKB_LINEAR_ASSERT(skb); 198 199 tcph = (void *)skb->data + protoff; 200 201 oldlen = skb->len - protoff; 202 mangle_contents(skb, protoff + tcph->doff*4, 203 match_offset, match_len, rep_buffer, rep_len); 204 205 datalen = skb->len - protoff; 206 207 l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); 208 l3proto->csum_recalc(skb, IPPROTO_TCP, tcph, &tcph->check, 209 datalen, oldlen); 210 211 if (adjust && rep_len != match_len) 212 nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, 213 (int)rep_len - (int)match_len); 214 215 return 1; 216 } 217 EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); 218 219 /* Generic function for mangling variable-length address changes inside 220 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX 221 * command in the Amanda protocol) 222 * 223 * Takes care about all the nasty sequence number changes, checksumming, 224 * skb enlargement, ... 225 * 226 * XXX - This function could be merged with nf_nat_mangle_tcp_packet which 227 * should be fairly easy to do. 228 */ 229 int 230 nf_nat_mangle_udp_packet(struct sk_buff *skb, 231 struct nf_conn *ct, 232 enum ip_conntrack_info ctinfo, 233 unsigned int protoff, 234 unsigned int match_offset, 235 unsigned int match_len, 236 const char *rep_buffer, 237 unsigned int rep_len) 238 { 239 const struct nf_nat_l3proto *l3proto; 240 struct udphdr *udph; 241 int datalen, oldlen; 242 243 if (!skb_make_writable(skb, skb->len)) 244 return 0; 245 246 if (rep_len > match_len && 247 rep_len - match_len > skb_tailroom(skb) && 248 !enlarge_skb(skb, rep_len - match_len)) 249 return 0; 250 251 udph = (void *)skb->data + protoff; 252 253 oldlen = skb->len - protoff; 254 mangle_contents(skb, protoff + sizeof(*udph), 255 match_offset, match_len, rep_buffer, rep_len); 256 257 /* update the length of the UDP packet */ 258 datalen = skb->len - protoff; 259 udph->len = htons(datalen); 260 261 /* fix udp checksum if udp checksum was previously calculated */ 262 if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) 263 return 1; 264 265 l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); 266 l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check, 267 datalen, oldlen); 268 269 return 1; 270 } 271 EXPORT_SYMBOL(nf_nat_mangle_udp_packet); 272 273 /* Adjust one found SACK option including checksum correction */ 274 static void 275 sack_adjust(struct sk_buff *skb, 276 struct tcphdr *tcph, 277 unsigned int sackoff, 278 unsigned int sackend, 279 struct nf_nat_seq *natseq) 280 { 281 while (sackoff < sackend) { 282 struct tcp_sack_block_wire *sack; 283 __be32 new_start_seq, new_end_seq; 284 285 sack = (void *)skb->data + sackoff; 286 if (after(ntohl(sack->start_seq) - natseq->offset_before, 287 natseq->correction_pos)) 288 new_start_seq = htonl(ntohl(sack->start_seq) 289 - natseq->offset_after); 290 else 291 new_start_seq = htonl(ntohl(sack->start_seq) 292 - natseq->offset_before); 293 294 if (after(ntohl(sack->end_seq) - natseq->offset_before, 295 natseq->correction_pos)) 296 new_end_seq = htonl(ntohl(sack->end_seq) 297 - natseq->offset_after); 298 else 299 new_end_seq = htonl(ntohl(sack->end_seq) 300 - natseq->offset_before); 301 302 pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", 303 ntohl(sack->start_seq), new_start_seq, 304 ntohl(sack->end_seq), new_end_seq); 305 306 inet_proto_csum_replace4(&tcph->check, skb, 307 sack->start_seq, new_start_seq, 0); 308 inet_proto_csum_replace4(&tcph->check, skb, 309 sack->end_seq, new_end_seq, 0); 310 sack->start_seq = new_start_seq; 311 sack->end_seq = new_end_seq; 312 sackoff += sizeof(*sack); 313 } 314 } 315 316 /* TCP SACK sequence number adjustment */ 317 static inline unsigned int 318 nf_nat_sack_adjust(struct sk_buff *skb, 319 unsigned int protoff, 320 struct tcphdr *tcph, 321 struct nf_conn *ct, 322 enum ip_conntrack_info ctinfo) 323 { 324 unsigned int dir, optoff, optend; 325 struct nf_conn_nat *nat = nfct_nat(ct); 326 327 optoff = protoff + sizeof(struct tcphdr); 328 optend = protoff + tcph->doff * 4; 329 330 if (!skb_make_writable(skb, optend)) 331 return 0; 332 333 dir = CTINFO2DIR(ctinfo); 334 335 while (optoff < optend) { 336 /* Usually: option, length. */ 337 unsigned char *op = skb->data + optoff; 338 339 switch (op[0]) { 340 case TCPOPT_EOL: 341 return 1; 342 case TCPOPT_NOP: 343 optoff++; 344 continue; 345 default: 346 /* no partial options */ 347 if (optoff + 1 == optend || 348 optoff + op[1] > optend || 349 op[1] < 2) 350 return 0; 351 if (op[0] == TCPOPT_SACK && 352 op[1] >= 2+TCPOLEN_SACK_PERBLOCK && 353 ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) 354 sack_adjust(skb, tcph, optoff+2, 355 optoff+op[1], &nat->seq[!dir]); 356 optoff += op[1]; 357 } 358 } 359 return 1; 360 } 361 362 /* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ 363 int 364 nf_nat_seq_adjust(struct sk_buff *skb, 365 struct nf_conn *ct, 366 enum ip_conntrack_info ctinfo, 367 unsigned int protoff) 368 { 369 struct tcphdr *tcph; 370 int dir; 371 __be32 newseq, newack; 372 s16 seqoff, ackoff; 373 struct nf_conn_nat *nat = nfct_nat(ct); 374 struct nf_nat_seq *this_way, *other_way; 375 376 dir = CTINFO2DIR(ctinfo); 377 378 this_way = &nat->seq[dir]; 379 other_way = &nat->seq[!dir]; 380 381 if (!skb_make_writable(skb, protoff + sizeof(*tcph))) 382 return 0; 383 384 tcph = (void *)skb->data + protoff; 385 if (after(ntohl(tcph->seq), this_way->correction_pos)) 386 seqoff = this_way->offset_after; 387 else 388 seqoff = this_way->offset_before; 389 390 if (after(ntohl(tcph->ack_seq) - other_way->offset_before, 391 other_way->correction_pos)) 392 ackoff = other_way->offset_after; 393 else 394 ackoff = other_way->offset_before; 395 396 newseq = htonl(ntohl(tcph->seq) + seqoff); 397 newack = htonl(ntohl(tcph->ack_seq) - ackoff); 398 399 inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); 400 inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); 401 402 pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", 403 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), 404 ntohl(newack)); 405 406 tcph->seq = newseq; 407 tcph->ack_seq = newack; 408 409 return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo); 410 } 411 412 /* Setup NAT on this expected conntrack so it follows master. */ 413 /* If we fail to get a free NAT slot, we'll get dropped on confirm */ 414 void nf_nat_follow_master(struct nf_conn *ct, 415 struct nf_conntrack_expect *exp) 416 { 417 struct nf_nat_range range; 418 419 /* This must be a fresh one. */ 420 BUG_ON(ct->status & IPS_NAT_DONE_MASK); 421 422 /* Change src to where master sends to */ 423 range.flags = NF_NAT_RANGE_MAP_IPS; 424 range.min_addr = range.max_addr 425 = ct->master->tuplehash[!exp->dir].tuple.dst.u3; 426 nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); 427 428 /* For DST manip, map port here to where it's expected. */ 429 range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); 430 range.min_proto = range.max_proto = exp->saved_proto; 431 range.min_addr = range.max_addr 432 = ct->master->tuplehash[!exp->dir].tuple.src.u3; 433 nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); 434 } 435 EXPORT_SYMBOL(nf_nat_follow_master); 436