1 /* 2 * Routines having to do with the 'struct sk_buff' memory handlers. 3 * 4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> 5 * Florian La Roche <rzsfl@rz.uni-sb.de> 6 * 7 * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ 8 * 9 * Fixes: 10 * Alan Cox : Fixed the worst of the load 11 * balancer bugs. 12 * Dave Platt : Interrupt stacking fix. 13 * Richard Kooijman : Timestamp fixes. 14 * Alan Cox : Changed buffer format. 15 * Alan Cox : destructor hook for AF_UNIX etc. 16 * Linus Torvalds : Better skb_clone. 17 * Alan Cox : Added skb_copy. 18 * Alan Cox : Added all the changed routines Linus 19 * only put in the headers 20 * Ray VanTassle : Fixed --skb->lock in free 21 * Alan Cox : skb_copy copy arp field 22 * Andi Kleen : slabified it. 23 * Robert Olsson : Removed skb_head_pool 24 * 25 * NOTE: 26 * The __skb_ routines should be called with interrupts 27 * disabled, or you better be *real* sure that the operation is atomic 28 * with respect to whatever list is being frobbed (e.g. via lock_sock() 29 * or via disabling bottom half handlers, etc). 30 * 31 * This program is free software; you can redistribute it and/or 32 * modify it under the terms of the GNU General Public License 33 * as published by the Free Software Foundation; either version 34 * 2 of the License, or (at your option) any later version. 35 */ 36 37 /* 38 * The functions in this file will not compile correctly with gcc 2.4.x 39 */ 40 41 #include <linux/config.h> 42 #include <linux/module.h> 43 #include <linux/types.h> 44 #include <linux/kernel.h> 45 #include <linux/sched.h> 46 #include <linux/mm.h> 47 #include <linux/interrupt.h> 48 #include <linux/in.h> 49 #include <linux/inet.h> 50 #include <linux/slab.h> 51 #include <linux/netdevice.h> 52 #ifdef CONFIG_NET_CLS_ACT 53 #include <net/pkt_sched.h> 54 #endif 55 #include <linux/string.h> 56 #include <linux/skbuff.h> 57 #include <linux/cache.h> 58 #include <linux/rtnetlink.h> 59 #include <linux/init.h> 60 #include <linux/highmem.h> 61 62 #include <net/protocol.h> 63 #include <net/dst.h> 64 #include <net/sock.h> 65 #include <net/checksum.h> 66 #include <net/xfrm.h> 67 68 #include <asm/uaccess.h> 69 #include <asm/system.h> 70 71 static kmem_cache_t *skbuff_head_cache; 72 73 /* 74 * Keep out-of-line to prevent kernel bloat. 75 * __builtin_return_address is not used because it is not always 76 * reliable. 77 */ 78 79 /** 80 * skb_over_panic - private function 81 * @skb: buffer 82 * @sz: size 83 * @here: address 84 * 85 * Out of line support code for skb_put(). Not user callable. 86 */ 87 void skb_over_panic(struct sk_buff *skb, int sz, void *here) 88 { 89 printk(KERN_INFO "skput:over: %p:%d put:%d dev:%s", 90 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); 91 BUG(); 92 } 93 94 /** 95 * skb_under_panic - private function 96 * @skb: buffer 97 * @sz: size 98 * @here: address 99 * 100 * Out of line support code for skb_push(). Not user callable. 101 */ 102 103 void skb_under_panic(struct sk_buff *skb, int sz, void *here) 104 { 105 printk(KERN_INFO "skput:under: %p:%d put:%d dev:%s", 106 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); 107 BUG(); 108 } 109 110 /* Allocate a new skbuff. We do this ourselves so we can fill in a few 111 * 'private' fields and also do memory statistics to find all the 112 * [BEEP] leaks. 113 * 114 */ 115 116 /** 117 * alloc_skb - allocate a network buffer 118 * @size: size to allocate 119 * @gfp_mask: allocation mask 120 * 121 * Allocate a new &sk_buff. The returned buffer has no headroom and a 122 * tail room of size bytes. The object has a reference count of one. 123 * The return is the buffer. On a failure the return is %NULL. 124 * 125 * Buffers may only be allocated from interrupts using a @gfp_mask of 126 * %GFP_ATOMIC. 127 */ 128 struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) 129 { 130 struct sk_buff *skb; 131 u8 *data; 132 133 /* Get the HEAD */ 134 skb = kmem_cache_alloc(skbuff_head_cache, 135 gfp_mask & ~__GFP_DMA); 136 if (!skb) 137 goto out; 138 139 /* Get the DATA. Size must match skb_add_mtu(). */ 140 size = SKB_DATA_ALIGN(size); 141 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 142 if (!data) 143 goto nodata; 144 145 memset(skb, 0, offsetof(struct sk_buff, truesize)); 146 skb->truesize = size + sizeof(struct sk_buff); 147 atomic_set(&skb->users, 1); 148 skb->head = data; 149 skb->data = data; 150 skb->tail = data; 151 skb->end = data + size; 152 153 atomic_set(&(skb_shinfo(skb)->dataref), 1); 154 skb_shinfo(skb)->nr_frags = 0; 155 skb_shinfo(skb)->tso_size = 0; 156 skb_shinfo(skb)->tso_segs = 0; 157 skb_shinfo(skb)->frag_list = NULL; 158 out: 159 return skb; 160 nodata: 161 kmem_cache_free(skbuff_head_cache, skb); 162 skb = NULL; 163 goto out; 164 } 165 166 /** 167 * alloc_skb_from_cache - allocate a network buffer 168 * @cp: kmem_cache from which to allocate the data area 169 * (object size must be big enough for @size bytes + skb overheads) 170 * @size: size to allocate 171 * @gfp_mask: allocation mask 172 * 173 * Allocate a new &sk_buff. The returned buffer has no headroom and 174 * tail room of size bytes. The object has a reference count of one. 175 * The return is the buffer. On a failure the return is %NULL. 176 * 177 * Buffers may only be allocated from interrupts using a @gfp_mask of 178 * %GFP_ATOMIC. 179 */ 180 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, 181 unsigned int size, int gfp_mask) 182 { 183 struct sk_buff *skb; 184 u8 *data; 185 186 /* Get the HEAD */ 187 skb = kmem_cache_alloc(skbuff_head_cache, 188 gfp_mask & ~__GFP_DMA); 189 if (!skb) 190 goto out; 191 192 /* Get the DATA. */ 193 size = SKB_DATA_ALIGN(size); 194 data = kmem_cache_alloc(cp, gfp_mask); 195 if (!data) 196 goto nodata; 197 198 memset(skb, 0, offsetof(struct sk_buff, truesize)); 199 skb->truesize = size + sizeof(struct sk_buff); 200 atomic_set(&skb->users, 1); 201 skb->head = data; 202 skb->data = data; 203 skb->tail = data; 204 skb->end = data + size; 205 206 atomic_set(&(skb_shinfo(skb)->dataref), 1); 207 skb_shinfo(skb)->nr_frags = 0; 208 skb_shinfo(skb)->tso_size = 0; 209 skb_shinfo(skb)->tso_segs = 0; 210 skb_shinfo(skb)->frag_list = NULL; 211 out: 212 return skb; 213 nodata: 214 kmem_cache_free(skbuff_head_cache, skb); 215 skb = NULL; 216 goto out; 217 } 218 219 220 static void skb_drop_fraglist(struct sk_buff *skb) 221 { 222 struct sk_buff *list = skb_shinfo(skb)->frag_list; 223 224 skb_shinfo(skb)->frag_list = NULL; 225 226 do { 227 struct sk_buff *this = list; 228 list = list->next; 229 kfree_skb(this); 230 } while (list); 231 } 232 233 static void skb_clone_fraglist(struct sk_buff *skb) 234 { 235 struct sk_buff *list; 236 237 for (list = skb_shinfo(skb)->frag_list; list; list = list->next) 238 skb_get(list); 239 } 240 241 void skb_release_data(struct sk_buff *skb) 242 { 243 if (!skb->cloned || 244 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, 245 &skb_shinfo(skb)->dataref)) { 246 if (skb_shinfo(skb)->nr_frags) { 247 int i; 248 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 249 put_page(skb_shinfo(skb)->frags[i].page); 250 } 251 252 if (skb_shinfo(skb)->frag_list) 253 skb_drop_fraglist(skb); 254 255 kfree(skb->head); 256 } 257 } 258 259 /* 260 * Free an skbuff by memory without cleaning the state. 261 */ 262 void kfree_skbmem(struct sk_buff *skb) 263 { 264 skb_release_data(skb); 265 kmem_cache_free(skbuff_head_cache, skb); 266 } 267 268 /** 269 * __kfree_skb - private function 270 * @skb: buffer 271 * 272 * Free an sk_buff. Release anything attached to the buffer. 273 * Clean the state. This is an internal helper function. Users should 274 * always call kfree_skb 275 */ 276 277 void __kfree_skb(struct sk_buff *skb) 278 { 279 if (skb->list) { 280 printk(KERN_WARNING "Warning: kfree_skb passed an skb still " 281 "on a list (from %p).\n", NET_CALLER(skb)); 282 BUG(); 283 } 284 285 dst_release(skb->dst); 286 #ifdef CONFIG_XFRM 287 secpath_put(skb->sp); 288 #endif 289 if(skb->destructor) { 290 if (in_irq()) 291 printk(KERN_WARNING "Warning: kfree_skb on " 292 "hard IRQ %p\n", NET_CALLER(skb)); 293 skb->destructor(skb); 294 } 295 #ifdef CONFIG_NETFILTER 296 nf_conntrack_put(skb->nfct); 297 #ifdef CONFIG_BRIDGE_NETFILTER 298 nf_bridge_put(skb->nf_bridge); 299 #endif 300 #endif 301 /* XXX: IS this still necessary? - JHS */ 302 #ifdef CONFIG_NET_SCHED 303 skb->tc_index = 0; 304 #ifdef CONFIG_NET_CLS_ACT 305 skb->tc_verd = 0; 306 skb->tc_classid = 0; 307 #endif 308 #endif 309 310 kfree_skbmem(skb); 311 } 312 313 /** 314 * skb_clone - duplicate an sk_buff 315 * @skb: buffer to clone 316 * @gfp_mask: allocation priority 317 * 318 * Duplicate an &sk_buff. The new one is not owned by a socket. Both 319 * copies share the same packet data but not structure. The new 320 * buffer has a reference count of 1. If the allocation fails the 321 * function returns %NULL otherwise the new buffer is returned. 322 * 323 * If this function is called from an interrupt gfp_mask() must be 324 * %GFP_ATOMIC. 325 */ 326 327 struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) 328 { 329 struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 330 331 if (!n) 332 return NULL; 333 334 #define C(x) n->x = skb->x 335 336 n->next = n->prev = NULL; 337 n->list = NULL; 338 n->sk = NULL; 339 C(stamp); 340 C(dev); 341 C(real_dev); 342 C(h); 343 C(nh); 344 C(mac); 345 C(dst); 346 dst_clone(skb->dst); 347 C(sp); 348 #ifdef CONFIG_INET 349 secpath_get(skb->sp); 350 #endif 351 memcpy(n->cb, skb->cb, sizeof(skb->cb)); 352 C(len); 353 C(data_len); 354 C(csum); 355 C(local_df); 356 n->cloned = 1; 357 n->nohdr = 0; 358 C(pkt_type); 359 C(ip_summed); 360 C(priority); 361 C(protocol); 362 C(security); 363 n->destructor = NULL; 364 #ifdef CONFIG_NETFILTER 365 C(nfmark); 366 C(nfcache); 367 C(nfct); 368 nf_conntrack_get(skb->nfct); 369 C(nfctinfo); 370 #ifdef CONFIG_NETFILTER_DEBUG 371 C(nf_debug); 372 #endif 373 #ifdef CONFIG_BRIDGE_NETFILTER 374 C(nf_bridge); 375 nf_bridge_get(skb->nf_bridge); 376 #endif 377 #endif /*CONFIG_NETFILTER*/ 378 #if defined(CONFIG_HIPPI) 379 C(private); 380 #endif 381 #ifdef CONFIG_NET_SCHED 382 C(tc_index); 383 #ifdef CONFIG_NET_CLS_ACT 384 n->tc_verd = SET_TC_VERD(skb->tc_verd,0); 385 n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd); 386 n->tc_verd = CLR_TC_MUNGED(skb->tc_verd); 387 C(input_dev); 388 C(tc_classid); 389 #endif 390 391 #endif 392 C(truesize); 393 atomic_set(&n->users, 1); 394 C(head); 395 C(data); 396 C(tail); 397 C(end); 398 399 atomic_inc(&(skb_shinfo(skb)->dataref)); 400 skb->cloned = 1; 401 402 return n; 403 } 404 405 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 406 { 407 /* 408 * Shift between the two data areas in bytes 409 */ 410 unsigned long offset = new->data - old->data; 411 412 new->list = NULL; 413 new->sk = NULL; 414 new->dev = old->dev; 415 new->real_dev = old->real_dev; 416 new->priority = old->priority; 417 new->protocol = old->protocol; 418 new->dst = dst_clone(old->dst); 419 #ifdef CONFIG_INET 420 new->sp = secpath_get(old->sp); 421 #endif 422 new->h.raw = old->h.raw + offset; 423 new->nh.raw = old->nh.raw + offset; 424 new->mac.raw = old->mac.raw + offset; 425 memcpy(new->cb, old->cb, sizeof(old->cb)); 426 new->local_df = old->local_df; 427 new->pkt_type = old->pkt_type; 428 new->stamp = old->stamp; 429 new->destructor = NULL; 430 new->security = old->security; 431 #ifdef CONFIG_NETFILTER 432 new->nfmark = old->nfmark; 433 new->nfcache = old->nfcache; 434 new->nfct = old->nfct; 435 nf_conntrack_get(old->nfct); 436 new->nfctinfo = old->nfctinfo; 437 #ifdef CONFIG_NETFILTER_DEBUG 438 new->nf_debug = old->nf_debug; 439 #endif 440 #ifdef CONFIG_BRIDGE_NETFILTER 441 new->nf_bridge = old->nf_bridge; 442 nf_bridge_get(old->nf_bridge); 443 #endif 444 #endif 445 #ifdef CONFIG_NET_SCHED 446 #ifdef CONFIG_NET_CLS_ACT 447 new->tc_verd = old->tc_verd; 448 #endif 449 new->tc_index = old->tc_index; 450 #endif 451 atomic_set(&new->users, 1); 452 skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; 453 skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; 454 } 455 456 /** 457 * skb_copy - create private copy of an sk_buff 458 * @skb: buffer to copy 459 * @gfp_mask: allocation priority 460 * 461 * Make a copy of both an &sk_buff and its data. This is used when the 462 * caller wishes to modify the data and needs a private copy of the 463 * data to alter. Returns %NULL on failure or the pointer to the buffer 464 * on success. The returned buffer has a reference count of 1. 465 * 466 * As by-product this function converts non-linear &sk_buff to linear 467 * one, so that &sk_buff becomes completely private and caller is allowed 468 * to modify all the data of returned buffer. This means that this 469 * function is not recommended for use in circumstances when only 470 * header is going to be modified. Use pskb_copy() instead. 471 */ 472 473 struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) 474 { 475 int headerlen = skb->data - skb->head; 476 /* 477 * Allocate the copy buffer 478 */ 479 struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, 480 gfp_mask); 481 if (!n) 482 return NULL; 483 484 /* Set the data pointer */ 485 skb_reserve(n, headerlen); 486 /* Set the tail pointer and length */ 487 skb_put(n, skb->len); 488 n->csum = skb->csum; 489 n->ip_summed = skb->ip_summed; 490 491 if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) 492 BUG(); 493 494 copy_skb_header(n, skb); 495 return n; 496 } 497 498 499 /** 500 * pskb_copy - create copy of an sk_buff with private head. 501 * @skb: buffer to copy 502 * @gfp_mask: allocation priority 503 * 504 * Make a copy of both an &sk_buff and part of its data, located 505 * in header. Fragmented data remain shared. This is used when 506 * the caller wishes to modify only header of &sk_buff and needs 507 * private copy of the header to alter. Returns %NULL on failure 508 * or the pointer to the buffer on success. 509 * The returned buffer has a reference count of 1. 510 */ 511 512 struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) 513 { 514 /* 515 * Allocate the copy buffer 516 */ 517 struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask); 518 519 if (!n) 520 goto out; 521 522 /* Set the data pointer */ 523 skb_reserve(n, skb->data - skb->head); 524 /* Set the tail pointer and length */ 525 skb_put(n, skb_headlen(skb)); 526 /* Copy the bytes */ 527 memcpy(n->data, skb->data, n->len); 528 n->csum = skb->csum; 529 n->ip_summed = skb->ip_summed; 530 531 n->data_len = skb->data_len; 532 n->len = skb->len; 533 534 if (skb_shinfo(skb)->nr_frags) { 535 int i; 536 537 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 538 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 539 get_page(skb_shinfo(n)->frags[i].page); 540 } 541 skb_shinfo(n)->nr_frags = i; 542 } 543 544 if (skb_shinfo(skb)->frag_list) { 545 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 546 skb_clone_fraglist(n); 547 } 548 549 copy_skb_header(n, skb); 550 out: 551 return n; 552 } 553 554 /** 555 * pskb_expand_head - reallocate header of &sk_buff 556 * @skb: buffer to reallocate 557 * @nhead: room to add at head 558 * @ntail: room to add at tail 559 * @gfp_mask: allocation priority 560 * 561 * Expands (or creates identical copy, if &nhead and &ntail are zero) 562 * header of skb. &sk_buff itself is not changed. &sk_buff MUST have 563 * reference count of 1. Returns zero in the case of success or error, 564 * if expansion failed. In the last case, &sk_buff is not changed. 565 * 566 * All the pointers pointing into skb header may change and must be 567 * reloaded after call to this function. 568 */ 569 570 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) 571 { 572 int i; 573 u8 *data; 574 int size = nhead + (skb->end - skb->head) + ntail; 575 long off; 576 577 if (skb_shared(skb)) 578 BUG(); 579 580 size = SKB_DATA_ALIGN(size); 581 582 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 583 if (!data) 584 goto nodata; 585 586 /* Copy only real data... and, alas, header. This should be 587 * optimized for the cases when header is void. */ 588 memcpy(data + nhead, skb->head, skb->tail - skb->head); 589 memcpy(data + size, skb->end, sizeof(struct skb_shared_info)); 590 591 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 592 get_page(skb_shinfo(skb)->frags[i].page); 593 594 if (skb_shinfo(skb)->frag_list) 595 skb_clone_fraglist(skb); 596 597 skb_release_data(skb); 598 599 off = (data + nhead) - skb->head; 600 601 skb->head = data; 602 skb->end = data + size; 603 skb->data += off; 604 skb->tail += off; 605 skb->mac.raw += off; 606 skb->h.raw += off; 607 skb->nh.raw += off; 608 skb->cloned = 0; 609 skb->nohdr = 0; 610 atomic_set(&skb_shinfo(skb)->dataref, 1); 611 return 0; 612 613 nodata: 614 return -ENOMEM; 615 } 616 617 /* Make private copy of skb with writable head and some headroom */ 618 619 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) 620 { 621 struct sk_buff *skb2; 622 int delta = headroom - skb_headroom(skb); 623 624 if (delta <= 0) 625 skb2 = pskb_copy(skb, GFP_ATOMIC); 626 else { 627 skb2 = skb_clone(skb, GFP_ATOMIC); 628 if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, 629 GFP_ATOMIC)) { 630 kfree_skb(skb2); 631 skb2 = NULL; 632 } 633 } 634 return skb2; 635 } 636 637 638 /** 639 * skb_copy_expand - copy and expand sk_buff 640 * @skb: buffer to copy 641 * @newheadroom: new free bytes at head 642 * @newtailroom: new free bytes at tail 643 * @gfp_mask: allocation priority 644 * 645 * Make a copy of both an &sk_buff and its data and while doing so 646 * allocate additional space. 647 * 648 * This is used when the caller wishes to modify the data and needs a 649 * private copy of the data to alter as well as more space for new fields. 650 * Returns %NULL on failure or the pointer to the buffer 651 * on success. The returned buffer has a reference count of 1. 652 * 653 * You must pass %GFP_ATOMIC as the allocation priority if this function 654 * is called from an interrupt. 655 * 656 * BUG ALERT: ip_summed is not copied. Why does this work? Is it used 657 * only by netfilter in the cases when checksum is recalculated? --ANK 658 */ 659 struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 660 int newheadroom, int newtailroom, int gfp_mask) 661 { 662 /* 663 * Allocate the copy buffer 664 */ 665 struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, 666 gfp_mask); 667 int head_copy_len, head_copy_off; 668 669 if (!n) 670 return NULL; 671 672 skb_reserve(n, newheadroom); 673 674 /* Set the tail pointer and length */ 675 skb_put(n, skb->len); 676 677 head_copy_len = skb_headroom(skb); 678 head_copy_off = 0; 679 if (newheadroom <= head_copy_len) 680 head_copy_len = newheadroom; 681 else 682 head_copy_off = newheadroom - head_copy_len; 683 684 /* Copy the linear header and data. */ 685 if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, 686 skb->len + head_copy_len)) 687 BUG(); 688 689 copy_skb_header(n, skb); 690 691 return n; 692 } 693 694 /** 695 * skb_pad - zero pad the tail of an skb 696 * @skb: buffer to pad 697 * @pad: space to pad 698 * 699 * Ensure that a buffer is followed by a padding area that is zero 700 * filled. Used by network drivers which may DMA or transfer data 701 * beyond the buffer end onto the wire. 702 * 703 * May return NULL in out of memory cases. 704 */ 705 706 struct sk_buff *skb_pad(struct sk_buff *skb, int pad) 707 { 708 struct sk_buff *nskb; 709 710 /* If the skbuff is non linear tailroom is always zero.. */ 711 if (skb_tailroom(skb) >= pad) { 712 memset(skb->data+skb->len, 0, pad); 713 return skb; 714 } 715 716 nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); 717 kfree_skb(skb); 718 if (nskb) 719 memset(nskb->data+nskb->len, 0, pad); 720 return nskb; 721 } 722 723 /* Trims skb to length len. It can change skb pointers, if "realloc" is 1. 724 * If realloc==0 and trimming is impossible without change of data, 725 * it is BUG(). 726 */ 727 728 int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) 729 { 730 int offset = skb_headlen(skb); 731 int nfrags = skb_shinfo(skb)->nr_frags; 732 int i; 733 734 for (i = 0; i < nfrags; i++) { 735 int end = offset + skb_shinfo(skb)->frags[i].size; 736 if (end > len) { 737 if (skb_cloned(skb)) { 738 if (!realloc) 739 BUG(); 740 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 741 return -ENOMEM; 742 } 743 if (len <= offset) { 744 put_page(skb_shinfo(skb)->frags[i].page); 745 skb_shinfo(skb)->nr_frags--; 746 } else { 747 skb_shinfo(skb)->frags[i].size = len - offset; 748 } 749 } 750 offset = end; 751 } 752 753 if (offset < len) { 754 skb->data_len -= skb->len - len; 755 skb->len = len; 756 } else { 757 if (len <= skb_headlen(skb)) { 758 skb->len = len; 759 skb->data_len = 0; 760 skb->tail = skb->data + len; 761 if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) 762 skb_drop_fraglist(skb); 763 } else { 764 skb->data_len -= skb->len - len; 765 skb->len = len; 766 } 767 } 768 769 return 0; 770 } 771 772 /** 773 * __pskb_pull_tail - advance tail of skb header 774 * @skb: buffer to reallocate 775 * @delta: number of bytes to advance tail 776 * 777 * The function makes a sense only on a fragmented &sk_buff, 778 * it expands header moving its tail forward and copying necessary 779 * data from fragmented part. 780 * 781 * &sk_buff MUST have reference count of 1. 782 * 783 * Returns %NULL (and &sk_buff does not change) if pull failed 784 * or value of new tail of skb in the case of success. 785 * 786 * All the pointers pointing into skb header may change and must be 787 * reloaded after call to this function. 788 */ 789 790 /* Moves tail of skb head forward, copying data from fragmented part, 791 * when it is necessary. 792 * 1. It may fail due to malloc failure. 793 * 2. It may change skb pointers. 794 * 795 * It is pretty complicated. Luckily, it is called only in exceptional cases. 796 */ 797 unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) 798 { 799 /* If skb has not enough free space at tail, get new one 800 * plus 128 bytes for future expansions. If we have enough 801 * room at tail, reallocate without expansion only if skb is cloned. 802 */ 803 int i, k, eat = (skb->tail + delta) - skb->end; 804 805 if (eat > 0 || skb_cloned(skb)) { 806 if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, 807 GFP_ATOMIC)) 808 return NULL; 809 } 810 811 if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) 812 BUG(); 813 814 /* Optimization: no fragments, no reasons to preestimate 815 * size of pulled pages. Superb. 816 */ 817 if (!skb_shinfo(skb)->frag_list) 818 goto pull_pages; 819 820 /* Estimate size of pulled pages. */ 821 eat = delta; 822 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 823 if (skb_shinfo(skb)->frags[i].size >= eat) 824 goto pull_pages; 825 eat -= skb_shinfo(skb)->frags[i].size; 826 } 827 828 /* If we need update frag list, we are in troubles. 829 * Certainly, it possible to add an offset to skb data, 830 * but taking into account that pulling is expected to 831 * be very rare operation, it is worth to fight against 832 * further bloating skb head and crucify ourselves here instead. 833 * Pure masohism, indeed. 8)8) 834 */ 835 if (eat) { 836 struct sk_buff *list = skb_shinfo(skb)->frag_list; 837 struct sk_buff *clone = NULL; 838 struct sk_buff *insp = NULL; 839 840 do { 841 if (!list) 842 BUG(); 843 844 if (list->len <= eat) { 845 /* Eaten as whole. */ 846 eat -= list->len; 847 list = list->next; 848 insp = list; 849 } else { 850 /* Eaten partially. */ 851 852 if (skb_shared(list)) { 853 /* Sucks! We need to fork list. :-( */ 854 clone = skb_clone(list, GFP_ATOMIC); 855 if (!clone) 856 return NULL; 857 insp = list->next; 858 list = clone; 859 } else { 860 /* This may be pulled without 861 * problems. */ 862 insp = list; 863 } 864 if (!pskb_pull(list, eat)) { 865 if (clone) 866 kfree_skb(clone); 867 return NULL; 868 } 869 break; 870 } 871 } while (eat); 872 873 /* Free pulled out fragments. */ 874 while ((list = skb_shinfo(skb)->frag_list) != insp) { 875 skb_shinfo(skb)->frag_list = list->next; 876 kfree_skb(list); 877 } 878 /* And insert new clone at head. */ 879 if (clone) { 880 clone->next = list; 881 skb_shinfo(skb)->frag_list = clone; 882 } 883 } 884 /* Success! Now we may commit changes to skb data. */ 885 886 pull_pages: 887 eat = delta; 888 k = 0; 889 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 890 if (skb_shinfo(skb)->frags[i].size <= eat) { 891 put_page(skb_shinfo(skb)->frags[i].page); 892 eat -= skb_shinfo(skb)->frags[i].size; 893 } else { 894 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 895 if (eat) { 896 skb_shinfo(skb)->frags[k].page_offset += eat; 897 skb_shinfo(skb)->frags[k].size -= eat; 898 eat = 0; 899 } 900 k++; 901 } 902 } 903 skb_shinfo(skb)->nr_frags = k; 904 905 skb->tail += delta; 906 skb->data_len -= delta; 907 908 return skb->tail; 909 } 910 911 /* Copy some data bits from skb to kernel buffer. */ 912 913 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) 914 { 915 int i, copy; 916 int start = skb_headlen(skb); 917 918 if (offset > (int)skb->len - len) 919 goto fault; 920 921 /* Copy header. */ 922 if ((copy = start - offset) > 0) { 923 if (copy > len) 924 copy = len; 925 memcpy(to, skb->data + offset, copy); 926 if ((len -= copy) == 0) 927 return 0; 928 offset += copy; 929 to += copy; 930 } 931 932 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 933 int end; 934 935 BUG_TRAP(start <= offset + len); 936 937 end = start + skb_shinfo(skb)->frags[i].size; 938 if ((copy = end - offset) > 0) { 939 u8 *vaddr; 940 941 if (copy > len) 942 copy = len; 943 944 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); 945 memcpy(to, 946 vaddr + skb_shinfo(skb)->frags[i].page_offset+ 947 offset - start, copy); 948 kunmap_skb_frag(vaddr); 949 950 if ((len -= copy) == 0) 951 return 0; 952 offset += copy; 953 to += copy; 954 } 955 start = end; 956 } 957 958 if (skb_shinfo(skb)->frag_list) { 959 struct sk_buff *list = skb_shinfo(skb)->frag_list; 960 961 for (; list; list = list->next) { 962 int end; 963 964 BUG_TRAP(start <= offset + len); 965 966 end = start + list->len; 967 if ((copy = end - offset) > 0) { 968 if (copy > len) 969 copy = len; 970 if (skb_copy_bits(list, offset - start, 971 to, copy)) 972 goto fault; 973 if ((len -= copy) == 0) 974 return 0; 975 offset += copy; 976 to += copy; 977 } 978 start = end; 979 } 980 } 981 if (!len) 982 return 0; 983 984 fault: 985 return -EFAULT; 986 } 987 988 /* Checksum skb data. */ 989 990 unsigned int skb_checksum(const struct sk_buff *skb, int offset, 991 int len, unsigned int csum) 992 { 993 int start = skb_headlen(skb); 994 int i, copy = start - offset; 995 int pos = 0; 996 997 /* Checksum header. */ 998 if (copy > 0) { 999 if (copy > len) 1000 copy = len; 1001 csum = csum_partial(skb->data + offset, copy, csum); 1002 if ((len -= copy) == 0) 1003 return csum; 1004 offset += copy; 1005 pos = copy; 1006 } 1007 1008 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1009 int end; 1010 1011 BUG_TRAP(start <= offset + len); 1012 1013 end = start + skb_shinfo(skb)->frags[i].size; 1014 if ((copy = end - offset) > 0) { 1015 unsigned int csum2; 1016 u8 *vaddr; 1017 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1018 1019 if (copy > len) 1020 copy = len; 1021 vaddr = kmap_skb_frag(frag); 1022 csum2 = csum_partial(vaddr + frag->page_offset + 1023 offset - start, copy, 0); 1024 kunmap_skb_frag(vaddr); 1025 csum = csum_block_add(csum, csum2, pos); 1026 if (!(len -= copy)) 1027 return csum; 1028 offset += copy; 1029 pos += copy; 1030 } 1031 start = end; 1032 } 1033 1034 if (skb_shinfo(skb)->frag_list) { 1035 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1036 1037 for (; list; list = list->next) { 1038 int end; 1039 1040 BUG_TRAP(start <= offset + len); 1041 1042 end = start + list->len; 1043 if ((copy = end - offset) > 0) { 1044 unsigned int csum2; 1045 if (copy > len) 1046 copy = len; 1047 csum2 = skb_checksum(list, offset - start, 1048 copy, 0); 1049 csum = csum_block_add(csum, csum2, pos); 1050 if ((len -= copy) == 0) 1051 return csum; 1052 offset += copy; 1053 pos += copy; 1054 } 1055 start = end; 1056 } 1057 } 1058 if (len) 1059 BUG(); 1060 1061 return csum; 1062 } 1063 1064 /* Both of above in one bottle. */ 1065 1066 unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, 1067 u8 *to, int len, unsigned int csum) 1068 { 1069 int start = skb_headlen(skb); 1070 int i, copy = start - offset; 1071 int pos = 0; 1072 1073 /* Copy header. */ 1074 if (copy > 0) { 1075 if (copy > len) 1076 copy = len; 1077 csum = csum_partial_copy_nocheck(skb->data + offset, to, 1078 copy, csum); 1079 if ((len -= copy) == 0) 1080 return csum; 1081 offset += copy; 1082 to += copy; 1083 pos = copy; 1084 } 1085 1086 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1087 int end; 1088 1089 BUG_TRAP(start <= offset + len); 1090 1091 end = start + skb_shinfo(skb)->frags[i].size; 1092 if ((copy = end - offset) > 0) { 1093 unsigned int csum2; 1094 u8 *vaddr; 1095 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1096 1097 if (copy > len) 1098 copy = len; 1099 vaddr = kmap_skb_frag(frag); 1100 csum2 = csum_partial_copy_nocheck(vaddr + 1101 frag->page_offset + 1102 offset - start, to, 1103 copy, 0); 1104 kunmap_skb_frag(vaddr); 1105 csum = csum_block_add(csum, csum2, pos); 1106 if (!(len -= copy)) 1107 return csum; 1108 offset += copy; 1109 to += copy; 1110 pos += copy; 1111 } 1112 start = end; 1113 } 1114 1115 if (skb_shinfo(skb)->frag_list) { 1116 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1117 1118 for (; list; list = list->next) { 1119 unsigned int csum2; 1120 int end; 1121 1122 BUG_TRAP(start <= offset + len); 1123 1124 end = start + list->len; 1125 if ((copy = end - offset) > 0) { 1126 if (copy > len) 1127 copy = len; 1128 csum2 = skb_copy_and_csum_bits(list, 1129 offset - start, 1130 to, copy, 0); 1131 csum = csum_block_add(csum, csum2, pos); 1132 if ((len -= copy) == 0) 1133 return csum; 1134 offset += copy; 1135 to += copy; 1136 pos += copy; 1137 } 1138 start = end; 1139 } 1140 } 1141 if (len) 1142 BUG(); 1143 return csum; 1144 } 1145 1146 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 1147 { 1148 unsigned int csum; 1149 long csstart; 1150 1151 if (skb->ip_summed == CHECKSUM_HW) 1152 csstart = skb->h.raw - skb->data; 1153 else 1154 csstart = skb_headlen(skb); 1155 1156 if (csstart > skb_headlen(skb)) 1157 BUG(); 1158 1159 memcpy(to, skb->data, csstart); 1160 1161 csum = 0; 1162 if (csstart != skb->len) 1163 csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, 1164 skb->len - csstart, 0); 1165 1166 if (skb->ip_summed == CHECKSUM_HW) { 1167 long csstuff = csstart + skb->csum; 1168 1169 *((unsigned short *)(to + csstuff)) = csum_fold(csum); 1170 } 1171 } 1172 1173 /** 1174 * skb_dequeue - remove from the head of the queue 1175 * @list: list to dequeue from 1176 * 1177 * Remove the head of the list. The list lock is taken so the function 1178 * may be used safely with other locking list functions. The head item is 1179 * returned or %NULL if the list is empty. 1180 */ 1181 1182 struct sk_buff *skb_dequeue(struct sk_buff_head *list) 1183 { 1184 unsigned long flags; 1185 struct sk_buff *result; 1186 1187 spin_lock_irqsave(&list->lock, flags); 1188 result = __skb_dequeue(list); 1189 spin_unlock_irqrestore(&list->lock, flags); 1190 return result; 1191 } 1192 1193 /** 1194 * skb_dequeue_tail - remove from the tail of the queue 1195 * @list: list to dequeue from 1196 * 1197 * Remove the tail of the list. The list lock is taken so the function 1198 * may be used safely with other locking list functions. The tail item is 1199 * returned or %NULL if the list is empty. 1200 */ 1201 struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) 1202 { 1203 unsigned long flags; 1204 struct sk_buff *result; 1205 1206 spin_lock_irqsave(&list->lock, flags); 1207 result = __skb_dequeue_tail(list); 1208 spin_unlock_irqrestore(&list->lock, flags); 1209 return result; 1210 } 1211 1212 /** 1213 * skb_queue_purge - empty a list 1214 * @list: list to empty 1215 * 1216 * Delete all buffers on an &sk_buff list. Each buffer is removed from 1217 * the list and one reference dropped. This function takes the list 1218 * lock and is atomic with respect to other list locking functions. 1219 */ 1220 void skb_queue_purge(struct sk_buff_head *list) 1221 { 1222 struct sk_buff *skb; 1223 while ((skb = skb_dequeue(list)) != NULL) 1224 kfree_skb(skb); 1225 } 1226 1227 /** 1228 * skb_queue_head - queue a buffer at the list head 1229 * @list: list to use 1230 * @newsk: buffer to queue 1231 * 1232 * Queue a buffer at the start of the list. This function takes the 1233 * list lock and can be used safely with other locking &sk_buff functions 1234 * safely. 1235 * 1236 * A buffer cannot be placed on two lists at the same time. 1237 */ 1238 void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 1239 { 1240 unsigned long flags; 1241 1242 spin_lock_irqsave(&list->lock, flags); 1243 __skb_queue_head(list, newsk); 1244 spin_unlock_irqrestore(&list->lock, flags); 1245 } 1246 1247 /** 1248 * skb_queue_tail - queue a buffer at the list tail 1249 * @list: list to use 1250 * @newsk: buffer to queue 1251 * 1252 * Queue a buffer at the tail of the list. This function takes the 1253 * list lock and can be used safely with other locking &sk_buff functions 1254 * safely. 1255 * 1256 * A buffer cannot be placed on two lists at the same time. 1257 */ 1258 void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 1259 { 1260 unsigned long flags; 1261 1262 spin_lock_irqsave(&list->lock, flags); 1263 __skb_queue_tail(list, newsk); 1264 spin_unlock_irqrestore(&list->lock, flags); 1265 } 1266 /** 1267 * skb_unlink - remove a buffer from a list 1268 * @skb: buffer to remove 1269 * 1270 * Place a packet after a given packet in a list. The list locks are taken 1271 * and this function is atomic with respect to other list locked calls 1272 * 1273 * Works even without knowing the list it is sitting on, which can be 1274 * handy at times. It also means that THE LIST MUST EXIST when you 1275 * unlink. Thus a list must have its contents unlinked before it is 1276 * destroyed. 1277 */ 1278 void skb_unlink(struct sk_buff *skb) 1279 { 1280 struct sk_buff_head *list = skb->list; 1281 1282 if (list) { 1283 unsigned long flags; 1284 1285 spin_lock_irqsave(&list->lock, flags); 1286 if (skb->list == list) 1287 __skb_unlink(skb, skb->list); 1288 spin_unlock_irqrestore(&list->lock, flags); 1289 } 1290 } 1291 1292 1293 /** 1294 * skb_append - append a buffer 1295 * @old: buffer to insert after 1296 * @newsk: buffer to insert 1297 * 1298 * Place a packet after a given packet in a list. The list locks are taken 1299 * and this function is atomic with respect to other list locked calls. 1300 * A buffer cannot be placed on two lists at the same time. 1301 */ 1302 1303 void skb_append(struct sk_buff *old, struct sk_buff *newsk) 1304 { 1305 unsigned long flags; 1306 1307 spin_lock_irqsave(&old->list->lock, flags); 1308 __skb_append(old, newsk); 1309 spin_unlock_irqrestore(&old->list->lock, flags); 1310 } 1311 1312 1313 /** 1314 * skb_insert - insert a buffer 1315 * @old: buffer to insert before 1316 * @newsk: buffer to insert 1317 * 1318 * Place a packet before a given packet in a list. The list locks are taken 1319 * and this function is atomic with respect to other list locked calls 1320 * A buffer cannot be placed on two lists at the same time. 1321 */ 1322 1323 void skb_insert(struct sk_buff *old, struct sk_buff *newsk) 1324 { 1325 unsigned long flags; 1326 1327 spin_lock_irqsave(&old->list->lock, flags); 1328 __skb_insert(newsk, old->prev, old, old->list); 1329 spin_unlock_irqrestore(&old->list->lock, flags); 1330 } 1331 1332 #if 0 1333 /* 1334 * Tune the memory allocator for a new MTU size. 1335 */ 1336 void skb_add_mtu(int mtu) 1337 { 1338 /* Must match allocation in alloc_skb */ 1339 mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); 1340 1341 kmem_add_cache_size(mtu); 1342 } 1343 #endif 1344 1345 static inline void skb_split_inside_header(struct sk_buff *skb, 1346 struct sk_buff* skb1, 1347 const u32 len, const int pos) 1348 { 1349 int i; 1350 1351 memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len); 1352 1353 /* And move data appendix as is. */ 1354 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1355 skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; 1356 1357 skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; 1358 skb_shinfo(skb)->nr_frags = 0; 1359 skb1->data_len = skb->data_len; 1360 skb1->len += skb1->data_len; 1361 skb->data_len = 0; 1362 skb->len = len; 1363 skb->tail = skb->data + len; 1364 } 1365 1366 static inline void skb_split_no_header(struct sk_buff *skb, 1367 struct sk_buff* skb1, 1368 const u32 len, int pos) 1369 { 1370 int i, k = 0; 1371 const int nfrags = skb_shinfo(skb)->nr_frags; 1372 1373 skb_shinfo(skb)->nr_frags = 0; 1374 skb1->len = skb1->data_len = skb->len - len; 1375 skb->len = len; 1376 skb->data_len = len - pos; 1377 1378 for (i = 0; i < nfrags; i++) { 1379 int size = skb_shinfo(skb)->frags[i].size; 1380 1381 if (pos + size > len) { 1382 skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; 1383 1384 if (pos < len) { 1385 /* Split frag. 1386 * We have two variants in this case: 1387 * 1. Move all the frag to the second 1388 * part, if it is possible. F.e. 1389 * this approach is mandatory for TUX, 1390 * where splitting is expensive. 1391 * 2. Split is accurately. We make this. 1392 */ 1393 get_page(skb_shinfo(skb)->frags[i].page); 1394 skb_shinfo(skb1)->frags[0].page_offset += len - pos; 1395 skb_shinfo(skb1)->frags[0].size -= len - pos; 1396 skb_shinfo(skb)->frags[i].size = len - pos; 1397 skb_shinfo(skb)->nr_frags++; 1398 } 1399 k++; 1400 } else 1401 skb_shinfo(skb)->nr_frags++; 1402 pos += size; 1403 } 1404 skb_shinfo(skb1)->nr_frags = k; 1405 } 1406 1407 /** 1408 * skb_split - Split fragmented skb to two parts at length len. 1409 * @skb: the buffer to split 1410 * @skb1: the buffer to receive the second part 1411 * @len: new length for skb 1412 */ 1413 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) 1414 { 1415 int pos = skb_headlen(skb); 1416 1417 if (len < pos) /* Split line is inside header. */ 1418 skb_split_inside_header(skb, skb1, len, pos); 1419 else /* Second chunk has no header, nothing to copy. */ 1420 skb_split_no_header(skb, skb1, len, pos); 1421 } 1422 1423 void __init skb_init(void) 1424 { 1425 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 1426 sizeof(struct sk_buff), 1427 0, 1428 SLAB_HWCACHE_ALIGN, 1429 NULL, NULL); 1430 if (!skbuff_head_cache) 1431 panic("cannot create skbuff cache"); 1432 } 1433 1434 EXPORT_SYMBOL(___pskb_trim); 1435 EXPORT_SYMBOL(__kfree_skb); 1436 EXPORT_SYMBOL(__pskb_pull_tail); 1437 EXPORT_SYMBOL(alloc_skb); 1438 EXPORT_SYMBOL(pskb_copy); 1439 EXPORT_SYMBOL(pskb_expand_head); 1440 EXPORT_SYMBOL(skb_checksum); 1441 EXPORT_SYMBOL(skb_clone); 1442 EXPORT_SYMBOL(skb_clone_fraglist); 1443 EXPORT_SYMBOL(skb_copy); 1444 EXPORT_SYMBOL(skb_copy_and_csum_bits); 1445 EXPORT_SYMBOL(skb_copy_and_csum_dev); 1446 EXPORT_SYMBOL(skb_copy_bits); 1447 EXPORT_SYMBOL(skb_copy_expand); 1448 EXPORT_SYMBOL(skb_over_panic); 1449 EXPORT_SYMBOL(skb_pad); 1450 EXPORT_SYMBOL(skb_realloc_headroom); 1451 EXPORT_SYMBOL(skb_under_panic); 1452 EXPORT_SYMBOL(skb_dequeue); 1453 EXPORT_SYMBOL(skb_dequeue_tail); 1454 EXPORT_SYMBOL(skb_insert); 1455 EXPORT_SYMBOL(skb_queue_purge); 1456 EXPORT_SYMBOL(skb_queue_head); 1457 EXPORT_SYMBOL(skb_queue_tail); 1458 EXPORT_SYMBOL(skb_unlink); 1459 EXPORT_SYMBOL(skb_append); 1460 EXPORT_SYMBOL(skb_split); 1461