1 /* 2 * Common framework for low-level network console, dump, and debugger code 3 * 4 * Sep 8 2003 Matt Mackall <mpm@selenic.com> 5 * 6 * based on the netconsole code from: 7 * 8 * Copyright (C) 2001 Ingo Molnar <mingo@redhat.com> 9 * Copyright (C) 2002 Red Hat, Inc. 10 */ 11 12 #include <linux/smp_lock.h> 13 #include <linux/netdevice.h> 14 #include <linux/etherdevice.h> 15 #include <linux/string.h> 16 #include <linux/inetdevice.h> 17 #include <linux/inet.h> 18 #include <linux/interrupt.h> 19 #include <linux/netpoll.h> 20 #include <linux/sched.h> 21 #include <linux/delay.h> 22 #include <linux/rcupdate.h> 23 #include <linux/workqueue.h> 24 #include <net/tcp.h> 25 #include <net/udp.h> 26 #include <asm/unaligned.h> 27 28 /* 29 * We maintain a small pool of fully-sized skbs, to make sure the 30 * message gets out even in extreme OOM situations. 31 */ 32 33 #define MAX_UDP_CHUNK 1460 34 #define MAX_SKBS 32 35 #define MAX_QUEUE_DEPTH (MAX_SKBS / 2) 36 37 static DEFINE_SPINLOCK(skb_list_lock); 38 static int nr_skbs; 39 static struct sk_buff *skbs; 40 41 static DEFINE_SPINLOCK(queue_lock); 42 static int queue_depth; 43 static struct sk_buff *queue_head, *queue_tail; 44 45 static atomic_t trapped; 46 47 #define NETPOLL_RX_ENABLED 1 48 #define NETPOLL_RX_DROP 2 49 50 #define MAX_SKB_SIZE \ 51 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \ 52 sizeof(struct iphdr) + sizeof(struct ethhdr)) 53 54 static void zap_completion_queue(void); 55 56 static void queue_process(void *p) 57 { 58 unsigned long flags; 59 struct sk_buff *skb; 60 61 while (queue_head) { 62 spin_lock_irqsave(&queue_lock, flags); 63 64 skb = queue_head; 65 queue_head = skb->next; 66 if (skb == queue_tail) 67 queue_head = NULL; 68 69 queue_depth--; 70 71 spin_unlock_irqrestore(&queue_lock, flags); 72 73 dev_queue_xmit(skb); 74 } 75 } 76 77 static DECLARE_WORK(send_queue, queue_process, NULL); 78 79 void netpoll_queue(struct sk_buff *skb) 80 { 81 unsigned long flags; 82 83 if (queue_depth == MAX_QUEUE_DEPTH) { 84 __kfree_skb(skb); 85 return; 86 } 87 88 spin_lock_irqsave(&queue_lock, flags); 89 if (!queue_head) 90 queue_head = skb; 91 else 92 queue_tail->next = skb; 93 queue_tail = skb; 94 queue_depth++; 95 spin_unlock_irqrestore(&queue_lock, flags); 96 97 schedule_work(&send_queue); 98 } 99 100 static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, 101 unsigned short ulen, u32 saddr, u32 daddr) 102 { 103 if (uh->check == 0) 104 return 0; 105 106 if (skb->ip_summed == CHECKSUM_HW) 107 return csum_tcpudp_magic( 108 saddr, daddr, ulen, IPPROTO_UDP, skb->csum); 109 110 skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); 111 112 return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); 113 } 114 115 /* 116 * Check whether delayed processing was scheduled for our NIC. If so, 117 * we attempt to grab the poll lock and use ->poll() to pump the card. 118 * If this fails, either we've recursed in ->poll() or it's already 119 * running on another CPU. 120 * 121 * Note: we don't mask interrupts with this lock because we're using 122 * trylock here and interrupts are already disabled in the softirq 123 * case. Further, we test the poll_owner to avoid recursion on UP 124 * systems where the lock doesn't exist. 125 * 126 * In cases where there is bi-directional communications, reading only 127 * one message at a time can lead to packets being dropped by the 128 * network adapter, forcing superfluous retries and possibly timeouts. 129 * Thus, we set our budget to greater than 1. 130 */ 131 static void poll_napi(struct netpoll *np) 132 { 133 int budget = 16; 134 135 if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && 136 np->poll_owner != smp_processor_id() && 137 spin_trylock(&np->poll_lock)) { 138 np->rx_flags |= NETPOLL_RX_DROP; 139 atomic_inc(&trapped); 140 141 np->dev->poll(np->dev, &budget); 142 143 atomic_dec(&trapped); 144 np->rx_flags &= ~NETPOLL_RX_DROP; 145 spin_unlock(&np->poll_lock); 146 } 147 } 148 149 void netpoll_poll(struct netpoll *np) 150 { 151 if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) 152 return; 153 154 /* Process pending work on NIC */ 155 np->dev->poll_controller(np->dev); 156 if (np->dev->poll) 157 poll_napi(np); 158 159 zap_completion_queue(); 160 } 161 162 static void refill_skbs(void) 163 { 164 struct sk_buff *skb; 165 unsigned long flags; 166 167 spin_lock_irqsave(&skb_list_lock, flags); 168 while (nr_skbs < MAX_SKBS) { 169 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); 170 if (!skb) 171 break; 172 173 skb->next = skbs; 174 skbs = skb; 175 nr_skbs++; 176 } 177 spin_unlock_irqrestore(&skb_list_lock, flags); 178 } 179 180 static void zap_completion_queue(void) 181 { 182 unsigned long flags; 183 struct softnet_data *sd = &get_cpu_var(softnet_data); 184 185 if (sd->completion_queue) { 186 struct sk_buff *clist; 187 188 local_irq_save(flags); 189 clist = sd->completion_queue; 190 sd->completion_queue = NULL; 191 local_irq_restore(flags); 192 193 while (clist != NULL) { 194 struct sk_buff *skb = clist; 195 clist = clist->next; 196 if(skb->destructor) 197 dev_kfree_skb_any(skb); /* put this one back */ 198 else 199 __kfree_skb(skb); 200 } 201 } 202 203 put_cpu_var(softnet_data); 204 } 205 206 static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve) 207 { 208 int once = 1, count = 0; 209 unsigned long flags; 210 struct sk_buff *skb = NULL; 211 212 zap_completion_queue(); 213 repeat: 214 if (nr_skbs < MAX_SKBS) 215 refill_skbs(); 216 217 skb = alloc_skb(len, GFP_ATOMIC); 218 219 if (!skb) { 220 spin_lock_irqsave(&skb_list_lock, flags); 221 skb = skbs; 222 if (skb) { 223 skbs = skb->next; 224 skb->next = NULL; 225 nr_skbs--; 226 } 227 spin_unlock_irqrestore(&skb_list_lock, flags); 228 } 229 230 if(!skb) { 231 count++; 232 if (once && (count == 1000000)) { 233 printk("out of netpoll skbs!\n"); 234 once = 0; 235 } 236 netpoll_poll(np); 237 goto repeat; 238 } 239 240 atomic_set(&skb->users, 1); 241 skb_reserve(skb, reserve); 242 return skb; 243 } 244 245 static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) 246 { 247 int status; 248 249 repeat: 250 if(!np || !np->dev || !netif_running(np->dev)) { 251 __kfree_skb(skb); 252 return; 253 } 254 255 /* avoid recursion */ 256 if(np->poll_owner == smp_processor_id() || 257 np->dev->xmit_lock_owner == smp_processor_id()) { 258 if (np->drop) 259 np->drop(skb); 260 else 261 __kfree_skb(skb); 262 return; 263 } 264 265 spin_lock(&np->dev->xmit_lock); 266 np->dev->xmit_lock_owner = smp_processor_id(); 267 268 /* 269 * network drivers do not expect to be called if the queue is 270 * stopped. 271 */ 272 if (netif_queue_stopped(np->dev)) { 273 np->dev->xmit_lock_owner = -1; 274 spin_unlock(&np->dev->xmit_lock); 275 276 netpoll_poll(np); 277 goto repeat; 278 } 279 280 status = np->dev->hard_start_xmit(skb, np->dev); 281 np->dev->xmit_lock_owner = -1; 282 spin_unlock(&np->dev->xmit_lock); 283 284 /* transmit busy */ 285 if(status) { 286 netpoll_poll(np); 287 goto repeat; 288 } 289 } 290 291 void netpoll_send_udp(struct netpoll *np, const char *msg, int len) 292 { 293 int total_len, eth_len, ip_len, udp_len; 294 struct sk_buff *skb; 295 struct udphdr *udph; 296 struct iphdr *iph; 297 struct ethhdr *eth; 298 299 udp_len = len + sizeof(*udph); 300 ip_len = eth_len = udp_len + sizeof(*iph); 301 total_len = eth_len + ETH_HLEN + NET_IP_ALIGN; 302 303 skb = find_skb(np, total_len, total_len - len); 304 if (!skb) 305 return; 306 307 memcpy(skb->data, msg, len); 308 skb->len += len; 309 310 udph = (struct udphdr *) skb_push(skb, sizeof(*udph)); 311 udph->source = htons(np->local_port); 312 udph->dest = htons(np->remote_port); 313 udph->len = htons(udp_len); 314 udph->check = 0; 315 316 iph = (struct iphdr *)skb_push(skb, sizeof(*iph)); 317 318 /* iph->version = 4; iph->ihl = 5; */ 319 put_unaligned(0x45, (unsigned char *)iph); 320 iph->tos = 0; 321 put_unaligned(htons(ip_len), &(iph->tot_len)); 322 iph->id = 0; 323 iph->frag_off = 0; 324 iph->ttl = 64; 325 iph->protocol = IPPROTO_UDP; 326 iph->check = 0; 327 put_unaligned(htonl(np->local_ip), &(iph->saddr)); 328 put_unaligned(htonl(np->remote_ip), &(iph->daddr)); 329 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 330 331 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); 332 333 eth->h_proto = htons(ETH_P_IP); 334 memcpy(eth->h_source, np->local_mac, 6); 335 memcpy(eth->h_dest, np->remote_mac, 6); 336 337 skb->dev = np->dev; 338 339 netpoll_send_skb(np, skb); 340 } 341 342 static void arp_reply(struct sk_buff *skb) 343 { 344 struct arphdr *arp; 345 unsigned char *arp_ptr; 346 int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; 347 u32 sip, tip; 348 struct sk_buff *send_skb; 349 struct netpoll *np = skb->dev->np; 350 351 if (!np) return; 352 353 /* No arp on this interface */ 354 if (skb->dev->flags & IFF_NOARP) 355 return; 356 357 if (!pskb_may_pull(skb, (sizeof(struct arphdr) + 358 (2 * skb->dev->addr_len) + 359 (2 * sizeof(u32))))) 360 return; 361 362 skb->h.raw = skb->nh.raw = skb->data; 363 arp = skb->nh.arph; 364 365 if ((arp->ar_hrd != htons(ARPHRD_ETHER) && 366 arp->ar_hrd != htons(ARPHRD_IEEE802)) || 367 arp->ar_pro != htons(ETH_P_IP) || 368 arp->ar_op != htons(ARPOP_REQUEST)) 369 return; 370 371 arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len; 372 memcpy(&sip, arp_ptr, 4); 373 arp_ptr += 4 + skb->dev->addr_len; 374 memcpy(&tip, arp_ptr, 4); 375 376 /* Should we ignore arp? */ 377 if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip)) 378 return; 379 380 size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4); 381 send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev), 382 LL_RESERVED_SPACE(np->dev)); 383 384 if (!send_skb) 385 return; 386 387 send_skb->nh.raw = send_skb->data; 388 arp = (struct arphdr *) skb_put(send_skb, size); 389 send_skb->dev = skb->dev; 390 send_skb->protocol = htons(ETH_P_ARP); 391 392 /* Fill the device header for the ARP frame */ 393 394 if (np->dev->hard_header && 395 np->dev->hard_header(send_skb, skb->dev, ptype, 396 np->remote_mac, np->local_mac, 397 send_skb->len) < 0) { 398 kfree_skb(send_skb); 399 return; 400 } 401 402 /* 403 * Fill out the arp protocol part. 404 * 405 * we only support ethernet device type, 406 * which (according to RFC 1390) should always equal 1 (Ethernet). 407 */ 408 409 arp->ar_hrd = htons(np->dev->type); 410 arp->ar_pro = htons(ETH_P_IP); 411 arp->ar_hln = np->dev->addr_len; 412 arp->ar_pln = 4; 413 arp->ar_op = htons(type); 414 415 arp_ptr=(unsigned char *)(arp + 1); 416 memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); 417 arp_ptr += np->dev->addr_len; 418 memcpy(arp_ptr, &tip, 4); 419 arp_ptr += 4; 420 memcpy(arp_ptr, np->remote_mac, np->dev->addr_len); 421 arp_ptr += np->dev->addr_len; 422 memcpy(arp_ptr, &sip, 4); 423 424 netpoll_send_skb(np, send_skb); 425 } 426 427 int __netpoll_rx(struct sk_buff *skb) 428 { 429 int proto, len, ulen; 430 struct iphdr *iph; 431 struct udphdr *uh; 432 struct netpoll *np = skb->dev->np; 433 434 if (!np->rx_hook) 435 goto out; 436 if (skb->dev->type != ARPHRD_ETHER) 437 goto out; 438 439 /* check if netpoll clients need ARP */ 440 if (skb->protocol == __constant_htons(ETH_P_ARP) && 441 atomic_read(&trapped)) { 442 arp_reply(skb); 443 return 1; 444 } 445 446 proto = ntohs(eth_hdr(skb)->h_proto); 447 if (proto != ETH_P_IP) 448 goto out; 449 if (skb->pkt_type == PACKET_OTHERHOST) 450 goto out; 451 if (skb_shared(skb)) 452 goto out; 453 454 iph = (struct iphdr *)skb->data; 455 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 456 goto out; 457 if (iph->ihl < 5 || iph->version != 4) 458 goto out; 459 if (!pskb_may_pull(skb, iph->ihl*4)) 460 goto out; 461 if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) 462 goto out; 463 464 len = ntohs(iph->tot_len); 465 if (skb->len < len || len < iph->ihl*4) 466 goto out; 467 468 if (iph->protocol != IPPROTO_UDP) 469 goto out; 470 471 len -= iph->ihl*4; 472 uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); 473 ulen = ntohs(uh->len); 474 475 if (ulen != len) 476 goto out; 477 if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0) 478 goto out; 479 if (np->local_ip && np->local_ip != ntohl(iph->daddr)) 480 goto out; 481 if (np->remote_ip && np->remote_ip != ntohl(iph->saddr)) 482 goto out; 483 if (np->local_port && np->local_port != ntohs(uh->dest)) 484 goto out; 485 486 np->rx_hook(np, ntohs(uh->source), 487 (char *)(uh+1), 488 ulen - sizeof(struct udphdr)); 489 490 kfree_skb(skb); 491 return 1; 492 493 out: 494 if (atomic_read(&trapped)) { 495 kfree_skb(skb); 496 return 1; 497 } 498 499 return 0; 500 } 501 502 int netpoll_parse_options(struct netpoll *np, char *opt) 503 { 504 char *cur=opt, *delim; 505 506 if(*cur != '@') { 507 if ((delim = strchr(cur, '@')) == NULL) 508 goto parse_failed; 509 *delim=0; 510 np->local_port=simple_strtol(cur, NULL, 10); 511 cur=delim; 512 } 513 cur++; 514 printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port); 515 516 if(*cur != '/') { 517 if ((delim = strchr(cur, '/')) == NULL) 518 goto parse_failed; 519 *delim=0; 520 np->local_ip=ntohl(in_aton(cur)); 521 cur=delim; 522 523 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", 524 np->name, HIPQUAD(np->local_ip)); 525 } 526 cur++; 527 528 if ( *cur != ',') { 529 /* parse out dev name */ 530 if ((delim = strchr(cur, ',')) == NULL) 531 goto parse_failed; 532 *delim=0; 533 strlcpy(np->dev_name, cur, sizeof(np->dev_name)); 534 cur=delim; 535 } 536 cur++; 537 538 printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name); 539 540 if ( *cur != '@' ) { 541 /* dst port */ 542 if ((delim = strchr(cur, '@')) == NULL) 543 goto parse_failed; 544 *delim=0; 545 np->remote_port=simple_strtol(cur, NULL, 10); 546 cur=delim; 547 } 548 cur++; 549 printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port); 550 551 /* dst ip */ 552 if ((delim = strchr(cur, '/')) == NULL) 553 goto parse_failed; 554 *delim=0; 555 np->remote_ip=ntohl(in_aton(cur)); 556 cur=delim+1; 557 558 printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", 559 np->name, HIPQUAD(np->remote_ip)); 560 561 if( *cur != 0 ) 562 { 563 /* MAC address */ 564 if ((delim = strchr(cur, ':')) == NULL) 565 goto parse_failed; 566 *delim=0; 567 np->remote_mac[0]=simple_strtol(cur, NULL, 16); 568 cur=delim+1; 569 if ((delim = strchr(cur, ':')) == NULL) 570 goto parse_failed; 571 *delim=0; 572 np->remote_mac[1]=simple_strtol(cur, NULL, 16); 573 cur=delim+1; 574 if ((delim = strchr(cur, ':')) == NULL) 575 goto parse_failed; 576 *delim=0; 577 np->remote_mac[2]=simple_strtol(cur, NULL, 16); 578 cur=delim+1; 579 if ((delim = strchr(cur, ':')) == NULL) 580 goto parse_failed; 581 *delim=0; 582 np->remote_mac[3]=simple_strtol(cur, NULL, 16); 583 cur=delim+1; 584 if ((delim = strchr(cur, ':')) == NULL) 585 goto parse_failed; 586 *delim=0; 587 np->remote_mac[4]=simple_strtol(cur, NULL, 16); 588 cur=delim+1; 589 np->remote_mac[5]=simple_strtol(cur, NULL, 16); 590 } 591 592 printk(KERN_INFO "%s: remote ethernet address " 593 "%02x:%02x:%02x:%02x:%02x:%02x\n", 594 np->name, 595 np->remote_mac[0], 596 np->remote_mac[1], 597 np->remote_mac[2], 598 np->remote_mac[3], 599 np->remote_mac[4], 600 np->remote_mac[5]); 601 602 return 0; 603 604 parse_failed: 605 printk(KERN_INFO "%s: couldn't parse config at %s!\n", 606 np->name, cur); 607 return -1; 608 } 609 610 int netpoll_setup(struct netpoll *np) 611 { 612 struct net_device *ndev = NULL; 613 struct in_device *in_dev; 614 615 np->poll_lock = SPIN_LOCK_UNLOCKED; 616 np->poll_owner = -1; 617 618 if (np->dev_name) 619 ndev = dev_get_by_name(np->dev_name); 620 if (!ndev) { 621 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", 622 np->name, np->dev_name); 623 return -1; 624 } 625 626 np->dev = ndev; 627 ndev->np = np; 628 629 if (!ndev->poll_controller) { 630 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", 631 np->name, np->dev_name); 632 goto release; 633 } 634 635 if (!netif_running(ndev)) { 636 unsigned long atmost, atleast; 637 638 printk(KERN_INFO "%s: device %s not up yet, forcing it\n", 639 np->name, np->dev_name); 640 641 rtnl_shlock(); 642 if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) { 643 printk(KERN_ERR "%s: failed to open %s\n", 644 np->name, np->dev_name); 645 rtnl_shunlock(); 646 goto release; 647 } 648 rtnl_shunlock(); 649 650 atleast = jiffies + HZ/10; 651 atmost = jiffies + 4*HZ; 652 while (!netif_carrier_ok(ndev)) { 653 if (time_after(jiffies, atmost)) { 654 printk(KERN_NOTICE 655 "%s: timeout waiting for carrier\n", 656 np->name); 657 break; 658 } 659 cond_resched(); 660 } 661 662 /* If carrier appears to come up instantly, we don't 663 * trust it and pause so that we don't pump all our 664 * queued console messages into the bitbucket. 665 */ 666 667 if (time_before(jiffies, atleast)) { 668 printk(KERN_NOTICE "%s: carrier detect appears" 669 " untrustworthy, waiting 4 seconds\n", 670 np->name); 671 msleep(4000); 672 } 673 } 674 675 if (!memcmp(np->local_mac, "\0\0\0\0\0\0", 6) && ndev->dev_addr) 676 memcpy(np->local_mac, ndev->dev_addr, 6); 677 678 if (!np->local_ip) { 679 rcu_read_lock(); 680 in_dev = __in_dev_get(ndev); 681 682 if (!in_dev || !in_dev->ifa_list) { 683 rcu_read_unlock(); 684 printk(KERN_ERR "%s: no IP address for %s, aborting\n", 685 np->name, np->dev_name); 686 goto release; 687 } 688 689 np->local_ip = ntohl(in_dev->ifa_list->ifa_local); 690 rcu_read_unlock(); 691 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", 692 np->name, HIPQUAD(np->local_ip)); 693 } 694 695 if(np->rx_hook) 696 np->rx_flags = NETPOLL_RX_ENABLED; 697 698 return 0; 699 700 release: 701 ndev->np = NULL; 702 np->dev = NULL; 703 dev_put(ndev); 704 return -1; 705 } 706 707 void netpoll_cleanup(struct netpoll *np) 708 { 709 if (np->dev) 710 np->dev->np = NULL; 711 dev_put(np->dev); 712 np->dev = NULL; 713 } 714 715 int netpoll_trap(void) 716 { 717 return atomic_read(&trapped); 718 } 719 720 void netpoll_set_trap(int trap) 721 { 722 if (trap) 723 atomic_inc(&trapped); 724 else 725 atomic_dec(&trapped); 726 } 727 728 EXPORT_SYMBOL(netpoll_set_trap); 729 EXPORT_SYMBOL(netpoll_trap); 730 EXPORT_SYMBOL(netpoll_parse_options); 731 EXPORT_SYMBOL(netpoll_setup); 732 EXPORT_SYMBOL(netpoll_cleanup); 733 EXPORT_SYMBOL(netpoll_send_udp); 734 EXPORT_SYMBOL(netpoll_poll); 735 EXPORT_SYMBOL(netpoll_queue); 736