11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * Pseudo-driver for the loopback interface. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Version: @(#)loopback.c 1.0.4b 08/16/93 91da177e4SLinus Torvalds * 1002c30a84SJesper Juhl * Authors: Ross Biro 111da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 121da177e4SLinus Torvalds * Donald Becker, <becker@scyld.com> 131da177e4SLinus Torvalds * 141da177e4SLinus Torvalds * Alan Cox : Fixed oddments for NET3.014 151da177e4SLinus Torvalds * Alan Cox : Rejig for NET3.029 snap #3 161da177e4SLinus Torvalds * Alan Cox : Fixed NET3.029 bugs and sped up 171da177e4SLinus Torvalds * Larry McVoy : Tiny tweak to double performance 181da177e4SLinus Torvalds * Alan Cox : Backed out LMV's tweak - the linux mm 191da177e4SLinus Torvalds * can't take it... 201da177e4SLinus Torvalds * Michael Griffith: Don't bother computing the checksums 211da177e4SLinus Torvalds * on packets received on the loopback 221da177e4SLinus Torvalds * interface. 231da177e4SLinus Torvalds * Alexey Kuznetsov: Potential hang under some extreme 241da177e4SLinus Torvalds * cases removed. 251da177e4SLinus Torvalds * 261da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 271da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 281da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 291da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 301da177e4SLinus Torvalds */ 311da177e4SLinus Torvalds #include <linux/kernel.h> 321da177e4SLinus Torvalds #include <linux/jiffies.h> 331da177e4SLinus Torvalds #include <linux/module.h> 341da177e4SLinus Torvalds #include <linux/interrupt.h> 351da177e4SLinus Torvalds #include <linux/fs.h> 361da177e4SLinus Torvalds #include <linux/types.h> 371da177e4SLinus Torvalds #include <linux/string.h> 381da177e4SLinus Torvalds #include <linux/socket.h> 391da177e4SLinus Torvalds #include <linux/errno.h> 401da177e4SLinus Torvalds #include <linux/fcntl.h> 411da177e4SLinus Torvalds #include <linux/in.h> 421da177e4SLinus Torvalds #include <linux/init.h> 431da177e4SLinus Torvalds 441da177e4SLinus Torvalds #include <asm/system.h> 451da177e4SLinus Torvalds #include <asm/uaccess.h> 461da177e4SLinus Torvalds #include <asm/io.h> 471da177e4SLinus Torvalds 481da177e4SLinus Torvalds #include <linux/inet.h> 491da177e4SLinus Torvalds #include <linux/netdevice.h> 501da177e4SLinus Torvalds #include <linux/etherdevice.h> 511da177e4SLinus Torvalds #include <linux/skbuff.h> 521da177e4SLinus Torvalds #include <linux/ethtool.h> 531da177e4SLinus Torvalds #include <net/sock.h> 541da177e4SLinus Torvalds #include <net/checksum.h> 551da177e4SLinus Torvalds #include <linux/if_ether.h> /* For the statistics structure. */ 561da177e4SLinus Torvalds #include <linux/if_arp.h> /* For ARPHRD_ETHER */ 571da177e4SLinus Torvalds #include <linux/ip.h> 581da177e4SLinus Torvalds #include <linux/tcp.h> 591da177e4SLinus Torvalds #include <linux/percpu.h> 602774c7abSEric W. Biederman #include <net/net_namespace.h> 611da177e4SLinus Torvalds 625175c378SEric Dumazet struct pcpu_lstats { 635175c378SEric Dumazet unsigned long packets; 645175c378SEric Dumazet unsigned long bytes; 655175c378SEric Dumazet }; 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds #define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16) 681da177e4SLinus Torvalds 691da177e4SLinus Torvalds /* KISS: just allocate small chunks and copy bits. 701da177e4SLinus Torvalds * 711da177e4SLinus Torvalds * So, in fact, this is documentation, explaining what we expect 721da177e4SLinus Torvalds * of largesending device modulo TCP checksum, which is ignored for loopback. 731da177e4SLinus Torvalds */ 741da177e4SLinus Torvalds 75d2ae1d2fSChuck Ebbert #ifdef LOOPBACK_TSO 761da177e4SLinus Torvalds static void emulate_large_send_offload(struct sk_buff *skb) 771da177e4SLinus Torvalds { 78eddc9ec5SArnaldo Carvalho de Melo struct iphdr *iph = ip_hdr(skb); 79d56f90a7SArnaldo Carvalho de Melo struct tcphdr *th = (struct tcphdr *)(skb_network_header(skb) + 80d56f90a7SArnaldo Carvalho de Melo (iph->ihl * 4)); 811da177e4SLinus Torvalds unsigned int doffset = (iph->ihl + th->doff) * 4; 827967168cSHerbert Xu unsigned int mtu = skb_shinfo(skb)->gso_size + doffset; 831da177e4SLinus Torvalds unsigned int offset = 0; 841da177e4SLinus Torvalds u32 seq = ntohl(th->seq); 851da177e4SLinus Torvalds u16 id = ntohs(iph->id); 861da177e4SLinus Torvalds 871da177e4SLinus Torvalds while (offset + doffset < skb->len) { 881da177e4SLinus Torvalds unsigned int frag_size = min(mtu, skb->len - offset) - doffset; 891da177e4SLinus Torvalds struct sk_buff *nskb = alloc_skb(mtu + 32, GFP_ATOMIC); 901da177e4SLinus Torvalds 911da177e4SLinus Torvalds if (!nskb) 921da177e4SLinus Torvalds break; 931da177e4SLinus Torvalds skb_reserve(nskb, 32); 9448d49d0cSArnaldo Carvalho de Melo skb_set_mac_header(nskb, -ETH_HLEN); 95c1d2bbe1SArnaldo Carvalho de Melo skb_reset_network_header(nskb); 96eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(nskb); 9727d7ff46SArnaldo Carvalho de Melo skb_copy_to_linear_data(nskb, skb_network_header(skb), 9827d7ff46SArnaldo Carvalho de Melo doffset); 991da177e4SLinus Torvalds if (skb_copy_bits(skb, 1001da177e4SLinus Torvalds doffset + offset, 1011da177e4SLinus Torvalds nskb->data + doffset, 1021da177e4SLinus Torvalds frag_size)) 1031da177e4SLinus Torvalds BUG(); 1041da177e4SLinus Torvalds skb_put(nskb, doffset + frag_size); 1051da177e4SLinus Torvalds nskb->ip_summed = CHECKSUM_UNNECESSARY; 1061da177e4SLinus Torvalds nskb->dev = skb->dev; 1071da177e4SLinus Torvalds nskb->priority = skb->priority; 1081da177e4SLinus Torvalds nskb->protocol = skb->protocol; 1091da177e4SLinus Torvalds nskb->dst = dst_clone(skb->dst); 1101da177e4SLinus Torvalds memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); 1111da177e4SLinus Torvalds nskb->pkt_type = skb->pkt_type; 1121da177e4SLinus Torvalds 113d56f90a7SArnaldo Carvalho de Melo th = (struct tcphdr *)(skb_network_header(nskb) + iph->ihl * 4); 1141da177e4SLinus Torvalds iph->tot_len = htons(frag_size + doffset); 1151da177e4SLinus Torvalds iph->id = htons(id); 1161da177e4SLinus Torvalds iph->check = 0; 1171da177e4SLinus Torvalds iph->check = ip_fast_csum((unsigned char *) iph, iph->ihl); 1181da177e4SLinus Torvalds th->seq = htonl(seq); 1191da177e4SLinus Torvalds if (offset + doffset + frag_size < skb->len) 1201da177e4SLinus Torvalds th->fin = th->psh = 0; 1211da177e4SLinus Torvalds netif_rx(nskb); 1221da177e4SLinus Torvalds offset += frag_size; 1231da177e4SLinus Torvalds seq += frag_size; 1241da177e4SLinus Torvalds id++; 1251da177e4SLinus Torvalds } 1261da177e4SLinus Torvalds 1271da177e4SLinus Torvalds dev_kfree_skb(skb); 1281da177e4SLinus Torvalds } 129d2ae1d2fSChuck Ebbert #endif /* LOOPBACK_TSO */ 1301da177e4SLinus Torvalds 1311da177e4SLinus Torvalds /* 1321da177e4SLinus Torvalds * The higher levels take care of making this non-reentrant (it's 1331da177e4SLinus Torvalds * called with bh's disabled). 1341da177e4SLinus Torvalds */ 1351da177e4SLinus Torvalds static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) 1361da177e4SLinus Torvalds { 1375f6d88b9SEric W. Biederman struct pcpu_lstats *pcpu_lstats, *lb_stats; 1381da177e4SLinus Torvalds 1391da177e4SLinus Torvalds skb_orphan(skb); 1401da177e4SLinus Torvalds 1411da177e4SLinus Torvalds skb->protocol = eth_type_trans(skb,dev); 1421da177e4SLinus Torvalds #ifndef LOOPBACK_MUST_CHECKSUM 1431da177e4SLinus Torvalds skb->ip_summed = CHECKSUM_UNNECESSARY; 1441da177e4SLinus Torvalds #endif 1451da177e4SLinus Torvalds 146d2ae1d2fSChuck Ebbert #ifdef LOOPBACK_TSO 14789114afdSHerbert Xu if (skb_is_gso(skb)) { 1481da177e4SLinus Torvalds BUG_ON(skb->protocol != htons(ETH_P_IP)); 149eddc9ec5SArnaldo Carvalho de Melo BUG_ON(ip_hdr(skb)->protocol != IPPROTO_TCP); 1501da177e4SLinus Torvalds 1511da177e4SLinus Torvalds emulate_large_send_offload(skb); 1521da177e4SLinus Torvalds return 0; 1531da177e4SLinus Torvalds } 154d2ae1d2fSChuck Ebbert #endif 1551da177e4SLinus Torvalds dev->last_rx = jiffies; 1561da177e4SLinus Torvalds 1579e0db4b1SEric W. Biederman /* it's OK to use per_cpu_ptr() because BHs are off */ 1585f6d88b9SEric W. Biederman pcpu_lstats = netdev_priv(dev); 1595f6d88b9SEric W. Biederman lb_stats = per_cpu_ptr(pcpu_lstats, smp_processor_id()); 1605175c378SEric Dumazet lb_stats->bytes += skb->len; 1615175c378SEric Dumazet lb_stats->packets++; 1621da177e4SLinus Torvalds 1631da177e4SLinus Torvalds netif_rx(skb); 1641da177e4SLinus Torvalds 16558f53974SEric Dumazet return 0; 1661da177e4SLinus Torvalds } 1671da177e4SLinus Torvalds 1681da177e4SLinus Torvalds static struct net_device_stats *get_stats(struct net_device *dev) 1691da177e4SLinus Torvalds { 1705f6d88b9SEric W. Biederman const struct pcpu_lstats *pcpu_lstats; 17133036807SEric Dumazet struct net_device_stats *stats = &dev->stats; 1725175c378SEric Dumazet unsigned long bytes = 0; 1735175c378SEric Dumazet unsigned long packets = 0; 1741da177e4SLinus Torvalds int i; 1751da177e4SLinus Torvalds 1765f6d88b9SEric W. Biederman pcpu_lstats = netdev_priv(dev); 1770fed4846SKAMEZAWA Hiroyuki for_each_possible_cpu(i) { 1785175c378SEric Dumazet const struct pcpu_lstats *lb_stats; 1791da177e4SLinus Torvalds 1805f6d88b9SEric W. Biederman lb_stats = per_cpu_ptr(pcpu_lstats, i); 1815175c378SEric Dumazet bytes += lb_stats->bytes; 1825175c378SEric Dumazet packets += lb_stats->packets; 1831da177e4SLinus Torvalds } 1845175c378SEric Dumazet stats->rx_packets = packets; 1855175c378SEric Dumazet stats->tx_packets = packets; 1865175c378SEric Dumazet stats->rx_bytes = bytes; 1875175c378SEric Dumazet stats->tx_bytes = bytes; 1881da177e4SLinus Torvalds return stats; 1891da177e4SLinus Torvalds } 1901da177e4SLinus Torvalds 1917fa6b066SStephen Hemminger static u32 always_on(struct net_device *dev) 1921da177e4SLinus Torvalds { 1931da177e4SLinus Torvalds return 1; 1941da177e4SLinus Torvalds } 1951da177e4SLinus Torvalds 1967282d491SJeff Garzik static const struct ethtool_ops loopback_ethtool_ops = { 1977fa6b066SStephen Hemminger .get_link = always_on, 1981da177e4SLinus Torvalds .set_tso = ethtool_op_set_tso, 1997fa6b066SStephen Hemminger .get_tx_csum = always_on, 2007fa6b066SStephen Hemminger .get_sg = always_on, 2017fa6b066SStephen Hemminger .get_rx_csum = always_on, 2021da177e4SLinus Torvalds }; 2031da177e4SLinus Torvalds 2045f6d88b9SEric W. Biederman static int loopback_dev_init(struct net_device *dev) 2055f6d88b9SEric W. Biederman { 2065f6d88b9SEric W. Biederman struct pcpu_lstats *lstats; 2075f6d88b9SEric W. Biederman 2085f6d88b9SEric W. Biederman lstats = alloc_percpu(struct pcpu_lstats); 2095f6d88b9SEric W. Biederman if (!lstats) 2105f6d88b9SEric W. Biederman return -ENOMEM; 2115f6d88b9SEric W. Biederman 2125f6d88b9SEric W. Biederman dev->priv = lstats; 2135f6d88b9SEric W. Biederman return 0; 2145f6d88b9SEric W. Biederman } 2155f6d88b9SEric W. Biederman 2165f6d88b9SEric W. Biederman static void loopback_dev_free(struct net_device *dev) 2175f6d88b9SEric W. Biederman { 2185f6d88b9SEric W. Biederman struct pcpu_lstats *lstats = netdev_priv(dev); 2195f6d88b9SEric W. Biederman 2205f6d88b9SEric W. Biederman free_percpu(lstats); 2215f6d88b9SEric W. Biederman free_netdev(dev); 2225f6d88b9SEric W. Biederman } 2235f6d88b9SEric W. Biederman 2247fa6b066SStephen Hemminger /* 2259e0db4b1SEric W. Biederman * The loopback device is special. There is only one instance 2269e0db4b1SEric W. Biederman * per network namespace. 2277fa6b066SStephen Hemminger */ 228854d8363SDaniel Lezcano static void loopback_setup(struct net_device *dev) 229854d8363SDaniel Lezcano { 230854d8363SDaniel Lezcano dev->get_stats = &get_stats; 231854d8363SDaniel Lezcano dev->mtu = (16 * 1024) + 20 + 20 + 12; 232854d8363SDaniel Lezcano dev->hard_start_xmit = loopback_xmit; 233854d8363SDaniel Lezcano dev->hard_header_len = ETH_HLEN; /* 14 */ 234854d8363SDaniel Lezcano dev->addr_len = ETH_ALEN; /* 6 */ 235854d8363SDaniel Lezcano dev->tx_queue_len = 0; 236854d8363SDaniel Lezcano dev->type = ARPHRD_LOOPBACK; /* 0x0001*/ 237854d8363SDaniel Lezcano dev->flags = IFF_LOOPBACK; 238854d8363SDaniel Lezcano dev->features = NETIF_F_SG | NETIF_F_FRAGLIST 239d2ae1d2fSChuck Ebbert #ifdef LOOPBACK_TSO 240d2ae1d2fSChuck Ebbert | NETIF_F_TSO 241d2ae1d2fSChuck Ebbert #endif 242854d8363SDaniel Lezcano | NETIF_F_NO_CSUM 243854d8363SDaniel Lezcano | NETIF_F_HIGHDMA 244ce286d32SEric W. Biederman | NETIF_F_LLTX 245*2d2c54e3SEmil Medve | NETIF_F_NETNS_LOCAL; 246854d8363SDaniel Lezcano dev->ethtool_ops = &loopback_ethtool_ops; 2473b04dddeSStephen Hemminger dev->header_ops = ð_header_ops; 2485f6d88b9SEric W. Biederman dev->init = loopback_dev_init; 2495f6d88b9SEric W. Biederman dev->destructor = loopback_dev_free; 250854d8363SDaniel Lezcano } 251de3cb747SDaniel Lezcano 25222783649SRalf Baechle /* Setup and register the loopback device. */ 2534665079cSPavel Emelyanov static __net_init int loopback_net_init(struct net *net) 2541da177e4SLinus Torvalds { 255854d8363SDaniel Lezcano struct net_device *dev; 256854d8363SDaniel Lezcano int err; 257aeed9e82SHerbert Xu 258854d8363SDaniel Lezcano err = -ENOMEM; 259854d8363SDaniel Lezcano dev = alloc_netdev(0, "lo", loopback_setup); 260854d8363SDaniel Lezcano if (!dev) 261854d8363SDaniel Lezcano goto out; 262854d8363SDaniel Lezcano 2632774c7abSEric W. Biederman dev->nd_net = net; 264854d8363SDaniel Lezcano err = register_netdev(dev); 265854d8363SDaniel Lezcano if (err) 266854d8363SDaniel Lezcano goto out_free_netdev; 267854d8363SDaniel Lezcano 2682774c7abSEric W. Biederman net->loopback_dev = dev; 2699d6dda32SPavel Emelyanov return 0; 270854d8363SDaniel Lezcano 2711da177e4SLinus Torvalds 272854d8363SDaniel Lezcano out_free_netdev: 273854d8363SDaniel Lezcano free_netdev(dev); 2749d6dda32SPavel Emelyanov out: 2759d6dda32SPavel Emelyanov if (net == &init_net) 2769d6dda32SPavel Emelyanov panic("loopback: Failed to register netdevice: %d\n", err); 2779d6dda32SPavel Emelyanov return err; 278854d8363SDaniel Lezcano } 27960903f2cSAdrian Bunk 2804665079cSPavel Emelyanov static __net_exit void loopback_net_exit(struct net *net) 2812774c7abSEric W. Biederman { 2822774c7abSEric W. Biederman struct net_device *dev = net->loopback_dev; 283854d8363SDaniel Lezcano 2842774c7abSEric W. Biederman unregister_netdev(dev); 2852774c7abSEric W. Biederman } 2862774c7abSEric W. Biederman 287022cbae6SDenis V. Lunev static struct pernet_operations __net_initdata loopback_net_ops = { 2882774c7abSEric W. Biederman .init = loopback_net_init, 2892774c7abSEric W. Biederman .exit = loopback_net_exit, 2902774c7abSEric W. Biederman }; 2912774c7abSEric W. Biederman 2922774c7abSEric W. Biederman static int __init loopback_init(void) 2932774c7abSEric W. Biederman { 2942774c7abSEric W. Biederman return register_pernet_device(&loopback_net_ops); 2952774c7abSEric W. Biederman } 2962774c7abSEric W. Biederman 297070ac3a2SDenis V. Lunev /* Loopback is special. It should be initialized before any other network 298070ac3a2SDenis V. Lunev * device and network subsystem. 299070ac3a2SDenis V. Lunev */ 3002774c7abSEric W. Biederman fs_initcall(loopback_init); 301