11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * PACKET - implements raw packet sockets. 71da177e4SLinus Torvalds * 802c30a84SJesper Juhl * Authors: Ross Biro 91da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 101da177e4SLinus Torvalds * Alan Cox, <gw4pts@gw4pts.ampr.org> 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * Fixes: 131da177e4SLinus Torvalds * Alan Cox : verify_area() now used correctly 141da177e4SLinus Torvalds * Alan Cox : new skbuff lists, look ma no backlogs! 151da177e4SLinus Torvalds * Alan Cox : tidied skbuff lists. 161da177e4SLinus Torvalds * Alan Cox : Now uses generic datagram routines I 171da177e4SLinus Torvalds * added. Also fixed the peek/read crash 181da177e4SLinus Torvalds * from all old Linux datagram code. 191da177e4SLinus Torvalds * Alan Cox : Uses the improved datagram code. 201da177e4SLinus Torvalds * Alan Cox : Added NULL's for socket options. 211da177e4SLinus Torvalds * Alan Cox : Re-commented the code. 221da177e4SLinus Torvalds * Alan Cox : Use new kernel side addressing 231da177e4SLinus Torvalds * Rob Janssen : Correct MTU usage. 241da177e4SLinus Torvalds * Dave Platt : Counter leaks caused by incorrect 251da177e4SLinus Torvalds * interrupt locking and some slightly 261da177e4SLinus Torvalds * dubious gcc output. Can you read 271da177e4SLinus Torvalds * compiler: it said _VOLATILE_ 281da177e4SLinus Torvalds * Richard Kooijman : Timestamp fixes. 291da177e4SLinus Torvalds * Alan Cox : New buffers. Use sk->mac.raw. 301da177e4SLinus Torvalds * Alan Cox : sendmsg/recvmsg support. 311da177e4SLinus Torvalds * Alan Cox : Protocol setting support 321da177e4SLinus Torvalds * Alexey Kuznetsov : Untied from IPv4 stack. 331da177e4SLinus Torvalds * Cyrus Durgin : Fixed kerneld for kmod. 341da177e4SLinus Torvalds * Michal Ostrowski : Module initialization cleanup. 351da177e4SLinus Torvalds * Ulises Alonso : Frame number limit removal and 361da177e4SLinus Torvalds * packet_set_ring memory leak. 370fb375fbSEric W. Biederman * Eric Biederman : Allow for > 8 byte hardware addresses. 380fb375fbSEric W. Biederman * The convention is that longer addresses 390fb375fbSEric W. Biederman * will simply extend the hardware address 400fb375fbSEric W. Biederman * byte arrays at the end of sockaddr_ll 410fb375fbSEric W. Biederman * and packet_mreq. 4269e3c75fSJohann Baudy * Johann Baudy : Added TX RING. 43f6fb8f10Schetan loke * Chetan Loke : Implemented TPACKET_V3 block abstraction 44f6fb8f10Schetan loke * layer. 45f6fb8f10Schetan loke * Copyright (C) 2011, <lokec@ccs.neu.edu> 46f6fb8f10Schetan loke * 471da177e4SLinus Torvalds * 481da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 491da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 501da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 511da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 521da177e4SLinus Torvalds * 531da177e4SLinus Torvalds */ 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds #include <linux/types.h> 561da177e4SLinus Torvalds #include <linux/mm.h> 574fc268d2SRandy Dunlap #include <linux/capability.h> 581da177e4SLinus Torvalds #include <linux/fcntl.h> 591da177e4SLinus Torvalds #include <linux/socket.h> 601da177e4SLinus Torvalds #include <linux/in.h> 611da177e4SLinus Torvalds #include <linux/inet.h> 621da177e4SLinus Torvalds #include <linux/netdevice.h> 631da177e4SLinus Torvalds #include <linux/if_packet.h> 641da177e4SLinus Torvalds #include <linux/wireless.h> 65ffbc6111SHerbert Xu #include <linux/kernel.h> 661da177e4SLinus Torvalds #include <linux/kmod.h> 675a0e3ad6STejun Heo #include <linux/slab.h> 680e3125c7SNeil Horman #include <linux/vmalloc.h> 69457c4cbcSEric W. Biederman #include <net/net_namespace.h> 701da177e4SLinus Torvalds #include <net/ip.h> 711da177e4SLinus Torvalds #include <net/protocol.h> 721da177e4SLinus Torvalds #include <linux/skbuff.h> 731da177e4SLinus Torvalds #include <net/sock.h> 741da177e4SLinus Torvalds #include <linux/errno.h> 751da177e4SLinus Torvalds #include <linux/timer.h> 761da177e4SLinus Torvalds #include <asm/uaccess.h> 771da177e4SLinus Torvalds #include <asm/ioctls.h> 781da177e4SLinus Torvalds #include <asm/page.h> 79a1f8e7f7SAl Viro #include <asm/cacheflush.h> 801da177e4SLinus Torvalds #include <asm/io.h> 811da177e4SLinus Torvalds #include <linux/proc_fs.h> 821da177e4SLinus Torvalds #include <linux/seq_file.h> 831da177e4SLinus Torvalds #include <linux/poll.h> 841da177e4SLinus Torvalds #include <linux/module.h> 851da177e4SLinus Torvalds #include <linux/init.h> 86905db440SHerbert Xu #include <linux/mutex.h> 8705423b24SEric Dumazet #include <linux/if_vlan.h> 88bfd5f4a3SSridhar Samudrala #include <linux/virtio_net.h> 89ed85b565SRichard Cochran #include <linux/errqueue.h> 90614f60faSScott McMillan #include <linux/net_tstamp.h> 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds #ifdef CONFIG_INET 931da177e4SLinus Torvalds #include <net/inet_common.h> 941da177e4SLinus Torvalds #endif 951da177e4SLinus Torvalds 961da177e4SLinus Torvalds /* 971da177e4SLinus Torvalds Assumptions: 981da177e4SLinus Torvalds - if device has no dev->hard_header routine, it adds and removes ll header 991da177e4SLinus Torvalds inside itself. In this case ll header is invisible outside of device, 1001da177e4SLinus Torvalds but higher levels still should reserve dev->hard_header_len. 1011da177e4SLinus Torvalds Some devices are enough clever to reallocate skb, when header 1021da177e4SLinus Torvalds will not fit to reserved space (tunnel), another ones are silly 1031da177e4SLinus Torvalds (PPP). 1041da177e4SLinus Torvalds - packet socket receives packets with pulled ll header, 1051da177e4SLinus Torvalds so that SOCK_RAW should push it back. 1061da177e4SLinus Torvalds 1071da177e4SLinus Torvalds On receive: 1081da177e4SLinus Torvalds ----------- 1091da177e4SLinus Torvalds 1101da177e4SLinus Torvalds Incoming, dev->hard_header!=NULL 111b0e380b1SArnaldo Carvalho de Melo mac_header -> ll header 1121da177e4SLinus Torvalds data -> data 1131da177e4SLinus Torvalds 1141da177e4SLinus Torvalds Outgoing, dev->hard_header!=NULL 115b0e380b1SArnaldo Carvalho de Melo mac_header -> ll header 1161da177e4SLinus Torvalds data -> ll header 1171da177e4SLinus Torvalds 1181da177e4SLinus Torvalds Incoming, dev->hard_header==NULL 119b0e380b1SArnaldo Carvalho de Melo mac_header -> UNKNOWN position. It is very likely, that it points to ll 120b0e380b1SArnaldo Carvalho de Melo header. PPP makes it, that is wrong, because introduce 121b0e380b1SArnaldo Carvalho de Melo assymetry between rx and tx paths. 1221da177e4SLinus Torvalds data -> data 1231da177e4SLinus Torvalds 1241da177e4SLinus Torvalds Outgoing, dev->hard_header==NULL 125b0e380b1SArnaldo Carvalho de Melo mac_header -> data. ll header is still not built! 1261da177e4SLinus Torvalds data -> data 1271da177e4SLinus Torvalds 1281da177e4SLinus Torvalds Resume 1291da177e4SLinus Torvalds If dev->hard_header==NULL we are unlikely to restore sensible ll header. 1301da177e4SLinus Torvalds 1311da177e4SLinus Torvalds 1321da177e4SLinus Torvalds On transmit: 1331da177e4SLinus Torvalds ------------ 1341da177e4SLinus Torvalds 1351da177e4SLinus Torvalds dev->hard_header != NULL 136b0e380b1SArnaldo Carvalho de Melo mac_header -> ll header 1371da177e4SLinus Torvalds data -> ll header 1381da177e4SLinus Torvalds 1391da177e4SLinus Torvalds dev->hard_header == NULL (ll header is added by device, we cannot control it) 140b0e380b1SArnaldo Carvalho de Melo mac_header -> data 1411da177e4SLinus Torvalds data -> data 1421da177e4SLinus Torvalds 1431da177e4SLinus Torvalds We should set nh.raw on output to correct posistion, 1441da177e4SLinus Torvalds packet classifier depends on it. 1451da177e4SLinus Torvalds */ 1461da177e4SLinus Torvalds 1471da177e4SLinus Torvalds /* Private packet socket structures. */ 1481da177e4SLinus Torvalds 14940d4e3dfSEric Dumazet struct packet_mclist { 1501da177e4SLinus Torvalds struct packet_mclist *next; 1511da177e4SLinus Torvalds int ifindex; 1521da177e4SLinus Torvalds int count; 1531da177e4SLinus Torvalds unsigned short type; 1541da177e4SLinus Torvalds unsigned short alen; 1550fb375fbSEric W. Biederman unsigned char addr[MAX_ADDR_LEN]; 1560fb375fbSEric W. Biederman }; 1570fb375fbSEric W. Biederman /* identical to struct packet_mreq except it has 1580fb375fbSEric W. Biederman * a longer address field. 1590fb375fbSEric W. Biederman */ 16040d4e3dfSEric Dumazet struct packet_mreq_max { 1610fb375fbSEric W. Biederman int mr_ifindex; 1620fb375fbSEric W. Biederman unsigned short mr_type; 1630fb375fbSEric W. Biederman unsigned short mr_alen; 1640fb375fbSEric W. Biederman unsigned char mr_address[MAX_ADDR_LEN]; 1651da177e4SLinus Torvalds }; 166a2efcfa0SDavid S. Miller 167f6fb8f10Schetan loke static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, 16869e3c75fSJohann Baudy int closing, int tx_ring); 16969e3c75fSJohann Baudy 170f6fb8f10Schetan loke 171f6fb8f10Schetan loke #define V3_ALIGNMENT (8) 172f6fb8f10Schetan loke 173bc59ba39Schetan loke #define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) 174f6fb8f10Schetan loke 175f6fb8f10Schetan loke #define BLK_PLUS_PRIV(sz_of_priv) \ 176f6fb8f10Schetan loke (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) 177f6fb8f10Schetan loke 178f6fb8f10Schetan loke /* kbdq - kernel block descriptor queue */ 179bc59ba39Schetan loke struct tpacket_kbdq_core { 180f6fb8f10Schetan loke struct pgv *pkbdq; 181f6fb8f10Schetan loke unsigned int feature_req_word; 182f6fb8f10Schetan loke unsigned int hdrlen; 183f6fb8f10Schetan loke unsigned char reset_pending_on_curr_blk; 184f6fb8f10Schetan loke unsigned char delete_blk_timer; 185f6fb8f10Schetan loke unsigned short kactive_blk_num; 186f6fb8f10Schetan loke unsigned short blk_sizeof_priv; 187f6fb8f10Schetan loke 188f6fb8f10Schetan loke /* last_kactive_blk_num: 189f6fb8f10Schetan loke * trick to see if user-space has caught up 190f6fb8f10Schetan loke * in order to avoid refreshing timer when every single pkt arrives. 191f6fb8f10Schetan loke */ 192f6fb8f10Schetan loke unsigned short last_kactive_blk_num; 193f6fb8f10Schetan loke 194f6fb8f10Schetan loke char *pkblk_start; 195f6fb8f10Schetan loke char *pkblk_end; 196f6fb8f10Schetan loke int kblk_size; 197f6fb8f10Schetan loke unsigned int knum_blocks; 198f6fb8f10Schetan loke uint64_t knxt_seq_num; 199f6fb8f10Schetan loke char *prev; 200f6fb8f10Schetan loke char *nxt_offset; 201f6fb8f10Schetan loke struct sk_buff *skb; 202f6fb8f10Schetan loke 203f6fb8f10Schetan loke atomic_t blk_fill_in_prog; 204f6fb8f10Schetan loke 205f6fb8f10Schetan loke /* Default is set to 8ms */ 206f6fb8f10Schetan loke #define DEFAULT_PRB_RETIRE_TOV (8) 207f6fb8f10Schetan loke 208f6fb8f10Schetan loke unsigned short retire_blk_tov; 209f6fb8f10Schetan loke unsigned short version; 210f6fb8f10Schetan loke unsigned long tov_in_jiffies; 211f6fb8f10Schetan loke 212f6fb8f10Schetan loke /* timer to retire an outstanding block */ 213f6fb8f10Schetan loke struct timer_list retire_blk_timer; 214f6fb8f10Schetan loke }; 215f6fb8f10Schetan loke 216f6fb8f10Schetan loke #define PGV_FROM_VMALLOC 1 2170e3125c7SNeil Horman struct pgv { 2180e3125c7SNeil Horman char *buffer; 2190e3125c7SNeil Horman }; 2200e3125c7SNeil Horman 22169e3c75fSJohann Baudy struct packet_ring_buffer { 2220e3125c7SNeil Horman struct pgv *pg_vec; 22369e3c75fSJohann Baudy unsigned int head; 22469e3c75fSJohann Baudy unsigned int frames_per_block; 22569e3c75fSJohann Baudy unsigned int frame_size; 22669e3c75fSJohann Baudy unsigned int frame_max; 22769e3c75fSJohann Baudy 22869e3c75fSJohann Baudy unsigned int pg_vec_order; 22969e3c75fSJohann Baudy unsigned int pg_vec_pages; 23069e3c75fSJohann Baudy unsigned int pg_vec_len; 23169e3c75fSJohann Baudy 232bc59ba39Schetan loke struct tpacket_kbdq_core prb_bdqc; 23369e3c75fSJohann Baudy atomic_t pending; 23469e3c75fSJohann Baudy }; 23569e3c75fSJohann Baudy 236f6fb8f10Schetan loke #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) 237f6fb8f10Schetan loke #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) 238f6fb8f10Schetan loke #define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt) 239f6fb8f10Schetan loke #define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len) 240f6fb8f10Schetan loke #define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num) 241f6fb8f10Schetan loke #define BLOCK_O2PRIV(x) ((x)->offset_to_priv) 242f6fb8f10Schetan loke #define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x))) 243f6fb8f10Schetan loke 24469e3c75fSJohann Baudy struct packet_sock; 24569e3c75fSJohann Baudy static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); 2461da177e4SLinus Torvalds 247f6fb8f10Schetan loke static void *packet_previous_frame(struct packet_sock *po, 248f6fb8f10Schetan loke struct packet_ring_buffer *rb, 249f6fb8f10Schetan loke int status); 250f6fb8f10Schetan loke static void packet_increment_head(struct packet_ring_buffer *buff); 251bc59ba39Schetan loke static int prb_curr_blk_in_use(struct tpacket_kbdq_core *, 252bc59ba39Schetan loke struct tpacket_block_desc *); 253bc59ba39Schetan loke static void *prb_dispatch_next_block(struct tpacket_kbdq_core *, 254f6fb8f10Schetan loke struct packet_sock *); 255bc59ba39Schetan loke static void prb_retire_current_block(struct tpacket_kbdq_core *, 256f6fb8f10Schetan loke struct packet_sock *, unsigned int status); 257bc59ba39Schetan loke static int prb_queue_frozen(struct tpacket_kbdq_core *); 258bc59ba39Schetan loke static void prb_open_block(struct tpacket_kbdq_core *, 259bc59ba39Schetan loke struct tpacket_block_desc *); 260f6fb8f10Schetan loke static void prb_retire_rx_blk_timer_expired(unsigned long); 261bc59ba39Schetan loke static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *); 262bc59ba39Schetan loke static void prb_init_blk_timer(struct packet_sock *, 263bc59ba39Schetan loke struct tpacket_kbdq_core *, 264f6fb8f10Schetan loke void (*func) (unsigned long)); 265bc59ba39Schetan loke static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); 266bc59ba39Schetan loke static void prb_clear_rxhash(struct tpacket_kbdq_core *, 267bc59ba39Schetan loke struct tpacket3_hdr *); 268bc59ba39Schetan loke static void prb_fill_vlan_info(struct tpacket_kbdq_core *, 269bc59ba39Schetan loke struct tpacket3_hdr *); 2701da177e4SLinus Torvalds static void packet_flush_mclist(struct sock *sk); 2711da177e4SLinus Torvalds 272dc99f600SDavid S. Miller struct packet_fanout; 2731da177e4SLinus Torvalds struct packet_sock { 2741da177e4SLinus Torvalds /* struct sock has to be the first member of packet_sock */ 2751da177e4SLinus Torvalds struct sock sk; 276dc99f600SDavid S. Miller struct packet_fanout *fanout; 2771da177e4SLinus Torvalds struct tpacket_stats stats; 278f6fb8f10Schetan loke union tpacket_stats_u stats_u; 27969e3c75fSJohann Baudy struct packet_ring_buffer rx_ring; 28069e3c75fSJohann Baudy struct packet_ring_buffer tx_ring; 2811da177e4SLinus Torvalds int copy_thresh; 2821da177e4SLinus Torvalds spinlock_t bind_lock; 283905db440SHerbert Xu struct mutex pg_vec_lock; 2848dc41944SHerbert Xu unsigned int running:1, /* prot_hook is attached*/ 28580feaacbSPeter P. Waskiewicz Jr auxdata:1, 286bfd5f4a3SSridhar Samudrala origdev:1, 287bfd5f4a3SSridhar Samudrala has_vnet_hdr:1; 2881da177e4SLinus Torvalds int ifindex; /* bound device */ 2890e11c91eSAl Viro __be16 num; 2901da177e4SLinus Torvalds struct packet_mclist *mclist; 2911da177e4SLinus Torvalds atomic_t mapped; 292bbd6ef87SPatrick McHardy enum tpacket_versions tp_version; 293bbd6ef87SPatrick McHardy unsigned int tp_hdrlen; 2948913336aSPatrick McHardy unsigned int tp_reserve; 29569e3c75fSJohann Baudy unsigned int tp_loss:1; 296614f60faSScott McMillan unsigned int tp_tstamp; 29794b05952SEric Dumazet struct packet_type prot_hook ____cacheline_aligned_in_smp; 2981da177e4SLinus Torvalds }; 2991da177e4SLinus Torvalds 300dc99f600SDavid S. Miller #define PACKET_FANOUT_MAX 256 301dc99f600SDavid S. Miller 302dc99f600SDavid S. Miller struct packet_fanout { 303dc99f600SDavid S. Miller #ifdef CONFIG_NET_NS 304dc99f600SDavid S. Miller struct net *net; 305dc99f600SDavid S. Miller #endif 306dc99f600SDavid S. Miller unsigned int num_members; 307dc99f600SDavid S. Miller u16 id; 308dc99f600SDavid S. Miller u8 type; 3097736d33fSDavid S. Miller u8 defrag; 310dc99f600SDavid S. Miller atomic_t rr_cur; 311dc99f600SDavid S. Miller struct list_head list; 312dc99f600SDavid S. Miller struct sock *arr[PACKET_FANOUT_MAX]; 313dc99f600SDavid S. Miller spinlock_t lock; 314dc99f600SDavid S. Miller atomic_t sk_ref; 315dc99f600SDavid S. Miller struct packet_type prot_hook ____cacheline_aligned_in_smp; 316dc99f600SDavid S. Miller }; 317dc99f600SDavid S. Miller 318ffbc6111SHerbert Xu struct packet_skb_cb { 319ffbc6111SHerbert Xu unsigned int origlen; 320ffbc6111SHerbert Xu union { 321ffbc6111SHerbert Xu struct sockaddr_pkt pkt; 322ffbc6111SHerbert Xu struct sockaddr_ll ll; 323ffbc6111SHerbert Xu } sa; 324ffbc6111SHerbert Xu }; 325ffbc6111SHerbert Xu 326ffbc6111SHerbert Xu #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) 3278dc41944SHerbert Xu 328bc59ba39Schetan loke #define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc)) 329f6fb8f10Schetan loke #define GET_PBLOCK_DESC(x, bid) \ 330bc59ba39Schetan loke ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer)) 331f6fb8f10Schetan loke #define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \ 332bc59ba39Schetan loke ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer)) 333f6fb8f10Schetan loke #define GET_NEXT_PRB_BLK_NUM(x) \ 334f6fb8f10Schetan loke (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \ 335f6fb8f10Schetan loke ((x)->kactive_blk_num+1) : 0) 336f6fb8f10Schetan loke 337eea49cc9SOlof Johansson static struct packet_sock *pkt_sk(struct sock *sk) 338ce06b03eSDavid S. Miller { 339ce06b03eSDavid S. Miller return (struct packet_sock *)sk; 340ce06b03eSDavid S. Miller } 341ce06b03eSDavid S. Miller 342dc99f600SDavid S. Miller static void __fanout_unlink(struct sock *sk, struct packet_sock *po); 343dc99f600SDavid S. Miller static void __fanout_link(struct sock *sk, struct packet_sock *po); 344dc99f600SDavid S. Miller 345ce06b03eSDavid S. Miller /* register_prot_hook must be invoked with the po->bind_lock held, 346ce06b03eSDavid S. Miller * or from a context in which asynchronous accesses to the packet 347ce06b03eSDavid S. Miller * socket is not possible (packet_create()). 348ce06b03eSDavid S. Miller */ 349ce06b03eSDavid S. Miller static void register_prot_hook(struct sock *sk) 350ce06b03eSDavid S. Miller { 351ce06b03eSDavid S. Miller struct packet_sock *po = pkt_sk(sk); 352ce06b03eSDavid S. Miller if (!po->running) { 353dc99f600SDavid S. Miller if (po->fanout) 354dc99f600SDavid S. Miller __fanout_link(sk, po); 355dc99f600SDavid S. Miller else 356ce06b03eSDavid S. Miller dev_add_pack(&po->prot_hook); 357ce06b03eSDavid S. Miller sock_hold(sk); 358ce06b03eSDavid S. Miller po->running = 1; 359ce06b03eSDavid S. Miller } 360ce06b03eSDavid S. Miller } 361ce06b03eSDavid S. Miller 362ce06b03eSDavid S. Miller /* {,__}unregister_prot_hook() must be invoked with the po->bind_lock 363ce06b03eSDavid S. Miller * held. If the sync parameter is true, we will temporarily drop 364ce06b03eSDavid S. Miller * the po->bind_lock and do a synchronize_net to make sure no 365ce06b03eSDavid S. Miller * asynchronous packet processing paths still refer to the elements 366ce06b03eSDavid S. Miller * of po->prot_hook. If the sync parameter is false, it is the 367ce06b03eSDavid S. Miller * callers responsibility to take care of this. 368ce06b03eSDavid S. Miller */ 369ce06b03eSDavid S. Miller static void __unregister_prot_hook(struct sock *sk, bool sync) 370ce06b03eSDavid S. Miller { 371ce06b03eSDavid S. Miller struct packet_sock *po = pkt_sk(sk); 372ce06b03eSDavid S. Miller 373ce06b03eSDavid S. Miller po->running = 0; 374dc99f600SDavid S. Miller if (po->fanout) 375dc99f600SDavid S. Miller __fanout_unlink(sk, po); 376dc99f600SDavid S. Miller else 377ce06b03eSDavid S. Miller __dev_remove_pack(&po->prot_hook); 378ce06b03eSDavid S. Miller __sock_put(sk); 379ce06b03eSDavid S. Miller 380ce06b03eSDavid S. Miller if (sync) { 381ce06b03eSDavid S. Miller spin_unlock(&po->bind_lock); 382ce06b03eSDavid S. Miller synchronize_net(); 383ce06b03eSDavid S. Miller spin_lock(&po->bind_lock); 384ce06b03eSDavid S. Miller } 385ce06b03eSDavid S. Miller } 386ce06b03eSDavid S. Miller 387ce06b03eSDavid S. Miller static void unregister_prot_hook(struct sock *sk, bool sync) 388ce06b03eSDavid S. Miller { 389ce06b03eSDavid S. Miller struct packet_sock *po = pkt_sk(sk); 390ce06b03eSDavid S. Miller 391ce06b03eSDavid S. Miller if (po->running) 392ce06b03eSDavid S. Miller __unregister_prot_hook(sk, sync); 393ce06b03eSDavid S. Miller } 394ce06b03eSDavid S. Miller 395f6dafa95SChangli Gao static inline __pure struct page *pgv_to_page(void *addr) 3960af55bb5SChangli Gao { 3970af55bb5SChangli Gao if (is_vmalloc_addr(addr)) 3980af55bb5SChangli Gao return vmalloc_to_page(addr); 3990af55bb5SChangli Gao return virt_to_page(addr); 4000af55bb5SChangli Gao } 4010af55bb5SChangli Gao 402bbd6ef87SPatrick McHardy static void __packet_set_status(struct packet_sock *po, void *frame, int status) 403bbd6ef87SPatrick McHardy { 404bbd6ef87SPatrick McHardy union { 405bbd6ef87SPatrick McHardy struct tpacket_hdr *h1; 406bbd6ef87SPatrick McHardy struct tpacket2_hdr *h2; 407bbd6ef87SPatrick McHardy void *raw; 408bbd6ef87SPatrick McHardy } h; 409bbd6ef87SPatrick McHardy 410bbd6ef87SPatrick McHardy h.raw = frame; 411bbd6ef87SPatrick McHardy switch (po->tp_version) { 412bbd6ef87SPatrick McHardy case TPACKET_V1: 413bbd6ef87SPatrick McHardy h.h1->tp_status = status; 4140af55bb5SChangli Gao flush_dcache_page(pgv_to_page(&h.h1->tp_status)); 415bbd6ef87SPatrick McHardy break; 416bbd6ef87SPatrick McHardy case TPACKET_V2: 417bbd6ef87SPatrick McHardy h.h2->tp_status = status; 4180af55bb5SChangli Gao flush_dcache_page(pgv_to_page(&h.h2->tp_status)); 419bbd6ef87SPatrick McHardy break; 420f6fb8f10Schetan loke case TPACKET_V3: 42169e3c75fSJohann Baudy default: 422f6fb8f10Schetan loke WARN(1, "TPACKET version not supported.\n"); 42369e3c75fSJohann Baudy BUG(); 42469e3c75fSJohann Baudy } 42569e3c75fSJohann Baudy 42669e3c75fSJohann Baudy smp_wmb(); 42769e3c75fSJohann Baudy } 42869e3c75fSJohann Baudy 42969e3c75fSJohann Baudy static int __packet_get_status(struct packet_sock *po, void *frame) 43069e3c75fSJohann Baudy { 43169e3c75fSJohann Baudy union { 43269e3c75fSJohann Baudy struct tpacket_hdr *h1; 43369e3c75fSJohann Baudy struct tpacket2_hdr *h2; 43469e3c75fSJohann Baudy void *raw; 43569e3c75fSJohann Baudy } h; 43669e3c75fSJohann Baudy 43769e3c75fSJohann Baudy smp_rmb(); 43869e3c75fSJohann Baudy 43969e3c75fSJohann Baudy h.raw = frame; 44069e3c75fSJohann Baudy switch (po->tp_version) { 44169e3c75fSJohann Baudy case TPACKET_V1: 4420af55bb5SChangli Gao flush_dcache_page(pgv_to_page(&h.h1->tp_status)); 44369e3c75fSJohann Baudy return h.h1->tp_status; 44469e3c75fSJohann Baudy case TPACKET_V2: 4450af55bb5SChangli Gao flush_dcache_page(pgv_to_page(&h.h2->tp_status)); 44669e3c75fSJohann Baudy return h.h2->tp_status; 447f6fb8f10Schetan loke case TPACKET_V3: 44869e3c75fSJohann Baudy default: 449f6fb8f10Schetan loke WARN(1, "TPACKET version not supported.\n"); 45069e3c75fSJohann Baudy BUG(); 45169e3c75fSJohann Baudy return 0; 452bbd6ef87SPatrick McHardy } 4531da177e4SLinus Torvalds } 45469e3c75fSJohann Baudy 45569e3c75fSJohann Baudy static void *packet_lookup_frame(struct packet_sock *po, 45669e3c75fSJohann Baudy struct packet_ring_buffer *rb, 45769e3c75fSJohann Baudy unsigned int position, 45869e3c75fSJohann Baudy int status) 45969e3c75fSJohann Baudy { 46069e3c75fSJohann Baudy unsigned int pg_vec_pos, frame_offset; 46169e3c75fSJohann Baudy union { 46269e3c75fSJohann Baudy struct tpacket_hdr *h1; 46369e3c75fSJohann Baudy struct tpacket2_hdr *h2; 46469e3c75fSJohann Baudy void *raw; 46569e3c75fSJohann Baudy } h; 46669e3c75fSJohann Baudy 46769e3c75fSJohann Baudy pg_vec_pos = position / rb->frames_per_block; 46869e3c75fSJohann Baudy frame_offset = position % rb->frames_per_block; 46969e3c75fSJohann Baudy 4700e3125c7SNeil Horman h.raw = rb->pg_vec[pg_vec_pos].buffer + 4710e3125c7SNeil Horman (frame_offset * rb->frame_size); 47269e3c75fSJohann Baudy 47369e3c75fSJohann Baudy if (status != __packet_get_status(po, h.raw)) 47469e3c75fSJohann Baudy return NULL; 47569e3c75fSJohann Baudy 47669e3c75fSJohann Baudy return h.raw; 47769e3c75fSJohann Baudy } 47869e3c75fSJohann Baudy 479eea49cc9SOlof Johansson static void *packet_current_frame(struct packet_sock *po, 48069e3c75fSJohann Baudy struct packet_ring_buffer *rb, 48169e3c75fSJohann Baudy int status) 48269e3c75fSJohann Baudy { 48369e3c75fSJohann Baudy return packet_lookup_frame(po, rb, rb->head, status); 48469e3c75fSJohann Baudy } 48569e3c75fSJohann Baudy 486bc59ba39Schetan loke static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) 487f6fb8f10Schetan loke { 488f6fb8f10Schetan loke del_timer_sync(&pkc->retire_blk_timer); 489f6fb8f10Schetan loke } 490f6fb8f10Schetan loke 491f6fb8f10Schetan loke static void prb_shutdown_retire_blk_timer(struct packet_sock *po, 492f6fb8f10Schetan loke int tx_ring, 493f6fb8f10Schetan loke struct sk_buff_head *rb_queue) 494f6fb8f10Schetan loke { 495bc59ba39Schetan loke struct tpacket_kbdq_core *pkc; 496f6fb8f10Schetan loke 497f6fb8f10Schetan loke pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; 498f6fb8f10Schetan loke 499f6fb8f10Schetan loke spin_lock(&rb_queue->lock); 500f6fb8f10Schetan loke pkc->delete_blk_timer = 1; 501f6fb8f10Schetan loke spin_unlock(&rb_queue->lock); 502f6fb8f10Schetan loke 503f6fb8f10Schetan loke prb_del_retire_blk_timer(pkc); 504f6fb8f10Schetan loke } 505f6fb8f10Schetan loke 506f6fb8f10Schetan loke static void prb_init_blk_timer(struct packet_sock *po, 507bc59ba39Schetan loke struct tpacket_kbdq_core *pkc, 508f6fb8f10Schetan loke void (*func) (unsigned long)) 509f6fb8f10Schetan loke { 510f6fb8f10Schetan loke init_timer(&pkc->retire_blk_timer); 511f6fb8f10Schetan loke pkc->retire_blk_timer.data = (long)po; 512f6fb8f10Schetan loke pkc->retire_blk_timer.function = func; 513f6fb8f10Schetan loke pkc->retire_blk_timer.expires = jiffies; 514f6fb8f10Schetan loke } 515f6fb8f10Schetan loke 516f6fb8f10Schetan loke static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring) 517f6fb8f10Schetan loke { 518bc59ba39Schetan loke struct tpacket_kbdq_core *pkc; 519f6fb8f10Schetan loke 520f6fb8f10Schetan loke if (tx_ring) 521f6fb8f10Schetan loke BUG(); 522f6fb8f10Schetan loke 523f6fb8f10Schetan loke pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; 524f6fb8f10Schetan loke prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); 525f6fb8f10Schetan loke } 526f6fb8f10Schetan loke 527f6fb8f10Schetan loke static int prb_calc_retire_blk_tmo(struct packet_sock *po, 528f6fb8f10Schetan loke int blk_size_in_bytes) 529f6fb8f10Schetan loke { 530f6fb8f10Schetan loke struct net_device *dev; 531f6fb8f10Schetan loke unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; 5324bc71cb9SJiri Pirko struct ethtool_cmd ecmd; 5334bc71cb9SJiri Pirko int err; 534e440cf2cSparav.pandit@emulex.com u32 speed; 535f6fb8f10Schetan loke 5364bc71cb9SJiri Pirko rtnl_lock(); 5374bc71cb9SJiri Pirko dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); 5384bc71cb9SJiri Pirko if (unlikely(!dev)) { 5394bc71cb9SJiri Pirko rtnl_unlock(); 540f6fb8f10Schetan loke return DEFAULT_PRB_RETIRE_TOV; 5414bc71cb9SJiri Pirko } 5424bc71cb9SJiri Pirko err = __ethtool_get_settings(dev, &ecmd); 543e440cf2cSparav.pandit@emulex.com speed = ethtool_cmd_speed(&ecmd); 5444bc71cb9SJiri Pirko rtnl_unlock(); 5454bc71cb9SJiri Pirko if (!err) { 546f6fb8f10Schetan loke /* 547f6fb8f10Schetan loke * If the link speed is so slow you don't really 548f6fb8f10Schetan loke * need to worry about perf anyways 549f6fb8f10Schetan loke */ 550e440cf2cSparav.pandit@emulex.com if (speed < SPEED_1000 || speed == SPEED_UNKNOWN) { 551f6fb8f10Schetan loke return DEFAULT_PRB_RETIRE_TOV; 552e440cf2cSparav.pandit@emulex.com } else { 553e440cf2cSparav.pandit@emulex.com msec = 1; 554e440cf2cSparav.pandit@emulex.com div = speed / 1000; 555f6fb8f10Schetan loke } 556f6fb8f10Schetan loke } 557f6fb8f10Schetan loke 558f6fb8f10Schetan loke mbits = (blk_size_in_bytes * 8) / (1024 * 1024); 559f6fb8f10Schetan loke 560f6fb8f10Schetan loke if (div) 561f6fb8f10Schetan loke mbits /= div; 562f6fb8f10Schetan loke 563f6fb8f10Schetan loke tmo = mbits * msec; 564f6fb8f10Schetan loke 565f6fb8f10Schetan loke if (div) 566f6fb8f10Schetan loke return tmo+1; 567f6fb8f10Schetan loke return tmo; 568f6fb8f10Schetan loke } 569f6fb8f10Schetan loke 570bc59ba39Schetan loke static void prb_init_ft_ops(struct tpacket_kbdq_core *p1, 571f6fb8f10Schetan loke union tpacket_req_u *req_u) 572f6fb8f10Schetan loke { 573f6fb8f10Schetan loke p1->feature_req_word = req_u->req3.tp_feature_req_word; 574f6fb8f10Schetan loke } 575f6fb8f10Schetan loke 576f6fb8f10Schetan loke static void init_prb_bdqc(struct packet_sock *po, 577f6fb8f10Schetan loke struct packet_ring_buffer *rb, 578f6fb8f10Schetan loke struct pgv *pg_vec, 579f6fb8f10Schetan loke union tpacket_req_u *req_u, int tx_ring) 580f6fb8f10Schetan loke { 581bc59ba39Schetan loke struct tpacket_kbdq_core *p1 = &rb->prb_bdqc; 582bc59ba39Schetan loke struct tpacket_block_desc *pbd; 583f6fb8f10Schetan loke 584f6fb8f10Schetan loke memset(p1, 0x0, sizeof(*p1)); 585f6fb8f10Schetan loke 586f6fb8f10Schetan loke p1->knxt_seq_num = 1; 587f6fb8f10Schetan loke p1->pkbdq = pg_vec; 588bc59ba39Schetan loke pbd = (struct tpacket_block_desc *)pg_vec[0].buffer; 589e3192690SJoe Perches p1->pkblk_start = pg_vec[0].buffer; 590f6fb8f10Schetan loke p1->kblk_size = req_u->req3.tp_block_size; 591f6fb8f10Schetan loke p1->knum_blocks = req_u->req3.tp_block_nr; 592f6fb8f10Schetan loke p1->hdrlen = po->tp_hdrlen; 593f6fb8f10Schetan loke p1->version = po->tp_version; 594f6fb8f10Schetan loke p1->last_kactive_blk_num = 0; 595f6fb8f10Schetan loke po->stats_u.stats3.tp_freeze_q_cnt = 0; 596f6fb8f10Schetan loke if (req_u->req3.tp_retire_blk_tov) 597f6fb8f10Schetan loke p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov; 598f6fb8f10Schetan loke else 599f6fb8f10Schetan loke p1->retire_blk_tov = prb_calc_retire_blk_tmo(po, 600f6fb8f10Schetan loke req_u->req3.tp_block_size); 601f6fb8f10Schetan loke p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); 602f6fb8f10Schetan loke p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; 603f6fb8f10Schetan loke 604f6fb8f10Schetan loke prb_init_ft_ops(p1, req_u); 605f6fb8f10Schetan loke prb_setup_retire_blk_timer(po, tx_ring); 606f6fb8f10Schetan loke prb_open_block(p1, pbd); 607f6fb8f10Schetan loke } 608f6fb8f10Schetan loke 609f6fb8f10Schetan loke /* Do NOT update the last_blk_num first. 610f6fb8f10Schetan loke * Assumes sk_buff_head lock is held. 611f6fb8f10Schetan loke */ 612bc59ba39Schetan loke static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) 613f6fb8f10Schetan loke { 614f6fb8f10Schetan loke mod_timer(&pkc->retire_blk_timer, 615f6fb8f10Schetan loke jiffies + pkc->tov_in_jiffies); 616f6fb8f10Schetan loke pkc->last_kactive_blk_num = pkc->kactive_blk_num; 617f6fb8f10Schetan loke } 618f6fb8f10Schetan loke 619f6fb8f10Schetan loke /* 620f6fb8f10Schetan loke * Timer logic: 621f6fb8f10Schetan loke * 1) We refresh the timer only when we open a block. 622f6fb8f10Schetan loke * By doing this we don't waste cycles refreshing the timer 623f6fb8f10Schetan loke * on packet-by-packet basis. 624f6fb8f10Schetan loke * 625f6fb8f10Schetan loke * With a 1MB block-size, on a 1Gbps line, it will take 626f6fb8f10Schetan loke * i) ~8 ms to fill a block + ii) memcpy etc. 627f6fb8f10Schetan loke * In this cut we are not accounting for the memcpy time. 628f6fb8f10Schetan loke * 629f6fb8f10Schetan loke * So, if the user sets the 'tmo' to 10ms then the timer 630f6fb8f10Schetan loke * will never fire while the block is still getting filled 631f6fb8f10Schetan loke * (which is what we want). However, the user could choose 632f6fb8f10Schetan loke * to close a block early and that's fine. 633f6fb8f10Schetan loke * 634f6fb8f10Schetan loke * But when the timer does fire, we check whether or not to refresh it. 635f6fb8f10Schetan loke * Since the tmo granularity is in msecs, it is not too expensive 636f6fb8f10Schetan loke * to refresh the timer, lets say every '8' msecs. 637f6fb8f10Schetan loke * Either the user can set the 'tmo' or we can derive it based on 638f6fb8f10Schetan loke * a) line-speed and b) block-size. 639f6fb8f10Schetan loke * prb_calc_retire_blk_tmo() calculates the tmo. 640f6fb8f10Schetan loke * 641f6fb8f10Schetan loke */ 642f6fb8f10Schetan loke static void prb_retire_rx_blk_timer_expired(unsigned long data) 643f6fb8f10Schetan loke { 644f6fb8f10Schetan loke struct packet_sock *po = (struct packet_sock *)data; 645bc59ba39Schetan loke struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc; 646f6fb8f10Schetan loke unsigned int frozen; 647bc59ba39Schetan loke struct tpacket_block_desc *pbd; 648f6fb8f10Schetan loke 649f6fb8f10Schetan loke spin_lock(&po->sk.sk_receive_queue.lock); 650f6fb8f10Schetan loke 651f6fb8f10Schetan loke frozen = prb_queue_frozen(pkc); 652f6fb8f10Schetan loke pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); 653f6fb8f10Schetan loke 654f6fb8f10Schetan loke if (unlikely(pkc->delete_blk_timer)) 655f6fb8f10Schetan loke goto out; 656f6fb8f10Schetan loke 657f6fb8f10Schetan loke /* We only need to plug the race when the block is partially filled. 658f6fb8f10Schetan loke * tpacket_rcv: 659f6fb8f10Schetan loke * lock(); increment BLOCK_NUM_PKTS; unlock() 660f6fb8f10Schetan loke * copy_bits() is in progress ... 661f6fb8f10Schetan loke * timer fires on other cpu: 662f6fb8f10Schetan loke * we can't retire the current block because copy_bits 663f6fb8f10Schetan loke * is in progress. 664f6fb8f10Schetan loke * 665f6fb8f10Schetan loke */ 666f6fb8f10Schetan loke if (BLOCK_NUM_PKTS(pbd)) { 667f6fb8f10Schetan loke while (atomic_read(&pkc->blk_fill_in_prog)) { 668f6fb8f10Schetan loke /* Waiting for skb_copy_bits to finish... */ 669f6fb8f10Schetan loke cpu_relax(); 670f6fb8f10Schetan loke } 671f6fb8f10Schetan loke } 672f6fb8f10Schetan loke 673f6fb8f10Schetan loke if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { 674f6fb8f10Schetan loke if (!frozen) { 675f6fb8f10Schetan loke prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); 676f6fb8f10Schetan loke if (!prb_dispatch_next_block(pkc, po)) 677f6fb8f10Schetan loke goto refresh_timer; 678f6fb8f10Schetan loke else 679f6fb8f10Schetan loke goto out; 680f6fb8f10Schetan loke } else { 681f6fb8f10Schetan loke /* Case 1. Queue was frozen because user-space was 682f6fb8f10Schetan loke * lagging behind. 683f6fb8f10Schetan loke */ 684f6fb8f10Schetan loke if (prb_curr_blk_in_use(pkc, pbd)) { 685f6fb8f10Schetan loke /* 686f6fb8f10Schetan loke * Ok, user-space is still behind. 687f6fb8f10Schetan loke * So just refresh the timer. 688f6fb8f10Schetan loke */ 689f6fb8f10Schetan loke goto refresh_timer; 690f6fb8f10Schetan loke } else { 691f6fb8f10Schetan loke /* Case 2. queue was frozen,user-space caught up, 692f6fb8f10Schetan loke * now the link went idle && the timer fired. 693f6fb8f10Schetan loke * We don't have a block to close.So we open this 694f6fb8f10Schetan loke * block and restart the timer. 695f6fb8f10Schetan loke * opening a block thaws the queue,restarts timer 696f6fb8f10Schetan loke * Thawing/timer-refresh is a side effect. 697f6fb8f10Schetan loke */ 698f6fb8f10Schetan loke prb_open_block(pkc, pbd); 699f6fb8f10Schetan loke goto out; 700f6fb8f10Schetan loke } 701f6fb8f10Schetan loke } 702f6fb8f10Schetan loke } 703f6fb8f10Schetan loke 704f6fb8f10Schetan loke refresh_timer: 705f6fb8f10Schetan loke _prb_refresh_rx_retire_blk_timer(pkc); 706f6fb8f10Schetan loke 707f6fb8f10Schetan loke out: 708f6fb8f10Schetan loke spin_unlock(&po->sk.sk_receive_queue.lock); 709f6fb8f10Schetan loke } 710f6fb8f10Schetan loke 711eea49cc9SOlof Johansson static void prb_flush_block(struct tpacket_kbdq_core *pkc1, 712bc59ba39Schetan loke struct tpacket_block_desc *pbd1, __u32 status) 713f6fb8f10Schetan loke { 714f6fb8f10Schetan loke /* Flush everything minus the block header */ 715f6fb8f10Schetan loke 716f6fb8f10Schetan loke #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 717f6fb8f10Schetan loke u8 *start, *end; 718f6fb8f10Schetan loke 719f6fb8f10Schetan loke start = (u8 *)pbd1; 720f6fb8f10Schetan loke 721f6fb8f10Schetan loke /* Skip the block header(we know header WILL fit in 4K) */ 722f6fb8f10Schetan loke start += PAGE_SIZE; 723f6fb8f10Schetan loke 724f6fb8f10Schetan loke end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end); 725f6fb8f10Schetan loke for (; start < end; start += PAGE_SIZE) 726f6fb8f10Schetan loke flush_dcache_page(pgv_to_page(start)); 727f6fb8f10Schetan loke 728f6fb8f10Schetan loke smp_wmb(); 729f6fb8f10Schetan loke #endif 730f6fb8f10Schetan loke 731f6fb8f10Schetan loke /* Now update the block status. */ 732f6fb8f10Schetan loke 733f6fb8f10Schetan loke BLOCK_STATUS(pbd1) = status; 734f6fb8f10Schetan loke 735f6fb8f10Schetan loke /* Flush the block header */ 736f6fb8f10Schetan loke 737f6fb8f10Schetan loke #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 738f6fb8f10Schetan loke start = (u8 *)pbd1; 739f6fb8f10Schetan loke flush_dcache_page(pgv_to_page(start)); 740f6fb8f10Schetan loke 741f6fb8f10Schetan loke smp_wmb(); 742f6fb8f10Schetan loke #endif 743f6fb8f10Schetan loke } 744f6fb8f10Schetan loke 745f6fb8f10Schetan loke /* 746f6fb8f10Schetan loke * Side effect: 747f6fb8f10Schetan loke * 748f6fb8f10Schetan loke * 1) flush the block 749f6fb8f10Schetan loke * 2) Increment active_blk_num 750f6fb8f10Schetan loke * 751f6fb8f10Schetan loke * Note:We DONT refresh the timer on purpose. 752f6fb8f10Schetan loke * Because almost always the next block will be opened. 753f6fb8f10Schetan loke */ 754bc59ba39Schetan loke static void prb_close_block(struct tpacket_kbdq_core *pkc1, 755bc59ba39Schetan loke struct tpacket_block_desc *pbd1, 756f6fb8f10Schetan loke struct packet_sock *po, unsigned int stat) 757f6fb8f10Schetan loke { 758f6fb8f10Schetan loke __u32 status = TP_STATUS_USER | stat; 759f6fb8f10Schetan loke 760f6fb8f10Schetan loke struct tpacket3_hdr *last_pkt; 761bc59ba39Schetan loke struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; 762f6fb8f10Schetan loke 763f6fb8f10Schetan loke if (po->stats.tp_drops) 764f6fb8f10Schetan loke status |= TP_STATUS_LOSING; 765f6fb8f10Schetan loke 766f6fb8f10Schetan loke last_pkt = (struct tpacket3_hdr *)pkc1->prev; 767f6fb8f10Schetan loke last_pkt->tp_next_offset = 0; 768f6fb8f10Schetan loke 769f6fb8f10Schetan loke /* Get the ts of the last pkt */ 770f6fb8f10Schetan loke if (BLOCK_NUM_PKTS(pbd1)) { 771f6fb8f10Schetan loke h1->ts_last_pkt.ts_sec = last_pkt->tp_sec; 772f6fb8f10Schetan loke h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec; 773f6fb8f10Schetan loke } else { 774f6fb8f10Schetan loke /* Ok, we tmo'd - so get the current time */ 775f6fb8f10Schetan loke struct timespec ts; 776f6fb8f10Schetan loke getnstimeofday(&ts); 777f6fb8f10Schetan loke h1->ts_last_pkt.ts_sec = ts.tv_sec; 778f6fb8f10Schetan loke h1->ts_last_pkt.ts_nsec = ts.tv_nsec; 779f6fb8f10Schetan loke } 780f6fb8f10Schetan loke 781f6fb8f10Schetan loke smp_wmb(); 782f6fb8f10Schetan loke 783f6fb8f10Schetan loke /* Flush the block */ 784f6fb8f10Schetan loke prb_flush_block(pkc1, pbd1, status); 785f6fb8f10Schetan loke 786f6fb8f10Schetan loke pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1); 787f6fb8f10Schetan loke } 788f6fb8f10Schetan loke 789eea49cc9SOlof Johansson static void prb_thaw_queue(struct tpacket_kbdq_core *pkc) 790f6fb8f10Schetan loke { 791f6fb8f10Schetan loke pkc->reset_pending_on_curr_blk = 0; 792f6fb8f10Schetan loke } 793f6fb8f10Schetan loke 794f6fb8f10Schetan loke /* 795f6fb8f10Schetan loke * Side effect of opening a block: 796f6fb8f10Schetan loke * 797f6fb8f10Schetan loke * 1) prb_queue is thawed. 798f6fb8f10Schetan loke * 2) retire_blk_timer is refreshed. 799f6fb8f10Schetan loke * 800f6fb8f10Schetan loke */ 801bc59ba39Schetan loke static void prb_open_block(struct tpacket_kbdq_core *pkc1, 802bc59ba39Schetan loke struct tpacket_block_desc *pbd1) 803f6fb8f10Schetan loke { 804f6fb8f10Schetan loke struct timespec ts; 805bc59ba39Schetan loke struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; 806f6fb8f10Schetan loke 807f6fb8f10Schetan loke smp_rmb(); 808f6fb8f10Schetan loke 809f6fb8f10Schetan loke if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) { 810f6fb8f10Schetan loke 811f6fb8f10Schetan loke /* We could have just memset this but we will lose the 812f6fb8f10Schetan loke * flexibility of making the priv area sticky 813f6fb8f10Schetan loke */ 814f6fb8f10Schetan loke BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; 815f6fb8f10Schetan loke BLOCK_NUM_PKTS(pbd1) = 0; 816f6fb8f10Schetan loke BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); 817f6fb8f10Schetan loke getnstimeofday(&ts); 818f6fb8f10Schetan loke h1->ts_first_pkt.ts_sec = ts.tv_sec; 819f6fb8f10Schetan loke h1->ts_first_pkt.ts_nsec = ts.tv_nsec; 820f6fb8f10Schetan loke pkc1->pkblk_start = (char *)pbd1; 821e3192690SJoe Perches pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); 822f6fb8f10Schetan loke BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); 823f6fb8f10Schetan loke BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; 824f6fb8f10Schetan loke pbd1->version = pkc1->version; 825f6fb8f10Schetan loke pkc1->prev = pkc1->nxt_offset; 826f6fb8f10Schetan loke pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; 827f6fb8f10Schetan loke prb_thaw_queue(pkc1); 828f6fb8f10Schetan loke _prb_refresh_rx_retire_blk_timer(pkc1); 829f6fb8f10Schetan loke 830f6fb8f10Schetan loke smp_wmb(); 831f6fb8f10Schetan loke 832f6fb8f10Schetan loke return; 833f6fb8f10Schetan loke } 834f6fb8f10Schetan loke 835f6fb8f10Schetan loke WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n", 836f6fb8f10Schetan loke pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num); 837f6fb8f10Schetan loke dump_stack(); 838f6fb8f10Schetan loke BUG(); 839f6fb8f10Schetan loke } 840f6fb8f10Schetan loke 841f6fb8f10Schetan loke /* 842f6fb8f10Schetan loke * Queue freeze logic: 843f6fb8f10Schetan loke * 1) Assume tp_block_nr = 8 blocks. 844f6fb8f10Schetan loke * 2) At time 't0', user opens Rx ring. 845f6fb8f10Schetan loke * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7 846f6fb8f10Schetan loke * 4) user-space is either sleeping or processing block '0'. 847f6fb8f10Schetan loke * 5) tpacket_rcv is currently filling block '7', since there is no space left, 848f6fb8f10Schetan loke * it will close block-7,loop around and try to fill block '0'. 849f6fb8f10Schetan loke * call-flow: 850f6fb8f10Schetan loke * __packet_lookup_frame_in_block 851f6fb8f10Schetan loke * prb_retire_current_block() 852f6fb8f10Schetan loke * prb_dispatch_next_block() 853f6fb8f10Schetan loke * |->(BLOCK_STATUS == USER) evaluates to true 854f6fb8f10Schetan loke * 5.1) Since block-0 is currently in-use, we just freeze the queue. 855f6fb8f10Schetan loke * 6) Now there are two cases: 856f6fb8f10Schetan loke * 6.1) Link goes idle right after the queue is frozen. 857f6fb8f10Schetan loke * But remember, the last open_block() refreshed the timer. 858f6fb8f10Schetan loke * When this timer expires,it will refresh itself so that we can 859f6fb8f10Schetan loke * re-open block-0 in near future. 860f6fb8f10Schetan loke * 6.2) Link is busy and keeps on receiving packets. This is a simple 861f6fb8f10Schetan loke * case and __packet_lookup_frame_in_block will check if block-0 862f6fb8f10Schetan loke * is free and can now be re-used. 863f6fb8f10Schetan loke */ 864eea49cc9SOlof Johansson static void prb_freeze_queue(struct tpacket_kbdq_core *pkc, 865f6fb8f10Schetan loke struct packet_sock *po) 866f6fb8f10Schetan loke { 867f6fb8f10Schetan loke pkc->reset_pending_on_curr_blk = 1; 868f6fb8f10Schetan loke po->stats_u.stats3.tp_freeze_q_cnt++; 869f6fb8f10Schetan loke } 870f6fb8f10Schetan loke 871f6fb8f10Schetan loke #define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT)) 872f6fb8f10Schetan loke 873f6fb8f10Schetan loke /* 874f6fb8f10Schetan loke * If the next block is free then we will dispatch it 875f6fb8f10Schetan loke * and return a good offset. 876f6fb8f10Schetan loke * Else, we will freeze the queue. 877f6fb8f10Schetan loke * So, caller must check the return value. 878f6fb8f10Schetan loke */ 879bc59ba39Schetan loke static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc, 880f6fb8f10Schetan loke struct packet_sock *po) 881f6fb8f10Schetan loke { 882bc59ba39Schetan loke struct tpacket_block_desc *pbd; 883f6fb8f10Schetan loke 884f6fb8f10Schetan loke smp_rmb(); 885f6fb8f10Schetan loke 886f6fb8f10Schetan loke /* 1. Get current block num */ 887f6fb8f10Schetan loke pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); 888f6fb8f10Schetan loke 889f6fb8f10Schetan loke /* 2. If this block is currently in_use then freeze the queue */ 890f6fb8f10Schetan loke if (TP_STATUS_USER & BLOCK_STATUS(pbd)) { 891f6fb8f10Schetan loke prb_freeze_queue(pkc, po); 892f6fb8f10Schetan loke return NULL; 893f6fb8f10Schetan loke } 894f6fb8f10Schetan loke 895f6fb8f10Schetan loke /* 896f6fb8f10Schetan loke * 3. 897f6fb8f10Schetan loke * open this block and return the offset where the first packet 898f6fb8f10Schetan loke * needs to get stored. 899f6fb8f10Schetan loke */ 900f6fb8f10Schetan loke prb_open_block(pkc, pbd); 901f6fb8f10Schetan loke return (void *)pkc->nxt_offset; 902f6fb8f10Schetan loke } 903f6fb8f10Schetan loke 904bc59ba39Schetan loke static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, 905f6fb8f10Schetan loke struct packet_sock *po, unsigned int status) 906f6fb8f10Schetan loke { 907bc59ba39Schetan loke struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); 908f6fb8f10Schetan loke 909f6fb8f10Schetan loke /* retire/close the current block */ 910f6fb8f10Schetan loke if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) { 911f6fb8f10Schetan loke /* 912f6fb8f10Schetan loke * Plug the case where copy_bits() is in progress on 913f6fb8f10Schetan loke * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't 914f6fb8f10Schetan loke * have space to copy the pkt in the current block and 915f6fb8f10Schetan loke * called prb_retire_current_block() 916f6fb8f10Schetan loke * 917f6fb8f10Schetan loke * We don't need to worry about the TMO case because 918f6fb8f10Schetan loke * the timer-handler already handled this case. 919f6fb8f10Schetan loke */ 920f6fb8f10Schetan loke if (!(status & TP_STATUS_BLK_TMO)) { 921f6fb8f10Schetan loke while (atomic_read(&pkc->blk_fill_in_prog)) { 922f6fb8f10Schetan loke /* Waiting for skb_copy_bits to finish... */ 923f6fb8f10Schetan loke cpu_relax(); 924f6fb8f10Schetan loke } 925f6fb8f10Schetan loke } 926f6fb8f10Schetan loke prb_close_block(pkc, pbd, po, status); 927f6fb8f10Schetan loke return; 928f6fb8f10Schetan loke } 929f6fb8f10Schetan loke 930f6fb8f10Schetan loke WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd); 931f6fb8f10Schetan loke dump_stack(); 932f6fb8f10Schetan loke BUG(); 933f6fb8f10Schetan loke } 934f6fb8f10Schetan loke 935eea49cc9SOlof Johansson static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, 936bc59ba39Schetan loke struct tpacket_block_desc *pbd) 937f6fb8f10Schetan loke { 938f6fb8f10Schetan loke return TP_STATUS_USER & BLOCK_STATUS(pbd); 939f6fb8f10Schetan loke } 940f6fb8f10Schetan loke 941eea49cc9SOlof Johansson static int prb_queue_frozen(struct tpacket_kbdq_core *pkc) 942f6fb8f10Schetan loke { 943f6fb8f10Schetan loke return pkc->reset_pending_on_curr_blk; 944f6fb8f10Schetan loke } 945f6fb8f10Schetan loke 946eea49cc9SOlof Johansson static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) 947f6fb8f10Schetan loke { 948bc59ba39Schetan loke struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); 949f6fb8f10Schetan loke atomic_dec(&pkc->blk_fill_in_prog); 950f6fb8f10Schetan loke } 951f6fb8f10Schetan loke 952eea49cc9SOlof Johansson static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, 953f6fb8f10Schetan loke struct tpacket3_hdr *ppd) 954f6fb8f10Schetan loke { 955f6fb8f10Schetan loke ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb); 956f6fb8f10Schetan loke } 957f6fb8f10Schetan loke 958eea49cc9SOlof Johansson static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, 959f6fb8f10Schetan loke struct tpacket3_hdr *ppd) 960f6fb8f10Schetan loke { 961f6fb8f10Schetan loke ppd->hv1.tp_rxhash = 0; 962f6fb8f10Schetan loke } 963f6fb8f10Schetan loke 964eea49cc9SOlof Johansson static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, 965f6fb8f10Schetan loke struct tpacket3_hdr *ppd) 966f6fb8f10Schetan loke { 967f6fb8f10Schetan loke if (vlan_tx_tag_present(pkc->skb)) { 968f6fb8f10Schetan loke ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); 969f6fb8f10Schetan loke ppd->tp_status = TP_STATUS_VLAN_VALID; 970f6fb8f10Schetan loke } else { 971f6fb8f10Schetan loke ppd->hv1.tp_vlan_tci = ppd->tp_status = 0; 972f6fb8f10Schetan loke } 973f6fb8f10Schetan loke } 974f6fb8f10Schetan loke 975bc59ba39Schetan loke static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc, 976f6fb8f10Schetan loke struct tpacket3_hdr *ppd) 977f6fb8f10Schetan loke { 978f6fb8f10Schetan loke prb_fill_vlan_info(pkc, ppd); 979f6fb8f10Schetan loke 980f6fb8f10Schetan loke if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH) 981f6fb8f10Schetan loke prb_fill_rxhash(pkc, ppd); 982f6fb8f10Schetan loke else 983f6fb8f10Schetan loke prb_clear_rxhash(pkc, ppd); 984f6fb8f10Schetan loke } 985f6fb8f10Schetan loke 986eea49cc9SOlof Johansson static void prb_fill_curr_block(char *curr, 987bc59ba39Schetan loke struct tpacket_kbdq_core *pkc, 988bc59ba39Schetan loke struct tpacket_block_desc *pbd, 989f6fb8f10Schetan loke unsigned int len) 990f6fb8f10Schetan loke { 991f6fb8f10Schetan loke struct tpacket3_hdr *ppd; 992f6fb8f10Schetan loke 993f6fb8f10Schetan loke ppd = (struct tpacket3_hdr *)curr; 994f6fb8f10Schetan loke ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len); 995f6fb8f10Schetan loke pkc->prev = curr; 996f6fb8f10Schetan loke pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len); 997f6fb8f10Schetan loke BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len); 998f6fb8f10Schetan loke BLOCK_NUM_PKTS(pbd) += 1; 999f6fb8f10Schetan loke atomic_inc(&pkc->blk_fill_in_prog); 1000f6fb8f10Schetan loke prb_run_all_ft_ops(pkc, ppd); 1001f6fb8f10Schetan loke } 1002f6fb8f10Schetan loke 1003f6fb8f10Schetan loke /* Assumes caller has the sk->rx_queue.lock */ 1004f6fb8f10Schetan loke static void *__packet_lookup_frame_in_block(struct packet_sock *po, 1005f6fb8f10Schetan loke struct sk_buff *skb, 1006f6fb8f10Schetan loke int status, 1007f6fb8f10Schetan loke unsigned int len 1008f6fb8f10Schetan loke ) 1009f6fb8f10Schetan loke { 1010bc59ba39Schetan loke struct tpacket_kbdq_core *pkc; 1011bc59ba39Schetan loke struct tpacket_block_desc *pbd; 1012f6fb8f10Schetan loke char *curr, *end; 1013f6fb8f10Schetan loke 1014e3192690SJoe Perches pkc = GET_PBDQC_FROM_RB(&po->rx_ring); 1015f6fb8f10Schetan loke pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); 1016f6fb8f10Schetan loke 1017f6fb8f10Schetan loke /* Queue is frozen when user space is lagging behind */ 1018f6fb8f10Schetan loke if (prb_queue_frozen(pkc)) { 1019f6fb8f10Schetan loke /* 1020f6fb8f10Schetan loke * Check if that last block which caused the queue to freeze, 1021f6fb8f10Schetan loke * is still in_use by user-space. 1022f6fb8f10Schetan loke */ 1023f6fb8f10Schetan loke if (prb_curr_blk_in_use(pkc, pbd)) { 1024f6fb8f10Schetan loke /* Can't record this packet */ 1025f6fb8f10Schetan loke return NULL; 1026f6fb8f10Schetan loke } else { 1027f6fb8f10Schetan loke /* 1028f6fb8f10Schetan loke * Ok, the block was released by user-space. 1029f6fb8f10Schetan loke * Now let's open that block. 1030f6fb8f10Schetan loke * opening a block also thaws the queue. 1031f6fb8f10Schetan loke * Thawing is a side effect. 1032f6fb8f10Schetan loke */ 1033f6fb8f10Schetan loke prb_open_block(pkc, pbd); 1034f6fb8f10Schetan loke } 1035f6fb8f10Schetan loke } 1036f6fb8f10Schetan loke 1037f6fb8f10Schetan loke smp_mb(); 1038f6fb8f10Schetan loke curr = pkc->nxt_offset; 1039f6fb8f10Schetan loke pkc->skb = skb; 1040e3192690SJoe Perches end = (char *)pbd + pkc->kblk_size; 1041f6fb8f10Schetan loke 1042f6fb8f10Schetan loke /* first try the current block */ 1043f6fb8f10Schetan loke if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) { 1044f6fb8f10Schetan loke prb_fill_curr_block(curr, pkc, pbd, len); 1045f6fb8f10Schetan loke return (void *)curr; 1046f6fb8f10Schetan loke } 1047f6fb8f10Schetan loke 1048f6fb8f10Schetan loke /* Ok, close the current block */ 1049f6fb8f10Schetan loke prb_retire_current_block(pkc, po, 0); 1050f6fb8f10Schetan loke 1051f6fb8f10Schetan loke /* Now, try to dispatch the next block */ 1052f6fb8f10Schetan loke curr = (char *)prb_dispatch_next_block(pkc, po); 1053f6fb8f10Schetan loke if (curr) { 1054f6fb8f10Schetan loke pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); 1055f6fb8f10Schetan loke prb_fill_curr_block(curr, pkc, pbd, len); 1056f6fb8f10Schetan loke return (void *)curr; 1057f6fb8f10Schetan loke } 1058f6fb8f10Schetan loke 1059f6fb8f10Schetan loke /* 1060f6fb8f10Schetan loke * No free blocks are available.user_space hasn't caught up yet. 1061f6fb8f10Schetan loke * Queue was just frozen and now this packet will get dropped. 1062f6fb8f10Schetan loke */ 1063f6fb8f10Schetan loke return NULL; 1064f6fb8f10Schetan loke } 1065f6fb8f10Schetan loke 1066eea49cc9SOlof Johansson static void *packet_current_rx_frame(struct packet_sock *po, 1067f6fb8f10Schetan loke struct sk_buff *skb, 1068f6fb8f10Schetan loke int status, unsigned int len) 1069f6fb8f10Schetan loke { 1070f6fb8f10Schetan loke char *curr = NULL; 1071f6fb8f10Schetan loke switch (po->tp_version) { 1072f6fb8f10Schetan loke case TPACKET_V1: 1073f6fb8f10Schetan loke case TPACKET_V2: 1074f6fb8f10Schetan loke curr = packet_lookup_frame(po, &po->rx_ring, 1075f6fb8f10Schetan loke po->rx_ring.head, status); 1076f6fb8f10Schetan loke return curr; 1077f6fb8f10Schetan loke case TPACKET_V3: 1078f6fb8f10Schetan loke return __packet_lookup_frame_in_block(po, skb, status, len); 1079f6fb8f10Schetan loke default: 1080f6fb8f10Schetan loke WARN(1, "TPACKET version not supported\n"); 1081f6fb8f10Schetan loke BUG(); 1082*99aa3473SYing Xue return NULL; 1083f6fb8f10Schetan loke } 1084f6fb8f10Schetan loke } 1085f6fb8f10Schetan loke 1086eea49cc9SOlof Johansson static void *prb_lookup_block(struct packet_sock *po, 1087f6fb8f10Schetan loke struct packet_ring_buffer *rb, 1088f6fb8f10Schetan loke unsigned int previous, 1089f6fb8f10Schetan loke int status) 1090f6fb8f10Schetan loke { 1091bc59ba39Schetan loke struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); 1092bc59ba39Schetan loke struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous); 1093f6fb8f10Schetan loke 1094f6fb8f10Schetan loke if (status != BLOCK_STATUS(pbd)) 1095f6fb8f10Schetan loke return NULL; 1096f6fb8f10Schetan loke return pbd; 1097f6fb8f10Schetan loke } 1098f6fb8f10Schetan loke 1099eea49cc9SOlof Johansson static int prb_previous_blk_num(struct packet_ring_buffer *rb) 1100f6fb8f10Schetan loke { 1101f6fb8f10Schetan loke unsigned int prev; 1102f6fb8f10Schetan loke if (rb->prb_bdqc.kactive_blk_num) 1103f6fb8f10Schetan loke prev = rb->prb_bdqc.kactive_blk_num-1; 1104f6fb8f10Schetan loke else 1105f6fb8f10Schetan loke prev = rb->prb_bdqc.knum_blocks-1; 1106f6fb8f10Schetan loke return prev; 1107f6fb8f10Schetan loke } 1108f6fb8f10Schetan loke 1109f6fb8f10Schetan loke /* Assumes caller has held the rx_queue.lock */ 1110eea49cc9SOlof Johansson static void *__prb_previous_block(struct packet_sock *po, 1111f6fb8f10Schetan loke struct packet_ring_buffer *rb, 1112f6fb8f10Schetan loke int status) 1113f6fb8f10Schetan loke { 1114f6fb8f10Schetan loke unsigned int previous = prb_previous_blk_num(rb); 1115f6fb8f10Schetan loke return prb_lookup_block(po, rb, previous, status); 1116f6fb8f10Schetan loke } 1117f6fb8f10Schetan loke 1118eea49cc9SOlof Johansson static void *packet_previous_rx_frame(struct packet_sock *po, 1119f6fb8f10Schetan loke struct packet_ring_buffer *rb, 1120f6fb8f10Schetan loke int status) 1121f6fb8f10Schetan loke { 1122f6fb8f10Schetan loke if (po->tp_version <= TPACKET_V2) 1123f6fb8f10Schetan loke return packet_previous_frame(po, rb, status); 1124f6fb8f10Schetan loke 1125f6fb8f10Schetan loke return __prb_previous_block(po, rb, status); 1126f6fb8f10Schetan loke } 1127f6fb8f10Schetan loke 1128eea49cc9SOlof Johansson static void packet_increment_rx_head(struct packet_sock *po, 1129f6fb8f10Schetan loke struct packet_ring_buffer *rb) 1130f6fb8f10Schetan loke { 1131f6fb8f10Schetan loke switch (po->tp_version) { 1132f6fb8f10Schetan loke case TPACKET_V1: 1133f6fb8f10Schetan loke case TPACKET_V2: 1134f6fb8f10Schetan loke return packet_increment_head(rb); 1135f6fb8f10Schetan loke case TPACKET_V3: 1136f6fb8f10Schetan loke default: 1137f6fb8f10Schetan loke WARN(1, "TPACKET version not supported.\n"); 1138f6fb8f10Schetan loke BUG(); 1139f6fb8f10Schetan loke return; 1140f6fb8f10Schetan loke } 1141f6fb8f10Schetan loke } 1142f6fb8f10Schetan loke 1143eea49cc9SOlof Johansson static void *packet_previous_frame(struct packet_sock *po, 114469e3c75fSJohann Baudy struct packet_ring_buffer *rb, 114569e3c75fSJohann Baudy int status) 114669e3c75fSJohann Baudy { 114769e3c75fSJohann Baudy unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max; 114869e3c75fSJohann Baudy return packet_lookup_frame(po, rb, previous, status); 114969e3c75fSJohann Baudy } 115069e3c75fSJohann Baudy 1151eea49cc9SOlof Johansson static void packet_increment_head(struct packet_ring_buffer *buff) 115269e3c75fSJohann Baudy { 115369e3c75fSJohann Baudy buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; 115469e3c75fSJohann Baudy } 115569e3c75fSJohann Baudy 11561da177e4SLinus Torvalds static void packet_sock_destruct(struct sock *sk) 11571da177e4SLinus Torvalds { 1158ed85b565SRichard Cochran skb_queue_purge(&sk->sk_error_queue); 1159ed85b565SRichard Cochran 1160547b792cSIlpo Järvinen WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 1161547b792cSIlpo Järvinen WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 11621da177e4SLinus Torvalds 11631da177e4SLinus Torvalds if (!sock_flag(sk, SOCK_DEAD)) { 116440d4e3dfSEric Dumazet pr_err("Attempt to release alive packet socket: %p\n", sk); 11651da177e4SLinus Torvalds return; 11661da177e4SLinus Torvalds } 11671da177e4SLinus Torvalds 116817ab56a2SPavel Emelyanov sk_refcnt_debug_dec(sk); 11691da177e4SLinus Torvalds } 11701da177e4SLinus Torvalds 1171dc99f600SDavid S. Miller static int fanout_rr_next(struct packet_fanout *f, unsigned int num) 1172dc99f600SDavid S. Miller { 1173dc99f600SDavid S. Miller int x = atomic_read(&f->rr_cur) + 1; 1174dc99f600SDavid S. Miller 1175dc99f600SDavid S. Miller if (x >= num) 1176dc99f600SDavid S. Miller x = 0; 1177dc99f600SDavid S. Miller 1178dc99f600SDavid S. Miller return x; 1179dc99f600SDavid S. Miller } 1180dc99f600SDavid S. Miller 1181dc99f600SDavid S. Miller static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) 1182dc99f600SDavid S. Miller { 1183dc99f600SDavid S. Miller u32 idx, hash = skb->rxhash; 1184dc99f600SDavid S. Miller 1185dc99f600SDavid S. Miller idx = ((u64)hash * num) >> 32; 1186dc99f600SDavid S. Miller 1187dc99f600SDavid S. Miller return f->arr[idx]; 1188dc99f600SDavid S. Miller } 1189dc99f600SDavid S. Miller 1190dc99f600SDavid S. Miller static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) 1191dc99f600SDavid S. Miller { 1192dc99f600SDavid S. Miller int cur, old; 1193dc99f600SDavid S. Miller 1194dc99f600SDavid S. Miller cur = atomic_read(&f->rr_cur); 1195dc99f600SDavid S. Miller while ((old = atomic_cmpxchg(&f->rr_cur, cur, 1196dc99f600SDavid S. Miller fanout_rr_next(f, num))) != cur) 1197dc99f600SDavid S. Miller cur = old; 1198dc99f600SDavid S. Miller return f->arr[cur]; 1199dc99f600SDavid S. Miller } 1200dc99f600SDavid S. Miller 120195ec3eb4SDavid S. Miller static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) 120295ec3eb4SDavid S. Miller { 120395ec3eb4SDavid S. Miller unsigned int cpu = smp_processor_id(); 120495ec3eb4SDavid S. Miller 120595ec3eb4SDavid S. Miller return f->arr[cpu % num]; 120695ec3eb4SDavid S. Miller } 120795ec3eb4SDavid S. Miller 120895ec3eb4SDavid S. Miller static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, 1209dc99f600SDavid S. Miller struct packet_type *pt, struct net_device *orig_dev) 1210dc99f600SDavid S. Miller { 1211dc99f600SDavid S. Miller struct packet_fanout *f = pt->af_packet_priv; 1212dc99f600SDavid S. Miller unsigned int num = f->num_members; 1213dc99f600SDavid S. Miller struct packet_sock *po; 1214dc99f600SDavid S. Miller struct sock *sk; 1215dc99f600SDavid S. Miller 1216dc99f600SDavid S. Miller if (!net_eq(dev_net(dev), read_pnet(&f->net)) || 1217dc99f600SDavid S. Miller !num) { 1218dc99f600SDavid S. Miller kfree_skb(skb); 1219dc99f600SDavid S. Miller return 0; 1220dc99f600SDavid S. Miller } 1221dc99f600SDavid S. Miller 122295ec3eb4SDavid S. Miller switch (f->type) { 122395ec3eb4SDavid S. Miller case PACKET_FANOUT_HASH: 122495ec3eb4SDavid S. Miller default: 12257736d33fSDavid S. Miller if (f->defrag) { 1226bc416d97SEric Dumazet skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); 12277736d33fSDavid S. Miller if (!skb) 12287736d33fSDavid S. Miller return 0; 12297736d33fSDavid S. Miller } 1230dc99f600SDavid S. Miller skb_get_rxhash(skb); 1231dc99f600SDavid S. Miller sk = fanout_demux_hash(f, skb, num); 123295ec3eb4SDavid S. Miller break; 123395ec3eb4SDavid S. Miller case PACKET_FANOUT_LB: 1234dc99f600SDavid S. Miller sk = fanout_demux_lb(f, skb, num); 123595ec3eb4SDavid S. Miller break; 123695ec3eb4SDavid S. Miller case PACKET_FANOUT_CPU: 123795ec3eb4SDavid S. Miller sk = fanout_demux_cpu(f, skb, num); 123895ec3eb4SDavid S. Miller break; 123995ec3eb4SDavid S. Miller } 124095ec3eb4SDavid S. Miller 1241dc99f600SDavid S. Miller po = pkt_sk(sk); 1242dc99f600SDavid S. Miller 1243dc99f600SDavid S. Miller return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); 1244dc99f600SDavid S. Miller } 1245dc99f600SDavid S. Miller 1246dc99f600SDavid S. Miller static DEFINE_MUTEX(fanout_mutex); 1247dc99f600SDavid S. Miller static LIST_HEAD(fanout_list); 1248dc99f600SDavid S. Miller 1249dc99f600SDavid S. Miller static void __fanout_link(struct sock *sk, struct packet_sock *po) 1250dc99f600SDavid S. Miller { 1251dc99f600SDavid S. Miller struct packet_fanout *f = po->fanout; 1252dc99f600SDavid S. Miller 1253dc99f600SDavid S. Miller spin_lock(&f->lock); 1254dc99f600SDavid S. Miller f->arr[f->num_members] = sk; 1255dc99f600SDavid S. Miller smp_wmb(); 1256dc99f600SDavid S. Miller f->num_members++; 1257dc99f600SDavid S. Miller spin_unlock(&f->lock); 1258dc99f600SDavid S. Miller } 1259dc99f600SDavid S. Miller 1260dc99f600SDavid S. Miller static void __fanout_unlink(struct sock *sk, struct packet_sock *po) 1261dc99f600SDavid S. Miller { 1262dc99f600SDavid S. Miller struct packet_fanout *f = po->fanout; 1263dc99f600SDavid S. Miller int i; 1264dc99f600SDavid S. Miller 1265dc99f600SDavid S. Miller spin_lock(&f->lock); 1266dc99f600SDavid S. Miller for (i = 0; i < f->num_members; i++) { 1267dc99f600SDavid S. Miller if (f->arr[i] == sk) 1268dc99f600SDavid S. Miller break; 1269dc99f600SDavid S. Miller } 1270dc99f600SDavid S. Miller BUG_ON(i >= f->num_members); 1271dc99f600SDavid S. Miller f->arr[i] = f->arr[f->num_members - 1]; 1272dc99f600SDavid S. Miller f->num_members--; 1273dc99f600SDavid S. Miller spin_unlock(&f->lock); 1274dc99f600SDavid S. Miller } 1275dc99f600SDavid S. Miller 12767736d33fSDavid S. Miller static int fanout_add(struct sock *sk, u16 id, u16 type_flags) 1277dc99f600SDavid S. Miller { 1278dc99f600SDavid S. Miller struct packet_sock *po = pkt_sk(sk); 1279dc99f600SDavid S. Miller struct packet_fanout *f, *match; 12807736d33fSDavid S. Miller u8 type = type_flags & 0xff; 12817736d33fSDavid S. Miller u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0; 1282dc99f600SDavid S. Miller int err; 1283dc99f600SDavid S. Miller 1284dc99f600SDavid S. Miller switch (type) { 1285dc99f600SDavid S. Miller case PACKET_FANOUT_HASH: 1286dc99f600SDavid S. Miller case PACKET_FANOUT_LB: 128795ec3eb4SDavid S. Miller case PACKET_FANOUT_CPU: 1288dc99f600SDavid S. Miller break; 1289dc99f600SDavid S. Miller default: 1290dc99f600SDavid S. Miller return -EINVAL; 1291dc99f600SDavid S. Miller } 1292dc99f600SDavid S. Miller 1293dc99f600SDavid S. Miller if (!po->running) 1294dc99f600SDavid S. Miller return -EINVAL; 1295dc99f600SDavid S. Miller 1296dc99f600SDavid S. Miller if (po->fanout) 1297dc99f600SDavid S. Miller return -EALREADY; 1298dc99f600SDavid S. Miller 1299dc99f600SDavid S. Miller mutex_lock(&fanout_mutex); 1300dc99f600SDavid S. Miller match = NULL; 1301dc99f600SDavid S. Miller list_for_each_entry(f, &fanout_list, list) { 1302dc99f600SDavid S. Miller if (f->id == id && 1303dc99f600SDavid S. Miller read_pnet(&f->net) == sock_net(sk)) { 1304dc99f600SDavid S. Miller match = f; 1305dc99f600SDavid S. Miller break; 1306dc99f600SDavid S. Miller } 1307dc99f600SDavid S. Miller } 1308afe62c68SEric Dumazet err = -EINVAL; 13097736d33fSDavid S. Miller if (match && match->defrag != defrag) 1310afe62c68SEric Dumazet goto out; 1311dc99f600SDavid S. Miller if (!match) { 1312afe62c68SEric Dumazet err = -ENOMEM; 1313dc99f600SDavid S. Miller match = kzalloc(sizeof(*match), GFP_KERNEL); 1314afe62c68SEric Dumazet if (!match) 1315afe62c68SEric Dumazet goto out; 1316dc99f600SDavid S. Miller write_pnet(&match->net, sock_net(sk)); 1317dc99f600SDavid S. Miller match->id = id; 1318dc99f600SDavid S. Miller match->type = type; 13197736d33fSDavid S. Miller match->defrag = defrag; 1320dc99f600SDavid S. Miller atomic_set(&match->rr_cur, 0); 1321dc99f600SDavid S. Miller INIT_LIST_HEAD(&match->list); 1322dc99f600SDavid S. Miller spin_lock_init(&match->lock); 1323dc99f600SDavid S. Miller atomic_set(&match->sk_ref, 0); 1324dc99f600SDavid S. Miller match->prot_hook.type = po->prot_hook.type; 1325dc99f600SDavid S. Miller match->prot_hook.dev = po->prot_hook.dev; 132695ec3eb4SDavid S. Miller match->prot_hook.func = packet_rcv_fanout; 1327dc99f600SDavid S. Miller match->prot_hook.af_packet_priv = match; 1328dc99f600SDavid S. Miller dev_add_pack(&match->prot_hook); 1329dc99f600SDavid S. Miller list_add(&match->list, &fanout_list); 1330dc99f600SDavid S. Miller } 1331dc99f600SDavid S. Miller err = -EINVAL; 1332dc99f600SDavid S. Miller if (match->type == type && 1333dc99f600SDavid S. Miller match->prot_hook.type == po->prot_hook.type && 1334dc99f600SDavid S. Miller match->prot_hook.dev == po->prot_hook.dev) { 1335dc99f600SDavid S. Miller err = -ENOSPC; 1336dc99f600SDavid S. Miller if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) { 1337dc99f600SDavid S. Miller __dev_remove_pack(&po->prot_hook); 1338dc99f600SDavid S. Miller po->fanout = match; 1339dc99f600SDavid S. Miller atomic_inc(&match->sk_ref); 1340dc99f600SDavid S. Miller __fanout_link(sk, po); 1341dc99f600SDavid S. Miller err = 0; 1342dc99f600SDavid S. Miller } 1343dc99f600SDavid S. Miller } 1344afe62c68SEric Dumazet out: 1345dc99f600SDavid S. Miller mutex_unlock(&fanout_mutex); 1346dc99f600SDavid S. Miller return err; 1347dc99f600SDavid S. Miller } 1348dc99f600SDavid S. Miller 1349dc99f600SDavid S. Miller static void fanout_release(struct sock *sk) 1350dc99f600SDavid S. Miller { 1351dc99f600SDavid S. Miller struct packet_sock *po = pkt_sk(sk); 1352dc99f600SDavid S. Miller struct packet_fanout *f; 1353dc99f600SDavid S. Miller 1354dc99f600SDavid S. Miller f = po->fanout; 1355dc99f600SDavid S. Miller if (!f) 1356dc99f600SDavid S. Miller return; 1357dc99f600SDavid S. Miller 1358dc99f600SDavid S. Miller po->fanout = NULL; 1359dc99f600SDavid S. Miller 1360dc99f600SDavid S. Miller mutex_lock(&fanout_mutex); 1361dc99f600SDavid S. Miller if (atomic_dec_and_test(&f->sk_ref)) { 1362dc99f600SDavid S. Miller list_del(&f->list); 1363dc99f600SDavid S. Miller dev_remove_pack(&f->prot_hook); 1364dc99f600SDavid S. Miller kfree(f); 1365dc99f600SDavid S. Miller } 1366dc99f600SDavid S. Miller mutex_unlock(&fanout_mutex); 1367dc99f600SDavid S. Miller } 13681da177e4SLinus Torvalds 136990ddc4f0SEric Dumazet static const struct proto_ops packet_ops; 13701da177e4SLinus Torvalds 137190ddc4f0SEric Dumazet static const struct proto_ops packet_ops_spkt; 13721da177e4SLinus Torvalds 137340d4e3dfSEric Dumazet static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, 137440d4e3dfSEric Dumazet struct packet_type *pt, struct net_device *orig_dev) 13751da177e4SLinus Torvalds { 13761da177e4SLinus Torvalds struct sock *sk; 13771da177e4SLinus Torvalds struct sockaddr_pkt *spkt; 13781da177e4SLinus Torvalds 13791da177e4SLinus Torvalds /* 13801da177e4SLinus Torvalds * When we registered the protocol we saved the socket in the data 13811da177e4SLinus Torvalds * field for just this event. 13821da177e4SLinus Torvalds */ 13831da177e4SLinus Torvalds 13841da177e4SLinus Torvalds sk = pt->af_packet_priv; 13851da177e4SLinus Torvalds 13861da177e4SLinus Torvalds /* 13871da177e4SLinus Torvalds * Yank back the headers [hope the device set this 13881da177e4SLinus Torvalds * right or kerboom...] 13891da177e4SLinus Torvalds * 13901da177e4SLinus Torvalds * Incoming packets have ll header pulled, 13911da177e4SLinus Torvalds * push it back. 13921da177e4SLinus Torvalds * 139398e399f8SArnaldo Carvalho de Melo * For outgoing ones skb->data == skb_mac_header(skb) 13941da177e4SLinus Torvalds * so that this procedure is noop. 13951da177e4SLinus Torvalds */ 13961da177e4SLinus Torvalds 13971da177e4SLinus Torvalds if (skb->pkt_type == PACKET_LOOPBACK) 13981da177e4SLinus Torvalds goto out; 13991da177e4SLinus Torvalds 140009ad9bc7SOctavian Purdila if (!net_eq(dev_net(dev), sock_net(sk))) 1401d12d01d6SDenis V. Lunev goto out; 1402d12d01d6SDenis V. Lunev 140340d4e3dfSEric Dumazet skb = skb_share_check(skb, GFP_ATOMIC); 140440d4e3dfSEric Dumazet if (skb == NULL) 14051da177e4SLinus Torvalds goto oom; 14061da177e4SLinus Torvalds 14071da177e4SLinus Torvalds /* drop any routing info */ 1408adf30907SEric Dumazet skb_dst_drop(skb); 14091da177e4SLinus Torvalds 141084531c24SPhil Oester /* drop conntrack reference */ 141184531c24SPhil Oester nf_reset(skb); 141284531c24SPhil Oester 1413ffbc6111SHerbert Xu spkt = &PACKET_SKB_CB(skb)->sa.pkt; 14141da177e4SLinus Torvalds 141598e399f8SArnaldo Carvalho de Melo skb_push(skb, skb->data - skb_mac_header(skb)); 14161da177e4SLinus Torvalds 14171da177e4SLinus Torvalds /* 14181da177e4SLinus Torvalds * The SOCK_PACKET socket receives _all_ frames. 14191da177e4SLinus Torvalds */ 14201da177e4SLinus Torvalds 14211da177e4SLinus Torvalds spkt->spkt_family = dev->type; 14221da177e4SLinus Torvalds strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device)); 14231da177e4SLinus Torvalds spkt->spkt_protocol = skb->protocol; 14241da177e4SLinus Torvalds 14251da177e4SLinus Torvalds /* 14261da177e4SLinus Torvalds * Charge the memory to the socket. This is done specifically 14271da177e4SLinus Torvalds * to prevent sockets using all the memory up. 14281da177e4SLinus Torvalds */ 14291da177e4SLinus Torvalds 14301da177e4SLinus Torvalds if (sock_queue_rcv_skb(sk, skb) == 0) 14311da177e4SLinus Torvalds return 0; 14321da177e4SLinus Torvalds 14331da177e4SLinus Torvalds out: 14341da177e4SLinus Torvalds kfree_skb(skb); 14351da177e4SLinus Torvalds oom: 14361da177e4SLinus Torvalds return 0; 14371da177e4SLinus Torvalds } 14381da177e4SLinus Torvalds 14391da177e4SLinus Torvalds 14401da177e4SLinus Torvalds /* 14411da177e4SLinus Torvalds * Output a raw packet to a device layer. This bypasses all the other 14421da177e4SLinus Torvalds * protocol layers and you must therefore supply it with a complete frame 14431da177e4SLinus Torvalds */ 14441da177e4SLinus Torvalds 14451da177e4SLinus Torvalds static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, 14461da177e4SLinus Torvalds struct msghdr *msg, size_t len) 14471da177e4SLinus Torvalds { 14481da177e4SLinus Torvalds struct sock *sk = sock->sk; 14491da177e4SLinus Torvalds struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name; 14501a35ca80SEric Dumazet struct sk_buff *skb = NULL; 14511da177e4SLinus Torvalds struct net_device *dev; 14520e11c91eSAl Viro __be16 proto = 0; 14531da177e4SLinus Torvalds int err; 14543bdc0ebaSBen Greear int extra_len = 0; 14551da177e4SLinus Torvalds 14561da177e4SLinus Torvalds /* 14571da177e4SLinus Torvalds * Get and verify the address. 14581da177e4SLinus Torvalds */ 14591da177e4SLinus Torvalds 146040d4e3dfSEric Dumazet if (saddr) { 14611da177e4SLinus Torvalds if (msg->msg_namelen < sizeof(struct sockaddr)) 146240d4e3dfSEric Dumazet return -EINVAL; 14631da177e4SLinus Torvalds if (msg->msg_namelen == sizeof(struct sockaddr_pkt)) 14641da177e4SLinus Torvalds proto = saddr->spkt_protocol; 146540d4e3dfSEric Dumazet } else 146640d4e3dfSEric Dumazet return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */ 14671da177e4SLinus Torvalds 14681da177e4SLinus Torvalds /* 14691da177e4SLinus Torvalds * Find the device first to size check it 14701da177e4SLinus Torvalds */ 14711da177e4SLinus Torvalds 1472de74e92aSdanborkmann@iogearbox.net saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0; 14731a35ca80SEric Dumazet retry: 1474654d1f8aSEric Dumazet rcu_read_lock(); 1475654d1f8aSEric Dumazet dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device); 14761da177e4SLinus Torvalds err = -ENODEV; 14771da177e4SLinus Torvalds if (dev == NULL) 14781da177e4SLinus Torvalds goto out_unlock; 14791da177e4SLinus Torvalds 1480d5e76b0aSDavid S. Miller err = -ENETDOWN; 1481d5e76b0aSDavid S. Miller if (!(dev->flags & IFF_UP)) 1482d5e76b0aSDavid S. Miller goto out_unlock; 1483d5e76b0aSDavid S. Miller 14841da177e4SLinus Torvalds /* 14851da177e4SLinus Torvalds * You may not queue a frame bigger than the mtu. This is the lowest level 14861da177e4SLinus Torvalds * raw protocol and you must do your own fragmentation at this level. 14871da177e4SLinus Torvalds */ 14881da177e4SLinus Torvalds 14893bdc0ebaSBen Greear if (unlikely(sock_flag(sk, SOCK_NOFCS))) { 14903bdc0ebaSBen Greear if (!netif_supports_nofcs(dev)) { 14913bdc0ebaSBen Greear err = -EPROTONOSUPPORT; 14923bdc0ebaSBen Greear goto out_unlock; 14933bdc0ebaSBen Greear } 14943bdc0ebaSBen Greear extra_len = 4; /* We're doing our own CRC */ 14953bdc0ebaSBen Greear } 14963bdc0ebaSBen Greear 14971da177e4SLinus Torvalds err = -EMSGSIZE; 14983bdc0ebaSBen Greear if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len) 14991da177e4SLinus Torvalds goto out_unlock; 15001da177e4SLinus Torvalds 15011a35ca80SEric Dumazet if (!skb) { 15021a35ca80SEric Dumazet size_t reserved = LL_RESERVED_SPACE(dev); 15034ce40912SHerbert Xu int tlen = dev->needed_tailroom; 15041a35ca80SEric Dumazet unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0; 15051da177e4SLinus Torvalds 15061a35ca80SEric Dumazet rcu_read_unlock(); 15074ce40912SHerbert Xu skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL); 15081da177e4SLinus Torvalds if (skb == NULL) 15091a35ca80SEric Dumazet return -ENOBUFS; 15101a35ca80SEric Dumazet /* FIXME: Save some space for broken drivers that write a hard 15111a35ca80SEric Dumazet * header at transmission time by themselves. PPP is the notable 15121a35ca80SEric Dumazet * one here. This should really be fixed at the driver level. 15131da177e4SLinus Torvalds */ 15141a35ca80SEric Dumazet skb_reserve(skb, reserved); 1515c1d2bbe1SArnaldo Carvalho de Melo skb_reset_network_header(skb); 15161da177e4SLinus Torvalds 15171da177e4SLinus Torvalds /* Try to align data part correctly */ 15181a35ca80SEric Dumazet if (hhlen) { 15191a35ca80SEric Dumazet skb->data -= hhlen; 15201a35ca80SEric Dumazet skb->tail -= hhlen; 15211a35ca80SEric Dumazet if (len < hhlen) 1522c1d2bbe1SArnaldo Carvalho de Melo skb_reset_network_header(skb); 15231da177e4SLinus Torvalds } 15241da177e4SLinus Torvalds err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 15251a35ca80SEric Dumazet if (err) 15261a35ca80SEric Dumazet goto out_free; 15271a35ca80SEric Dumazet goto retry; 15281a35ca80SEric Dumazet } 15291a35ca80SEric Dumazet 15303bdc0ebaSBen Greear if (len > (dev->mtu + dev->hard_header_len + extra_len)) { 153157f89bfaSBen Greear /* Earlier code assumed this would be a VLAN pkt, 153257f89bfaSBen Greear * double-check this now that we have the actual 153357f89bfaSBen Greear * packet in hand. 153457f89bfaSBen Greear */ 153557f89bfaSBen Greear struct ethhdr *ehdr; 153657f89bfaSBen Greear skb_reset_mac_header(skb); 153757f89bfaSBen Greear ehdr = eth_hdr(skb); 153857f89bfaSBen Greear if (ehdr->h_proto != htons(ETH_P_8021Q)) { 153957f89bfaSBen Greear err = -EMSGSIZE; 154057f89bfaSBen Greear goto out_unlock; 154157f89bfaSBen Greear } 154257f89bfaSBen Greear } 15431a35ca80SEric Dumazet 15441da177e4SLinus Torvalds skb->protocol = proto; 15451da177e4SLinus Torvalds skb->dev = dev; 15461da177e4SLinus Torvalds skb->priority = sk->sk_priority; 15472d37a186SEric Dumazet skb->mark = sk->sk_mark; 15482244d07bSOliver Hartkopp err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 1549ed85b565SRichard Cochran if (err < 0) 1550ed85b565SRichard Cochran goto out_unlock; 15511da177e4SLinus Torvalds 15523bdc0ebaSBen Greear if (unlikely(extra_len == 4)) 15533bdc0ebaSBen Greear skb->no_fcs = 1; 15543bdc0ebaSBen Greear 15551da177e4SLinus Torvalds dev_queue_xmit(skb); 1556654d1f8aSEric Dumazet rcu_read_unlock(); 155740d4e3dfSEric Dumazet return len; 15581da177e4SLinus Torvalds 15591da177e4SLinus Torvalds out_unlock: 1560654d1f8aSEric Dumazet rcu_read_unlock(); 15611a35ca80SEric Dumazet out_free: 15621a35ca80SEric Dumazet kfree_skb(skb); 15631da177e4SLinus Torvalds return err; 15641da177e4SLinus Torvalds } 15651da177e4SLinus Torvalds 1566eea49cc9SOlof Johansson static unsigned int run_filter(const struct sk_buff *skb, 156762ab0812SEric Dumazet const struct sock *sk, 1568dbcb5855SDavid S. Miller unsigned int res) 15691da177e4SLinus Torvalds { 15701da177e4SLinus Torvalds struct sk_filter *filter; 15711da177e4SLinus Torvalds 157280f8f102SEric Dumazet rcu_read_lock(); 157380f8f102SEric Dumazet filter = rcu_dereference(sk->sk_filter); 1574dbcb5855SDavid S. Miller if (filter != NULL) 15750a14842fSEric Dumazet res = SK_RUN_FILTER(filter, skb); 157680f8f102SEric Dumazet rcu_read_unlock(); 15771da177e4SLinus Torvalds 1578dbcb5855SDavid S. Miller return res; 15791da177e4SLinus Torvalds } 15801da177e4SLinus Torvalds 15811da177e4SLinus Torvalds /* 158262ab0812SEric Dumazet * This function makes lazy skb cloning in hope that most of packets 158362ab0812SEric Dumazet * are discarded by BPF. 158462ab0812SEric Dumazet * 158562ab0812SEric Dumazet * Note tricky part: we DO mangle shared skb! skb->data, skb->len 158662ab0812SEric Dumazet * and skb->cb are mangled. It works because (and until) packets 158762ab0812SEric Dumazet * falling here are owned by current CPU. Output packets are cloned 158862ab0812SEric Dumazet * by dev_queue_xmit_nit(), input packets are processed by net_bh 158962ab0812SEric Dumazet * sequencially, so that if we return skb to original state on exit, 159062ab0812SEric Dumazet * we will not harm anyone. 15911da177e4SLinus Torvalds */ 15921da177e4SLinus Torvalds 159340d4e3dfSEric Dumazet static int packet_rcv(struct sk_buff *skb, struct net_device *dev, 159440d4e3dfSEric Dumazet struct packet_type *pt, struct net_device *orig_dev) 15951da177e4SLinus Torvalds { 15961da177e4SLinus Torvalds struct sock *sk; 15971da177e4SLinus Torvalds struct sockaddr_ll *sll; 15981da177e4SLinus Torvalds struct packet_sock *po; 15991da177e4SLinus Torvalds u8 *skb_head = skb->data; 16001da177e4SLinus Torvalds int skb_len = skb->len; 1601dbcb5855SDavid S. Miller unsigned int snaplen, res; 16021da177e4SLinus Torvalds 16031da177e4SLinus Torvalds if (skb->pkt_type == PACKET_LOOPBACK) 16041da177e4SLinus Torvalds goto drop; 16051da177e4SLinus Torvalds 16061da177e4SLinus Torvalds sk = pt->af_packet_priv; 16071da177e4SLinus Torvalds po = pkt_sk(sk); 16081da177e4SLinus Torvalds 160909ad9bc7SOctavian Purdila if (!net_eq(dev_net(dev), sock_net(sk))) 1610d12d01d6SDenis V. Lunev goto drop; 1611d12d01d6SDenis V. Lunev 16121da177e4SLinus Torvalds skb->dev = dev; 16131da177e4SLinus Torvalds 16143b04dddeSStephen Hemminger if (dev->header_ops) { 16151da177e4SLinus Torvalds /* The device has an explicit notion of ll header, 161662ab0812SEric Dumazet * exported to higher levels. 161762ab0812SEric Dumazet * 161862ab0812SEric Dumazet * Otherwise, the device hides details of its frame 161962ab0812SEric Dumazet * structure, so that corresponding packet head is 162062ab0812SEric Dumazet * never delivered to user. 16211da177e4SLinus Torvalds */ 16221da177e4SLinus Torvalds if (sk->sk_type != SOCK_DGRAM) 162398e399f8SArnaldo Carvalho de Melo skb_push(skb, skb->data - skb_mac_header(skb)); 16241da177e4SLinus Torvalds else if (skb->pkt_type == PACKET_OUTGOING) { 16251da177e4SLinus Torvalds /* Special case: outgoing packets have ll header at head */ 1626bbe735e4SArnaldo Carvalho de Melo skb_pull(skb, skb_network_offset(skb)); 16271da177e4SLinus Torvalds } 16281da177e4SLinus Torvalds } 16291da177e4SLinus Torvalds 16301da177e4SLinus Torvalds snaplen = skb->len; 16311da177e4SLinus Torvalds 1632dbcb5855SDavid S. Miller res = run_filter(skb, sk, snaplen); 1633dbcb5855SDavid S. Miller if (!res) 16341da177e4SLinus Torvalds goto drop_n_restore; 1635dbcb5855SDavid S. Miller if (snaplen > res) 1636dbcb5855SDavid S. Miller snaplen = res; 16371da177e4SLinus Torvalds 16380fd7bac6SEric Dumazet if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 16391da177e4SLinus Torvalds goto drop_n_acct; 16401da177e4SLinus Torvalds 16411da177e4SLinus Torvalds if (skb_shared(skb)) { 16421da177e4SLinus Torvalds struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); 16431da177e4SLinus Torvalds if (nskb == NULL) 16441da177e4SLinus Torvalds goto drop_n_acct; 16451da177e4SLinus Torvalds 16461da177e4SLinus Torvalds if (skb_head != skb->data) { 16471da177e4SLinus Torvalds skb->data = skb_head; 16481da177e4SLinus Torvalds skb->len = skb_len; 16491da177e4SLinus Torvalds } 1650abc4e4faSEric Dumazet consume_skb(skb); 16511da177e4SLinus Torvalds skb = nskb; 16521da177e4SLinus Torvalds } 16531da177e4SLinus Torvalds 1654ffbc6111SHerbert Xu BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 > 1655ffbc6111SHerbert Xu sizeof(skb->cb)); 1656ffbc6111SHerbert Xu 1657ffbc6111SHerbert Xu sll = &PACKET_SKB_CB(skb)->sa.ll; 16581da177e4SLinus Torvalds sll->sll_family = AF_PACKET; 16591da177e4SLinus Torvalds sll->sll_hatype = dev->type; 16601da177e4SLinus Torvalds sll->sll_protocol = skb->protocol; 16611da177e4SLinus Torvalds sll->sll_pkttype = skb->pkt_type; 16628032b464SPeter P Waskiewicz Jr if (unlikely(po->origdev)) 166380feaacbSPeter P. Waskiewicz Jr sll->sll_ifindex = orig_dev->ifindex; 166480feaacbSPeter P. Waskiewicz Jr else 16651da177e4SLinus Torvalds sll->sll_ifindex = dev->ifindex; 16661da177e4SLinus Torvalds 1667b95cce35SStephen Hemminger sll->sll_halen = dev_parse_header(skb, sll->sll_addr); 16681da177e4SLinus Torvalds 1669ffbc6111SHerbert Xu PACKET_SKB_CB(skb)->origlen = skb->len; 16708dc41944SHerbert Xu 16711da177e4SLinus Torvalds if (pskb_trim(skb, snaplen)) 16721da177e4SLinus Torvalds goto drop_n_acct; 16731da177e4SLinus Torvalds 16741da177e4SLinus Torvalds skb_set_owner_r(skb, sk); 16751da177e4SLinus Torvalds skb->dev = NULL; 1676adf30907SEric Dumazet skb_dst_drop(skb); 16771da177e4SLinus Torvalds 167884531c24SPhil Oester /* drop conntrack reference */ 167984531c24SPhil Oester nf_reset(skb); 168084531c24SPhil Oester 16811da177e4SLinus Torvalds spin_lock(&sk->sk_receive_queue.lock); 16821da177e4SLinus Torvalds po->stats.tp_packets++; 16833b885787SNeil Horman skb->dropcount = atomic_read(&sk->sk_drops); 16841da177e4SLinus Torvalds __skb_queue_tail(&sk->sk_receive_queue, skb); 16851da177e4SLinus Torvalds spin_unlock(&sk->sk_receive_queue.lock); 16861da177e4SLinus Torvalds sk->sk_data_ready(sk, skb->len); 16871da177e4SLinus Torvalds return 0; 16881da177e4SLinus Torvalds 16891da177e4SLinus Torvalds drop_n_acct: 16907091fbd8SWillem de Bruijn spin_lock(&sk->sk_receive_queue.lock); 16917091fbd8SWillem de Bruijn po->stats.tp_drops++; 16927091fbd8SWillem de Bruijn atomic_inc(&sk->sk_drops); 16937091fbd8SWillem de Bruijn spin_unlock(&sk->sk_receive_queue.lock); 16941da177e4SLinus Torvalds 16951da177e4SLinus Torvalds drop_n_restore: 16961da177e4SLinus Torvalds if (skb_head != skb->data && skb_shared(skb)) { 16971da177e4SLinus Torvalds skb->data = skb_head; 16981da177e4SLinus Torvalds skb->len = skb_len; 16991da177e4SLinus Torvalds } 17001da177e4SLinus Torvalds drop: 1701ead2ceb0SNeil Horman consume_skb(skb); 17021da177e4SLinus Torvalds return 0; 17031da177e4SLinus Torvalds } 17041da177e4SLinus Torvalds 170540d4e3dfSEric Dumazet static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, 170640d4e3dfSEric Dumazet struct packet_type *pt, struct net_device *orig_dev) 17071da177e4SLinus Torvalds { 17081da177e4SLinus Torvalds struct sock *sk; 17091da177e4SLinus Torvalds struct packet_sock *po; 17101da177e4SLinus Torvalds struct sockaddr_ll *sll; 1711bbd6ef87SPatrick McHardy union { 1712bbd6ef87SPatrick McHardy struct tpacket_hdr *h1; 1713bbd6ef87SPatrick McHardy struct tpacket2_hdr *h2; 1714f6fb8f10Schetan loke struct tpacket3_hdr *h3; 1715bbd6ef87SPatrick McHardy void *raw; 1716bbd6ef87SPatrick McHardy } h; 17171da177e4SLinus Torvalds u8 *skb_head = skb->data; 17181da177e4SLinus Torvalds int skb_len = skb->len; 1719dbcb5855SDavid S. Miller unsigned int snaplen, res; 1720f6fb8f10Schetan loke unsigned long status = TP_STATUS_USER; 1721bbd6ef87SPatrick McHardy unsigned short macoff, netoff, hdrlen; 17221da177e4SLinus Torvalds struct sk_buff *copy_skb = NULL; 1723b7aa0bf7SEric Dumazet struct timeval tv; 1724bbd6ef87SPatrick McHardy struct timespec ts; 1725614f60faSScott McMillan struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); 17261da177e4SLinus Torvalds 17271da177e4SLinus Torvalds if (skb->pkt_type == PACKET_LOOPBACK) 17281da177e4SLinus Torvalds goto drop; 17291da177e4SLinus Torvalds 17301da177e4SLinus Torvalds sk = pt->af_packet_priv; 17311da177e4SLinus Torvalds po = pkt_sk(sk); 17321da177e4SLinus Torvalds 173309ad9bc7SOctavian Purdila if (!net_eq(dev_net(dev), sock_net(sk))) 1734d12d01d6SDenis V. Lunev goto drop; 1735d12d01d6SDenis V. Lunev 17363b04dddeSStephen Hemminger if (dev->header_ops) { 17371da177e4SLinus Torvalds if (sk->sk_type != SOCK_DGRAM) 173898e399f8SArnaldo Carvalho de Melo skb_push(skb, skb->data - skb_mac_header(skb)); 17391da177e4SLinus Torvalds else if (skb->pkt_type == PACKET_OUTGOING) { 17401da177e4SLinus Torvalds /* Special case: outgoing packets have ll header at head */ 1741bbe735e4SArnaldo Carvalho de Melo skb_pull(skb, skb_network_offset(skb)); 17428dc41944SHerbert Xu } 17438dc41944SHerbert Xu } 17448dc41944SHerbert Xu 174584fa7933SPatrick McHardy if (skb->ip_summed == CHECKSUM_PARTIAL) 17461da177e4SLinus Torvalds status |= TP_STATUS_CSUMNOTREADY; 17471da177e4SLinus Torvalds 17481da177e4SLinus Torvalds snaplen = skb->len; 17491da177e4SLinus Torvalds 1750dbcb5855SDavid S. Miller res = run_filter(skb, sk, snaplen); 1751dbcb5855SDavid S. Miller if (!res) 17521da177e4SLinus Torvalds goto drop_n_restore; 1753dbcb5855SDavid S. Miller if (snaplen > res) 1754dbcb5855SDavid S. Miller snaplen = res; 17551da177e4SLinus Torvalds 17561da177e4SLinus Torvalds if (sk->sk_type == SOCK_DGRAM) { 17578913336aSPatrick McHardy macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 + 17588913336aSPatrick McHardy po->tp_reserve; 17591da177e4SLinus Torvalds } else { 176095c96174SEric Dumazet unsigned int maclen = skb_network_offset(skb); 1761bbd6ef87SPatrick McHardy netoff = TPACKET_ALIGN(po->tp_hdrlen + 17628913336aSPatrick McHardy (maclen < 16 ? 16 : maclen)) + 17638913336aSPatrick McHardy po->tp_reserve; 17641da177e4SLinus Torvalds macoff = netoff - maclen; 17651da177e4SLinus Torvalds } 1766f6fb8f10Schetan loke if (po->tp_version <= TPACKET_V2) { 176769e3c75fSJohann Baudy if (macoff + snaplen > po->rx_ring.frame_size) { 17681da177e4SLinus Torvalds if (po->copy_thresh && 17690fd7bac6SEric Dumazet atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { 17701da177e4SLinus Torvalds if (skb_shared(skb)) { 17711da177e4SLinus Torvalds copy_skb = skb_clone(skb, GFP_ATOMIC); 17721da177e4SLinus Torvalds } else { 17731da177e4SLinus Torvalds copy_skb = skb_get(skb); 17741da177e4SLinus Torvalds skb_head = skb->data; 17751da177e4SLinus Torvalds } 17761da177e4SLinus Torvalds if (copy_skb) 17771da177e4SLinus Torvalds skb_set_owner_r(copy_skb, sk); 17781da177e4SLinus Torvalds } 177969e3c75fSJohann Baudy snaplen = po->rx_ring.frame_size - macoff; 17801da177e4SLinus Torvalds if ((int)snaplen < 0) 17811da177e4SLinus Torvalds snaplen = 0; 17821da177e4SLinus Torvalds } 1783f6fb8f10Schetan loke } 17841da177e4SLinus Torvalds spin_lock(&sk->sk_receive_queue.lock); 1785f6fb8f10Schetan loke h.raw = packet_current_rx_frame(po, skb, 1786f6fb8f10Schetan loke TP_STATUS_KERNEL, (macoff+snaplen)); 1787bbd6ef87SPatrick McHardy if (!h.raw) 17881da177e4SLinus Torvalds goto ring_is_full; 1789f6fb8f10Schetan loke if (po->tp_version <= TPACKET_V2) { 1790f6fb8f10Schetan loke packet_increment_rx_head(po, &po->rx_ring); 1791f6fb8f10Schetan loke /* 1792f6fb8f10Schetan loke * LOSING will be reported till you read the stats, 1793f6fb8f10Schetan loke * because it's COR - Clear On Read. 1794f6fb8f10Schetan loke * Anyways, moving it for V1/V2 only as V3 doesn't need this 1795f6fb8f10Schetan loke * at packet level. 1796f6fb8f10Schetan loke */ 1797f6fb8f10Schetan loke if (po->stats.tp_drops) 1798f6fb8f10Schetan loke status |= TP_STATUS_LOSING; 1799f6fb8f10Schetan loke } 18001da177e4SLinus Torvalds po->stats.tp_packets++; 18011da177e4SLinus Torvalds if (copy_skb) { 18021da177e4SLinus Torvalds status |= TP_STATUS_COPY; 18031da177e4SLinus Torvalds __skb_queue_tail(&sk->sk_receive_queue, copy_skb); 18041da177e4SLinus Torvalds } 18051da177e4SLinus Torvalds spin_unlock(&sk->sk_receive_queue.lock); 18061da177e4SLinus Torvalds 1807bbd6ef87SPatrick McHardy skb_copy_bits(skb, 0, h.raw + macoff, snaplen); 18081da177e4SLinus Torvalds 1809bbd6ef87SPatrick McHardy switch (po->tp_version) { 1810bbd6ef87SPatrick McHardy case TPACKET_V1: 1811bbd6ef87SPatrick McHardy h.h1->tp_len = skb->len; 1812bbd6ef87SPatrick McHardy h.h1->tp_snaplen = snaplen; 1813bbd6ef87SPatrick McHardy h.h1->tp_mac = macoff; 1814bbd6ef87SPatrick McHardy h.h1->tp_net = netoff; 1815614f60faSScott McMillan if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) 1816614f60faSScott McMillan && shhwtstamps->syststamp.tv64) 1817614f60faSScott McMillan tv = ktime_to_timeval(shhwtstamps->syststamp); 1818614f60faSScott McMillan else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) 1819614f60faSScott McMillan && shhwtstamps->hwtstamp.tv64) 1820614f60faSScott McMillan tv = ktime_to_timeval(shhwtstamps->hwtstamp); 1821614f60faSScott McMillan else if (skb->tstamp.tv64) 1822b7aa0bf7SEric Dumazet tv = ktime_to_timeval(skb->tstamp); 182350f17787SStephen Hemminger else 182450f17787SStephen Hemminger do_gettimeofday(&tv); 1825bbd6ef87SPatrick McHardy h.h1->tp_sec = tv.tv_sec; 1826bbd6ef87SPatrick McHardy h.h1->tp_usec = tv.tv_usec; 1827bbd6ef87SPatrick McHardy hdrlen = sizeof(*h.h1); 1828bbd6ef87SPatrick McHardy break; 1829bbd6ef87SPatrick McHardy case TPACKET_V2: 1830bbd6ef87SPatrick McHardy h.h2->tp_len = skb->len; 1831bbd6ef87SPatrick McHardy h.h2->tp_snaplen = snaplen; 1832bbd6ef87SPatrick McHardy h.h2->tp_mac = macoff; 1833bbd6ef87SPatrick McHardy h.h2->tp_net = netoff; 1834614f60faSScott McMillan if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) 1835614f60faSScott McMillan && shhwtstamps->syststamp.tv64) 1836614f60faSScott McMillan ts = ktime_to_timespec(shhwtstamps->syststamp); 1837614f60faSScott McMillan else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) 1838614f60faSScott McMillan && shhwtstamps->hwtstamp.tv64) 1839614f60faSScott McMillan ts = ktime_to_timespec(shhwtstamps->hwtstamp); 1840614f60faSScott McMillan else if (skb->tstamp.tv64) 1841bbd6ef87SPatrick McHardy ts = ktime_to_timespec(skb->tstamp); 1842bbd6ef87SPatrick McHardy else 1843bbd6ef87SPatrick McHardy getnstimeofday(&ts); 1844bbd6ef87SPatrick McHardy h.h2->tp_sec = ts.tv_sec; 1845bbd6ef87SPatrick McHardy h.h2->tp_nsec = ts.tv_nsec; 1846a3bcc23eSBen Greear if (vlan_tx_tag_present(skb)) { 184705423b24SEric Dumazet h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); 1848a3bcc23eSBen Greear status |= TP_STATUS_VLAN_VALID; 1849a3bcc23eSBen Greear } else { 1850a3bcc23eSBen Greear h.h2->tp_vlan_tci = 0; 1851a3bcc23eSBen Greear } 185213fcb7bdSEric Dumazet h.h2->tp_padding = 0; 1853bbd6ef87SPatrick McHardy hdrlen = sizeof(*h.h2); 1854bbd6ef87SPatrick McHardy break; 1855f6fb8f10Schetan loke case TPACKET_V3: 1856f6fb8f10Schetan loke /* tp_nxt_offset,vlan are already populated above. 1857f6fb8f10Schetan loke * So DONT clear those fields here 1858f6fb8f10Schetan loke */ 1859f6fb8f10Schetan loke h.h3->tp_status |= status; 1860f6fb8f10Schetan loke h.h3->tp_len = skb->len; 1861f6fb8f10Schetan loke h.h3->tp_snaplen = snaplen; 1862f6fb8f10Schetan loke h.h3->tp_mac = macoff; 1863f6fb8f10Schetan loke h.h3->tp_net = netoff; 1864f6fb8f10Schetan loke if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) 1865f6fb8f10Schetan loke && shhwtstamps->syststamp.tv64) 1866f6fb8f10Schetan loke ts = ktime_to_timespec(shhwtstamps->syststamp); 1867f6fb8f10Schetan loke else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) 1868f6fb8f10Schetan loke && shhwtstamps->hwtstamp.tv64) 1869f6fb8f10Schetan loke ts = ktime_to_timespec(shhwtstamps->hwtstamp); 1870f6fb8f10Schetan loke else if (skb->tstamp.tv64) 1871f6fb8f10Schetan loke ts = ktime_to_timespec(skb->tstamp); 1872f6fb8f10Schetan loke else 1873f6fb8f10Schetan loke getnstimeofday(&ts); 1874f6fb8f10Schetan loke h.h3->tp_sec = ts.tv_sec; 1875f6fb8f10Schetan loke h.h3->tp_nsec = ts.tv_nsec; 1876f6fb8f10Schetan loke hdrlen = sizeof(*h.h3); 1877f6fb8f10Schetan loke break; 1878bbd6ef87SPatrick McHardy default: 1879bbd6ef87SPatrick McHardy BUG(); 1880bbd6ef87SPatrick McHardy } 18811da177e4SLinus Torvalds 1882bbd6ef87SPatrick McHardy sll = h.raw + TPACKET_ALIGN(hdrlen); 1883b95cce35SStephen Hemminger sll->sll_halen = dev_parse_header(skb, sll->sll_addr); 18841da177e4SLinus Torvalds sll->sll_family = AF_PACKET; 18851da177e4SLinus Torvalds sll->sll_hatype = dev->type; 18861da177e4SLinus Torvalds sll->sll_protocol = skb->protocol; 18871da177e4SLinus Torvalds sll->sll_pkttype = skb->pkt_type; 18888032b464SPeter P Waskiewicz Jr if (unlikely(po->origdev)) 188980feaacbSPeter P. Waskiewicz Jr sll->sll_ifindex = orig_dev->ifindex; 189080feaacbSPeter P. Waskiewicz Jr else 18911da177e4SLinus Torvalds sll->sll_ifindex = dev->ifindex; 18921da177e4SLinus Torvalds 1893e16aa207SRalf Baechle smp_mb(); 1894f6dafa95SChangli Gao #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 18951da177e4SLinus Torvalds { 18960af55bb5SChangli Gao u8 *start, *end; 18971da177e4SLinus Torvalds 1898f6fb8f10Schetan loke if (po->tp_version <= TPACKET_V2) { 1899f6fb8f10Schetan loke end = (u8 *)PAGE_ALIGN((unsigned long)h.raw 1900f6fb8f10Schetan loke + macoff + snaplen); 19010af55bb5SChangli Gao for (start = h.raw; start < end; start += PAGE_SIZE) 19020af55bb5SChangli Gao flush_dcache_page(pgv_to_page(start)); 1903f6fb8f10Schetan loke } 1904cc9f01b2SChetan Loke smp_wmb(); 19051da177e4SLinus Torvalds } 1906f6dafa95SChangli Gao #endif 1907f6fb8f10Schetan loke if (po->tp_version <= TPACKET_V2) 1908cc9f01b2SChetan Loke __packet_set_status(po, h.raw, status); 1909f6fb8f10Schetan loke else 1910f6fb8f10Schetan loke prb_clear_blk_fill_status(&po->rx_ring); 19111da177e4SLinus Torvalds 19121da177e4SLinus Torvalds sk->sk_data_ready(sk, 0); 19131da177e4SLinus Torvalds 19141da177e4SLinus Torvalds drop_n_restore: 19151da177e4SLinus Torvalds if (skb_head != skb->data && skb_shared(skb)) { 19161da177e4SLinus Torvalds skb->data = skb_head; 19171da177e4SLinus Torvalds skb->len = skb_len; 19181da177e4SLinus Torvalds } 19191da177e4SLinus Torvalds drop: 19201da177e4SLinus Torvalds kfree_skb(skb); 19211da177e4SLinus Torvalds return 0; 19221da177e4SLinus Torvalds 19231da177e4SLinus Torvalds ring_is_full: 19241da177e4SLinus Torvalds po->stats.tp_drops++; 19251da177e4SLinus Torvalds spin_unlock(&sk->sk_receive_queue.lock); 19261da177e4SLinus Torvalds 19271da177e4SLinus Torvalds sk->sk_data_ready(sk, 0); 19281da177e4SLinus Torvalds kfree_skb(copy_skb); 19291da177e4SLinus Torvalds goto drop_n_restore; 19301da177e4SLinus Torvalds } 19311da177e4SLinus Torvalds 193269e3c75fSJohann Baudy static void tpacket_destruct_skb(struct sk_buff *skb) 193369e3c75fSJohann Baudy { 193469e3c75fSJohann Baudy struct packet_sock *po = pkt_sk(skb->sk); 193569e3c75fSJohann Baudy void *ph; 193669e3c75fSJohann Baudy 193769e3c75fSJohann Baudy if (likely(po->tx_ring.pg_vec)) { 193869e3c75fSJohann Baudy ph = skb_shinfo(skb)->destructor_arg; 193969e3c75fSJohann Baudy BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); 194069e3c75fSJohann Baudy BUG_ON(atomic_read(&po->tx_ring.pending) == 0); 194169e3c75fSJohann Baudy atomic_dec(&po->tx_ring.pending); 194269e3c75fSJohann Baudy __packet_set_status(po, ph, TP_STATUS_AVAILABLE); 194369e3c75fSJohann Baudy } 194469e3c75fSJohann Baudy 194569e3c75fSJohann Baudy sock_wfree(skb); 194669e3c75fSJohann Baudy } 194769e3c75fSJohann Baudy 194869e3c75fSJohann Baudy static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, 194969e3c75fSJohann Baudy void *frame, struct net_device *dev, int size_max, 1950ae641949SHerbert Xu __be16 proto, unsigned char *addr, int hlen) 195169e3c75fSJohann Baudy { 195269e3c75fSJohann Baudy union { 195369e3c75fSJohann Baudy struct tpacket_hdr *h1; 195469e3c75fSJohann Baudy struct tpacket2_hdr *h2; 195569e3c75fSJohann Baudy void *raw; 195669e3c75fSJohann Baudy } ph; 195769e3c75fSJohann Baudy int to_write, offset, len, tp_len, nr_frags, len_max; 195869e3c75fSJohann Baudy struct socket *sock = po->sk.sk_socket; 195969e3c75fSJohann Baudy struct page *page; 196069e3c75fSJohann Baudy void *data; 196169e3c75fSJohann Baudy int err; 196269e3c75fSJohann Baudy 196369e3c75fSJohann Baudy ph.raw = frame; 196469e3c75fSJohann Baudy 196569e3c75fSJohann Baudy skb->protocol = proto; 196669e3c75fSJohann Baudy skb->dev = dev; 196769e3c75fSJohann Baudy skb->priority = po->sk.sk_priority; 19682d37a186SEric Dumazet skb->mark = po->sk.sk_mark; 196969e3c75fSJohann Baudy skb_shinfo(skb)->destructor_arg = ph.raw; 197069e3c75fSJohann Baudy 197169e3c75fSJohann Baudy switch (po->tp_version) { 197269e3c75fSJohann Baudy case TPACKET_V2: 197369e3c75fSJohann Baudy tp_len = ph.h2->tp_len; 197469e3c75fSJohann Baudy break; 197569e3c75fSJohann Baudy default: 197669e3c75fSJohann Baudy tp_len = ph.h1->tp_len; 197769e3c75fSJohann Baudy break; 197869e3c75fSJohann Baudy } 197969e3c75fSJohann Baudy if (unlikely(tp_len > size_max)) { 198040d4e3dfSEric Dumazet pr_err("packet size is too long (%d > %d)\n", tp_len, size_max); 198169e3c75fSJohann Baudy return -EMSGSIZE; 198269e3c75fSJohann Baudy } 198369e3c75fSJohann Baudy 1984ae641949SHerbert Xu skb_reserve(skb, hlen); 198569e3c75fSJohann Baudy skb_reset_network_header(skb); 198669e3c75fSJohann Baudy 198769e3c75fSJohann Baudy data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); 198869e3c75fSJohann Baudy to_write = tp_len; 198969e3c75fSJohann Baudy 199069e3c75fSJohann Baudy if (sock->type == SOCK_DGRAM) { 199169e3c75fSJohann Baudy err = dev_hard_header(skb, dev, ntohs(proto), addr, 199269e3c75fSJohann Baudy NULL, tp_len); 199369e3c75fSJohann Baudy if (unlikely(err < 0)) 199469e3c75fSJohann Baudy return -EINVAL; 199569e3c75fSJohann Baudy } else if (dev->hard_header_len) { 199669e3c75fSJohann Baudy /* net device doesn't like empty head */ 199769e3c75fSJohann Baudy if (unlikely(tp_len <= dev->hard_header_len)) { 199840d4e3dfSEric Dumazet pr_err("packet size is too short (%d < %d)\n", 199940d4e3dfSEric Dumazet tp_len, dev->hard_header_len); 200069e3c75fSJohann Baudy return -EINVAL; 200169e3c75fSJohann Baudy } 200269e3c75fSJohann Baudy 200369e3c75fSJohann Baudy skb_push(skb, dev->hard_header_len); 200469e3c75fSJohann Baudy err = skb_store_bits(skb, 0, data, 200569e3c75fSJohann Baudy dev->hard_header_len); 200669e3c75fSJohann Baudy if (unlikely(err)) 200769e3c75fSJohann Baudy return err; 200869e3c75fSJohann Baudy 200969e3c75fSJohann Baudy data += dev->hard_header_len; 201069e3c75fSJohann Baudy to_write -= dev->hard_header_len; 201169e3c75fSJohann Baudy } 201269e3c75fSJohann Baudy 201369e3c75fSJohann Baudy err = -EFAULT; 201469e3c75fSJohann Baudy offset = offset_in_page(data); 201569e3c75fSJohann Baudy len_max = PAGE_SIZE - offset; 201669e3c75fSJohann Baudy len = ((to_write > len_max) ? len_max : to_write); 201769e3c75fSJohann Baudy 201869e3c75fSJohann Baudy skb->data_len = to_write; 201969e3c75fSJohann Baudy skb->len += to_write; 202069e3c75fSJohann Baudy skb->truesize += to_write; 202169e3c75fSJohann Baudy atomic_add(to_write, &po->sk.sk_wmem_alloc); 202269e3c75fSJohann Baudy 202369e3c75fSJohann Baudy while (likely(to_write)) { 202469e3c75fSJohann Baudy nr_frags = skb_shinfo(skb)->nr_frags; 202569e3c75fSJohann Baudy 202669e3c75fSJohann Baudy if (unlikely(nr_frags >= MAX_SKB_FRAGS)) { 202740d4e3dfSEric Dumazet pr_err("Packet exceed the number of skb frags(%lu)\n", 202869e3c75fSJohann Baudy MAX_SKB_FRAGS); 202969e3c75fSJohann Baudy return -EFAULT; 203069e3c75fSJohann Baudy } 203169e3c75fSJohann Baudy 20320af55bb5SChangli Gao page = pgv_to_page(data); 20330af55bb5SChangli Gao data += len; 203469e3c75fSJohann Baudy flush_dcache_page(page); 203569e3c75fSJohann Baudy get_page(page); 20360af55bb5SChangli Gao skb_fill_page_desc(skb, nr_frags, page, offset, len); 203769e3c75fSJohann Baudy to_write -= len; 203869e3c75fSJohann Baudy offset = 0; 203969e3c75fSJohann Baudy len_max = PAGE_SIZE; 204069e3c75fSJohann Baudy len = ((to_write > len_max) ? len_max : to_write); 204169e3c75fSJohann Baudy } 204269e3c75fSJohann Baudy 204369e3c75fSJohann Baudy return tp_len; 204469e3c75fSJohann Baudy } 204569e3c75fSJohann Baudy 204669e3c75fSJohann Baudy static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) 204769e3c75fSJohann Baudy { 204869e3c75fSJohann Baudy struct sk_buff *skb; 204969e3c75fSJohann Baudy struct net_device *dev; 205069e3c75fSJohann Baudy __be16 proto; 2051827d9780SBen Greear bool need_rls_dev = false; 2052827d9780SBen Greear int err, reserve = 0; 205369e3c75fSJohann Baudy void *ph; 205469e3c75fSJohann Baudy struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; 205569e3c75fSJohann Baudy int tp_len, size_max; 205669e3c75fSJohann Baudy unsigned char *addr; 205769e3c75fSJohann Baudy int len_sum = 0; 205869e3c75fSJohann Baudy int status = 0; 2059ae641949SHerbert Xu int hlen, tlen; 206069e3c75fSJohann Baudy 206169e3c75fSJohann Baudy mutex_lock(&po->pg_vec_lock); 206269e3c75fSJohann Baudy 206369e3c75fSJohann Baudy err = -EBUSY; 206469e3c75fSJohann Baudy if (saddr == NULL) { 2065827d9780SBen Greear dev = po->prot_hook.dev; 206669e3c75fSJohann Baudy proto = po->num; 206769e3c75fSJohann Baudy addr = NULL; 206869e3c75fSJohann Baudy } else { 206969e3c75fSJohann Baudy err = -EINVAL; 207069e3c75fSJohann Baudy if (msg->msg_namelen < sizeof(struct sockaddr_ll)) 207169e3c75fSJohann Baudy goto out; 207269e3c75fSJohann Baudy if (msg->msg_namelen < (saddr->sll_halen 207369e3c75fSJohann Baudy + offsetof(struct sockaddr_ll, 207469e3c75fSJohann Baudy sll_addr))) 207569e3c75fSJohann Baudy goto out; 207669e3c75fSJohann Baudy proto = saddr->sll_protocol; 207769e3c75fSJohann Baudy addr = saddr->sll_addr; 2078827d9780SBen Greear dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); 2079827d9780SBen Greear need_rls_dev = true; 208069e3c75fSJohann Baudy } 208169e3c75fSJohann Baudy 208269e3c75fSJohann Baudy err = -ENXIO; 208369e3c75fSJohann Baudy if (unlikely(dev == NULL)) 208469e3c75fSJohann Baudy goto out; 208569e3c75fSJohann Baudy 208669e3c75fSJohann Baudy reserve = dev->hard_header_len; 208769e3c75fSJohann Baudy 208869e3c75fSJohann Baudy err = -ENETDOWN; 208969e3c75fSJohann Baudy if (unlikely(!(dev->flags & IFF_UP))) 209069e3c75fSJohann Baudy goto out_put; 209169e3c75fSJohann Baudy 209269e3c75fSJohann Baudy size_max = po->tx_ring.frame_size 2093b5dd884eSGabor Gombas - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); 209469e3c75fSJohann Baudy 209569e3c75fSJohann Baudy if (size_max > dev->mtu + reserve) 209669e3c75fSJohann Baudy size_max = dev->mtu + reserve; 209769e3c75fSJohann Baudy 209869e3c75fSJohann Baudy do { 209969e3c75fSJohann Baudy ph = packet_current_frame(po, &po->tx_ring, 210069e3c75fSJohann Baudy TP_STATUS_SEND_REQUEST); 210169e3c75fSJohann Baudy 210269e3c75fSJohann Baudy if (unlikely(ph == NULL)) { 210369e3c75fSJohann Baudy schedule(); 210469e3c75fSJohann Baudy continue; 210569e3c75fSJohann Baudy } 210669e3c75fSJohann Baudy 210769e3c75fSJohann Baudy status = TP_STATUS_SEND_REQUEST; 2108ae641949SHerbert Xu hlen = LL_RESERVED_SPACE(dev); 2109ae641949SHerbert Xu tlen = dev->needed_tailroom; 211069e3c75fSJohann Baudy skb = sock_alloc_send_skb(&po->sk, 2111ae641949SHerbert Xu hlen + tlen + sizeof(struct sockaddr_ll), 211269e3c75fSJohann Baudy 0, &err); 211369e3c75fSJohann Baudy 211469e3c75fSJohann Baudy if (unlikely(skb == NULL)) 211569e3c75fSJohann Baudy goto out_status; 211669e3c75fSJohann Baudy 211769e3c75fSJohann Baudy tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, 2118ae641949SHerbert Xu addr, hlen); 211969e3c75fSJohann Baudy 212069e3c75fSJohann Baudy if (unlikely(tp_len < 0)) { 212169e3c75fSJohann Baudy if (po->tp_loss) { 212269e3c75fSJohann Baudy __packet_set_status(po, ph, 212369e3c75fSJohann Baudy TP_STATUS_AVAILABLE); 212469e3c75fSJohann Baudy packet_increment_head(&po->tx_ring); 212569e3c75fSJohann Baudy kfree_skb(skb); 212669e3c75fSJohann Baudy continue; 212769e3c75fSJohann Baudy } else { 212869e3c75fSJohann Baudy status = TP_STATUS_WRONG_FORMAT; 212969e3c75fSJohann Baudy err = tp_len; 213069e3c75fSJohann Baudy goto out_status; 213169e3c75fSJohann Baudy } 213269e3c75fSJohann Baudy } 213369e3c75fSJohann Baudy 213469e3c75fSJohann Baudy skb->destructor = tpacket_destruct_skb; 213569e3c75fSJohann Baudy __packet_set_status(po, ph, TP_STATUS_SENDING); 213669e3c75fSJohann Baudy atomic_inc(&po->tx_ring.pending); 213769e3c75fSJohann Baudy 213869e3c75fSJohann Baudy status = TP_STATUS_SEND_REQUEST; 213969e3c75fSJohann Baudy err = dev_queue_xmit(skb); 2140eb70df13SJarek Poplawski if (unlikely(err > 0)) { 2141eb70df13SJarek Poplawski err = net_xmit_errno(err); 2142eb70df13SJarek Poplawski if (err && __packet_get_status(po, ph) == 2143eb70df13SJarek Poplawski TP_STATUS_AVAILABLE) { 2144eb70df13SJarek Poplawski /* skb was destructed already */ 2145eb70df13SJarek Poplawski skb = NULL; 2146eb70df13SJarek Poplawski goto out_status; 2147eb70df13SJarek Poplawski } 2148eb70df13SJarek Poplawski /* 2149eb70df13SJarek Poplawski * skb was dropped but not destructed yet; 2150eb70df13SJarek Poplawski * let's treat it like congestion or err < 0 2151eb70df13SJarek Poplawski */ 2152eb70df13SJarek Poplawski err = 0; 2153eb70df13SJarek Poplawski } 215469e3c75fSJohann Baudy packet_increment_head(&po->tx_ring); 215569e3c75fSJohann Baudy len_sum += tp_len; 2156f64f9e71SJoe Perches } while (likely((ph != NULL) || 2157f64f9e71SJoe Perches ((!(msg->msg_flags & MSG_DONTWAIT)) && 2158f64f9e71SJoe Perches (atomic_read(&po->tx_ring.pending)))) 215969e3c75fSJohann Baudy ); 216069e3c75fSJohann Baudy 216169e3c75fSJohann Baudy err = len_sum; 216269e3c75fSJohann Baudy goto out_put; 216369e3c75fSJohann Baudy 216469e3c75fSJohann Baudy out_status: 216569e3c75fSJohann Baudy __packet_set_status(po, ph, status); 216669e3c75fSJohann Baudy kfree_skb(skb); 216769e3c75fSJohann Baudy out_put: 2168827d9780SBen Greear if (need_rls_dev) 216969e3c75fSJohann Baudy dev_put(dev); 217069e3c75fSJohann Baudy out: 217169e3c75fSJohann Baudy mutex_unlock(&po->pg_vec_lock); 217269e3c75fSJohann Baudy return err; 217369e3c75fSJohann Baudy } 21741da177e4SLinus Torvalds 2175eea49cc9SOlof Johansson static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, 2176bfd5f4a3SSridhar Samudrala size_t reserve, size_t len, 2177bfd5f4a3SSridhar Samudrala size_t linear, int noblock, 2178bfd5f4a3SSridhar Samudrala int *err) 2179bfd5f4a3SSridhar Samudrala { 2180bfd5f4a3SSridhar Samudrala struct sk_buff *skb; 2181bfd5f4a3SSridhar Samudrala 2182bfd5f4a3SSridhar Samudrala /* Under a page? Don't bother with paged skb. */ 2183bfd5f4a3SSridhar Samudrala if (prepad + len < PAGE_SIZE || !linear) 2184bfd5f4a3SSridhar Samudrala linear = len; 2185bfd5f4a3SSridhar Samudrala 2186bfd5f4a3SSridhar Samudrala skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, 2187bfd5f4a3SSridhar Samudrala err); 2188bfd5f4a3SSridhar Samudrala if (!skb) 2189bfd5f4a3SSridhar Samudrala return NULL; 2190bfd5f4a3SSridhar Samudrala 2191bfd5f4a3SSridhar Samudrala skb_reserve(skb, reserve); 2192bfd5f4a3SSridhar Samudrala skb_put(skb, linear); 2193bfd5f4a3SSridhar Samudrala skb->data_len = len - linear; 2194bfd5f4a3SSridhar Samudrala skb->len += len - linear; 2195bfd5f4a3SSridhar Samudrala 2196bfd5f4a3SSridhar Samudrala return skb; 2197bfd5f4a3SSridhar Samudrala } 2198bfd5f4a3SSridhar Samudrala 219969e3c75fSJohann Baudy static int packet_snd(struct socket *sock, 22001da177e4SLinus Torvalds struct msghdr *msg, size_t len) 22011da177e4SLinus Torvalds { 22021da177e4SLinus Torvalds struct sock *sk = sock->sk; 22031da177e4SLinus Torvalds struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; 22041da177e4SLinus Torvalds struct sk_buff *skb; 22051da177e4SLinus Torvalds struct net_device *dev; 22060e11c91eSAl Viro __be16 proto; 2207827d9780SBen Greear bool need_rls_dev = false; 22081da177e4SLinus Torvalds unsigned char *addr; 2209827d9780SBen Greear int err, reserve = 0; 2210bfd5f4a3SSridhar Samudrala struct virtio_net_hdr vnet_hdr = { 0 }; 2211bfd5f4a3SSridhar Samudrala int offset = 0; 2212bfd5f4a3SSridhar Samudrala int vnet_hdr_len; 2213bfd5f4a3SSridhar Samudrala struct packet_sock *po = pkt_sk(sk); 2214bfd5f4a3SSridhar Samudrala unsigned short gso_type = 0; 2215ae641949SHerbert Xu int hlen, tlen; 22163bdc0ebaSBen Greear int extra_len = 0; 22171da177e4SLinus Torvalds 22181da177e4SLinus Torvalds /* 22191da177e4SLinus Torvalds * Get and verify the address. 22201da177e4SLinus Torvalds */ 22211da177e4SLinus Torvalds 22221da177e4SLinus Torvalds if (saddr == NULL) { 2223827d9780SBen Greear dev = po->prot_hook.dev; 22241da177e4SLinus Torvalds proto = po->num; 22251da177e4SLinus Torvalds addr = NULL; 22261da177e4SLinus Torvalds } else { 22271da177e4SLinus Torvalds err = -EINVAL; 22281da177e4SLinus Torvalds if (msg->msg_namelen < sizeof(struct sockaddr_ll)) 22291da177e4SLinus Torvalds goto out; 22300fb375fbSEric W. Biederman if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) 22310fb375fbSEric W. Biederman goto out; 22321da177e4SLinus Torvalds proto = saddr->sll_protocol; 22331da177e4SLinus Torvalds addr = saddr->sll_addr; 2234827d9780SBen Greear dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); 2235827d9780SBen Greear need_rls_dev = true; 22361da177e4SLinus Torvalds } 22371da177e4SLinus Torvalds 22381da177e4SLinus Torvalds err = -ENXIO; 22391da177e4SLinus Torvalds if (dev == NULL) 22401da177e4SLinus Torvalds goto out_unlock; 22411da177e4SLinus Torvalds if (sock->type == SOCK_RAW) 22421da177e4SLinus Torvalds reserve = dev->hard_header_len; 22431da177e4SLinus Torvalds 2244d5e76b0aSDavid S. Miller err = -ENETDOWN; 2245d5e76b0aSDavid S. Miller if (!(dev->flags & IFF_UP)) 2246d5e76b0aSDavid S. Miller goto out_unlock; 2247d5e76b0aSDavid S. Miller 2248bfd5f4a3SSridhar Samudrala if (po->has_vnet_hdr) { 2249bfd5f4a3SSridhar Samudrala vnet_hdr_len = sizeof(vnet_hdr); 2250bfd5f4a3SSridhar Samudrala 2251bfd5f4a3SSridhar Samudrala err = -EINVAL; 2252bfd5f4a3SSridhar Samudrala if (len < vnet_hdr_len) 22531da177e4SLinus Torvalds goto out_unlock; 22541da177e4SLinus Torvalds 2255bfd5f4a3SSridhar Samudrala len -= vnet_hdr_len; 2256bfd5f4a3SSridhar Samudrala 2257bfd5f4a3SSridhar Samudrala err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov, 2258bfd5f4a3SSridhar Samudrala vnet_hdr_len); 2259bfd5f4a3SSridhar Samudrala if (err < 0) 2260bfd5f4a3SSridhar Samudrala goto out_unlock; 2261bfd5f4a3SSridhar Samudrala 2262bfd5f4a3SSridhar Samudrala if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && 2263bfd5f4a3SSridhar Samudrala (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 > 2264bfd5f4a3SSridhar Samudrala vnet_hdr.hdr_len)) 2265bfd5f4a3SSridhar Samudrala vnet_hdr.hdr_len = vnet_hdr.csum_start + 2266bfd5f4a3SSridhar Samudrala vnet_hdr.csum_offset + 2; 2267bfd5f4a3SSridhar Samudrala 2268bfd5f4a3SSridhar Samudrala err = -EINVAL; 2269bfd5f4a3SSridhar Samudrala if (vnet_hdr.hdr_len > len) 2270bfd5f4a3SSridhar Samudrala goto out_unlock; 2271bfd5f4a3SSridhar Samudrala 2272bfd5f4a3SSridhar Samudrala if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2273bfd5f4a3SSridhar Samudrala switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2274bfd5f4a3SSridhar Samudrala case VIRTIO_NET_HDR_GSO_TCPV4: 2275bfd5f4a3SSridhar Samudrala gso_type = SKB_GSO_TCPV4; 2276bfd5f4a3SSridhar Samudrala break; 2277bfd5f4a3SSridhar Samudrala case VIRTIO_NET_HDR_GSO_TCPV6: 2278bfd5f4a3SSridhar Samudrala gso_type = SKB_GSO_TCPV6; 2279bfd5f4a3SSridhar Samudrala break; 2280bfd5f4a3SSridhar Samudrala case VIRTIO_NET_HDR_GSO_UDP: 2281bfd5f4a3SSridhar Samudrala gso_type = SKB_GSO_UDP; 2282bfd5f4a3SSridhar Samudrala break; 2283bfd5f4a3SSridhar Samudrala default: 2284bfd5f4a3SSridhar Samudrala goto out_unlock; 2285bfd5f4a3SSridhar Samudrala } 2286bfd5f4a3SSridhar Samudrala 2287bfd5f4a3SSridhar Samudrala if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) 2288bfd5f4a3SSridhar Samudrala gso_type |= SKB_GSO_TCP_ECN; 2289bfd5f4a3SSridhar Samudrala 2290bfd5f4a3SSridhar Samudrala if (vnet_hdr.gso_size == 0) 2291bfd5f4a3SSridhar Samudrala goto out_unlock; 2292bfd5f4a3SSridhar Samudrala 2293bfd5f4a3SSridhar Samudrala } 2294bfd5f4a3SSridhar Samudrala } 2295bfd5f4a3SSridhar Samudrala 22963bdc0ebaSBen Greear if (unlikely(sock_flag(sk, SOCK_NOFCS))) { 22973bdc0ebaSBen Greear if (!netif_supports_nofcs(dev)) { 22983bdc0ebaSBen Greear err = -EPROTONOSUPPORT; 22993bdc0ebaSBen Greear goto out_unlock; 23003bdc0ebaSBen Greear } 23013bdc0ebaSBen Greear extra_len = 4; /* We're doing our own CRC */ 23023bdc0ebaSBen Greear } 23033bdc0ebaSBen Greear 2304bfd5f4a3SSridhar Samudrala err = -EMSGSIZE; 23053bdc0ebaSBen Greear if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) 2306bfd5f4a3SSridhar Samudrala goto out_unlock; 2307bfd5f4a3SSridhar Samudrala 2308bfd5f4a3SSridhar Samudrala err = -ENOBUFS; 2309ae641949SHerbert Xu hlen = LL_RESERVED_SPACE(dev); 2310ae641949SHerbert Xu tlen = dev->needed_tailroom; 2311ae641949SHerbert Xu skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, vnet_hdr.hdr_len, 23121da177e4SLinus Torvalds msg->msg_flags & MSG_DONTWAIT, &err); 23131da177e4SLinus Torvalds if (skb == NULL) 23141da177e4SLinus Torvalds goto out_unlock; 23151da177e4SLinus Torvalds 2316bfd5f4a3SSridhar Samudrala skb_set_network_header(skb, reserve); 23171da177e4SLinus Torvalds 23181da177e4SLinus Torvalds err = -EINVAL; 23190c4e8581SStephen Hemminger if (sock->type == SOCK_DGRAM && 2320bfd5f4a3SSridhar Samudrala (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0) 23211da177e4SLinus Torvalds goto out_free; 23221da177e4SLinus Torvalds 23231da177e4SLinus Torvalds /* Returns -EFAULT on error */ 2324bfd5f4a3SSridhar Samudrala err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); 23251da177e4SLinus Torvalds if (err) 23261da177e4SLinus Torvalds goto out_free; 23272244d07bSOliver Hartkopp err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 2328ed85b565SRichard Cochran if (err < 0) 2329ed85b565SRichard Cochran goto out_free; 23301da177e4SLinus Torvalds 23313bdc0ebaSBen Greear if (!gso_type && (len > dev->mtu + reserve + extra_len)) { 233257f89bfaSBen Greear /* Earlier code assumed this would be a VLAN pkt, 233357f89bfaSBen Greear * double-check this now that we have the actual 233457f89bfaSBen Greear * packet in hand. 233557f89bfaSBen Greear */ 233657f89bfaSBen Greear struct ethhdr *ehdr; 233757f89bfaSBen Greear skb_reset_mac_header(skb); 233857f89bfaSBen Greear ehdr = eth_hdr(skb); 233957f89bfaSBen Greear if (ehdr->h_proto != htons(ETH_P_8021Q)) { 234057f89bfaSBen Greear err = -EMSGSIZE; 234157f89bfaSBen Greear goto out_free; 234257f89bfaSBen Greear } 234357f89bfaSBen Greear } 234457f89bfaSBen Greear 23451da177e4SLinus Torvalds skb->protocol = proto; 23461da177e4SLinus Torvalds skb->dev = dev; 23471da177e4SLinus Torvalds skb->priority = sk->sk_priority; 23482d37a186SEric Dumazet skb->mark = sk->sk_mark; 23491da177e4SLinus Torvalds 2350bfd5f4a3SSridhar Samudrala if (po->has_vnet_hdr) { 2351bfd5f4a3SSridhar Samudrala if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2352bfd5f4a3SSridhar Samudrala if (!skb_partial_csum_set(skb, vnet_hdr.csum_start, 2353bfd5f4a3SSridhar Samudrala vnet_hdr.csum_offset)) { 2354bfd5f4a3SSridhar Samudrala err = -EINVAL; 2355bfd5f4a3SSridhar Samudrala goto out_free; 2356bfd5f4a3SSridhar Samudrala } 2357bfd5f4a3SSridhar Samudrala } 2358bfd5f4a3SSridhar Samudrala 2359bfd5f4a3SSridhar Samudrala skb_shinfo(skb)->gso_size = vnet_hdr.gso_size; 2360bfd5f4a3SSridhar Samudrala skb_shinfo(skb)->gso_type = gso_type; 2361bfd5f4a3SSridhar Samudrala 2362bfd5f4a3SSridhar Samudrala /* Header must be checked, and gso_segs computed. */ 2363bfd5f4a3SSridhar Samudrala skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 2364bfd5f4a3SSridhar Samudrala skb_shinfo(skb)->gso_segs = 0; 2365bfd5f4a3SSridhar Samudrala 2366bfd5f4a3SSridhar Samudrala len += vnet_hdr_len; 2367bfd5f4a3SSridhar Samudrala } 2368bfd5f4a3SSridhar Samudrala 23693bdc0ebaSBen Greear if (unlikely(extra_len == 4)) 23703bdc0ebaSBen Greear skb->no_fcs = 1; 23713bdc0ebaSBen Greear 23721da177e4SLinus Torvalds /* 23731da177e4SLinus Torvalds * Now send it 23741da177e4SLinus Torvalds */ 23751da177e4SLinus Torvalds 23761da177e4SLinus Torvalds err = dev_queue_xmit(skb); 23771da177e4SLinus Torvalds if (err > 0 && (err = net_xmit_errno(err)) != 0) 23781da177e4SLinus Torvalds goto out_unlock; 23791da177e4SLinus Torvalds 2380827d9780SBen Greear if (need_rls_dev) 23811da177e4SLinus Torvalds dev_put(dev); 23821da177e4SLinus Torvalds 238340d4e3dfSEric Dumazet return len; 23841da177e4SLinus Torvalds 23851da177e4SLinus Torvalds out_free: 23861da177e4SLinus Torvalds kfree_skb(skb); 23871da177e4SLinus Torvalds out_unlock: 2388827d9780SBen Greear if (dev && need_rls_dev) 23891da177e4SLinus Torvalds dev_put(dev); 23901da177e4SLinus Torvalds out: 23911da177e4SLinus Torvalds return err; 23921da177e4SLinus Torvalds } 23931da177e4SLinus Torvalds 239469e3c75fSJohann Baudy static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, 239569e3c75fSJohann Baudy struct msghdr *msg, size_t len) 239669e3c75fSJohann Baudy { 239769e3c75fSJohann Baudy struct sock *sk = sock->sk; 239869e3c75fSJohann Baudy struct packet_sock *po = pkt_sk(sk); 239969e3c75fSJohann Baudy if (po->tx_ring.pg_vec) 240069e3c75fSJohann Baudy return tpacket_snd(po, msg); 240169e3c75fSJohann Baudy else 240269e3c75fSJohann Baudy return packet_snd(sock, msg, len); 240369e3c75fSJohann Baudy } 240469e3c75fSJohann Baudy 24051da177e4SLinus Torvalds /* 24061da177e4SLinus Torvalds * Close a PACKET socket. This is fairly simple. We immediately go 24071da177e4SLinus Torvalds * to 'closed' state and remove our protocol entry in the device list. 24081da177e4SLinus Torvalds */ 24091da177e4SLinus Torvalds 24101da177e4SLinus Torvalds static int packet_release(struct socket *sock) 24111da177e4SLinus Torvalds { 24121da177e4SLinus Torvalds struct sock *sk = sock->sk; 24131da177e4SLinus Torvalds struct packet_sock *po; 2414d12d01d6SDenis V. Lunev struct net *net; 2415f6fb8f10Schetan loke union tpacket_req_u req_u; 24161da177e4SLinus Torvalds 24171da177e4SLinus Torvalds if (!sk) 24181da177e4SLinus Torvalds return 0; 24191da177e4SLinus Torvalds 24203b1e0a65SYOSHIFUJI Hideaki net = sock_net(sk); 24211da177e4SLinus Torvalds po = pkt_sk(sk); 24221da177e4SLinus Torvalds 2423808f5114Sstephen hemminger spin_lock_bh(&net->packet.sklist_lock); 2424808f5114Sstephen hemminger sk_del_node_init_rcu(sk); 2425920de804SEric Dumazet sock_prot_inuse_add(net, sk->sk_prot, -1); 2426808f5114Sstephen hemminger spin_unlock_bh(&net->packet.sklist_lock); 24271da177e4SLinus Torvalds 2428808f5114Sstephen hemminger spin_lock(&po->bind_lock); 2429ce06b03eSDavid S. Miller unregister_prot_hook(sk, false); 2430160ff18aSBen Greear if (po->prot_hook.dev) { 2431160ff18aSBen Greear dev_put(po->prot_hook.dev); 2432160ff18aSBen Greear po->prot_hook.dev = NULL; 2433160ff18aSBen Greear } 2434808f5114Sstephen hemminger spin_unlock(&po->bind_lock); 24351da177e4SLinus Torvalds 24361da177e4SLinus Torvalds packet_flush_mclist(sk); 24371da177e4SLinus Torvalds 2438f6fb8f10Schetan loke memset(&req_u, 0, sizeof(req_u)); 243969e3c75fSJohann Baudy 244069e3c75fSJohann Baudy if (po->rx_ring.pg_vec) 2441f6fb8f10Schetan loke packet_set_ring(sk, &req_u, 1, 0); 244269e3c75fSJohann Baudy 244369e3c75fSJohann Baudy if (po->tx_ring.pg_vec) 2444f6fb8f10Schetan loke packet_set_ring(sk, &req_u, 1, 1); 24451da177e4SLinus Torvalds 2446dc99f600SDavid S. Miller fanout_release(sk); 2447dc99f600SDavid S. Miller 2448808f5114Sstephen hemminger synchronize_net(); 24491da177e4SLinus Torvalds /* 24501da177e4SLinus Torvalds * Now the socket is dead. No more input will appear. 24511da177e4SLinus Torvalds */ 24521da177e4SLinus Torvalds sock_orphan(sk); 24531da177e4SLinus Torvalds sock->sk = NULL; 24541da177e4SLinus Torvalds 24551da177e4SLinus Torvalds /* Purge queues */ 24561da177e4SLinus Torvalds 24571da177e4SLinus Torvalds skb_queue_purge(&sk->sk_receive_queue); 245817ab56a2SPavel Emelyanov sk_refcnt_debug_release(sk); 24591da177e4SLinus Torvalds 24601da177e4SLinus Torvalds sock_put(sk); 24611da177e4SLinus Torvalds return 0; 24621da177e4SLinus Torvalds } 24631da177e4SLinus Torvalds 24641da177e4SLinus Torvalds /* 24651da177e4SLinus Torvalds * Attach a packet hook. 24661da177e4SLinus Torvalds */ 24671da177e4SLinus Torvalds 24680e11c91eSAl Viro static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol) 24691da177e4SLinus Torvalds { 24701da177e4SLinus Torvalds struct packet_sock *po = pkt_sk(sk); 2471dc99f600SDavid S. Miller 2472aef950b4SWei Yongjun if (po->fanout) { 2473aef950b4SWei Yongjun if (dev) 2474aef950b4SWei Yongjun dev_put(dev); 2475aef950b4SWei Yongjun 2476dc99f600SDavid S. Miller return -EINVAL; 2477aef950b4SWei Yongjun } 24781da177e4SLinus Torvalds 24791da177e4SLinus Torvalds lock_sock(sk); 24801da177e4SLinus Torvalds 24811da177e4SLinus Torvalds spin_lock(&po->bind_lock); 2482ce06b03eSDavid S. Miller unregister_prot_hook(sk, true); 24831da177e4SLinus Torvalds po->num = protocol; 24841da177e4SLinus Torvalds po->prot_hook.type = protocol; 2485160ff18aSBen Greear if (po->prot_hook.dev) 2486160ff18aSBen Greear dev_put(po->prot_hook.dev); 24871da177e4SLinus Torvalds po->prot_hook.dev = dev; 24881da177e4SLinus Torvalds 24891da177e4SLinus Torvalds po->ifindex = dev ? dev->ifindex : 0; 24901da177e4SLinus Torvalds 24911da177e4SLinus Torvalds if (protocol == 0) 24921da177e4SLinus Torvalds goto out_unlock; 24931da177e4SLinus Torvalds 2494be85d4adSUrs Thuermann if (!dev || (dev->flags & IFF_UP)) { 2495ce06b03eSDavid S. Miller register_prot_hook(sk); 24961da177e4SLinus Torvalds } else { 24971da177e4SLinus Torvalds sk->sk_err = ENETDOWN; 24981da177e4SLinus Torvalds if (!sock_flag(sk, SOCK_DEAD)) 24991da177e4SLinus Torvalds sk->sk_error_report(sk); 25001da177e4SLinus Torvalds } 25011da177e4SLinus Torvalds 25021da177e4SLinus Torvalds out_unlock: 25031da177e4SLinus Torvalds spin_unlock(&po->bind_lock); 25041da177e4SLinus Torvalds release_sock(sk); 25051da177e4SLinus Torvalds return 0; 25061da177e4SLinus Torvalds } 25071da177e4SLinus Torvalds 25081da177e4SLinus Torvalds /* 25091da177e4SLinus Torvalds * Bind a packet socket to a device 25101da177e4SLinus Torvalds */ 25111da177e4SLinus Torvalds 251240d4e3dfSEric Dumazet static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, 251340d4e3dfSEric Dumazet int addr_len) 25141da177e4SLinus Torvalds { 25151da177e4SLinus Torvalds struct sock *sk = sock->sk; 25161da177e4SLinus Torvalds char name[15]; 25171da177e4SLinus Torvalds struct net_device *dev; 25181da177e4SLinus Torvalds int err = -ENODEV; 25191da177e4SLinus Torvalds 25201da177e4SLinus Torvalds /* 25211da177e4SLinus Torvalds * Check legality 25221da177e4SLinus Torvalds */ 25231da177e4SLinus Torvalds 25241da177e4SLinus Torvalds if (addr_len != sizeof(struct sockaddr)) 25251da177e4SLinus Torvalds return -EINVAL; 25261da177e4SLinus Torvalds strlcpy(name, uaddr->sa_data, sizeof(name)); 25271da177e4SLinus Torvalds 25283b1e0a65SYOSHIFUJI Hideaki dev = dev_get_by_name(sock_net(sk), name); 2529160ff18aSBen Greear if (dev) 25301da177e4SLinus Torvalds err = packet_do_bind(sk, dev, pkt_sk(sk)->num); 25311da177e4SLinus Torvalds return err; 25321da177e4SLinus Torvalds } 25331da177e4SLinus Torvalds 25341da177e4SLinus Torvalds static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 25351da177e4SLinus Torvalds { 25361da177e4SLinus Torvalds struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; 25371da177e4SLinus Torvalds struct sock *sk = sock->sk; 25381da177e4SLinus Torvalds struct net_device *dev = NULL; 25391da177e4SLinus Torvalds int err; 25401da177e4SLinus Torvalds 25411da177e4SLinus Torvalds 25421da177e4SLinus Torvalds /* 25431da177e4SLinus Torvalds * Check legality 25441da177e4SLinus Torvalds */ 25451da177e4SLinus Torvalds 25461da177e4SLinus Torvalds if (addr_len < sizeof(struct sockaddr_ll)) 25471da177e4SLinus Torvalds return -EINVAL; 25481da177e4SLinus Torvalds if (sll->sll_family != AF_PACKET) 25491da177e4SLinus Torvalds return -EINVAL; 25501da177e4SLinus Torvalds 25511da177e4SLinus Torvalds if (sll->sll_ifindex) { 25521da177e4SLinus Torvalds err = -ENODEV; 25533b1e0a65SYOSHIFUJI Hideaki dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex); 25541da177e4SLinus Torvalds if (dev == NULL) 25551da177e4SLinus Torvalds goto out; 25561da177e4SLinus Torvalds } 25571da177e4SLinus Torvalds err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); 25581da177e4SLinus Torvalds 25591da177e4SLinus Torvalds out: 25601da177e4SLinus Torvalds return err; 25611da177e4SLinus Torvalds } 25621da177e4SLinus Torvalds 25631da177e4SLinus Torvalds static struct proto packet_proto = { 25641da177e4SLinus Torvalds .name = "PACKET", 25651da177e4SLinus Torvalds .owner = THIS_MODULE, 25661da177e4SLinus Torvalds .obj_size = sizeof(struct packet_sock), 25671da177e4SLinus Torvalds }; 25681da177e4SLinus Torvalds 25691da177e4SLinus Torvalds /* 25701da177e4SLinus Torvalds * Create a packet of type SOCK_PACKET. 25711da177e4SLinus Torvalds */ 25721da177e4SLinus Torvalds 25733f378b68SEric Paris static int packet_create(struct net *net, struct socket *sock, int protocol, 25743f378b68SEric Paris int kern) 25751da177e4SLinus Torvalds { 25761da177e4SLinus Torvalds struct sock *sk; 25771da177e4SLinus Torvalds struct packet_sock *po; 25780e11c91eSAl Viro __be16 proto = (__force __be16)protocol; /* weird, but documented */ 25791da177e4SLinus Torvalds int err; 25801da177e4SLinus Torvalds 25811da177e4SLinus Torvalds if (!capable(CAP_NET_RAW)) 25821da177e4SLinus Torvalds return -EPERM; 2583be02097cSDavid S. Miller if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && 2584be02097cSDavid S. Miller sock->type != SOCK_PACKET) 25851da177e4SLinus Torvalds return -ESOCKTNOSUPPORT; 25861da177e4SLinus Torvalds 25871da177e4SLinus Torvalds sock->state = SS_UNCONNECTED; 25881da177e4SLinus Torvalds 25891da177e4SLinus Torvalds err = -ENOBUFS; 25906257ff21SPavel Emelyanov sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto); 25911da177e4SLinus Torvalds if (sk == NULL) 25921da177e4SLinus Torvalds goto out; 25931da177e4SLinus Torvalds 25941da177e4SLinus Torvalds sock->ops = &packet_ops; 25951da177e4SLinus Torvalds if (sock->type == SOCK_PACKET) 25961da177e4SLinus Torvalds sock->ops = &packet_ops_spkt; 2597be02097cSDavid S. Miller 25981da177e4SLinus Torvalds sock_init_data(sock, sk); 25991da177e4SLinus Torvalds 26001da177e4SLinus Torvalds po = pkt_sk(sk); 26011da177e4SLinus Torvalds sk->sk_family = PF_PACKET; 26020e11c91eSAl Viro po->num = proto; 26031da177e4SLinus Torvalds 26041da177e4SLinus Torvalds sk->sk_destruct = packet_sock_destruct; 260517ab56a2SPavel Emelyanov sk_refcnt_debug_inc(sk); 26061da177e4SLinus Torvalds 26071da177e4SLinus Torvalds /* 26081da177e4SLinus Torvalds * Attach a protocol block 26091da177e4SLinus Torvalds */ 26101da177e4SLinus Torvalds 26111da177e4SLinus Torvalds spin_lock_init(&po->bind_lock); 2612905db440SHerbert Xu mutex_init(&po->pg_vec_lock); 26131da177e4SLinus Torvalds po->prot_hook.func = packet_rcv; 2614be02097cSDavid S. Miller 26151da177e4SLinus Torvalds if (sock->type == SOCK_PACKET) 26161da177e4SLinus Torvalds po->prot_hook.func = packet_rcv_spkt; 2617be02097cSDavid S. Miller 26181da177e4SLinus Torvalds po->prot_hook.af_packet_priv = sk; 26191da177e4SLinus Torvalds 26200e11c91eSAl Viro if (proto) { 26210e11c91eSAl Viro po->prot_hook.type = proto; 2622ce06b03eSDavid S. Miller register_prot_hook(sk); 26231da177e4SLinus Torvalds } 26241da177e4SLinus Torvalds 2625808f5114Sstephen hemminger spin_lock_bh(&net->packet.sklist_lock); 2626808f5114Sstephen hemminger sk_add_node_rcu(sk, &net->packet.sklist); 26273680453cSEric Dumazet sock_prot_inuse_add(net, &packet_proto, 1); 2628808f5114Sstephen hemminger spin_unlock_bh(&net->packet.sklist_lock); 2629808f5114Sstephen hemminger 263040d4e3dfSEric Dumazet return 0; 26311da177e4SLinus Torvalds out: 26321da177e4SLinus Torvalds return err; 26331da177e4SLinus Torvalds } 26341da177e4SLinus Torvalds 2635ed85b565SRichard Cochran static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len) 2636ed85b565SRichard Cochran { 2637ed85b565SRichard Cochran struct sock_exterr_skb *serr; 2638ed85b565SRichard Cochran struct sk_buff *skb, *skb2; 2639ed85b565SRichard Cochran int copied, err; 2640ed85b565SRichard Cochran 2641ed85b565SRichard Cochran err = -EAGAIN; 2642ed85b565SRichard Cochran skb = skb_dequeue(&sk->sk_error_queue); 2643ed85b565SRichard Cochran if (skb == NULL) 2644ed85b565SRichard Cochran goto out; 2645ed85b565SRichard Cochran 2646ed85b565SRichard Cochran copied = skb->len; 2647ed85b565SRichard Cochran if (copied > len) { 2648ed85b565SRichard Cochran msg->msg_flags |= MSG_TRUNC; 2649ed85b565SRichard Cochran copied = len; 2650ed85b565SRichard Cochran } 2651ed85b565SRichard Cochran err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 2652ed85b565SRichard Cochran if (err) 2653ed85b565SRichard Cochran goto out_free_skb; 2654ed85b565SRichard Cochran 2655ed85b565SRichard Cochran sock_recv_timestamp(msg, sk, skb); 2656ed85b565SRichard Cochran 2657ed85b565SRichard Cochran serr = SKB_EXT_ERR(skb); 2658ed85b565SRichard Cochran put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP, 2659ed85b565SRichard Cochran sizeof(serr->ee), &serr->ee); 2660ed85b565SRichard Cochran 2661ed85b565SRichard Cochran msg->msg_flags |= MSG_ERRQUEUE; 2662ed85b565SRichard Cochran err = copied; 2663ed85b565SRichard Cochran 2664ed85b565SRichard Cochran /* Reset and regenerate socket error */ 2665ed85b565SRichard Cochran spin_lock_bh(&sk->sk_error_queue.lock); 2666ed85b565SRichard Cochran sk->sk_err = 0; 2667ed85b565SRichard Cochran if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { 2668ed85b565SRichard Cochran sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; 2669ed85b565SRichard Cochran spin_unlock_bh(&sk->sk_error_queue.lock); 2670ed85b565SRichard Cochran sk->sk_error_report(sk); 2671ed85b565SRichard Cochran } else 2672ed85b565SRichard Cochran spin_unlock_bh(&sk->sk_error_queue.lock); 2673ed85b565SRichard Cochran 2674ed85b565SRichard Cochran out_free_skb: 2675ed85b565SRichard Cochran kfree_skb(skb); 2676ed85b565SRichard Cochran out: 2677ed85b565SRichard Cochran return err; 2678ed85b565SRichard Cochran } 2679ed85b565SRichard Cochran 26801da177e4SLinus Torvalds /* 26811da177e4SLinus Torvalds * Pull a packet from our receive queue and hand it to the user. 26821da177e4SLinus Torvalds * If necessary we block. 26831da177e4SLinus Torvalds */ 26841da177e4SLinus Torvalds 26851da177e4SLinus Torvalds static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, 26861da177e4SLinus Torvalds struct msghdr *msg, size_t len, int flags) 26871da177e4SLinus Torvalds { 26881da177e4SLinus Torvalds struct sock *sk = sock->sk; 26891da177e4SLinus Torvalds struct sk_buff *skb; 26901da177e4SLinus Torvalds int copied, err; 26910fb375fbSEric W. Biederman struct sockaddr_ll *sll; 2692bfd5f4a3SSridhar Samudrala int vnet_hdr_len = 0; 26931da177e4SLinus Torvalds 26941da177e4SLinus Torvalds err = -EINVAL; 2695ed85b565SRichard Cochran if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE)) 26961da177e4SLinus Torvalds goto out; 26971da177e4SLinus Torvalds 26981da177e4SLinus Torvalds #if 0 26991da177e4SLinus Torvalds /* What error should we return now? EUNATTACH? */ 27001da177e4SLinus Torvalds if (pkt_sk(sk)->ifindex < 0) 27011da177e4SLinus Torvalds return -ENODEV; 27021da177e4SLinus Torvalds #endif 27031da177e4SLinus Torvalds 2704ed85b565SRichard Cochran if (flags & MSG_ERRQUEUE) { 2705ed85b565SRichard Cochran err = packet_recv_error(sk, msg, len); 2706ed85b565SRichard Cochran goto out; 2707ed85b565SRichard Cochran } 2708ed85b565SRichard Cochran 27091da177e4SLinus Torvalds /* 27101da177e4SLinus Torvalds * Call the generic datagram receiver. This handles all sorts 27111da177e4SLinus Torvalds * of horrible races and re-entrancy so we can forget about it 27121da177e4SLinus Torvalds * in the protocol layers. 27131da177e4SLinus Torvalds * 27141da177e4SLinus Torvalds * Now it will return ENETDOWN, if device have just gone down, 27151da177e4SLinus Torvalds * but then it will block. 27161da177e4SLinus Torvalds */ 27171da177e4SLinus Torvalds 27181da177e4SLinus Torvalds skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); 27191da177e4SLinus Torvalds 27201da177e4SLinus Torvalds /* 27211da177e4SLinus Torvalds * An error occurred so return it. Because skb_recv_datagram() 27221da177e4SLinus Torvalds * handles the blocking we don't see and worry about blocking 27231da177e4SLinus Torvalds * retries. 27241da177e4SLinus Torvalds */ 27251da177e4SLinus Torvalds 27261da177e4SLinus Torvalds if (skb == NULL) 27271da177e4SLinus Torvalds goto out; 27281da177e4SLinus Torvalds 2729bfd5f4a3SSridhar Samudrala if (pkt_sk(sk)->has_vnet_hdr) { 2730bfd5f4a3SSridhar Samudrala struct virtio_net_hdr vnet_hdr = { 0 }; 2731bfd5f4a3SSridhar Samudrala 2732bfd5f4a3SSridhar Samudrala err = -EINVAL; 2733bfd5f4a3SSridhar Samudrala vnet_hdr_len = sizeof(vnet_hdr); 27341f18b717SMariusz Kozlowski if (len < vnet_hdr_len) 2735bfd5f4a3SSridhar Samudrala goto out_free; 2736bfd5f4a3SSridhar Samudrala 27371f18b717SMariusz Kozlowski len -= vnet_hdr_len; 27381f18b717SMariusz Kozlowski 2739bfd5f4a3SSridhar Samudrala if (skb_is_gso(skb)) { 2740bfd5f4a3SSridhar Samudrala struct skb_shared_info *sinfo = skb_shinfo(skb); 2741bfd5f4a3SSridhar Samudrala 2742bfd5f4a3SSridhar Samudrala /* This is a hint as to how much should be linear. */ 2743bfd5f4a3SSridhar Samudrala vnet_hdr.hdr_len = skb_headlen(skb); 2744bfd5f4a3SSridhar Samudrala vnet_hdr.gso_size = sinfo->gso_size; 2745bfd5f4a3SSridhar Samudrala if (sinfo->gso_type & SKB_GSO_TCPV4) 2746bfd5f4a3SSridhar Samudrala vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2747bfd5f4a3SSridhar Samudrala else if (sinfo->gso_type & SKB_GSO_TCPV6) 2748bfd5f4a3SSridhar Samudrala vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2749bfd5f4a3SSridhar Samudrala else if (sinfo->gso_type & SKB_GSO_UDP) 2750bfd5f4a3SSridhar Samudrala vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP; 2751bfd5f4a3SSridhar Samudrala else if (sinfo->gso_type & SKB_GSO_FCOE) 2752bfd5f4a3SSridhar Samudrala goto out_free; 2753bfd5f4a3SSridhar Samudrala else 2754bfd5f4a3SSridhar Samudrala BUG(); 2755bfd5f4a3SSridhar Samudrala if (sinfo->gso_type & SKB_GSO_TCP_ECN) 2756bfd5f4a3SSridhar Samudrala vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN; 2757bfd5f4a3SSridhar Samudrala } else 2758bfd5f4a3SSridhar Samudrala vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; 2759bfd5f4a3SSridhar Samudrala 2760bfd5f4a3SSridhar Samudrala if (skb->ip_summed == CHECKSUM_PARTIAL) { 2761bfd5f4a3SSridhar Samudrala vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 276255508d60SMichał Mirosław vnet_hdr.csum_start = skb_checksum_start_offset(skb); 2763bfd5f4a3SSridhar Samudrala vnet_hdr.csum_offset = skb->csum_offset; 276410a8d94aSJason Wang } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { 276510a8d94aSJason Wang vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID; 2766bfd5f4a3SSridhar Samudrala } /* else everything is zero */ 2767bfd5f4a3SSridhar Samudrala 2768bfd5f4a3SSridhar Samudrala err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr, 2769bfd5f4a3SSridhar Samudrala vnet_hdr_len); 2770bfd5f4a3SSridhar Samudrala if (err < 0) 2771bfd5f4a3SSridhar Samudrala goto out_free; 2772bfd5f4a3SSridhar Samudrala } 2773bfd5f4a3SSridhar Samudrala 27741da177e4SLinus Torvalds /* 27750fb375fbSEric W. Biederman * If the address length field is there to be filled in, we fill 27760fb375fbSEric W. Biederman * it in now. 27770fb375fbSEric W. Biederman */ 27780fb375fbSEric W. Biederman 2779ffbc6111SHerbert Xu sll = &PACKET_SKB_CB(skb)->sa.ll; 27800fb375fbSEric W. Biederman if (sock->type == SOCK_PACKET) 27810fb375fbSEric W. Biederman msg->msg_namelen = sizeof(struct sockaddr_pkt); 27820fb375fbSEric W. Biederman else 27830fb375fbSEric W. Biederman msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); 27840fb375fbSEric W. Biederman 27850fb375fbSEric W. Biederman /* 27861da177e4SLinus Torvalds * You lose any data beyond the buffer you gave. If it worries a 27871da177e4SLinus Torvalds * user program they can ask the device for its MTU anyway. 27881da177e4SLinus Torvalds */ 27891da177e4SLinus Torvalds 27901da177e4SLinus Torvalds copied = skb->len; 279140d4e3dfSEric Dumazet if (copied > len) { 27921da177e4SLinus Torvalds copied = len; 27931da177e4SLinus Torvalds msg->msg_flags |= MSG_TRUNC; 27941da177e4SLinus Torvalds } 27951da177e4SLinus Torvalds 27961da177e4SLinus Torvalds err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 27971da177e4SLinus Torvalds if (err) 27981da177e4SLinus Torvalds goto out_free; 27991da177e4SLinus Torvalds 28003b885787SNeil Horman sock_recv_ts_and_drops(msg, sk, skb); 28011da177e4SLinus Torvalds 28021da177e4SLinus Torvalds if (msg->msg_name) 2803ffbc6111SHerbert Xu memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, 2804ffbc6111SHerbert Xu msg->msg_namelen); 28051da177e4SLinus Torvalds 28068dc41944SHerbert Xu if (pkt_sk(sk)->auxdata) { 2807ffbc6111SHerbert Xu struct tpacket_auxdata aux; 2808ffbc6111SHerbert Xu 2809ffbc6111SHerbert Xu aux.tp_status = TP_STATUS_USER; 2810ffbc6111SHerbert Xu if (skb->ip_summed == CHECKSUM_PARTIAL) 2811ffbc6111SHerbert Xu aux.tp_status |= TP_STATUS_CSUMNOTREADY; 2812ffbc6111SHerbert Xu aux.tp_len = PACKET_SKB_CB(skb)->origlen; 2813ffbc6111SHerbert Xu aux.tp_snaplen = skb->len; 2814ffbc6111SHerbert Xu aux.tp_mac = 0; 2815bbe735e4SArnaldo Carvalho de Melo aux.tp_net = skb_network_offset(skb); 2816a3bcc23eSBen Greear if (vlan_tx_tag_present(skb)) { 281705423b24SEric Dumazet aux.tp_vlan_tci = vlan_tx_tag_get(skb); 2818a3bcc23eSBen Greear aux.tp_status |= TP_STATUS_VLAN_VALID; 2819a3bcc23eSBen Greear } else { 2820a3bcc23eSBen Greear aux.tp_vlan_tci = 0; 2821a3bcc23eSBen Greear } 282213fcb7bdSEric Dumazet aux.tp_padding = 0; 2823ffbc6111SHerbert Xu put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); 28248dc41944SHerbert Xu } 28258dc41944SHerbert Xu 28261da177e4SLinus Torvalds /* 28271da177e4SLinus Torvalds * Free or return the buffer as appropriate. Again this 28281da177e4SLinus Torvalds * hides all the races and re-entrancy issues from us. 28291da177e4SLinus Torvalds */ 2830bfd5f4a3SSridhar Samudrala err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied); 28311da177e4SLinus Torvalds 28321da177e4SLinus Torvalds out_free: 28331da177e4SLinus Torvalds skb_free_datagram(sk, skb); 28341da177e4SLinus Torvalds out: 28351da177e4SLinus Torvalds return err; 28361da177e4SLinus Torvalds } 28371da177e4SLinus Torvalds 28381da177e4SLinus Torvalds static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, 28391da177e4SLinus Torvalds int *uaddr_len, int peer) 28401da177e4SLinus Torvalds { 28411da177e4SLinus Torvalds struct net_device *dev; 28421da177e4SLinus Torvalds struct sock *sk = sock->sk; 28431da177e4SLinus Torvalds 28441da177e4SLinus Torvalds if (peer) 28451da177e4SLinus Torvalds return -EOPNOTSUPP; 28461da177e4SLinus Torvalds 28471da177e4SLinus Torvalds uaddr->sa_family = AF_PACKET; 2848654d1f8aSEric Dumazet rcu_read_lock(); 2849654d1f8aSEric Dumazet dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); 2850654d1f8aSEric Dumazet if (dev) 285167286640SVasiliy Kulikov strncpy(uaddr->sa_data, dev->name, 14); 2852654d1f8aSEric Dumazet else 28531da177e4SLinus Torvalds memset(uaddr->sa_data, 0, 14); 2854654d1f8aSEric Dumazet rcu_read_unlock(); 28551da177e4SLinus Torvalds *uaddr_len = sizeof(*uaddr); 28561da177e4SLinus Torvalds 28571da177e4SLinus Torvalds return 0; 28581da177e4SLinus Torvalds } 28591da177e4SLinus Torvalds 28601da177e4SLinus Torvalds static int packet_getname(struct socket *sock, struct sockaddr *uaddr, 28611da177e4SLinus Torvalds int *uaddr_len, int peer) 28621da177e4SLinus Torvalds { 28631da177e4SLinus Torvalds struct net_device *dev; 28641da177e4SLinus Torvalds struct sock *sk = sock->sk; 28651da177e4SLinus Torvalds struct packet_sock *po = pkt_sk(sk); 286613cfa97bSCyrill Gorcunov DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr); 28671da177e4SLinus Torvalds 28681da177e4SLinus Torvalds if (peer) 28691da177e4SLinus Torvalds return -EOPNOTSUPP; 28701da177e4SLinus Torvalds 28711da177e4SLinus Torvalds sll->sll_family = AF_PACKET; 28721da177e4SLinus Torvalds sll->sll_ifindex = po->ifindex; 28731da177e4SLinus Torvalds sll->sll_protocol = po->num; 287467286640SVasiliy Kulikov sll->sll_pkttype = 0; 2875654d1f8aSEric Dumazet rcu_read_lock(); 2876654d1f8aSEric Dumazet dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); 28771da177e4SLinus Torvalds if (dev) { 28781da177e4SLinus Torvalds sll->sll_hatype = dev->type; 28791da177e4SLinus Torvalds sll->sll_halen = dev->addr_len; 28801da177e4SLinus Torvalds memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len); 28811da177e4SLinus Torvalds } else { 28821da177e4SLinus Torvalds sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ 28831da177e4SLinus Torvalds sll->sll_halen = 0; 28841da177e4SLinus Torvalds } 2885654d1f8aSEric Dumazet rcu_read_unlock(); 28860fb375fbSEric W. Biederman *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; 28871da177e4SLinus Torvalds 28881da177e4SLinus Torvalds return 0; 28891da177e4SLinus Torvalds } 28901da177e4SLinus Torvalds 28912aeb0b88SWang Chen static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, 28922aeb0b88SWang Chen int what) 28931da177e4SLinus Torvalds { 28941da177e4SLinus Torvalds switch (i->type) { 28951da177e4SLinus Torvalds case PACKET_MR_MULTICAST: 28961162563fSJiri Pirko if (i->alen != dev->addr_len) 28971162563fSJiri Pirko return -EINVAL; 28981da177e4SLinus Torvalds if (what > 0) 289922bedad3SJiri Pirko return dev_mc_add(dev, i->addr); 29001da177e4SLinus Torvalds else 290122bedad3SJiri Pirko return dev_mc_del(dev, i->addr); 29021da177e4SLinus Torvalds break; 29031da177e4SLinus Torvalds case PACKET_MR_PROMISC: 29042aeb0b88SWang Chen return dev_set_promiscuity(dev, what); 29051da177e4SLinus Torvalds break; 29061da177e4SLinus Torvalds case PACKET_MR_ALLMULTI: 29072aeb0b88SWang Chen return dev_set_allmulti(dev, what); 29081da177e4SLinus Torvalds break; 2909d95ed927SEric W. Biederman case PACKET_MR_UNICAST: 29101162563fSJiri Pirko if (i->alen != dev->addr_len) 29111162563fSJiri Pirko return -EINVAL; 2912d95ed927SEric W. Biederman if (what > 0) 2913a748ee24SJiri Pirko return dev_uc_add(dev, i->addr); 2914d95ed927SEric W. Biederman else 2915a748ee24SJiri Pirko return dev_uc_del(dev, i->addr); 2916d95ed927SEric W. Biederman break; 291740d4e3dfSEric Dumazet default: 291840d4e3dfSEric Dumazet break; 29191da177e4SLinus Torvalds } 29202aeb0b88SWang Chen return 0; 29211da177e4SLinus Torvalds } 29221da177e4SLinus Torvalds 29231da177e4SLinus Torvalds static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what) 29241da177e4SLinus Torvalds { 29251da177e4SLinus Torvalds for ( ; i; i = i->next) { 29261da177e4SLinus Torvalds if (i->ifindex == dev->ifindex) 29271da177e4SLinus Torvalds packet_dev_mc(dev, i, what); 29281da177e4SLinus Torvalds } 29291da177e4SLinus Torvalds } 29301da177e4SLinus Torvalds 29310fb375fbSEric W. Biederman static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) 29321da177e4SLinus Torvalds { 29331da177e4SLinus Torvalds struct packet_sock *po = pkt_sk(sk); 29341da177e4SLinus Torvalds struct packet_mclist *ml, *i; 29351da177e4SLinus Torvalds struct net_device *dev; 29361da177e4SLinus Torvalds int err; 29371da177e4SLinus Torvalds 29381da177e4SLinus Torvalds rtnl_lock(); 29391da177e4SLinus Torvalds 29401da177e4SLinus Torvalds err = -ENODEV; 29413b1e0a65SYOSHIFUJI Hideaki dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex); 29421da177e4SLinus Torvalds if (!dev) 29431da177e4SLinus Torvalds goto done; 29441da177e4SLinus Torvalds 29451da177e4SLinus Torvalds err = -EINVAL; 29461162563fSJiri Pirko if (mreq->mr_alen > dev->addr_len) 29471da177e4SLinus Torvalds goto done; 29481da177e4SLinus Torvalds 29491da177e4SLinus Torvalds err = -ENOBUFS; 29508b3a7005SKris Katterjohn i = kmalloc(sizeof(*i), GFP_KERNEL); 29511da177e4SLinus Torvalds if (i == NULL) 29521da177e4SLinus Torvalds goto done; 29531da177e4SLinus Torvalds 29541da177e4SLinus Torvalds err = 0; 29551da177e4SLinus Torvalds for (ml = po->mclist; ml; ml = ml->next) { 29561da177e4SLinus Torvalds if (ml->ifindex == mreq->mr_ifindex && 29571da177e4SLinus Torvalds ml->type == mreq->mr_type && 29581da177e4SLinus Torvalds ml->alen == mreq->mr_alen && 29591da177e4SLinus Torvalds memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { 29601da177e4SLinus Torvalds ml->count++; 29611da177e4SLinus Torvalds /* Free the new element ... */ 29621da177e4SLinus Torvalds kfree(i); 29631da177e4SLinus Torvalds goto done; 29641da177e4SLinus Torvalds } 29651da177e4SLinus Torvalds } 29661da177e4SLinus Torvalds 29671da177e4SLinus Torvalds i->type = mreq->mr_type; 29681da177e4SLinus Torvalds i->ifindex = mreq->mr_ifindex; 29691da177e4SLinus Torvalds i->alen = mreq->mr_alen; 29701da177e4SLinus Torvalds memcpy(i->addr, mreq->mr_address, i->alen); 29711da177e4SLinus Torvalds i->count = 1; 29721da177e4SLinus Torvalds i->next = po->mclist; 29731da177e4SLinus Torvalds po->mclist = i; 29742aeb0b88SWang Chen err = packet_dev_mc(dev, i, 1); 29752aeb0b88SWang Chen if (err) { 29762aeb0b88SWang Chen po->mclist = i->next; 29772aeb0b88SWang Chen kfree(i); 29782aeb0b88SWang Chen } 29791da177e4SLinus Torvalds 29801da177e4SLinus Torvalds done: 29811da177e4SLinus Torvalds rtnl_unlock(); 29821da177e4SLinus Torvalds return err; 29831da177e4SLinus Torvalds } 29841da177e4SLinus Torvalds 29850fb375fbSEric W. Biederman static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) 29861da177e4SLinus Torvalds { 29871da177e4SLinus Torvalds struct packet_mclist *ml, **mlp; 29881da177e4SLinus Torvalds 29891da177e4SLinus Torvalds rtnl_lock(); 29901da177e4SLinus Torvalds 29911da177e4SLinus Torvalds for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) { 29921da177e4SLinus Torvalds if (ml->ifindex == mreq->mr_ifindex && 29931da177e4SLinus Torvalds ml->type == mreq->mr_type && 29941da177e4SLinus Torvalds ml->alen == mreq->mr_alen && 29951da177e4SLinus Torvalds memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { 29961da177e4SLinus Torvalds if (--ml->count == 0) { 29971da177e4SLinus Torvalds struct net_device *dev; 29981da177e4SLinus Torvalds *mlp = ml->next; 2999ad959e76SEric Dumazet dev = __dev_get_by_index(sock_net(sk), ml->ifindex); 3000ad959e76SEric Dumazet if (dev) 30011da177e4SLinus Torvalds packet_dev_mc(dev, ml, -1); 30021da177e4SLinus Torvalds kfree(ml); 30031da177e4SLinus Torvalds } 30041da177e4SLinus Torvalds rtnl_unlock(); 30051da177e4SLinus Torvalds return 0; 30061da177e4SLinus Torvalds } 30071da177e4SLinus Torvalds } 30081da177e4SLinus Torvalds rtnl_unlock(); 30091da177e4SLinus Torvalds return -EADDRNOTAVAIL; 30101da177e4SLinus Torvalds } 30111da177e4SLinus Torvalds 30121da177e4SLinus Torvalds static void packet_flush_mclist(struct sock *sk) 30131da177e4SLinus Torvalds { 30141da177e4SLinus Torvalds struct packet_sock *po = pkt_sk(sk); 30151da177e4SLinus Torvalds struct packet_mclist *ml; 30161da177e4SLinus Torvalds 30171da177e4SLinus Torvalds if (!po->mclist) 30181da177e4SLinus Torvalds return; 30191da177e4SLinus Torvalds 30201da177e4SLinus Torvalds rtnl_lock(); 30211da177e4SLinus Torvalds while ((ml = po->mclist) != NULL) { 30221da177e4SLinus Torvalds struct net_device *dev; 30231da177e4SLinus Torvalds 30241da177e4SLinus Torvalds po->mclist = ml->next; 3025ad959e76SEric Dumazet dev = __dev_get_by_index(sock_net(sk), ml->ifindex); 3026ad959e76SEric Dumazet if (dev != NULL) 30271da177e4SLinus Torvalds packet_dev_mc(dev, ml, -1); 30281da177e4SLinus Torvalds kfree(ml); 30291da177e4SLinus Torvalds } 30301da177e4SLinus Torvalds rtnl_unlock(); 30311da177e4SLinus Torvalds } 30321da177e4SLinus Torvalds 30331da177e4SLinus Torvalds static int 3034b7058842SDavid S. Miller packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) 30351da177e4SLinus Torvalds { 30361da177e4SLinus Torvalds struct sock *sk = sock->sk; 30378dc41944SHerbert Xu struct packet_sock *po = pkt_sk(sk); 30381da177e4SLinus Torvalds int ret; 30391da177e4SLinus Torvalds 30401da177e4SLinus Torvalds if (level != SOL_PACKET) 30411da177e4SLinus Torvalds return -ENOPROTOOPT; 30421da177e4SLinus Torvalds 30431da177e4SLinus Torvalds switch (optname) { 30441da177e4SLinus Torvalds case PACKET_ADD_MEMBERSHIP: 30451da177e4SLinus Torvalds case PACKET_DROP_MEMBERSHIP: 30461da177e4SLinus Torvalds { 30470fb375fbSEric W. Biederman struct packet_mreq_max mreq; 30480fb375fbSEric W. Biederman int len = optlen; 30490fb375fbSEric W. Biederman memset(&mreq, 0, sizeof(mreq)); 30500fb375fbSEric W. Biederman if (len < sizeof(struct packet_mreq)) 30511da177e4SLinus Torvalds return -EINVAL; 30520fb375fbSEric W. Biederman if (len > sizeof(mreq)) 30530fb375fbSEric W. Biederman len = sizeof(mreq); 30540fb375fbSEric W. Biederman if (copy_from_user(&mreq, optval, len)) 30551da177e4SLinus Torvalds return -EFAULT; 30560fb375fbSEric W. Biederman if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) 30570fb375fbSEric W. Biederman return -EINVAL; 30581da177e4SLinus Torvalds if (optname == PACKET_ADD_MEMBERSHIP) 30591da177e4SLinus Torvalds ret = packet_mc_add(sk, &mreq); 30601da177e4SLinus Torvalds else 30611da177e4SLinus Torvalds ret = packet_mc_drop(sk, &mreq); 30621da177e4SLinus Torvalds return ret; 30631da177e4SLinus Torvalds } 3064a2efcfa0SDavid S. Miller 30651da177e4SLinus Torvalds case PACKET_RX_RING: 306669e3c75fSJohann Baudy case PACKET_TX_RING: 30671da177e4SLinus Torvalds { 3068f6fb8f10Schetan loke union tpacket_req_u req_u; 3069f6fb8f10Schetan loke int len; 30701da177e4SLinus Torvalds 3071f6fb8f10Schetan loke switch (po->tp_version) { 3072f6fb8f10Schetan loke case TPACKET_V1: 3073f6fb8f10Schetan loke case TPACKET_V2: 3074f6fb8f10Schetan loke len = sizeof(req_u.req); 3075f6fb8f10Schetan loke break; 3076f6fb8f10Schetan loke case TPACKET_V3: 3077f6fb8f10Schetan loke default: 3078f6fb8f10Schetan loke len = sizeof(req_u.req3); 3079f6fb8f10Schetan loke break; 3080f6fb8f10Schetan loke } 3081f6fb8f10Schetan loke if (optlen < len) 30821da177e4SLinus Torvalds return -EINVAL; 3083bfd5f4a3SSridhar Samudrala if (pkt_sk(sk)->has_vnet_hdr) 3084bfd5f4a3SSridhar Samudrala return -EINVAL; 3085f6fb8f10Schetan loke if (copy_from_user(&req_u.req, optval, len)) 30861da177e4SLinus Torvalds return -EFAULT; 3087f6fb8f10Schetan loke return packet_set_ring(sk, &req_u, 0, 3088f6fb8f10Schetan loke optname == PACKET_TX_RING); 30891da177e4SLinus Torvalds } 30901da177e4SLinus Torvalds case PACKET_COPY_THRESH: 30911da177e4SLinus Torvalds { 30921da177e4SLinus Torvalds int val; 30931da177e4SLinus Torvalds 30941da177e4SLinus Torvalds if (optlen != sizeof(val)) 30951da177e4SLinus Torvalds return -EINVAL; 30961da177e4SLinus Torvalds if (copy_from_user(&val, optval, sizeof(val))) 30971da177e4SLinus Torvalds return -EFAULT; 30981da177e4SLinus Torvalds 30991da177e4SLinus Torvalds pkt_sk(sk)->copy_thresh = val; 31001da177e4SLinus Torvalds return 0; 31011da177e4SLinus Torvalds } 3102bbd6ef87SPatrick McHardy case PACKET_VERSION: 3103bbd6ef87SPatrick McHardy { 3104bbd6ef87SPatrick McHardy int val; 3105bbd6ef87SPatrick McHardy 3106bbd6ef87SPatrick McHardy if (optlen != sizeof(val)) 3107bbd6ef87SPatrick McHardy return -EINVAL; 310869e3c75fSJohann Baudy if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) 3109bbd6ef87SPatrick McHardy return -EBUSY; 3110bbd6ef87SPatrick McHardy if (copy_from_user(&val, optval, sizeof(val))) 3111bbd6ef87SPatrick McHardy return -EFAULT; 3112bbd6ef87SPatrick McHardy switch (val) { 3113bbd6ef87SPatrick McHardy case TPACKET_V1: 3114bbd6ef87SPatrick McHardy case TPACKET_V2: 3115f6fb8f10Schetan loke case TPACKET_V3: 3116bbd6ef87SPatrick McHardy po->tp_version = val; 3117bbd6ef87SPatrick McHardy return 0; 3118bbd6ef87SPatrick McHardy default: 3119bbd6ef87SPatrick McHardy return -EINVAL; 3120bbd6ef87SPatrick McHardy } 3121bbd6ef87SPatrick McHardy } 31228913336aSPatrick McHardy case PACKET_RESERVE: 31238913336aSPatrick McHardy { 31248913336aSPatrick McHardy unsigned int val; 31258913336aSPatrick McHardy 31268913336aSPatrick McHardy if (optlen != sizeof(val)) 31278913336aSPatrick McHardy return -EINVAL; 312869e3c75fSJohann Baudy if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) 31298913336aSPatrick McHardy return -EBUSY; 31308913336aSPatrick McHardy if (copy_from_user(&val, optval, sizeof(val))) 31318913336aSPatrick McHardy return -EFAULT; 31328913336aSPatrick McHardy po->tp_reserve = val; 31338913336aSPatrick McHardy return 0; 31348913336aSPatrick McHardy } 313569e3c75fSJohann Baudy case PACKET_LOSS: 313669e3c75fSJohann Baudy { 313769e3c75fSJohann Baudy unsigned int val; 313869e3c75fSJohann Baudy 313969e3c75fSJohann Baudy if (optlen != sizeof(val)) 314069e3c75fSJohann Baudy return -EINVAL; 314169e3c75fSJohann Baudy if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) 314269e3c75fSJohann Baudy return -EBUSY; 314369e3c75fSJohann Baudy if (copy_from_user(&val, optval, sizeof(val))) 314469e3c75fSJohann Baudy return -EFAULT; 314569e3c75fSJohann Baudy po->tp_loss = !!val; 314669e3c75fSJohann Baudy return 0; 314769e3c75fSJohann Baudy } 31488dc41944SHerbert Xu case PACKET_AUXDATA: 31498dc41944SHerbert Xu { 31508dc41944SHerbert Xu int val; 31518dc41944SHerbert Xu 31528dc41944SHerbert Xu if (optlen < sizeof(val)) 31538dc41944SHerbert Xu return -EINVAL; 31548dc41944SHerbert Xu if (copy_from_user(&val, optval, sizeof(val))) 31558dc41944SHerbert Xu return -EFAULT; 31568dc41944SHerbert Xu 31578dc41944SHerbert Xu po->auxdata = !!val; 31588dc41944SHerbert Xu return 0; 31598dc41944SHerbert Xu } 316080feaacbSPeter P. Waskiewicz Jr case PACKET_ORIGDEV: 316180feaacbSPeter P. Waskiewicz Jr { 316280feaacbSPeter P. Waskiewicz Jr int val; 316380feaacbSPeter P. Waskiewicz Jr 316480feaacbSPeter P. Waskiewicz Jr if (optlen < sizeof(val)) 316580feaacbSPeter P. Waskiewicz Jr return -EINVAL; 316680feaacbSPeter P. Waskiewicz Jr if (copy_from_user(&val, optval, sizeof(val))) 316780feaacbSPeter P. Waskiewicz Jr return -EFAULT; 316880feaacbSPeter P. Waskiewicz Jr 316980feaacbSPeter P. Waskiewicz Jr po->origdev = !!val; 317080feaacbSPeter P. Waskiewicz Jr return 0; 317180feaacbSPeter P. Waskiewicz Jr } 3172bfd5f4a3SSridhar Samudrala case PACKET_VNET_HDR: 3173bfd5f4a3SSridhar Samudrala { 3174bfd5f4a3SSridhar Samudrala int val; 3175bfd5f4a3SSridhar Samudrala 3176bfd5f4a3SSridhar Samudrala if (sock->type != SOCK_RAW) 3177bfd5f4a3SSridhar Samudrala return -EINVAL; 3178bfd5f4a3SSridhar Samudrala if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) 3179bfd5f4a3SSridhar Samudrala return -EBUSY; 3180bfd5f4a3SSridhar Samudrala if (optlen < sizeof(val)) 3181bfd5f4a3SSridhar Samudrala return -EINVAL; 3182bfd5f4a3SSridhar Samudrala if (copy_from_user(&val, optval, sizeof(val))) 3183bfd5f4a3SSridhar Samudrala return -EFAULT; 3184bfd5f4a3SSridhar Samudrala 3185bfd5f4a3SSridhar Samudrala po->has_vnet_hdr = !!val; 3186bfd5f4a3SSridhar Samudrala return 0; 3187bfd5f4a3SSridhar Samudrala } 3188614f60faSScott McMillan case PACKET_TIMESTAMP: 3189614f60faSScott McMillan { 3190614f60faSScott McMillan int val; 3191614f60faSScott McMillan 3192614f60faSScott McMillan if (optlen != sizeof(val)) 3193614f60faSScott McMillan return -EINVAL; 3194614f60faSScott McMillan if (copy_from_user(&val, optval, sizeof(val))) 3195614f60faSScott McMillan return -EFAULT; 3196614f60faSScott McMillan 3197614f60faSScott McMillan po->tp_tstamp = val; 3198614f60faSScott McMillan return 0; 3199614f60faSScott McMillan } 3200dc99f600SDavid S. Miller case PACKET_FANOUT: 3201dc99f600SDavid S. Miller { 3202dc99f600SDavid S. Miller int val; 3203dc99f600SDavid S. Miller 3204dc99f600SDavid S. Miller if (optlen != sizeof(val)) 3205dc99f600SDavid S. Miller return -EINVAL; 3206dc99f600SDavid S. Miller if (copy_from_user(&val, optval, sizeof(val))) 3207dc99f600SDavid S. Miller return -EFAULT; 3208dc99f600SDavid S. Miller 3209dc99f600SDavid S. Miller return fanout_add(sk, val & 0xffff, val >> 16); 3210dc99f600SDavid S. Miller } 32111da177e4SLinus Torvalds default: 32121da177e4SLinus Torvalds return -ENOPROTOOPT; 32131da177e4SLinus Torvalds } 32141da177e4SLinus Torvalds } 32151da177e4SLinus Torvalds 32161da177e4SLinus Torvalds static int packet_getsockopt(struct socket *sock, int level, int optname, 32171da177e4SLinus Torvalds char __user *optval, int __user *optlen) 32181da177e4SLinus Torvalds { 32191da177e4SLinus Torvalds int len; 3220c06fff6eSEric Dumazet int val, lv = sizeof(val); 32211da177e4SLinus Torvalds struct sock *sk = sock->sk; 32221da177e4SLinus Torvalds struct packet_sock *po = pkt_sk(sk); 3223c06fff6eSEric Dumazet void *data = &val; 32248dc41944SHerbert Xu struct tpacket_stats st; 3225f6fb8f10Schetan loke union tpacket_stats_u st_u; 32261da177e4SLinus Torvalds 32271da177e4SLinus Torvalds if (level != SOL_PACKET) 32281da177e4SLinus Torvalds return -ENOPROTOOPT; 32291da177e4SLinus Torvalds 32301da177e4SLinus Torvalds if (get_user(len, optlen)) 32311da177e4SLinus Torvalds return -EFAULT; 32321da177e4SLinus Torvalds 32331da177e4SLinus Torvalds if (len < 0) 32341da177e4SLinus Torvalds return -EINVAL; 32351da177e4SLinus Torvalds 32361da177e4SLinus Torvalds switch (optname) { 32371da177e4SLinus Torvalds case PACKET_STATISTICS: 32381da177e4SLinus Torvalds spin_lock_bh(&sk->sk_receive_queue.lock); 3239f6fb8f10Schetan loke if (po->tp_version == TPACKET_V3) { 3240c06fff6eSEric Dumazet lv = sizeof(struct tpacket_stats_v3); 3241f6fb8f10Schetan loke memcpy(&st_u.stats3, &po->stats, 3242f6fb8f10Schetan loke sizeof(struct tpacket_stats)); 3243f6fb8f10Schetan loke st_u.stats3.tp_freeze_q_cnt = 3244f6fb8f10Schetan loke po->stats_u.stats3.tp_freeze_q_cnt; 3245f6fb8f10Schetan loke st_u.stats3.tp_packets += po->stats.tp_drops; 3246f6fb8f10Schetan loke data = &st_u.stats3; 3247f6fb8f10Schetan loke } else { 3248c06fff6eSEric Dumazet lv = sizeof(struct tpacket_stats); 32491da177e4SLinus Torvalds st = po->stats; 3250f6fb8f10Schetan loke st.tp_packets += st.tp_drops; 3251f6fb8f10Schetan loke data = &st; 3252f6fb8f10Schetan loke } 32531da177e4SLinus Torvalds memset(&po->stats, 0, sizeof(st)); 32541da177e4SLinus Torvalds spin_unlock_bh(&sk->sk_receive_queue.lock); 32551da177e4SLinus Torvalds break; 32568dc41944SHerbert Xu case PACKET_AUXDATA: 32578dc41944SHerbert Xu val = po->auxdata; 32588dc41944SHerbert Xu break; 325980feaacbSPeter P. Waskiewicz Jr case PACKET_ORIGDEV: 326080feaacbSPeter P. Waskiewicz Jr val = po->origdev; 326180feaacbSPeter P. Waskiewicz Jr break; 3262bfd5f4a3SSridhar Samudrala case PACKET_VNET_HDR: 3263bfd5f4a3SSridhar Samudrala val = po->has_vnet_hdr; 3264bfd5f4a3SSridhar Samudrala break; 3265bbd6ef87SPatrick McHardy case PACKET_VERSION: 3266bbd6ef87SPatrick McHardy val = po->tp_version; 3267bbd6ef87SPatrick McHardy break; 3268bbd6ef87SPatrick McHardy case PACKET_HDRLEN: 3269bbd6ef87SPatrick McHardy if (len > sizeof(int)) 3270bbd6ef87SPatrick McHardy len = sizeof(int); 3271bbd6ef87SPatrick McHardy if (copy_from_user(&val, optval, len)) 3272bbd6ef87SPatrick McHardy return -EFAULT; 3273bbd6ef87SPatrick McHardy switch (val) { 3274bbd6ef87SPatrick McHardy case TPACKET_V1: 3275bbd6ef87SPatrick McHardy val = sizeof(struct tpacket_hdr); 3276bbd6ef87SPatrick McHardy break; 3277bbd6ef87SPatrick McHardy case TPACKET_V2: 3278bbd6ef87SPatrick McHardy val = sizeof(struct tpacket2_hdr); 3279bbd6ef87SPatrick McHardy break; 3280f6fb8f10Schetan loke case TPACKET_V3: 3281f6fb8f10Schetan loke val = sizeof(struct tpacket3_hdr); 3282f6fb8f10Schetan loke break; 3283bbd6ef87SPatrick McHardy default: 3284bbd6ef87SPatrick McHardy return -EINVAL; 3285bbd6ef87SPatrick McHardy } 3286bbd6ef87SPatrick McHardy break; 32878913336aSPatrick McHardy case PACKET_RESERVE: 32888913336aSPatrick McHardy val = po->tp_reserve; 32898913336aSPatrick McHardy break; 329069e3c75fSJohann Baudy case PACKET_LOSS: 329169e3c75fSJohann Baudy val = po->tp_loss; 329269e3c75fSJohann Baudy break; 3293614f60faSScott McMillan case PACKET_TIMESTAMP: 3294614f60faSScott McMillan val = po->tp_tstamp; 3295614f60faSScott McMillan break; 3296dc99f600SDavid S. Miller case PACKET_FANOUT: 3297dc99f600SDavid S. Miller val = (po->fanout ? 3298dc99f600SDavid S. Miller ((u32)po->fanout->id | 3299dc99f600SDavid S. Miller ((u32)po->fanout->type << 16)) : 3300dc99f600SDavid S. Miller 0); 3301dc99f600SDavid S. Miller break; 33021da177e4SLinus Torvalds default: 33031da177e4SLinus Torvalds return -ENOPROTOOPT; 33041da177e4SLinus Torvalds } 33051da177e4SLinus Torvalds 3306c06fff6eSEric Dumazet if (len > lv) 3307c06fff6eSEric Dumazet len = lv; 33081da177e4SLinus Torvalds if (put_user(len, optlen)) 33091da177e4SLinus Torvalds return -EFAULT; 33108dc41944SHerbert Xu if (copy_to_user(optval, data, len)) 33118dc41944SHerbert Xu return -EFAULT; 33121da177e4SLinus Torvalds return 0; 33131da177e4SLinus Torvalds } 33141da177e4SLinus Torvalds 33151da177e4SLinus Torvalds 33161da177e4SLinus Torvalds static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data) 33171da177e4SLinus Torvalds { 33181da177e4SLinus Torvalds struct sock *sk; 33191da177e4SLinus Torvalds struct hlist_node *node; 3320ad930650SJason Lunz struct net_device *dev = data; 3321c346dca1SYOSHIFUJI Hideaki struct net *net = dev_net(dev); 33221da177e4SLinus Torvalds 3323808f5114Sstephen hemminger rcu_read_lock(); 3324808f5114Sstephen hemminger sk_for_each_rcu(sk, node, &net->packet.sklist) { 33251da177e4SLinus Torvalds struct packet_sock *po = pkt_sk(sk); 33261da177e4SLinus Torvalds 33271da177e4SLinus Torvalds switch (msg) { 33281da177e4SLinus Torvalds case NETDEV_UNREGISTER: 33291da177e4SLinus Torvalds if (po->mclist) 33301da177e4SLinus Torvalds packet_dev_mclist(dev, po->mclist, -1); 3331a2efcfa0SDavid S. Miller /* fallthrough */ 3332a2efcfa0SDavid S. Miller 33331da177e4SLinus Torvalds case NETDEV_DOWN: 33341da177e4SLinus Torvalds if (dev->ifindex == po->ifindex) { 33351da177e4SLinus Torvalds spin_lock(&po->bind_lock); 33361da177e4SLinus Torvalds if (po->running) { 3337ce06b03eSDavid S. Miller __unregister_prot_hook(sk, false); 33381da177e4SLinus Torvalds sk->sk_err = ENETDOWN; 33391da177e4SLinus Torvalds if (!sock_flag(sk, SOCK_DEAD)) 33401da177e4SLinus Torvalds sk->sk_error_report(sk); 33411da177e4SLinus Torvalds } 33421da177e4SLinus Torvalds if (msg == NETDEV_UNREGISTER) { 33431da177e4SLinus Torvalds po->ifindex = -1; 3344160ff18aSBen Greear if (po->prot_hook.dev) 3345160ff18aSBen Greear dev_put(po->prot_hook.dev); 33461da177e4SLinus Torvalds po->prot_hook.dev = NULL; 33471da177e4SLinus Torvalds } 33481da177e4SLinus Torvalds spin_unlock(&po->bind_lock); 33491da177e4SLinus Torvalds } 33501da177e4SLinus Torvalds break; 33511da177e4SLinus Torvalds case NETDEV_UP: 3352808f5114Sstephen hemminger if (dev->ifindex == po->ifindex) { 33531da177e4SLinus Torvalds spin_lock(&po->bind_lock); 3354ce06b03eSDavid S. Miller if (po->num) 3355ce06b03eSDavid S. Miller register_prot_hook(sk); 33561da177e4SLinus Torvalds spin_unlock(&po->bind_lock); 3357808f5114Sstephen hemminger } 33581da177e4SLinus Torvalds break; 33591da177e4SLinus Torvalds } 33601da177e4SLinus Torvalds } 3361808f5114Sstephen hemminger rcu_read_unlock(); 33621da177e4SLinus Torvalds return NOTIFY_DONE; 33631da177e4SLinus Torvalds } 33641da177e4SLinus Torvalds 33651da177e4SLinus Torvalds 33661da177e4SLinus Torvalds static int packet_ioctl(struct socket *sock, unsigned int cmd, 33671da177e4SLinus Torvalds unsigned long arg) 33681da177e4SLinus Torvalds { 33691da177e4SLinus Torvalds struct sock *sk = sock->sk; 33701da177e4SLinus Torvalds 33711da177e4SLinus Torvalds switch (cmd) { 33721da177e4SLinus Torvalds case SIOCOUTQ: 33731da177e4SLinus Torvalds { 337431e6d363SEric Dumazet int amount = sk_wmem_alloc_get(sk); 337531e6d363SEric Dumazet 33761da177e4SLinus Torvalds return put_user(amount, (int __user *)arg); 33771da177e4SLinus Torvalds } 33781da177e4SLinus Torvalds case SIOCINQ: 33791da177e4SLinus Torvalds { 33801da177e4SLinus Torvalds struct sk_buff *skb; 33811da177e4SLinus Torvalds int amount = 0; 33821da177e4SLinus Torvalds 33831da177e4SLinus Torvalds spin_lock_bh(&sk->sk_receive_queue.lock); 33841da177e4SLinus Torvalds skb = skb_peek(&sk->sk_receive_queue); 33851da177e4SLinus Torvalds if (skb) 33861da177e4SLinus Torvalds amount = skb->len; 33871da177e4SLinus Torvalds spin_unlock_bh(&sk->sk_receive_queue.lock); 33881da177e4SLinus Torvalds return put_user(amount, (int __user *)arg); 33891da177e4SLinus Torvalds } 33901da177e4SLinus Torvalds case SIOCGSTAMP: 33911da177e4SLinus Torvalds return sock_get_timestamp(sk, (struct timeval __user *)arg); 3392ae40eb1eSEric Dumazet case SIOCGSTAMPNS: 3393ae40eb1eSEric Dumazet return sock_get_timestampns(sk, (struct timespec __user *)arg); 33941da177e4SLinus Torvalds 33951da177e4SLinus Torvalds #ifdef CONFIG_INET 33961da177e4SLinus Torvalds case SIOCADDRT: 33971da177e4SLinus Torvalds case SIOCDELRT: 33981da177e4SLinus Torvalds case SIOCDARP: 33991da177e4SLinus Torvalds case SIOCGARP: 34001da177e4SLinus Torvalds case SIOCSARP: 34011da177e4SLinus Torvalds case SIOCGIFADDR: 34021da177e4SLinus Torvalds case SIOCSIFADDR: 34031da177e4SLinus Torvalds case SIOCGIFBRDADDR: 34041da177e4SLinus Torvalds case SIOCSIFBRDADDR: 34051da177e4SLinus Torvalds case SIOCGIFNETMASK: 34061da177e4SLinus Torvalds case SIOCSIFNETMASK: 34071da177e4SLinus Torvalds case SIOCGIFDSTADDR: 34081da177e4SLinus Torvalds case SIOCSIFDSTADDR: 34091da177e4SLinus Torvalds case SIOCSIFFLAGS: 34101da177e4SLinus Torvalds return inet_dgram_ops.ioctl(sock, cmd, arg); 34111da177e4SLinus Torvalds #endif 34121da177e4SLinus Torvalds 34131da177e4SLinus Torvalds default: 3414b5e5fa5eSChristoph Hellwig return -ENOIOCTLCMD; 34151da177e4SLinus Torvalds } 34161da177e4SLinus Torvalds return 0; 34171da177e4SLinus Torvalds } 34181da177e4SLinus Torvalds 34191da177e4SLinus Torvalds static unsigned int packet_poll(struct file *file, struct socket *sock, 34201da177e4SLinus Torvalds poll_table *wait) 34211da177e4SLinus Torvalds { 34221da177e4SLinus Torvalds struct sock *sk = sock->sk; 34231da177e4SLinus Torvalds struct packet_sock *po = pkt_sk(sk); 34241da177e4SLinus Torvalds unsigned int mask = datagram_poll(file, sock, wait); 34251da177e4SLinus Torvalds 34261da177e4SLinus Torvalds spin_lock_bh(&sk->sk_receive_queue.lock); 342769e3c75fSJohann Baudy if (po->rx_ring.pg_vec) { 3428f6fb8f10Schetan loke if (!packet_previous_rx_frame(po, &po->rx_ring, 3429f6fb8f10Schetan loke TP_STATUS_KERNEL)) 34301da177e4SLinus Torvalds mask |= POLLIN | POLLRDNORM; 34311da177e4SLinus Torvalds } 34321da177e4SLinus Torvalds spin_unlock_bh(&sk->sk_receive_queue.lock); 343369e3c75fSJohann Baudy spin_lock_bh(&sk->sk_write_queue.lock); 343469e3c75fSJohann Baudy if (po->tx_ring.pg_vec) { 343569e3c75fSJohann Baudy if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE)) 343669e3c75fSJohann Baudy mask |= POLLOUT | POLLWRNORM; 343769e3c75fSJohann Baudy } 343869e3c75fSJohann Baudy spin_unlock_bh(&sk->sk_write_queue.lock); 34391da177e4SLinus Torvalds return mask; 34401da177e4SLinus Torvalds } 34411da177e4SLinus Torvalds 34421da177e4SLinus Torvalds 34431da177e4SLinus Torvalds /* Dirty? Well, I still did not learn better way to account 34441da177e4SLinus Torvalds * for user mmaps. 34451da177e4SLinus Torvalds */ 34461da177e4SLinus Torvalds 34471da177e4SLinus Torvalds static void packet_mm_open(struct vm_area_struct *vma) 34481da177e4SLinus Torvalds { 34491da177e4SLinus Torvalds struct file *file = vma->vm_file; 3450b69aee04SEric Dumazet struct socket *sock = file->private_data; 34511da177e4SLinus Torvalds struct sock *sk = sock->sk; 34521da177e4SLinus Torvalds 34531da177e4SLinus Torvalds if (sk) 34541da177e4SLinus Torvalds atomic_inc(&pkt_sk(sk)->mapped); 34551da177e4SLinus Torvalds } 34561da177e4SLinus Torvalds 34571da177e4SLinus Torvalds static void packet_mm_close(struct vm_area_struct *vma) 34581da177e4SLinus Torvalds { 34591da177e4SLinus Torvalds struct file *file = vma->vm_file; 3460b69aee04SEric Dumazet struct socket *sock = file->private_data; 34611da177e4SLinus Torvalds struct sock *sk = sock->sk; 34621da177e4SLinus Torvalds 34631da177e4SLinus Torvalds if (sk) 34641da177e4SLinus Torvalds atomic_dec(&pkt_sk(sk)->mapped); 34651da177e4SLinus Torvalds } 34661da177e4SLinus Torvalds 3467f0f37e2fSAlexey Dobriyan static const struct vm_operations_struct packet_mmap_ops = { 34681da177e4SLinus Torvalds .open = packet_mm_open, 34691da177e4SLinus Torvalds .close = packet_mm_close, 34701da177e4SLinus Torvalds }; 34711da177e4SLinus Torvalds 34720e3125c7SNeil Horman static void free_pg_vec(struct pgv *pg_vec, unsigned int order, 34730e3125c7SNeil Horman unsigned int len) 34741da177e4SLinus Torvalds { 34751da177e4SLinus Torvalds int i; 34761da177e4SLinus Torvalds 34771da177e4SLinus Torvalds for (i = 0; i < len; i++) { 34780e3125c7SNeil Horman if (likely(pg_vec[i].buffer)) { 3479c56b4d90SChangli Gao if (is_vmalloc_addr(pg_vec[i].buffer)) 34800e3125c7SNeil Horman vfree(pg_vec[i].buffer); 34810e3125c7SNeil Horman else 34820e3125c7SNeil Horman free_pages((unsigned long)pg_vec[i].buffer, 34830e3125c7SNeil Horman order); 34840e3125c7SNeil Horman pg_vec[i].buffer = NULL; 34850e3125c7SNeil Horman } 34861da177e4SLinus Torvalds } 34871da177e4SLinus Torvalds kfree(pg_vec); 34881da177e4SLinus Torvalds } 34891da177e4SLinus Torvalds 3490eea49cc9SOlof Johansson static char *alloc_one_pg_vec_page(unsigned long order) 34914ebf0ae2SDavid S. Miller { 34920e3125c7SNeil Horman char *buffer = NULL; 34930e3125c7SNeil Horman gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | 34940e3125c7SNeil Horman __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; 3495719bfeaaSEric Dumazet 34960e3125c7SNeil Horman buffer = (char *) __get_free_pages(gfp_flags, order); 34970e3125c7SNeil Horman 34980e3125c7SNeil Horman if (buffer) 34990e3125c7SNeil Horman return buffer; 35000e3125c7SNeil Horman 35010e3125c7SNeil Horman /* 35020e3125c7SNeil Horman * __get_free_pages failed, fall back to vmalloc 35030e3125c7SNeil Horman */ 3504bbce5a59SEric Dumazet buffer = vzalloc((1 << order) * PAGE_SIZE); 35050e3125c7SNeil Horman 35060e3125c7SNeil Horman if (buffer) 35070e3125c7SNeil Horman return buffer; 35080e3125c7SNeil Horman 35090e3125c7SNeil Horman /* 35100e3125c7SNeil Horman * vmalloc failed, lets dig into swap here 35110e3125c7SNeil Horman */ 35120e3125c7SNeil Horman gfp_flags &= ~__GFP_NORETRY; 35130e3125c7SNeil Horman buffer = (char *)__get_free_pages(gfp_flags, order); 35140e3125c7SNeil Horman if (buffer) 35150e3125c7SNeil Horman return buffer; 35160e3125c7SNeil Horman 35170e3125c7SNeil Horman /* 35180e3125c7SNeil Horman * complete and utter failure 35190e3125c7SNeil Horman */ 35200e3125c7SNeil Horman return NULL; 35214ebf0ae2SDavid S. Miller } 35224ebf0ae2SDavid S. Miller 35230e3125c7SNeil Horman static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) 35244ebf0ae2SDavid S. Miller { 35254ebf0ae2SDavid S. Miller unsigned int block_nr = req->tp_block_nr; 35260e3125c7SNeil Horman struct pgv *pg_vec; 35274ebf0ae2SDavid S. Miller int i; 35284ebf0ae2SDavid S. Miller 35290e3125c7SNeil Horman pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL); 35304ebf0ae2SDavid S. Miller if (unlikely(!pg_vec)) 35314ebf0ae2SDavid S. Miller goto out; 35324ebf0ae2SDavid S. Miller 35334ebf0ae2SDavid S. Miller for (i = 0; i < block_nr; i++) { 3534c56b4d90SChangli Gao pg_vec[i].buffer = alloc_one_pg_vec_page(order); 35350e3125c7SNeil Horman if (unlikely(!pg_vec[i].buffer)) 35364ebf0ae2SDavid S. Miller goto out_free_pgvec; 35374ebf0ae2SDavid S. Miller } 35384ebf0ae2SDavid S. Miller 35394ebf0ae2SDavid S. Miller out: 35404ebf0ae2SDavid S. Miller return pg_vec; 35414ebf0ae2SDavid S. Miller 35424ebf0ae2SDavid S. Miller out_free_pgvec: 35434ebf0ae2SDavid S. Miller free_pg_vec(pg_vec, order, block_nr); 35444ebf0ae2SDavid S. Miller pg_vec = NULL; 35454ebf0ae2SDavid S. Miller goto out; 35464ebf0ae2SDavid S. Miller } 35471da177e4SLinus Torvalds 3548f6fb8f10Schetan loke static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, 354969e3c75fSJohann Baudy int closing, int tx_ring) 35501da177e4SLinus Torvalds { 35510e3125c7SNeil Horman struct pgv *pg_vec = NULL; 35521da177e4SLinus Torvalds struct packet_sock *po = pkt_sk(sk); 35530e11c91eSAl Viro int was_running, order = 0; 355469e3c75fSJohann Baudy struct packet_ring_buffer *rb; 355569e3c75fSJohann Baudy struct sk_buff_head *rb_queue; 35560e11c91eSAl Viro __be16 num; 3557f6fb8f10Schetan loke int err = -EINVAL; 3558f6fb8f10Schetan loke /* Added to avoid minimal code churn */ 3559f6fb8f10Schetan loke struct tpacket_req *req = &req_u->req; 3560f6fb8f10Schetan loke 3561f6fb8f10Schetan loke /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ 3562f6fb8f10Schetan loke if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { 3563f6fb8f10Schetan loke WARN(1, "Tx-ring is not supported.\n"); 3564f6fb8f10Schetan loke goto out; 3565f6fb8f10Schetan loke } 356669e3c75fSJohann Baudy 356769e3c75fSJohann Baudy rb = tx_ring ? &po->tx_ring : &po->rx_ring; 356869e3c75fSJohann Baudy rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; 356969e3c75fSJohann Baudy 357069e3c75fSJohann Baudy err = -EBUSY; 357169e3c75fSJohann Baudy if (!closing) { 357269e3c75fSJohann Baudy if (atomic_read(&po->mapped)) 357369e3c75fSJohann Baudy goto out; 357469e3c75fSJohann Baudy if (atomic_read(&rb->pending)) 357569e3c75fSJohann Baudy goto out; 357669e3c75fSJohann Baudy } 35771da177e4SLinus Torvalds 35781da177e4SLinus Torvalds if (req->tp_block_nr) { 35791da177e4SLinus Torvalds /* Sanity tests and some calculations */ 358069e3c75fSJohann Baudy err = -EBUSY; 358169e3c75fSJohann Baudy if (unlikely(rb->pg_vec)) 358269e3c75fSJohann Baudy goto out; 35831da177e4SLinus Torvalds 3584bbd6ef87SPatrick McHardy switch (po->tp_version) { 3585bbd6ef87SPatrick McHardy case TPACKET_V1: 3586bbd6ef87SPatrick McHardy po->tp_hdrlen = TPACKET_HDRLEN; 3587bbd6ef87SPatrick McHardy break; 3588bbd6ef87SPatrick McHardy case TPACKET_V2: 3589bbd6ef87SPatrick McHardy po->tp_hdrlen = TPACKET2_HDRLEN; 3590bbd6ef87SPatrick McHardy break; 3591f6fb8f10Schetan loke case TPACKET_V3: 3592f6fb8f10Schetan loke po->tp_hdrlen = TPACKET3_HDRLEN; 3593f6fb8f10Schetan loke break; 3594bbd6ef87SPatrick McHardy } 3595bbd6ef87SPatrick McHardy 359669e3c75fSJohann Baudy err = -EINVAL; 35974ebf0ae2SDavid S. Miller if (unlikely((int)req->tp_block_size <= 0)) 359869e3c75fSJohann Baudy goto out; 35994ebf0ae2SDavid S. Miller if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) 360069e3c75fSJohann Baudy goto out; 36018913336aSPatrick McHardy if (unlikely(req->tp_frame_size < po->tp_hdrlen + 36028913336aSPatrick McHardy po->tp_reserve)) 360369e3c75fSJohann Baudy goto out; 36044ebf0ae2SDavid S. Miller if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) 360569e3c75fSJohann Baudy goto out; 36061da177e4SLinus Torvalds 360769e3c75fSJohann Baudy rb->frames_per_block = req->tp_block_size/req->tp_frame_size; 360869e3c75fSJohann Baudy if (unlikely(rb->frames_per_block <= 0)) 360969e3c75fSJohann Baudy goto out; 361069e3c75fSJohann Baudy if (unlikely((rb->frames_per_block * req->tp_block_nr) != 36114ebf0ae2SDavid S. Miller req->tp_frame_nr)) 361269e3c75fSJohann Baudy goto out; 36131da177e4SLinus Torvalds 36141da177e4SLinus Torvalds err = -ENOMEM; 36154ebf0ae2SDavid S. Miller order = get_order(req->tp_block_size); 36164ebf0ae2SDavid S. Miller pg_vec = alloc_pg_vec(req, order); 36174ebf0ae2SDavid S. Miller if (unlikely(!pg_vec)) 36181da177e4SLinus Torvalds goto out; 3619f6fb8f10Schetan loke switch (po->tp_version) { 3620f6fb8f10Schetan loke case TPACKET_V3: 3621f6fb8f10Schetan loke /* Transmit path is not supported. We checked 3622f6fb8f10Schetan loke * it above but just being paranoid 3623f6fb8f10Schetan loke */ 3624f6fb8f10Schetan loke if (!tx_ring) 3625f6fb8f10Schetan loke init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring); 3626f6fb8f10Schetan loke break; 3627f6fb8f10Schetan loke default: 3628f6fb8f10Schetan loke break; 3629f6fb8f10Schetan loke } 36301da177e4SLinus Torvalds } 36311da177e4SLinus Torvalds /* Done */ 363269e3c75fSJohann Baudy else { 363369e3c75fSJohann Baudy err = -EINVAL; 36344ebf0ae2SDavid S. Miller if (unlikely(req->tp_frame_nr)) 363569e3c75fSJohann Baudy goto out; 36361da177e4SLinus Torvalds } 36371da177e4SLinus Torvalds 36381da177e4SLinus Torvalds lock_sock(sk); 36391da177e4SLinus Torvalds 36401da177e4SLinus Torvalds /* Detach socket from network */ 36411da177e4SLinus Torvalds spin_lock(&po->bind_lock); 36421da177e4SLinus Torvalds was_running = po->running; 36431da177e4SLinus Torvalds num = po->num; 36441da177e4SLinus Torvalds if (was_running) { 36451da177e4SLinus Torvalds po->num = 0; 3646ce06b03eSDavid S. Miller __unregister_prot_hook(sk, false); 36471da177e4SLinus Torvalds } 36481da177e4SLinus Torvalds spin_unlock(&po->bind_lock); 36491da177e4SLinus Torvalds 36501da177e4SLinus Torvalds synchronize_net(); 36511da177e4SLinus Torvalds 36521da177e4SLinus Torvalds err = -EBUSY; 3653905db440SHerbert Xu mutex_lock(&po->pg_vec_lock); 36541da177e4SLinus Torvalds if (closing || atomic_read(&po->mapped) == 0) { 36551da177e4SLinus Torvalds err = 0; 365669e3c75fSJohann Baudy spin_lock_bh(&rb_queue->lock); 3657c053fd96SChangli Gao swap(rb->pg_vec, pg_vec); 365869e3c75fSJohann Baudy rb->frame_max = (req->tp_frame_nr - 1); 365969e3c75fSJohann Baudy rb->head = 0; 366069e3c75fSJohann Baudy rb->frame_size = req->tp_frame_size; 366169e3c75fSJohann Baudy spin_unlock_bh(&rb_queue->lock); 36621da177e4SLinus Torvalds 3663c053fd96SChangli Gao swap(rb->pg_vec_order, order); 3664c053fd96SChangli Gao swap(rb->pg_vec_len, req->tp_block_nr); 36651da177e4SLinus Torvalds 366669e3c75fSJohann Baudy rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE; 366769e3c75fSJohann Baudy po->prot_hook.func = (po->rx_ring.pg_vec) ? 366869e3c75fSJohann Baudy tpacket_rcv : packet_rcv; 366969e3c75fSJohann Baudy skb_queue_purge(rb_queue); 36701da177e4SLinus Torvalds if (atomic_read(&po->mapped)) 367140d4e3dfSEric Dumazet pr_err("packet_mmap: vma is busy: %d\n", 367269e3c75fSJohann Baudy atomic_read(&po->mapped)); 36731da177e4SLinus Torvalds } 3674905db440SHerbert Xu mutex_unlock(&po->pg_vec_lock); 36751da177e4SLinus Torvalds 36761da177e4SLinus Torvalds spin_lock(&po->bind_lock); 3677ce06b03eSDavid S. Miller if (was_running) { 36781da177e4SLinus Torvalds po->num = num; 3679ce06b03eSDavid S. Miller register_prot_hook(sk); 36801da177e4SLinus Torvalds } 36811da177e4SLinus Torvalds spin_unlock(&po->bind_lock); 3682f6fb8f10Schetan loke if (closing && (po->tp_version > TPACKET_V2)) { 3683f6fb8f10Schetan loke /* Because we don't support block-based V3 on tx-ring */ 3684f6fb8f10Schetan loke if (!tx_ring) 3685f6fb8f10Schetan loke prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue); 3686f6fb8f10Schetan loke } 36871da177e4SLinus Torvalds release_sock(sk); 36881da177e4SLinus Torvalds 36891da177e4SLinus Torvalds if (pg_vec) 36901da177e4SLinus Torvalds free_pg_vec(pg_vec, order, req->tp_block_nr); 36911da177e4SLinus Torvalds out: 36921da177e4SLinus Torvalds return err; 36931da177e4SLinus Torvalds } 36941da177e4SLinus Torvalds 369569e3c75fSJohann Baudy static int packet_mmap(struct file *file, struct socket *sock, 369669e3c75fSJohann Baudy struct vm_area_struct *vma) 36971da177e4SLinus Torvalds { 36981da177e4SLinus Torvalds struct sock *sk = sock->sk; 36991da177e4SLinus Torvalds struct packet_sock *po = pkt_sk(sk); 370069e3c75fSJohann Baudy unsigned long size, expected_size; 370169e3c75fSJohann Baudy struct packet_ring_buffer *rb; 37021da177e4SLinus Torvalds unsigned long start; 37031da177e4SLinus Torvalds int err = -EINVAL; 37041da177e4SLinus Torvalds int i; 37051da177e4SLinus Torvalds 37061da177e4SLinus Torvalds if (vma->vm_pgoff) 37071da177e4SLinus Torvalds return -EINVAL; 37081da177e4SLinus Torvalds 3709905db440SHerbert Xu mutex_lock(&po->pg_vec_lock); 371069e3c75fSJohann Baudy 371169e3c75fSJohann Baudy expected_size = 0; 371269e3c75fSJohann Baudy for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { 371369e3c75fSJohann Baudy if (rb->pg_vec) { 371469e3c75fSJohann Baudy expected_size += rb->pg_vec_len 371569e3c75fSJohann Baudy * rb->pg_vec_pages 371669e3c75fSJohann Baudy * PAGE_SIZE; 371769e3c75fSJohann Baudy } 371869e3c75fSJohann Baudy } 371969e3c75fSJohann Baudy 372069e3c75fSJohann Baudy if (expected_size == 0) 37211da177e4SLinus Torvalds goto out; 372269e3c75fSJohann Baudy 372369e3c75fSJohann Baudy size = vma->vm_end - vma->vm_start; 372469e3c75fSJohann Baudy if (size != expected_size) 37251da177e4SLinus Torvalds goto out; 37261da177e4SLinus Torvalds 37271da177e4SLinus Torvalds start = vma->vm_start; 372869e3c75fSJohann Baudy for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { 372969e3c75fSJohann Baudy if (rb->pg_vec == NULL) 373069e3c75fSJohann Baudy continue; 373169e3c75fSJohann Baudy 373269e3c75fSJohann Baudy for (i = 0; i < rb->pg_vec_len; i++) { 37330e3125c7SNeil Horman struct page *page; 37340e3125c7SNeil Horman void *kaddr = rb->pg_vec[i].buffer; 37354ebf0ae2SDavid S. Miller int pg_num; 37364ebf0ae2SDavid S. Miller 3737c56b4d90SChangli Gao for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) { 3738c56b4d90SChangli Gao page = pgv_to_page(kaddr); 37394ebf0ae2SDavid S. Miller err = vm_insert_page(vma, start, page); 37404ebf0ae2SDavid S. Miller if (unlikely(err)) 37411da177e4SLinus Torvalds goto out; 37424ebf0ae2SDavid S. Miller start += PAGE_SIZE; 37430e3125c7SNeil Horman kaddr += PAGE_SIZE; 37441da177e4SLinus Torvalds } 37454ebf0ae2SDavid S. Miller } 374669e3c75fSJohann Baudy } 374769e3c75fSJohann Baudy 37484ebf0ae2SDavid S. Miller atomic_inc(&po->mapped); 37491da177e4SLinus Torvalds vma->vm_ops = &packet_mmap_ops; 37501da177e4SLinus Torvalds err = 0; 37511da177e4SLinus Torvalds 37521da177e4SLinus Torvalds out: 3753905db440SHerbert Xu mutex_unlock(&po->pg_vec_lock); 37541da177e4SLinus Torvalds return err; 37551da177e4SLinus Torvalds } 37561da177e4SLinus Torvalds 375790ddc4f0SEric Dumazet static const struct proto_ops packet_ops_spkt = { 37581da177e4SLinus Torvalds .family = PF_PACKET, 37591da177e4SLinus Torvalds .owner = THIS_MODULE, 37601da177e4SLinus Torvalds .release = packet_release, 37611da177e4SLinus Torvalds .bind = packet_bind_spkt, 37621da177e4SLinus Torvalds .connect = sock_no_connect, 37631da177e4SLinus Torvalds .socketpair = sock_no_socketpair, 37641da177e4SLinus Torvalds .accept = sock_no_accept, 37651da177e4SLinus Torvalds .getname = packet_getname_spkt, 37661da177e4SLinus Torvalds .poll = datagram_poll, 37671da177e4SLinus Torvalds .ioctl = packet_ioctl, 37681da177e4SLinus Torvalds .listen = sock_no_listen, 37691da177e4SLinus Torvalds .shutdown = sock_no_shutdown, 37701da177e4SLinus Torvalds .setsockopt = sock_no_setsockopt, 37711da177e4SLinus Torvalds .getsockopt = sock_no_getsockopt, 37721da177e4SLinus Torvalds .sendmsg = packet_sendmsg_spkt, 37731da177e4SLinus Torvalds .recvmsg = packet_recvmsg, 37741da177e4SLinus Torvalds .mmap = sock_no_mmap, 37751da177e4SLinus Torvalds .sendpage = sock_no_sendpage, 37761da177e4SLinus Torvalds }; 37771da177e4SLinus Torvalds 377890ddc4f0SEric Dumazet static const struct proto_ops packet_ops = { 37791da177e4SLinus Torvalds .family = PF_PACKET, 37801da177e4SLinus Torvalds .owner = THIS_MODULE, 37811da177e4SLinus Torvalds .release = packet_release, 37821da177e4SLinus Torvalds .bind = packet_bind, 37831da177e4SLinus Torvalds .connect = sock_no_connect, 37841da177e4SLinus Torvalds .socketpair = sock_no_socketpair, 37851da177e4SLinus Torvalds .accept = sock_no_accept, 37861da177e4SLinus Torvalds .getname = packet_getname, 37871da177e4SLinus Torvalds .poll = packet_poll, 37881da177e4SLinus Torvalds .ioctl = packet_ioctl, 37891da177e4SLinus Torvalds .listen = sock_no_listen, 37901da177e4SLinus Torvalds .shutdown = sock_no_shutdown, 37911da177e4SLinus Torvalds .setsockopt = packet_setsockopt, 37921da177e4SLinus Torvalds .getsockopt = packet_getsockopt, 37931da177e4SLinus Torvalds .sendmsg = packet_sendmsg, 37941da177e4SLinus Torvalds .recvmsg = packet_recvmsg, 37951da177e4SLinus Torvalds .mmap = packet_mmap, 37961da177e4SLinus Torvalds .sendpage = sock_no_sendpage, 37971da177e4SLinus Torvalds }; 37981da177e4SLinus Torvalds 3799ec1b4cf7SStephen Hemminger static const struct net_proto_family packet_family_ops = { 38001da177e4SLinus Torvalds .family = PF_PACKET, 38011da177e4SLinus Torvalds .create = packet_create, 38021da177e4SLinus Torvalds .owner = THIS_MODULE, 38031da177e4SLinus Torvalds }; 38041da177e4SLinus Torvalds 38051da177e4SLinus Torvalds static struct notifier_block packet_netdev_notifier = { 38061da177e4SLinus Torvalds .notifier_call = packet_notifier, 38071da177e4SLinus Torvalds }; 38081da177e4SLinus Torvalds 38091da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 38101da177e4SLinus Torvalds 38111da177e4SLinus Torvalds static void *packet_seq_start(struct seq_file *seq, loff_t *pos) 3812808f5114Sstephen hemminger __acquires(RCU) 38131da177e4SLinus Torvalds { 3814e372c414SDenis V. Lunev struct net *net = seq_file_net(seq); 3815808f5114Sstephen hemminger 3816808f5114Sstephen hemminger rcu_read_lock(); 3817808f5114Sstephen hemminger return seq_hlist_start_head_rcu(&net->packet.sklist, *pos); 38181da177e4SLinus Torvalds } 38191da177e4SLinus Torvalds 38201da177e4SLinus Torvalds static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) 38211da177e4SLinus Torvalds { 38221bf40954SHerbert Xu struct net *net = seq_file_net(seq); 3823808f5114Sstephen hemminger return seq_hlist_next_rcu(v, &net->packet.sklist, pos); 38241da177e4SLinus Torvalds } 38251da177e4SLinus Torvalds 38261da177e4SLinus Torvalds static void packet_seq_stop(struct seq_file *seq, void *v) 3827808f5114Sstephen hemminger __releases(RCU) 38281da177e4SLinus Torvalds { 3829808f5114Sstephen hemminger rcu_read_unlock(); 38301da177e4SLinus Torvalds } 38311da177e4SLinus Torvalds 38321da177e4SLinus Torvalds static int packet_seq_show(struct seq_file *seq, void *v) 38331da177e4SLinus Torvalds { 38341da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) 38351da177e4SLinus Torvalds seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n"); 38361da177e4SLinus Torvalds else { 3837b7ceabd9SLi Zefan struct sock *s = sk_entry(v); 38381da177e4SLinus Torvalds const struct packet_sock *po = pkt_sk(s); 38391da177e4SLinus Torvalds 38401da177e4SLinus Torvalds seq_printf(seq, 384171338aa7SDan Rosenberg "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n", 38421da177e4SLinus Torvalds s, 38431da177e4SLinus Torvalds atomic_read(&s->sk_refcnt), 38441da177e4SLinus Torvalds s->sk_type, 38451da177e4SLinus Torvalds ntohs(po->num), 38461da177e4SLinus Torvalds po->ifindex, 38471da177e4SLinus Torvalds po->running, 38481da177e4SLinus Torvalds atomic_read(&s->sk_rmem_alloc), 38491da177e4SLinus Torvalds sock_i_uid(s), 38501da177e4SLinus Torvalds sock_i_ino(s)); 38511da177e4SLinus Torvalds } 38521da177e4SLinus Torvalds 38531da177e4SLinus Torvalds return 0; 38541da177e4SLinus Torvalds } 38551da177e4SLinus Torvalds 385656b3d975SPhilippe De Muyter static const struct seq_operations packet_seq_ops = { 38571da177e4SLinus Torvalds .start = packet_seq_start, 38581da177e4SLinus Torvalds .next = packet_seq_next, 38591da177e4SLinus Torvalds .stop = packet_seq_stop, 38601da177e4SLinus Torvalds .show = packet_seq_show, 38611da177e4SLinus Torvalds }; 38621da177e4SLinus Torvalds 38631da177e4SLinus Torvalds static int packet_seq_open(struct inode *inode, struct file *file) 38641da177e4SLinus Torvalds { 3865e372c414SDenis V. Lunev return seq_open_net(inode, file, &packet_seq_ops, 3866e372c414SDenis V. Lunev sizeof(struct seq_net_private)); 38671da177e4SLinus Torvalds } 38681da177e4SLinus Torvalds 3869da7071d7SArjan van de Ven static const struct file_operations packet_seq_fops = { 38701da177e4SLinus Torvalds .owner = THIS_MODULE, 38711da177e4SLinus Torvalds .open = packet_seq_open, 38721da177e4SLinus Torvalds .read = seq_read, 38731da177e4SLinus Torvalds .llseek = seq_lseek, 3874e372c414SDenis V. Lunev .release = seq_release_net, 38751da177e4SLinus Torvalds }; 38761da177e4SLinus Torvalds 38771da177e4SLinus Torvalds #endif 38781da177e4SLinus Torvalds 38792c8c1e72SAlexey Dobriyan static int __net_init packet_net_init(struct net *net) 3880d12d01d6SDenis V. Lunev { 3881808f5114Sstephen hemminger spin_lock_init(&net->packet.sklist_lock); 38822aaef4e4SDenis V. Lunev INIT_HLIST_HEAD(&net->packet.sklist); 3883d12d01d6SDenis V. Lunev 3884d12d01d6SDenis V. Lunev if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) 3885d12d01d6SDenis V. Lunev return -ENOMEM; 3886d12d01d6SDenis V. Lunev 3887d12d01d6SDenis V. Lunev return 0; 3888d12d01d6SDenis V. Lunev } 3889d12d01d6SDenis V. Lunev 38902c8c1e72SAlexey Dobriyan static void __net_exit packet_net_exit(struct net *net) 3891d12d01d6SDenis V. Lunev { 3892d12d01d6SDenis V. Lunev proc_net_remove(net, "packet"); 3893d12d01d6SDenis V. Lunev } 3894d12d01d6SDenis V. Lunev 3895d12d01d6SDenis V. Lunev static struct pernet_operations packet_net_ops = { 3896d12d01d6SDenis V. Lunev .init = packet_net_init, 3897d12d01d6SDenis V. Lunev .exit = packet_net_exit, 3898d12d01d6SDenis V. Lunev }; 3899d12d01d6SDenis V. Lunev 3900d12d01d6SDenis V. Lunev 39011da177e4SLinus Torvalds static void __exit packet_exit(void) 39021da177e4SLinus Torvalds { 39031da177e4SLinus Torvalds unregister_netdevice_notifier(&packet_netdev_notifier); 3904d12d01d6SDenis V. Lunev unregister_pernet_subsys(&packet_net_ops); 39051da177e4SLinus Torvalds sock_unregister(PF_PACKET); 39061da177e4SLinus Torvalds proto_unregister(&packet_proto); 39071da177e4SLinus Torvalds } 39081da177e4SLinus Torvalds 39091da177e4SLinus Torvalds static int __init packet_init(void) 39101da177e4SLinus Torvalds { 39111da177e4SLinus Torvalds int rc = proto_register(&packet_proto, 0); 39121da177e4SLinus Torvalds 39131da177e4SLinus Torvalds if (rc != 0) 39141da177e4SLinus Torvalds goto out; 39151da177e4SLinus Torvalds 39161da177e4SLinus Torvalds sock_register(&packet_family_ops); 3917d12d01d6SDenis V. Lunev register_pernet_subsys(&packet_net_ops); 39181da177e4SLinus Torvalds register_netdevice_notifier(&packet_netdev_notifier); 39191da177e4SLinus Torvalds out: 39201da177e4SLinus Torvalds return rc; 39211da177e4SLinus Torvalds } 39221da177e4SLinus Torvalds 39231da177e4SLinus Torvalds module_init(packet_init); 39241da177e4SLinus Torvalds module_exit(packet_exit); 39251da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 39261da177e4SLinus Torvalds MODULE_ALIAS_NETPROTO(PF_PACKET); 3927