xref: /openbmc/linux/net/packet/af_packet.c (revision 99aa3473e672ca610905838997fa018b95cd643f)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		PACKET - implements raw packet sockets.
71da177e4SLinus Torvalds  *
802c30a84SJesper Juhl  * Authors:	Ross Biro
91da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
101da177e4SLinus Torvalds  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Fixes:
131da177e4SLinus Torvalds  *		Alan Cox	:	verify_area() now used correctly
141da177e4SLinus Torvalds  *		Alan Cox	:	new skbuff lists, look ma no backlogs!
151da177e4SLinus Torvalds  *		Alan Cox	:	tidied skbuff lists.
161da177e4SLinus Torvalds  *		Alan Cox	:	Now uses generic datagram routines I
171da177e4SLinus Torvalds  *					added. Also fixed the peek/read crash
181da177e4SLinus Torvalds  *					from all old Linux datagram code.
191da177e4SLinus Torvalds  *		Alan Cox	:	Uses the improved datagram code.
201da177e4SLinus Torvalds  *		Alan Cox	:	Added NULL's for socket options.
211da177e4SLinus Torvalds  *		Alan Cox	:	Re-commented the code.
221da177e4SLinus Torvalds  *		Alan Cox	:	Use new kernel side addressing
231da177e4SLinus Torvalds  *		Rob Janssen	:	Correct MTU usage.
241da177e4SLinus Torvalds  *		Dave Platt	:	Counter leaks caused by incorrect
251da177e4SLinus Torvalds  *					interrupt locking and some slightly
261da177e4SLinus Torvalds  *					dubious gcc output. Can you read
271da177e4SLinus Torvalds  *					compiler: it said _VOLATILE_
281da177e4SLinus Torvalds  *	Richard Kooijman	:	Timestamp fixes.
291da177e4SLinus Torvalds  *		Alan Cox	:	New buffers. Use sk->mac.raw.
301da177e4SLinus Torvalds  *		Alan Cox	:	sendmsg/recvmsg support.
311da177e4SLinus Torvalds  *		Alan Cox	:	Protocol setting support
321da177e4SLinus Torvalds  *	Alexey Kuznetsov	:	Untied from IPv4 stack.
331da177e4SLinus Torvalds  *	Cyrus Durgin		:	Fixed kerneld for kmod.
341da177e4SLinus Torvalds  *	Michal Ostrowski        :       Module initialization cleanup.
351da177e4SLinus Torvalds  *         Ulises Alonso        :       Frame number limit removal and
361da177e4SLinus Torvalds  *                                      packet_set_ring memory leak.
370fb375fbSEric W. Biederman  *		Eric Biederman	:	Allow for > 8 byte hardware addresses.
380fb375fbSEric W. Biederman  *					The convention is that longer addresses
390fb375fbSEric W. Biederman  *					will simply extend the hardware address
400fb375fbSEric W. Biederman  *					byte arrays at the end of sockaddr_ll
410fb375fbSEric W. Biederman  *					and packet_mreq.
4269e3c75fSJohann Baudy  *		Johann Baudy	:	Added TX RING.
43f6fb8f10Schetan loke  *		Chetan Loke	:	Implemented TPACKET_V3 block abstraction
44f6fb8f10Schetan loke  *					layer.
45f6fb8f10Schetan loke  *					Copyright (C) 2011, <lokec@ccs.neu.edu>
46f6fb8f10Schetan loke  *
471da177e4SLinus Torvalds  *
481da177e4SLinus Torvalds  *		This program is free software; you can redistribute it and/or
491da177e4SLinus Torvalds  *		modify it under the terms of the GNU General Public License
501da177e4SLinus Torvalds  *		as published by the Free Software Foundation; either version
511da177e4SLinus Torvalds  *		2 of the License, or (at your option) any later version.
521da177e4SLinus Torvalds  *
531da177e4SLinus Torvalds  */
541da177e4SLinus Torvalds 
551da177e4SLinus Torvalds #include <linux/types.h>
561da177e4SLinus Torvalds #include <linux/mm.h>
574fc268d2SRandy Dunlap #include <linux/capability.h>
581da177e4SLinus Torvalds #include <linux/fcntl.h>
591da177e4SLinus Torvalds #include <linux/socket.h>
601da177e4SLinus Torvalds #include <linux/in.h>
611da177e4SLinus Torvalds #include <linux/inet.h>
621da177e4SLinus Torvalds #include <linux/netdevice.h>
631da177e4SLinus Torvalds #include <linux/if_packet.h>
641da177e4SLinus Torvalds #include <linux/wireless.h>
65ffbc6111SHerbert Xu #include <linux/kernel.h>
661da177e4SLinus Torvalds #include <linux/kmod.h>
675a0e3ad6STejun Heo #include <linux/slab.h>
680e3125c7SNeil Horman #include <linux/vmalloc.h>
69457c4cbcSEric W. Biederman #include <net/net_namespace.h>
701da177e4SLinus Torvalds #include <net/ip.h>
711da177e4SLinus Torvalds #include <net/protocol.h>
721da177e4SLinus Torvalds #include <linux/skbuff.h>
731da177e4SLinus Torvalds #include <net/sock.h>
741da177e4SLinus Torvalds #include <linux/errno.h>
751da177e4SLinus Torvalds #include <linux/timer.h>
761da177e4SLinus Torvalds #include <asm/uaccess.h>
771da177e4SLinus Torvalds #include <asm/ioctls.h>
781da177e4SLinus Torvalds #include <asm/page.h>
79a1f8e7f7SAl Viro #include <asm/cacheflush.h>
801da177e4SLinus Torvalds #include <asm/io.h>
811da177e4SLinus Torvalds #include <linux/proc_fs.h>
821da177e4SLinus Torvalds #include <linux/seq_file.h>
831da177e4SLinus Torvalds #include <linux/poll.h>
841da177e4SLinus Torvalds #include <linux/module.h>
851da177e4SLinus Torvalds #include <linux/init.h>
86905db440SHerbert Xu #include <linux/mutex.h>
8705423b24SEric Dumazet #include <linux/if_vlan.h>
88bfd5f4a3SSridhar Samudrala #include <linux/virtio_net.h>
89ed85b565SRichard Cochran #include <linux/errqueue.h>
90614f60faSScott McMillan #include <linux/net_tstamp.h>
911da177e4SLinus Torvalds 
921da177e4SLinus Torvalds #ifdef CONFIG_INET
931da177e4SLinus Torvalds #include <net/inet_common.h>
941da177e4SLinus Torvalds #endif
951da177e4SLinus Torvalds 
961da177e4SLinus Torvalds /*
971da177e4SLinus Torvalds    Assumptions:
981da177e4SLinus Torvalds    - if device has no dev->hard_header routine, it adds and removes ll header
991da177e4SLinus Torvalds      inside itself. In this case ll header is invisible outside of device,
1001da177e4SLinus Torvalds      but higher levels still should reserve dev->hard_header_len.
1011da177e4SLinus Torvalds      Some devices are enough clever to reallocate skb, when header
1021da177e4SLinus Torvalds      will not fit to reserved space (tunnel), another ones are silly
1031da177e4SLinus Torvalds      (PPP).
1041da177e4SLinus Torvalds    - packet socket receives packets with pulled ll header,
1051da177e4SLinus Torvalds      so that SOCK_RAW should push it back.
1061da177e4SLinus Torvalds 
1071da177e4SLinus Torvalds On receive:
1081da177e4SLinus Torvalds -----------
1091da177e4SLinus Torvalds 
1101da177e4SLinus Torvalds Incoming, dev->hard_header!=NULL
111b0e380b1SArnaldo Carvalho de Melo    mac_header -> ll header
1121da177e4SLinus Torvalds    data       -> data
1131da177e4SLinus Torvalds 
1141da177e4SLinus Torvalds Outgoing, dev->hard_header!=NULL
115b0e380b1SArnaldo Carvalho de Melo    mac_header -> ll header
1161da177e4SLinus Torvalds    data       -> ll header
1171da177e4SLinus Torvalds 
1181da177e4SLinus Torvalds Incoming, dev->hard_header==NULL
119b0e380b1SArnaldo Carvalho de Melo    mac_header -> UNKNOWN position. It is very likely, that it points to ll
120b0e380b1SArnaldo Carvalho de Melo 		 header.  PPP makes it, that is wrong, because introduce
121b0e380b1SArnaldo Carvalho de Melo 		 assymetry between rx and tx paths.
1221da177e4SLinus Torvalds    data       -> data
1231da177e4SLinus Torvalds 
1241da177e4SLinus Torvalds Outgoing, dev->hard_header==NULL
125b0e380b1SArnaldo Carvalho de Melo    mac_header -> data. ll header is still not built!
1261da177e4SLinus Torvalds    data       -> data
1271da177e4SLinus Torvalds 
1281da177e4SLinus Torvalds Resume
1291da177e4SLinus Torvalds   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
1301da177e4SLinus Torvalds 
1311da177e4SLinus Torvalds 
1321da177e4SLinus Torvalds On transmit:
1331da177e4SLinus Torvalds ------------
1341da177e4SLinus Torvalds 
1351da177e4SLinus Torvalds dev->hard_header != NULL
136b0e380b1SArnaldo Carvalho de Melo    mac_header -> ll header
1371da177e4SLinus Torvalds    data       -> ll header
1381da177e4SLinus Torvalds 
1391da177e4SLinus Torvalds dev->hard_header == NULL (ll header is added by device, we cannot control it)
140b0e380b1SArnaldo Carvalho de Melo    mac_header -> data
1411da177e4SLinus Torvalds    data       -> data
1421da177e4SLinus Torvalds 
1431da177e4SLinus Torvalds    We should set nh.raw on output to correct posistion,
1441da177e4SLinus Torvalds    packet classifier depends on it.
1451da177e4SLinus Torvalds  */
1461da177e4SLinus Torvalds 
1471da177e4SLinus Torvalds /* Private packet socket structures. */
1481da177e4SLinus Torvalds 
14940d4e3dfSEric Dumazet struct packet_mclist {
1501da177e4SLinus Torvalds 	struct packet_mclist	*next;
1511da177e4SLinus Torvalds 	int			ifindex;
1521da177e4SLinus Torvalds 	int			count;
1531da177e4SLinus Torvalds 	unsigned short		type;
1541da177e4SLinus Torvalds 	unsigned short		alen;
1550fb375fbSEric W. Biederman 	unsigned char		addr[MAX_ADDR_LEN];
1560fb375fbSEric W. Biederman };
1570fb375fbSEric W. Biederman /* identical to struct packet_mreq except it has
1580fb375fbSEric W. Biederman  * a longer address field.
1590fb375fbSEric W. Biederman  */
16040d4e3dfSEric Dumazet struct packet_mreq_max {
1610fb375fbSEric W. Biederman 	int		mr_ifindex;
1620fb375fbSEric W. Biederman 	unsigned short	mr_type;
1630fb375fbSEric W. Biederman 	unsigned short	mr_alen;
1640fb375fbSEric W. Biederman 	unsigned char	mr_address[MAX_ADDR_LEN];
1651da177e4SLinus Torvalds };
166a2efcfa0SDavid S. Miller 
167f6fb8f10Schetan loke static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
16869e3c75fSJohann Baudy 		int closing, int tx_ring);
16969e3c75fSJohann Baudy 
170f6fb8f10Schetan loke 
171f6fb8f10Schetan loke #define V3_ALIGNMENT	(8)
172f6fb8f10Schetan loke 
173bc59ba39Schetan loke #define BLK_HDR_LEN	(ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
174f6fb8f10Schetan loke 
175f6fb8f10Schetan loke #define BLK_PLUS_PRIV(sz_of_priv) \
176f6fb8f10Schetan loke 	(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
177f6fb8f10Schetan loke 
178f6fb8f10Schetan loke /* kbdq - kernel block descriptor queue */
179bc59ba39Schetan loke struct tpacket_kbdq_core {
180f6fb8f10Schetan loke 	struct pgv	*pkbdq;
181f6fb8f10Schetan loke 	unsigned int	feature_req_word;
182f6fb8f10Schetan loke 	unsigned int	hdrlen;
183f6fb8f10Schetan loke 	unsigned char	reset_pending_on_curr_blk;
184f6fb8f10Schetan loke 	unsigned char   delete_blk_timer;
185f6fb8f10Schetan loke 	unsigned short	kactive_blk_num;
186f6fb8f10Schetan loke 	unsigned short	blk_sizeof_priv;
187f6fb8f10Schetan loke 
188f6fb8f10Schetan loke 	/* last_kactive_blk_num:
189f6fb8f10Schetan loke 	 * trick to see if user-space has caught up
190f6fb8f10Schetan loke 	 * in order to avoid refreshing timer when every single pkt arrives.
191f6fb8f10Schetan loke 	 */
192f6fb8f10Schetan loke 	unsigned short	last_kactive_blk_num;
193f6fb8f10Schetan loke 
194f6fb8f10Schetan loke 	char		*pkblk_start;
195f6fb8f10Schetan loke 	char		*pkblk_end;
196f6fb8f10Schetan loke 	int		kblk_size;
197f6fb8f10Schetan loke 	unsigned int	knum_blocks;
198f6fb8f10Schetan loke 	uint64_t	knxt_seq_num;
199f6fb8f10Schetan loke 	char		*prev;
200f6fb8f10Schetan loke 	char		*nxt_offset;
201f6fb8f10Schetan loke 	struct sk_buff	*skb;
202f6fb8f10Schetan loke 
203f6fb8f10Schetan loke 	atomic_t	blk_fill_in_prog;
204f6fb8f10Schetan loke 
205f6fb8f10Schetan loke 	/* Default is set to 8ms */
206f6fb8f10Schetan loke #define DEFAULT_PRB_RETIRE_TOV	(8)
207f6fb8f10Schetan loke 
208f6fb8f10Schetan loke 	unsigned short  retire_blk_tov;
209f6fb8f10Schetan loke 	unsigned short  version;
210f6fb8f10Schetan loke 	unsigned long	tov_in_jiffies;
211f6fb8f10Schetan loke 
212f6fb8f10Schetan loke 	/* timer to retire an outstanding block */
213f6fb8f10Schetan loke 	struct timer_list retire_blk_timer;
214f6fb8f10Schetan loke };
215f6fb8f10Schetan loke 
216f6fb8f10Schetan loke #define PGV_FROM_VMALLOC 1
2170e3125c7SNeil Horman struct pgv {
2180e3125c7SNeil Horman 	char *buffer;
2190e3125c7SNeil Horman };
2200e3125c7SNeil Horman 
22169e3c75fSJohann Baudy struct packet_ring_buffer {
2220e3125c7SNeil Horman 	struct pgv		*pg_vec;
22369e3c75fSJohann Baudy 	unsigned int		head;
22469e3c75fSJohann Baudy 	unsigned int		frames_per_block;
22569e3c75fSJohann Baudy 	unsigned int		frame_size;
22669e3c75fSJohann Baudy 	unsigned int		frame_max;
22769e3c75fSJohann Baudy 
22869e3c75fSJohann Baudy 	unsigned int		pg_vec_order;
22969e3c75fSJohann Baudy 	unsigned int		pg_vec_pages;
23069e3c75fSJohann Baudy 	unsigned int		pg_vec_len;
23169e3c75fSJohann Baudy 
232bc59ba39Schetan loke 	struct tpacket_kbdq_core	prb_bdqc;
23369e3c75fSJohann Baudy 	atomic_t		pending;
23469e3c75fSJohann Baudy };
23569e3c75fSJohann Baudy 
236f6fb8f10Schetan loke #define BLOCK_STATUS(x)	((x)->hdr.bh1.block_status)
237f6fb8f10Schetan loke #define BLOCK_NUM_PKTS(x)	((x)->hdr.bh1.num_pkts)
238f6fb8f10Schetan loke #define BLOCK_O2FP(x)		((x)->hdr.bh1.offset_to_first_pkt)
239f6fb8f10Schetan loke #define BLOCK_LEN(x)		((x)->hdr.bh1.blk_len)
240f6fb8f10Schetan loke #define BLOCK_SNUM(x)		((x)->hdr.bh1.seq_num)
241f6fb8f10Schetan loke #define BLOCK_O2PRIV(x)	((x)->offset_to_priv)
242f6fb8f10Schetan loke #define BLOCK_PRIV(x)		((void *)((char *)(x) + BLOCK_O2PRIV(x)))
243f6fb8f10Schetan loke 
24469e3c75fSJohann Baudy struct packet_sock;
24569e3c75fSJohann Baudy static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
2461da177e4SLinus Torvalds 
247f6fb8f10Schetan loke static void *packet_previous_frame(struct packet_sock *po,
248f6fb8f10Schetan loke 		struct packet_ring_buffer *rb,
249f6fb8f10Schetan loke 		int status);
250f6fb8f10Schetan loke static void packet_increment_head(struct packet_ring_buffer *buff);
251bc59ba39Schetan loke static int prb_curr_blk_in_use(struct tpacket_kbdq_core *,
252bc59ba39Schetan loke 			struct tpacket_block_desc *);
253bc59ba39Schetan loke static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
254f6fb8f10Schetan loke 			struct packet_sock *);
255bc59ba39Schetan loke static void prb_retire_current_block(struct tpacket_kbdq_core *,
256f6fb8f10Schetan loke 		struct packet_sock *, unsigned int status);
257bc59ba39Schetan loke static int prb_queue_frozen(struct tpacket_kbdq_core *);
258bc59ba39Schetan loke static void prb_open_block(struct tpacket_kbdq_core *,
259bc59ba39Schetan loke 		struct tpacket_block_desc *);
260f6fb8f10Schetan loke static void prb_retire_rx_blk_timer_expired(unsigned long);
261bc59ba39Schetan loke static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
262bc59ba39Schetan loke static void prb_init_blk_timer(struct packet_sock *,
263bc59ba39Schetan loke 		struct tpacket_kbdq_core *,
264f6fb8f10Schetan loke 		void (*func) (unsigned long));
265bc59ba39Schetan loke static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
266bc59ba39Schetan loke static void prb_clear_rxhash(struct tpacket_kbdq_core *,
267bc59ba39Schetan loke 		struct tpacket3_hdr *);
268bc59ba39Schetan loke static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
269bc59ba39Schetan loke 		struct tpacket3_hdr *);
2701da177e4SLinus Torvalds static void packet_flush_mclist(struct sock *sk);
2711da177e4SLinus Torvalds 
272dc99f600SDavid S. Miller struct packet_fanout;
2731da177e4SLinus Torvalds struct packet_sock {
2741da177e4SLinus Torvalds 	/* struct sock has to be the first member of packet_sock */
2751da177e4SLinus Torvalds 	struct sock		sk;
276dc99f600SDavid S. Miller 	struct packet_fanout	*fanout;
2771da177e4SLinus Torvalds 	struct tpacket_stats	stats;
278f6fb8f10Schetan loke 	union  tpacket_stats_u	stats_u;
27969e3c75fSJohann Baudy 	struct packet_ring_buffer	rx_ring;
28069e3c75fSJohann Baudy 	struct packet_ring_buffer	tx_ring;
2811da177e4SLinus Torvalds 	int			copy_thresh;
2821da177e4SLinus Torvalds 	spinlock_t		bind_lock;
283905db440SHerbert Xu 	struct mutex		pg_vec_lock;
2848dc41944SHerbert Xu 	unsigned int		running:1,	/* prot_hook is attached*/
28580feaacbSPeter P. Waskiewicz Jr 				auxdata:1,
286bfd5f4a3SSridhar Samudrala 				origdev:1,
287bfd5f4a3SSridhar Samudrala 				has_vnet_hdr:1;
2881da177e4SLinus Torvalds 	int			ifindex;	/* bound device		*/
2890e11c91eSAl Viro 	__be16			num;
2901da177e4SLinus Torvalds 	struct packet_mclist	*mclist;
2911da177e4SLinus Torvalds 	atomic_t		mapped;
292bbd6ef87SPatrick McHardy 	enum tpacket_versions	tp_version;
293bbd6ef87SPatrick McHardy 	unsigned int		tp_hdrlen;
2948913336aSPatrick McHardy 	unsigned int		tp_reserve;
29569e3c75fSJohann Baudy 	unsigned int		tp_loss:1;
296614f60faSScott McMillan 	unsigned int		tp_tstamp;
29794b05952SEric Dumazet 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
2981da177e4SLinus Torvalds };
2991da177e4SLinus Torvalds 
300dc99f600SDavid S. Miller #define PACKET_FANOUT_MAX	256
301dc99f600SDavid S. Miller 
302dc99f600SDavid S. Miller struct packet_fanout {
303dc99f600SDavid S. Miller #ifdef CONFIG_NET_NS
304dc99f600SDavid S. Miller 	struct net		*net;
305dc99f600SDavid S. Miller #endif
306dc99f600SDavid S. Miller 	unsigned int		num_members;
307dc99f600SDavid S. Miller 	u16			id;
308dc99f600SDavid S. Miller 	u8			type;
3097736d33fSDavid S. Miller 	u8			defrag;
310dc99f600SDavid S. Miller 	atomic_t		rr_cur;
311dc99f600SDavid S. Miller 	struct list_head	list;
312dc99f600SDavid S. Miller 	struct sock		*arr[PACKET_FANOUT_MAX];
313dc99f600SDavid S. Miller 	spinlock_t		lock;
314dc99f600SDavid S. Miller 	atomic_t		sk_ref;
315dc99f600SDavid S. Miller 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
316dc99f600SDavid S. Miller };
317dc99f600SDavid S. Miller 
318ffbc6111SHerbert Xu struct packet_skb_cb {
319ffbc6111SHerbert Xu 	unsigned int origlen;
320ffbc6111SHerbert Xu 	union {
321ffbc6111SHerbert Xu 		struct sockaddr_pkt pkt;
322ffbc6111SHerbert Xu 		struct sockaddr_ll ll;
323ffbc6111SHerbert Xu 	} sa;
324ffbc6111SHerbert Xu };
325ffbc6111SHerbert Xu 
326ffbc6111SHerbert Xu #define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))
3278dc41944SHerbert Xu 
328bc59ba39Schetan loke #define GET_PBDQC_FROM_RB(x)	((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
329f6fb8f10Schetan loke #define GET_PBLOCK_DESC(x, bid)	\
330bc59ba39Schetan loke 	((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
331f6fb8f10Schetan loke #define GET_CURR_PBLOCK_DESC_FROM_CORE(x)	\
332bc59ba39Schetan loke 	((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
333f6fb8f10Schetan loke #define GET_NEXT_PRB_BLK_NUM(x) \
334f6fb8f10Schetan loke 	(((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
335f6fb8f10Schetan loke 	((x)->kactive_blk_num+1) : 0)
336f6fb8f10Schetan loke 
337eea49cc9SOlof Johansson static struct packet_sock *pkt_sk(struct sock *sk)
338ce06b03eSDavid S. Miller {
339ce06b03eSDavid S. Miller 	return (struct packet_sock *)sk;
340ce06b03eSDavid S. Miller }
341ce06b03eSDavid S. Miller 
342dc99f600SDavid S. Miller static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
343dc99f600SDavid S. Miller static void __fanout_link(struct sock *sk, struct packet_sock *po);
344dc99f600SDavid S. Miller 
345ce06b03eSDavid S. Miller /* register_prot_hook must be invoked with the po->bind_lock held,
346ce06b03eSDavid S. Miller  * or from a context in which asynchronous accesses to the packet
347ce06b03eSDavid S. Miller  * socket is not possible (packet_create()).
348ce06b03eSDavid S. Miller  */
349ce06b03eSDavid S. Miller static void register_prot_hook(struct sock *sk)
350ce06b03eSDavid S. Miller {
351ce06b03eSDavid S. Miller 	struct packet_sock *po = pkt_sk(sk);
352ce06b03eSDavid S. Miller 	if (!po->running) {
353dc99f600SDavid S. Miller 		if (po->fanout)
354dc99f600SDavid S. Miller 			__fanout_link(sk, po);
355dc99f600SDavid S. Miller 		else
356ce06b03eSDavid S. Miller 			dev_add_pack(&po->prot_hook);
357ce06b03eSDavid S. Miller 		sock_hold(sk);
358ce06b03eSDavid S. Miller 		po->running = 1;
359ce06b03eSDavid S. Miller 	}
360ce06b03eSDavid S. Miller }
361ce06b03eSDavid S. Miller 
362ce06b03eSDavid S. Miller /* {,__}unregister_prot_hook() must be invoked with the po->bind_lock
363ce06b03eSDavid S. Miller  * held.   If the sync parameter is true, we will temporarily drop
364ce06b03eSDavid S. Miller  * the po->bind_lock and do a synchronize_net to make sure no
365ce06b03eSDavid S. Miller  * asynchronous packet processing paths still refer to the elements
366ce06b03eSDavid S. Miller  * of po->prot_hook.  If the sync parameter is false, it is the
367ce06b03eSDavid S. Miller  * callers responsibility to take care of this.
368ce06b03eSDavid S. Miller  */
369ce06b03eSDavid S. Miller static void __unregister_prot_hook(struct sock *sk, bool sync)
370ce06b03eSDavid S. Miller {
371ce06b03eSDavid S. Miller 	struct packet_sock *po = pkt_sk(sk);
372ce06b03eSDavid S. Miller 
373ce06b03eSDavid S. Miller 	po->running = 0;
374dc99f600SDavid S. Miller 	if (po->fanout)
375dc99f600SDavid S. Miller 		__fanout_unlink(sk, po);
376dc99f600SDavid S. Miller 	else
377ce06b03eSDavid S. Miller 		__dev_remove_pack(&po->prot_hook);
378ce06b03eSDavid S. Miller 	__sock_put(sk);
379ce06b03eSDavid S. Miller 
380ce06b03eSDavid S. Miller 	if (sync) {
381ce06b03eSDavid S. Miller 		spin_unlock(&po->bind_lock);
382ce06b03eSDavid S. Miller 		synchronize_net();
383ce06b03eSDavid S. Miller 		spin_lock(&po->bind_lock);
384ce06b03eSDavid S. Miller 	}
385ce06b03eSDavid S. Miller }
386ce06b03eSDavid S. Miller 
387ce06b03eSDavid S. Miller static void unregister_prot_hook(struct sock *sk, bool sync)
388ce06b03eSDavid S. Miller {
389ce06b03eSDavid S. Miller 	struct packet_sock *po = pkt_sk(sk);
390ce06b03eSDavid S. Miller 
391ce06b03eSDavid S. Miller 	if (po->running)
392ce06b03eSDavid S. Miller 		__unregister_prot_hook(sk, sync);
393ce06b03eSDavid S. Miller }
394ce06b03eSDavid S. Miller 
395f6dafa95SChangli Gao static inline __pure struct page *pgv_to_page(void *addr)
3960af55bb5SChangli Gao {
3970af55bb5SChangli Gao 	if (is_vmalloc_addr(addr))
3980af55bb5SChangli Gao 		return vmalloc_to_page(addr);
3990af55bb5SChangli Gao 	return virt_to_page(addr);
4000af55bb5SChangli Gao }
4010af55bb5SChangli Gao 
402bbd6ef87SPatrick McHardy static void __packet_set_status(struct packet_sock *po, void *frame, int status)
403bbd6ef87SPatrick McHardy {
404bbd6ef87SPatrick McHardy 	union {
405bbd6ef87SPatrick McHardy 		struct tpacket_hdr *h1;
406bbd6ef87SPatrick McHardy 		struct tpacket2_hdr *h2;
407bbd6ef87SPatrick McHardy 		void *raw;
408bbd6ef87SPatrick McHardy 	} h;
409bbd6ef87SPatrick McHardy 
410bbd6ef87SPatrick McHardy 	h.raw = frame;
411bbd6ef87SPatrick McHardy 	switch (po->tp_version) {
412bbd6ef87SPatrick McHardy 	case TPACKET_V1:
413bbd6ef87SPatrick McHardy 		h.h1->tp_status = status;
4140af55bb5SChangli Gao 		flush_dcache_page(pgv_to_page(&h.h1->tp_status));
415bbd6ef87SPatrick McHardy 		break;
416bbd6ef87SPatrick McHardy 	case TPACKET_V2:
417bbd6ef87SPatrick McHardy 		h.h2->tp_status = status;
4180af55bb5SChangli Gao 		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
419bbd6ef87SPatrick McHardy 		break;
420f6fb8f10Schetan loke 	case TPACKET_V3:
42169e3c75fSJohann Baudy 	default:
422f6fb8f10Schetan loke 		WARN(1, "TPACKET version not supported.\n");
42369e3c75fSJohann Baudy 		BUG();
42469e3c75fSJohann Baudy 	}
42569e3c75fSJohann Baudy 
42669e3c75fSJohann Baudy 	smp_wmb();
42769e3c75fSJohann Baudy }
42869e3c75fSJohann Baudy 
42969e3c75fSJohann Baudy static int __packet_get_status(struct packet_sock *po, void *frame)
43069e3c75fSJohann Baudy {
43169e3c75fSJohann Baudy 	union {
43269e3c75fSJohann Baudy 		struct tpacket_hdr *h1;
43369e3c75fSJohann Baudy 		struct tpacket2_hdr *h2;
43469e3c75fSJohann Baudy 		void *raw;
43569e3c75fSJohann Baudy 	} h;
43669e3c75fSJohann Baudy 
43769e3c75fSJohann Baudy 	smp_rmb();
43869e3c75fSJohann Baudy 
43969e3c75fSJohann Baudy 	h.raw = frame;
44069e3c75fSJohann Baudy 	switch (po->tp_version) {
44169e3c75fSJohann Baudy 	case TPACKET_V1:
4420af55bb5SChangli Gao 		flush_dcache_page(pgv_to_page(&h.h1->tp_status));
44369e3c75fSJohann Baudy 		return h.h1->tp_status;
44469e3c75fSJohann Baudy 	case TPACKET_V2:
4450af55bb5SChangli Gao 		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
44669e3c75fSJohann Baudy 		return h.h2->tp_status;
447f6fb8f10Schetan loke 	case TPACKET_V3:
44869e3c75fSJohann Baudy 	default:
449f6fb8f10Schetan loke 		WARN(1, "TPACKET version not supported.\n");
45069e3c75fSJohann Baudy 		BUG();
45169e3c75fSJohann Baudy 		return 0;
452bbd6ef87SPatrick McHardy 	}
4531da177e4SLinus Torvalds }
45469e3c75fSJohann Baudy 
45569e3c75fSJohann Baudy static void *packet_lookup_frame(struct packet_sock *po,
45669e3c75fSJohann Baudy 		struct packet_ring_buffer *rb,
45769e3c75fSJohann Baudy 		unsigned int position,
45869e3c75fSJohann Baudy 		int status)
45969e3c75fSJohann Baudy {
46069e3c75fSJohann Baudy 	unsigned int pg_vec_pos, frame_offset;
46169e3c75fSJohann Baudy 	union {
46269e3c75fSJohann Baudy 		struct tpacket_hdr *h1;
46369e3c75fSJohann Baudy 		struct tpacket2_hdr *h2;
46469e3c75fSJohann Baudy 		void *raw;
46569e3c75fSJohann Baudy 	} h;
46669e3c75fSJohann Baudy 
46769e3c75fSJohann Baudy 	pg_vec_pos = position / rb->frames_per_block;
46869e3c75fSJohann Baudy 	frame_offset = position % rb->frames_per_block;
46969e3c75fSJohann Baudy 
4700e3125c7SNeil Horman 	h.raw = rb->pg_vec[pg_vec_pos].buffer +
4710e3125c7SNeil Horman 		(frame_offset * rb->frame_size);
47269e3c75fSJohann Baudy 
47369e3c75fSJohann Baudy 	if (status != __packet_get_status(po, h.raw))
47469e3c75fSJohann Baudy 		return NULL;
47569e3c75fSJohann Baudy 
47669e3c75fSJohann Baudy 	return h.raw;
47769e3c75fSJohann Baudy }
47869e3c75fSJohann Baudy 
479eea49cc9SOlof Johansson static void *packet_current_frame(struct packet_sock *po,
48069e3c75fSJohann Baudy 		struct packet_ring_buffer *rb,
48169e3c75fSJohann Baudy 		int status)
48269e3c75fSJohann Baudy {
48369e3c75fSJohann Baudy 	return packet_lookup_frame(po, rb, rb->head, status);
48469e3c75fSJohann Baudy }
48569e3c75fSJohann Baudy 
486bc59ba39Schetan loke static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
487f6fb8f10Schetan loke {
488f6fb8f10Schetan loke 	del_timer_sync(&pkc->retire_blk_timer);
489f6fb8f10Schetan loke }
490f6fb8f10Schetan loke 
491f6fb8f10Schetan loke static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
492f6fb8f10Schetan loke 		int tx_ring,
493f6fb8f10Schetan loke 		struct sk_buff_head *rb_queue)
494f6fb8f10Schetan loke {
495bc59ba39Schetan loke 	struct tpacket_kbdq_core *pkc;
496f6fb8f10Schetan loke 
497f6fb8f10Schetan loke 	pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
498f6fb8f10Schetan loke 
499f6fb8f10Schetan loke 	spin_lock(&rb_queue->lock);
500f6fb8f10Schetan loke 	pkc->delete_blk_timer = 1;
501f6fb8f10Schetan loke 	spin_unlock(&rb_queue->lock);
502f6fb8f10Schetan loke 
503f6fb8f10Schetan loke 	prb_del_retire_blk_timer(pkc);
504f6fb8f10Schetan loke }
505f6fb8f10Schetan loke 
506f6fb8f10Schetan loke static void prb_init_blk_timer(struct packet_sock *po,
507bc59ba39Schetan loke 		struct tpacket_kbdq_core *pkc,
508f6fb8f10Schetan loke 		void (*func) (unsigned long))
509f6fb8f10Schetan loke {
510f6fb8f10Schetan loke 	init_timer(&pkc->retire_blk_timer);
511f6fb8f10Schetan loke 	pkc->retire_blk_timer.data = (long)po;
512f6fb8f10Schetan loke 	pkc->retire_blk_timer.function = func;
513f6fb8f10Schetan loke 	pkc->retire_blk_timer.expires = jiffies;
514f6fb8f10Schetan loke }
515f6fb8f10Schetan loke 
516f6fb8f10Schetan loke static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring)
517f6fb8f10Schetan loke {
518bc59ba39Schetan loke 	struct tpacket_kbdq_core *pkc;
519f6fb8f10Schetan loke 
520f6fb8f10Schetan loke 	if (tx_ring)
521f6fb8f10Schetan loke 		BUG();
522f6fb8f10Schetan loke 
523f6fb8f10Schetan loke 	pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
524f6fb8f10Schetan loke 	prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
525f6fb8f10Schetan loke }
526f6fb8f10Schetan loke 
527f6fb8f10Schetan loke static int prb_calc_retire_blk_tmo(struct packet_sock *po,
528f6fb8f10Schetan loke 				int blk_size_in_bytes)
529f6fb8f10Schetan loke {
530f6fb8f10Schetan loke 	struct net_device *dev;
531f6fb8f10Schetan loke 	unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
5324bc71cb9SJiri Pirko 	struct ethtool_cmd ecmd;
5334bc71cb9SJiri Pirko 	int err;
534e440cf2cSparav.pandit@emulex.com 	u32 speed;
535f6fb8f10Schetan loke 
5364bc71cb9SJiri Pirko 	rtnl_lock();
5374bc71cb9SJiri Pirko 	dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
5384bc71cb9SJiri Pirko 	if (unlikely(!dev)) {
5394bc71cb9SJiri Pirko 		rtnl_unlock();
540f6fb8f10Schetan loke 		return DEFAULT_PRB_RETIRE_TOV;
5414bc71cb9SJiri Pirko 	}
5424bc71cb9SJiri Pirko 	err = __ethtool_get_settings(dev, &ecmd);
543e440cf2cSparav.pandit@emulex.com 	speed = ethtool_cmd_speed(&ecmd);
5444bc71cb9SJiri Pirko 	rtnl_unlock();
5454bc71cb9SJiri Pirko 	if (!err) {
546f6fb8f10Schetan loke 		/*
547f6fb8f10Schetan loke 		 * If the link speed is so slow you don't really
548f6fb8f10Schetan loke 		 * need to worry about perf anyways
549f6fb8f10Schetan loke 		 */
550e440cf2cSparav.pandit@emulex.com 		if (speed < SPEED_1000 || speed == SPEED_UNKNOWN) {
551f6fb8f10Schetan loke 			return DEFAULT_PRB_RETIRE_TOV;
552e440cf2cSparav.pandit@emulex.com 		} else {
553e440cf2cSparav.pandit@emulex.com 			msec = 1;
554e440cf2cSparav.pandit@emulex.com 			div = speed / 1000;
555f6fb8f10Schetan loke 		}
556f6fb8f10Schetan loke 	}
557f6fb8f10Schetan loke 
558f6fb8f10Schetan loke 	mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
559f6fb8f10Schetan loke 
560f6fb8f10Schetan loke 	if (div)
561f6fb8f10Schetan loke 		mbits /= div;
562f6fb8f10Schetan loke 
563f6fb8f10Schetan loke 	tmo = mbits * msec;
564f6fb8f10Schetan loke 
565f6fb8f10Schetan loke 	if (div)
566f6fb8f10Schetan loke 		return tmo+1;
567f6fb8f10Schetan loke 	return tmo;
568f6fb8f10Schetan loke }
569f6fb8f10Schetan loke 
570bc59ba39Schetan loke static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
571f6fb8f10Schetan loke 			union tpacket_req_u *req_u)
572f6fb8f10Schetan loke {
573f6fb8f10Schetan loke 	p1->feature_req_word = req_u->req3.tp_feature_req_word;
574f6fb8f10Schetan loke }
575f6fb8f10Schetan loke 
576f6fb8f10Schetan loke static void init_prb_bdqc(struct packet_sock *po,
577f6fb8f10Schetan loke 			struct packet_ring_buffer *rb,
578f6fb8f10Schetan loke 			struct pgv *pg_vec,
579f6fb8f10Schetan loke 			union tpacket_req_u *req_u, int tx_ring)
580f6fb8f10Schetan loke {
581bc59ba39Schetan loke 	struct tpacket_kbdq_core *p1 = &rb->prb_bdqc;
582bc59ba39Schetan loke 	struct tpacket_block_desc *pbd;
583f6fb8f10Schetan loke 
584f6fb8f10Schetan loke 	memset(p1, 0x0, sizeof(*p1));
585f6fb8f10Schetan loke 
586f6fb8f10Schetan loke 	p1->knxt_seq_num = 1;
587f6fb8f10Schetan loke 	p1->pkbdq = pg_vec;
588bc59ba39Schetan loke 	pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
589e3192690SJoe Perches 	p1->pkblk_start	= pg_vec[0].buffer;
590f6fb8f10Schetan loke 	p1->kblk_size = req_u->req3.tp_block_size;
591f6fb8f10Schetan loke 	p1->knum_blocks	= req_u->req3.tp_block_nr;
592f6fb8f10Schetan loke 	p1->hdrlen = po->tp_hdrlen;
593f6fb8f10Schetan loke 	p1->version = po->tp_version;
594f6fb8f10Schetan loke 	p1->last_kactive_blk_num = 0;
595f6fb8f10Schetan loke 	po->stats_u.stats3.tp_freeze_q_cnt = 0;
596f6fb8f10Schetan loke 	if (req_u->req3.tp_retire_blk_tov)
597f6fb8f10Schetan loke 		p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
598f6fb8f10Schetan loke 	else
599f6fb8f10Schetan loke 		p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
600f6fb8f10Schetan loke 						req_u->req3.tp_block_size);
601f6fb8f10Schetan loke 	p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
602f6fb8f10Schetan loke 	p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
603f6fb8f10Schetan loke 
604f6fb8f10Schetan loke 	prb_init_ft_ops(p1, req_u);
605f6fb8f10Schetan loke 	prb_setup_retire_blk_timer(po, tx_ring);
606f6fb8f10Schetan loke 	prb_open_block(p1, pbd);
607f6fb8f10Schetan loke }
608f6fb8f10Schetan loke 
609f6fb8f10Schetan loke /*  Do NOT update the last_blk_num first.
610f6fb8f10Schetan loke  *  Assumes sk_buff_head lock is held.
611f6fb8f10Schetan loke  */
612bc59ba39Schetan loke static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
613f6fb8f10Schetan loke {
614f6fb8f10Schetan loke 	mod_timer(&pkc->retire_blk_timer,
615f6fb8f10Schetan loke 			jiffies + pkc->tov_in_jiffies);
616f6fb8f10Schetan loke 	pkc->last_kactive_blk_num = pkc->kactive_blk_num;
617f6fb8f10Schetan loke }
618f6fb8f10Schetan loke 
619f6fb8f10Schetan loke /*
620f6fb8f10Schetan loke  * Timer logic:
621f6fb8f10Schetan loke  * 1) We refresh the timer only when we open a block.
622f6fb8f10Schetan loke  *    By doing this we don't waste cycles refreshing the timer
623f6fb8f10Schetan loke  *	  on packet-by-packet basis.
624f6fb8f10Schetan loke  *
625f6fb8f10Schetan loke  * With a 1MB block-size, on a 1Gbps line, it will take
626f6fb8f10Schetan loke  * i) ~8 ms to fill a block + ii) memcpy etc.
627f6fb8f10Schetan loke  * In this cut we are not accounting for the memcpy time.
628f6fb8f10Schetan loke  *
629f6fb8f10Schetan loke  * So, if the user sets the 'tmo' to 10ms then the timer
630f6fb8f10Schetan loke  * will never fire while the block is still getting filled
631f6fb8f10Schetan loke  * (which is what we want). However, the user could choose
632f6fb8f10Schetan loke  * to close a block early and that's fine.
633f6fb8f10Schetan loke  *
634f6fb8f10Schetan loke  * But when the timer does fire, we check whether or not to refresh it.
635f6fb8f10Schetan loke  * Since the tmo granularity is in msecs, it is not too expensive
636f6fb8f10Schetan loke  * to refresh the timer, lets say every '8' msecs.
637f6fb8f10Schetan loke  * Either the user can set the 'tmo' or we can derive it based on
638f6fb8f10Schetan loke  * a) line-speed and b) block-size.
639f6fb8f10Schetan loke  * prb_calc_retire_blk_tmo() calculates the tmo.
640f6fb8f10Schetan loke  *
641f6fb8f10Schetan loke  */
642f6fb8f10Schetan loke static void prb_retire_rx_blk_timer_expired(unsigned long data)
643f6fb8f10Schetan loke {
644f6fb8f10Schetan loke 	struct packet_sock *po = (struct packet_sock *)data;
645bc59ba39Schetan loke 	struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc;
646f6fb8f10Schetan loke 	unsigned int frozen;
647bc59ba39Schetan loke 	struct tpacket_block_desc *pbd;
648f6fb8f10Schetan loke 
649f6fb8f10Schetan loke 	spin_lock(&po->sk.sk_receive_queue.lock);
650f6fb8f10Schetan loke 
651f6fb8f10Schetan loke 	frozen = prb_queue_frozen(pkc);
652f6fb8f10Schetan loke 	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
653f6fb8f10Schetan loke 
654f6fb8f10Schetan loke 	if (unlikely(pkc->delete_blk_timer))
655f6fb8f10Schetan loke 		goto out;
656f6fb8f10Schetan loke 
657f6fb8f10Schetan loke 	/* We only need to plug the race when the block is partially filled.
658f6fb8f10Schetan loke 	 * tpacket_rcv:
659f6fb8f10Schetan loke 	 *		lock(); increment BLOCK_NUM_PKTS; unlock()
660f6fb8f10Schetan loke 	 *		copy_bits() is in progress ...
661f6fb8f10Schetan loke 	 *		timer fires on other cpu:
662f6fb8f10Schetan loke 	 *		we can't retire the current block because copy_bits
663f6fb8f10Schetan loke 	 *		is in progress.
664f6fb8f10Schetan loke 	 *
665f6fb8f10Schetan loke 	 */
666f6fb8f10Schetan loke 	if (BLOCK_NUM_PKTS(pbd)) {
667f6fb8f10Schetan loke 		while (atomic_read(&pkc->blk_fill_in_prog)) {
668f6fb8f10Schetan loke 			/* Waiting for skb_copy_bits to finish... */
669f6fb8f10Schetan loke 			cpu_relax();
670f6fb8f10Schetan loke 		}
671f6fb8f10Schetan loke 	}
672f6fb8f10Schetan loke 
673f6fb8f10Schetan loke 	if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
674f6fb8f10Schetan loke 		if (!frozen) {
675f6fb8f10Schetan loke 			prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
676f6fb8f10Schetan loke 			if (!prb_dispatch_next_block(pkc, po))
677f6fb8f10Schetan loke 				goto refresh_timer;
678f6fb8f10Schetan loke 			else
679f6fb8f10Schetan loke 				goto out;
680f6fb8f10Schetan loke 		} else {
681f6fb8f10Schetan loke 			/* Case 1. Queue was frozen because user-space was
682f6fb8f10Schetan loke 			 *	   lagging behind.
683f6fb8f10Schetan loke 			 */
684f6fb8f10Schetan loke 			if (prb_curr_blk_in_use(pkc, pbd)) {
685f6fb8f10Schetan loke 				/*
686f6fb8f10Schetan loke 				 * Ok, user-space is still behind.
687f6fb8f10Schetan loke 				 * So just refresh the timer.
688f6fb8f10Schetan loke 				 */
689f6fb8f10Schetan loke 				goto refresh_timer;
690f6fb8f10Schetan loke 			} else {
691f6fb8f10Schetan loke 			       /* Case 2. queue was frozen,user-space caught up,
692f6fb8f10Schetan loke 				* now the link went idle && the timer fired.
693f6fb8f10Schetan loke 				* We don't have a block to close.So we open this
694f6fb8f10Schetan loke 				* block and restart the timer.
695f6fb8f10Schetan loke 				* opening a block thaws the queue,restarts timer
696f6fb8f10Schetan loke 				* Thawing/timer-refresh is a side effect.
697f6fb8f10Schetan loke 				*/
698f6fb8f10Schetan loke 				prb_open_block(pkc, pbd);
699f6fb8f10Schetan loke 				goto out;
700f6fb8f10Schetan loke 			}
701f6fb8f10Schetan loke 		}
702f6fb8f10Schetan loke 	}
703f6fb8f10Schetan loke 
704f6fb8f10Schetan loke refresh_timer:
705f6fb8f10Schetan loke 	_prb_refresh_rx_retire_blk_timer(pkc);
706f6fb8f10Schetan loke 
707f6fb8f10Schetan loke out:
708f6fb8f10Schetan loke 	spin_unlock(&po->sk.sk_receive_queue.lock);
709f6fb8f10Schetan loke }
710f6fb8f10Schetan loke 
711eea49cc9SOlof Johansson static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
712bc59ba39Schetan loke 		struct tpacket_block_desc *pbd1, __u32 status)
713f6fb8f10Schetan loke {
714f6fb8f10Schetan loke 	/* Flush everything minus the block header */
715f6fb8f10Schetan loke 
716f6fb8f10Schetan loke #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
717f6fb8f10Schetan loke 	u8 *start, *end;
718f6fb8f10Schetan loke 
719f6fb8f10Schetan loke 	start = (u8 *)pbd1;
720f6fb8f10Schetan loke 
721f6fb8f10Schetan loke 	/* Skip the block header(we know header WILL fit in 4K) */
722f6fb8f10Schetan loke 	start += PAGE_SIZE;
723f6fb8f10Schetan loke 
724f6fb8f10Schetan loke 	end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
725f6fb8f10Schetan loke 	for (; start < end; start += PAGE_SIZE)
726f6fb8f10Schetan loke 		flush_dcache_page(pgv_to_page(start));
727f6fb8f10Schetan loke 
728f6fb8f10Schetan loke 	smp_wmb();
729f6fb8f10Schetan loke #endif
730f6fb8f10Schetan loke 
731f6fb8f10Schetan loke 	/* Now update the block status. */
732f6fb8f10Schetan loke 
733f6fb8f10Schetan loke 	BLOCK_STATUS(pbd1) = status;
734f6fb8f10Schetan loke 
735f6fb8f10Schetan loke 	/* Flush the block header */
736f6fb8f10Schetan loke 
737f6fb8f10Schetan loke #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
738f6fb8f10Schetan loke 	start = (u8 *)pbd1;
739f6fb8f10Schetan loke 	flush_dcache_page(pgv_to_page(start));
740f6fb8f10Schetan loke 
741f6fb8f10Schetan loke 	smp_wmb();
742f6fb8f10Schetan loke #endif
743f6fb8f10Schetan loke }
744f6fb8f10Schetan loke 
745f6fb8f10Schetan loke /*
746f6fb8f10Schetan loke  * Side effect:
747f6fb8f10Schetan loke  *
748f6fb8f10Schetan loke  * 1) flush the block
749f6fb8f10Schetan loke  * 2) Increment active_blk_num
750f6fb8f10Schetan loke  *
751f6fb8f10Schetan loke  * Note:We DONT refresh the timer on purpose.
752f6fb8f10Schetan loke  *	Because almost always the next block will be opened.
753f6fb8f10Schetan loke  */
754bc59ba39Schetan loke static void prb_close_block(struct tpacket_kbdq_core *pkc1,
755bc59ba39Schetan loke 		struct tpacket_block_desc *pbd1,
756f6fb8f10Schetan loke 		struct packet_sock *po, unsigned int stat)
757f6fb8f10Schetan loke {
758f6fb8f10Schetan loke 	__u32 status = TP_STATUS_USER | stat;
759f6fb8f10Schetan loke 
760f6fb8f10Schetan loke 	struct tpacket3_hdr *last_pkt;
761bc59ba39Schetan loke 	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
762f6fb8f10Schetan loke 
763f6fb8f10Schetan loke 	if (po->stats.tp_drops)
764f6fb8f10Schetan loke 		status |= TP_STATUS_LOSING;
765f6fb8f10Schetan loke 
766f6fb8f10Schetan loke 	last_pkt = (struct tpacket3_hdr *)pkc1->prev;
767f6fb8f10Schetan loke 	last_pkt->tp_next_offset = 0;
768f6fb8f10Schetan loke 
769f6fb8f10Schetan loke 	/* Get the ts of the last pkt */
770f6fb8f10Schetan loke 	if (BLOCK_NUM_PKTS(pbd1)) {
771f6fb8f10Schetan loke 		h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
772f6fb8f10Schetan loke 		h1->ts_last_pkt.ts_nsec	= last_pkt->tp_nsec;
773f6fb8f10Schetan loke 	} else {
774f6fb8f10Schetan loke 		/* Ok, we tmo'd - so get the current time */
775f6fb8f10Schetan loke 		struct timespec ts;
776f6fb8f10Schetan loke 		getnstimeofday(&ts);
777f6fb8f10Schetan loke 		h1->ts_last_pkt.ts_sec = ts.tv_sec;
778f6fb8f10Schetan loke 		h1->ts_last_pkt.ts_nsec	= ts.tv_nsec;
779f6fb8f10Schetan loke 	}
780f6fb8f10Schetan loke 
781f6fb8f10Schetan loke 	smp_wmb();
782f6fb8f10Schetan loke 
783f6fb8f10Schetan loke 	/* Flush the block */
784f6fb8f10Schetan loke 	prb_flush_block(pkc1, pbd1, status);
785f6fb8f10Schetan loke 
786f6fb8f10Schetan loke 	pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
787f6fb8f10Schetan loke }
788f6fb8f10Schetan loke 
789eea49cc9SOlof Johansson static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
790f6fb8f10Schetan loke {
791f6fb8f10Schetan loke 	pkc->reset_pending_on_curr_blk = 0;
792f6fb8f10Schetan loke }
793f6fb8f10Schetan loke 
794f6fb8f10Schetan loke /*
795f6fb8f10Schetan loke  * Side effect of opening a block:
796f6fb8f10Schetan loke  *
797f6fb8f10Schetan loke  * 1) prb_queue is thawed.
798f6fb8f10Schetan loke  * 2) retire_blk_timer is refreshed.
799f6fb8f10Schetan loke  *
800f6fb8f10Schetan loke  */
801bc59ba39Schetan loke static void prb_open_block(struct tpacket_kbdq_core *pkc1,
802bc59ba39Schetan loke 	struct tpacket_block_desc *pbd1)
803f6fb8f10Schetan loke {
804f6fb8f10Schetan loke 	struct timespec ts;
805bc59ba39Schetan loke 	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
806f6fb8f10Schetan loke 
807f6fb8f10Schetan loke 	smp_rmb();
808f6fb8f10Schetan loke 
809f6fb8f10Schetan loke 	if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) {
810f6fb8f10Schetan loke 
811f6fb8f10Schetan loke 		/* We could have just memset this but we will lose the
812f6fb8f10Schetan loke 		 * flexibility of making the priv area sticky
813f6fb8f10Schetan loke 		 */
814f6fb8f10Schetan loke 		BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
815f6fb8f10Schetan loke 		BLOCK_NUM_PKTS(pbd1) = 0;
816f6fb8f10Schetan loke 		BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
817f6fb8f10Schetan loke 		getnstimeofday(&ts);
818f6fb8f10Schetan loke 		h1->ts_first_pkt.ts_sec = ts.tv_sec;
819f6fb8f10Schetan loke 		h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
820f6fb8f10Schetan loke 		pkc1->pkblk_start = (char *)pbd1;
821e3192690SJoe Perches 		pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
822f6fb8f10Schetan loke 		BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
823f6fb8f10Schetan loke 		BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
824f6fb8f10Schetan loke 		pbd1->version = pkc1->version;
825f6fb8f10Schetan loke 		pkc1->prev = pkc1->nxt_offset;
826f6fb8f10Schetan loke 		pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
827f6fb8f10Schetan loke 		prb_thaw_queue(pkc1);
828f6fb8f10Schetan loke 		_prb_refresh_rx_retire_blk_timer(pkc1);
829f6fb8f10Schetan loke 
830f6fb8f10Schetan loke 		smp_wmb();
831f6fb8f10Schetan loke 
832f6fb8f10Schetan loke 		return;
833f6fb8f10Schetan loke 	}
834f6fb8f10Schetan loke 
835f6fb8f10Schetan loke 	WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n",
836f6fb8f10Schetan loke 		pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num);
837f6fb8f10Schetan loke 	dump_stack();
838f6fb8f10Schetan loke 	BUG();
839f6fb8f10Schetan loke }
840f6fb8f10Schetan loke 
841f6fb8f10Schetan loke /*
842f6fb8f10Schetan loke  * Queue freeze logic:
843f6fb8f10Schetan loke  * 1) Assume tp_block_nr = 8 blocks.
844f6fb8f10Schetan loke  * 2) At time 't0', user opens Rx ring.
845f6fb8f10Schetan loke  * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7
846f6fb8f10Schetan loke  * 4) user-space is either sleeping or processing block '0'.
847f6fb8f10Schetan loke  * 5) tpacket_rcv is currently filling block '7', since there is no space left,
848f6fb8f10Schetan loke  *    it will close block-7,loop around and try to fill block '0'.
849f6fb8f10Schetan loke  *    call-flow:
850f6fb8f10Schetan loke  *    __packet_lookup_frame_in_block
851f6fb8f10Schetan loke  *      prb_retire_current_block()
852f6fb8f10Schetan loke  *      prb_dispatch_next_block()
853f6fb8f10Schetan loke  *        |->(BLOCK_STATUS == USER) evaluates to true
854f6fb8f10Schetan loke  *    5.1) Since block-0 is currently in-use, we just freeze the queue.
855f6fb8f10Schetan loke  * 6) Now there are two cases:
856f6fb8f10Schetan loke  *    6.1) Link goes idle right after the queue is frozen.
857f6fb8f10Schetan loke  *         But remember, the last open_block() refreshed the timer.
858f6fb8f10Schetan loke  *         When this timer expires,it will refresh itself so that we can
859f6fb8f10Schetan loke  *         re-open block-0 in near future.
860f6fb8f10Schetan loke  *    6.2) Link is busy and keeps on receiving packets. This is a simple
861f6fb8f10Schetan loke  *         case and __packet_lookup_frame_in_block will check if block-0
862f6fb8f10Schetan loke  *         is free and can now be re-used.
863f6fb8f10Schetan loke  */
864eea49cc9SOlof Johansson static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
865f6fb8f10Schetan loke 				  struct packet_sock *po)
866f6fb8f10Schetan loke {
867f6fb8f10Schetan loke 	pkc->reset_pending_on_curr_blk = 1;
868f6fb8f10Schetan loke 	po->stats_u.stats3.tp_freeze_q_cnt++;
869f6fb8f10Schetan loke }
870f6fb8f10Schetan loke 
871f6fb8f10Schetan loke #define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
872f6fb8f10Schetan loke 
873f6fb8f10Schetan loke /*
874f6fb8f10Schetan loke  * If the next block is free then we will dispatch it
875f6fb8f10Schetan loke  * and return a good offset.
876f6fb8f10Schetan loke  * Else, we will freeze the queue.
877f6fb8f10Schetan loke  * So, caller must check the return value.
878f6fb8f10Schetan loke  */
879bc59ba39Schetan loke static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
880f6fb8f10Schetan loke 		struct packet_sock *po)
881f6fb8f10Schetan loke {
882bc59ba39Schetan loke 	struct tpacket_block_desc *pbd;
883f6fb8f10Schetan loke 
884f6fb8f10Schetan loke 	smp_rmb();
885f6fb8f10Schetan loke 
886f6fb8f10Schetan loke 	/* 1. Get current block num */
887f6fb8f10Schetan loke 	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
888f6fb8f10Schetan loke 
889f6fb8f10Schetan loke 	/* 2. If this block is currently in_use then freeze the queue */
890f6fb8f10Schetan loke 	if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
891f6fb8f10Schetan loke 		prb_freeze_queue(pkc, po);
892f6fb8f10Schetan loke 		return NULL;
893f6fb8f10Schetan loke 	}
894f6fb8f10Schetan loke 
895f6fb8f10Schetan loke 	/*
896f6fb8f10Schetan loke 	 * 3.
897f6fb8f10Schetan loke 	 * open this block and return the offset where the first packet
898f6fb8f10Schetan loke 	 * needs to get stored.
899f6fb8f10Schetan loke 	 */
900f6fb8f10Schetan loke 	prb_open_block(pkc, pbd);
901f6fb8f10Schetan loke 	return (void *)pkc->nxt_offset;
902f6fb8f10Schetan loke }
903f6fb8f10Schetan loke 
904bc59ba39Schetan loke static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
905f6fb8f10Schetan loke 		struct packet_sock *po, unsigned int status)
906f6fb8f10Schetan loke {
907bc59ba39Schetan loke 	struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
908f6fb8f10Schetan loke 
909f6fb8f10Schetan loke 	/* retire/close the current block */
910f6fb8f10Schetan loke 	if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
911f6fb8f10Schetan loke 		/*
912f6fb8f10Schetan loke 		 * Plug the case where copy_bits() is in progress on
913f6fb8f10Schetan loke 		 * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't
914f6fb8f10Schetan loke 		 * have space to copy the pkt in the current block and
915f6fb8f10Schetan loke 		 * called prb_retire_current_block()
916f6fb8f10Schetan loke 		 *
917f6fb8f10Schetan loke 		 * We don't need to worry about the TMO case because
918f6fb8f10Schetan loke 		 * the timer-handler already handled this case.
919f6fb8f10Schetan loke 		 */
920f6fb8f10Schetan loke 		if (!(status & TP_STATUS_BLK_TMO)) {
921f6fb8f10Schetan loke 			while (atomic_read(&pkc->blk_fill_in_prog)) {
922f6fb8f10Schetan loke 				/* Waiting for skb_copy_bits to finish... */
923f6fb8f10Schetan loke 				cpu_relax();
924f6fb8f10Schetan loke 			}
925f6fb8f10Schetan loke 		}
926f6fb8f10Schetan loke 		prb_close_block(pkc, pbd, po, status);
927f6fb8f10Schetan loke 		return;
928f6fb8f10Schetan loke 	}
929f6fb8f10Schetan loke 
930f6fb8f10Schetan loke 	WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd);
931f6fb8f10Schetan loke 	dump_stack();
932f6fb8f10Schetan loke 	BUG();
933f6fb8f10Schetan loke }
934f6fb8f10Schetan loke 
935eea49cc9SOlof Johansson static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
936bc59ba39Schetan loke 				      struct tpacket_block_desc *pbd)
937f6fb8f10Schetan loke {
938f6fb8f10Schetan loke 	return TP_STATUS_USER & BLOCK_STATUS(pbd);
939f6fb8f10Schetan loke }
940f6fb8f10Schetan loke 
941eea49cc9SOlof Johansson static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
942f6fb8f10Schetan loke {
943f6fb8f10Schetan loke 	return pkc->reset_pending_on_curr_blk;
944f6fb8f10Schetan loke }
945f6fb8f10Schetan loke 
946eea49cc9SOlof Johansson static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
947f6fb8f10Schetan loke {
948bc59ba39Schetan loke 	struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
949f6fb8f10Schetan loke 	atomic_dec(&pkc->blk_fill_in_prog);
950f6fb8f10Schetan loke }
951f6fb8f10Schetan loke 
952eea49cc9SOlof Johansson static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
953f6fb8f10Schetan loke 			struct tpacket3_hdr *ppd)
954f6fb8f10Schetan loke {
955f6fb8f10Schetan loke 	ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb);
956f6fb8f10Schetan loke }
957f6fb8f10Schetan loke 
958eea49cc9SOlof Johansson static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
959f6fb8f10Schetan loke 			struct tpacket3_hdr *ppd)
960f6fb8f10Schetan loke {
961f6fb8f10Schetan loke 	ppd->hv1.tp_rxhash = 0;
962f6fb8f10Schetan loke }
963f6fb8f10Schetan loke 
964eea49cc9SOlof Johansson static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
965f6fb8f10Schetan loke 			struct tpacket3_hdr *ppd)
966f6fb8f10Schetan loke {
967f6fb8f10Schetan loke 	if (vlan_tx_tag_present(pkc->skb)) {
968f6fb8f10Schetan loke 		ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb);
969f6fb8f10Schetan loke 		ppd->tp_status = TP_STATUS_VLAN_VALID;
970f6fb8f10Schetan loke 	} else {
971f6fb8f10Schetan loke 		ppd->hv1.tp_vlan_tci = ppd->tp_status = 0;
972f6fb8f10Schetan loke 	}
973f6fb8f10Schetan loke }
974f6fb8f10Schetan loke 
975bc59ba39Schetan loke static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
976f6fb8f10Schetan loke 			struct tpacket3_hdr *ppd)
977f6fb8f10Schetan loke {
978f6fb8f10Schetan loke 	prb_fill_vlan_info(pkc, ppd);
979f6fb8f10Schetan loke 
980f6fb8f10Schetan loke 	if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
981f6fb8f10Schetan loke 		prb_fill_rxhash(pkc, ppd);
982f6fb8f10Schetan loke 	else
983f6fb8f10Schetan loke 		prb_clear_rxhash(pkc, ppd);
984f6fb8f10Schetan loke }
985f6fb8f10Schetan loke 
986eea49cc9SOlof Johansson static void prb_fill_curr_block(char *curr,
987bc59ba39Schetan loke 				struct tpacket_kbdq_core *pkc,
988bc59ba39Schetan loke 				struct tpacket_block_desc *pbd,
989f6fb8f10Schetan loke 				unsigned int len)
990f6fb8f10Schetan loke {
991f6fb8f10Schetan loke 	struct tpacket3_hdr *ppd;
992f6fb8f10Schetan loke 
993f6fb8f10Schetan loke 	ppd  = (struct tpacket3_hdr *)curr;
994f6fb8f10Schetan loke 	ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
995f6fb8f10Schetan loke 	pkc->prev = curr;
996f6fb8f10Schetan loke 	pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
997f6fb8f10Schetan loke 	BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
998f6fb8f10Schetan loke 	BLOCK_NUM_PKTS(pbd) += 1;
999f6fb8f10Schetan loke 	atomic_inc(&pkc->blk_fill_in_prog);
1000f6fb8f10Schetan loke 	prb_run_all_ft_ops(pkc, ppd);
1001f6fb8f10Schetan loke }
1002f6fb8f10Schetan loke 
1003f6fb8f10Schetan loke /* Assumes caller has the sk->rx_queue.lock */
1004f6fb8f10Schetan loke static void *__packet_lookup_frame_in_block(struct packet_sock *po,
1005f6fb8f10Schetan loke 					    struct sk_buff *skb,
1006f6fb8f10Schetan loke 						int status,
1007f6fb8f10Schetan loke 					    unsigned int len
1008f6fb8f10Schetan loke 					    )
1009f6fb8f10Schetan loke {
1010bc59ba39Schetan loke 	struct tpacket_kbdq_core *pkc;
1011bc59ba39Schetan loke 	struct tpacket_block_desc *pbd;
1012f6fb8f10Schetan loke 	char *curr, *end;
1013f6fb8f10Schetan loke 
1014e3192690SJoe Perches 	pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
1015f6fb8f10Schetan loke 	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1016f6fb8f10Schetan loke 
1017f6fb8f10Schetan loke 	/* Queue is frozen when user space is lagging behind */
1018f6fb8f10Schetan loke 	if (prb_queue_frozen(pkc)) {
1019f6fb8f10Schetan loke 		/*
1020f6fb8f10Schetan loke 		 * Check if that last block which caused the queue to freeze,
1021f6fb8f10Schetan loke 		 * is still in_use by user-space.
1022f6fb8f10Schetan loke 		 */
1023f6fb8f10Schetan loke 		if (prb_curr_blk_in_use(pkc, pbd)) {
1024f6fb8f10Schetan loke 			/* Can't record this packet */
1025f6fb8f10Schetan loke 			return NULL;
1026f6fb8f10Schetan loke 		} else {
1027f6fb8f10Schetan loke 			/*
1028f6fb8f10Schetan loke 			 * Ok, the block was released by user-space.
1029f6fb8f10Schetan loke 			 * Now let's open that block.
1030f6fb8f10Schetan loke 			 * opening a block also thaws the queue.
1031f6fb8f10Schetan loke 			 * Thawing is a side effect.
1032f6fb8f10Schetan loke 			 */
1033f6fb8f10Schetan loke 			prb_open_block(pkc, pbd);
1034f6fb8f10Schetan loke 		}
1035f6fb8f10Schetan loke 	}
1036f6fb8f10Schetan loke 
1037f6fb8f10Schetan loke 	smp_mb();
1038f6fb8f10Schetan loke 	curr = pkc->nxt_offset;
1039f6fb8f10Schetan loke 	pkc->skb = skb;
1040e3192690SJoe Perches 	end = (char *)pbd + pkc->kblk_size;
1041f6fb8f10Schetan loke 
1042f6fb8f10Schetan loke 	/* first try the current block */
1043f6fb8f10Schetan loke 	if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
1044f6fb8f10Schetan loke 		prb_fill_curr_block(curr, pkc, pbd, len);
1045f6fb8f10Schetan loke 		return (void *)curr;
1046f6fb8f10Schetan loke 	}
1047f6fb8f10Schetan loke 
1048f6fb8f10Schetan loke 	/* Ok, close the current block */
1049f6fb8f10Schetan loke 	prb_retire_current_block(pkc, po, 0);
1050f6fb8f10Schetan loke 
1051f6fb8f10Schetan loke 	/* Now, try to dispatch the next block */
1052f6fb8f10Schetan loke 	curr = (char *)prb_dispatch_next_block(pkc, po);
1053f6fb8f10Schetan loke 	if (curr) {
1054f6fb8f10Schetan loke 		pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1055f6fb8f10Schetan loke 		prb_fill_curr_block(curr, pkc, pbd, len);
1056f6fb8f10Schetan loke 		return (void *)curr;
1057f6fb8f10Schetan loke 	}
1058f6fb8f10Schetan loke 
1059f6fb8f10Schetan loke 	/*
1060f6fb8f10Schetan loke 	 * No free blocks are available.user_space hasn't caught up yet.
1061f6fb8f10Schetan loke 	 * Queue was just frozen and now this packet will get dropped.
1062f6fb8f10Schetan loke 	 */
1063f6fb8f10Schetan loke 	return NULL;
1064f6fb8f10Schetan loke }
1065f6fb8f10Schetan loke 
1066eea49cc9SOlof Johansson static void *packet_current_rx_frame(struct packet_sock *po,
1067f6fb8f10Schetan loke 					    struct sk_buff *skb,
1068f6fb8f10Schetan loke 					    int status, unsigned int len)
1069f6fb8f10Schetan loke {
1070f6fb8f10Schetan loke 	char *curr = NULL;
1071f6fb8f10Schetan loke 	switch (po->tp_version) {
1072f6fb8f10Schetan loke 	case TPACKET_V1:
1073f6fb8f10Schetan loke 	case TPACKET_V2:
1074f6fb8f10Schetan loke 		curr = packet_lookup_frame(po, &po->rx_ring,
1075f6fb8f10Schetan loke 					po->rx_ring.head, status);
1076f6fb8f10Schetan loke 		return curr;
1077f6fb8f10Schetan loke 	case TPACKET_V3:
1078f6fb8f10Schetan loke 		return __packet_lookup_frame_in_block(po, skb, status, len);
1079f6fb8f10Schetan loke 	default:
1080f6fb8f10Schetan loke 		WARN(1, "TPACKET version not supported\n");
1081f6fb8f10Schetan loke 		BUG();
1082*99aa3473SYing Xue 		return NULL;
1083f6fb8f10Schetan loke 	}
1084f6fb8f10Schetan loke }
1085f6fb8f10Schetan loke 
1086eea49cc9SOlof Johansson static void *prb_lookup_block(struct packet_sock *po,
1087f6fb8f10Schetan loke 				     struct packet_ring_buffer *rb,
1088f6fb8f10Schetan loke 				     unsigned int previous,
1089f6fb8f10Schetan loke 				     int status)
1090f6fb8f10Schetan loke {
1091bc59ba39Schetan loke 	struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
1092bc59ba39Schetan loke 	struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous);
1093f6fb8f10Schetan loke 
1094f6fb8f10Schetan loke 	if (status != BLOCK_STATUS(pbd))
1095f6fb8f10Schetan loke 		return NULL;
1096f6fb8f10Schetan loke 	return pbd;
1097f6fb8f10Schetan loke }
1098f6fb8f10Schetan loke 
1099eea49cc9SOlof Johansson static int prb_previous_blk_num(struct packet_ring_buffer *rb)
1100f6fb8f10Schetan loke {
1101f6fb8f10Schetan loke 	unsigned int prev;
1102f6fb8f10Schetan loke 	if (rb->prb_bdqc.kactive_blk_num)
1103f6fb8f10Schetan loke 		prev = rb->prb_bdqc.kactive_blk_num-1;
1104f6fb8f10Schetan loke 	else
1105f6fb8f10Schetan loke 		prev = rb->prb_bdqc.knum_blocks-1;
1106f6fb8f10Schetan loke 	return prev;
1107f6fb8f10Schetan loke }
1108f6fb8f10Schetan loke 
1109f6fb8f10Schetan loke /* Assumes caller has held the rx_queue.lock */
1110eea49cc9SOlof Johansson static void *__prb_previous_block(struct packet_sock *po,
1111f6fb8f10Schetan loke 					 struct packet_ring_buffer *rb,
1112f6fb8f10Schetan loke 					 int status)
1113f6fb8f10Schetan loke {
1114f6fb8f10Schetan loke 	unsigned int previous = prb_previous_blk_num(rb);
1115f6fb8f10Schetan loke 	return prb_lookup_block(po, rb, previous, status);
1116f6fb8f10Schetan loke }
1117f6fb8f10Schetan loke 
1118eea49cc9SOlof Johansson static void *packet_previous_rx_frame(struct packet_sock *po,
1119f6fb8f10Schetan loke 					     struct packet_ring_buffer *rb,
1120f6fb8f10Schetan loke 					     int status)
1121f6fb8f10Schetan loke {
1122f6fb8f10Schetan loke 	if (po->tp_version <= TPACKET_V2)
1123f6fb8f10Schetan loke 		return packet_previous_frame(po, rb, status);
1124f6fb8f10Schetan loke 
1125f6fb8f10Schetan loke 	return __prb_previous_block(po, rb, status);
1126f6fb8f10Schetan loke }
1127f6fb8f10Schetan loke 
1128eea49cc9SOlof Johansson static void packet_increment_rx_head(struct packet_sock *po,
1129f6fb8f10Schetan loke 					    struct packet_ring_buffer *rb)
1130f6fb8f10Schetan loke {
1131f6fb8f10Schetan loke 	switch (po->tp_version) {
1132f6fb8f10Schetan loke 	case TPACKET_V1:
1133f6fb8f10Schetan loke 	case TPACKET_V2:
1134f6fb8f10Schetan loke 		return packet_increment_head(rb);
1135f6fb8f10Schetan loke 	case TPACKET_V3:
1136f6fb8f10Schetan loke 	default:
1137f6fb8f10Schetan loke 		WARN(1, "TPACKET version not supported.\n");
1138f6fb8f10Schetan loke 		BUG();
1139f6fb8f10Schetan loke 		return;
1140f6fb8f10Schetan loke 	}
1141f6fb8f10Schetan loke }
1142f6fb8f10Schetan loke 
1143eea49cc9SOlof Johansson static void *packet_previous_frame(struct packet_sock *po,
114469e3c75fSJohann Baudy 		struct packet_ring_buffer *rb,
114569e3c75fSJohann Baudy 		int status)
114669e3c75fSJohann Baudy {
114769e3c75fSJohann Baudy 	unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
114869e3c75fSJohann Baudy 	return packet_lookup_frame(po, rb, previous, status);
114969e3c75fSJohann Baudy }
115069e3c75fSJohann Baudy 
1151eea49cc9SOlof Johansson static void packet_increment_head(struct packet_ring_buffer *buff)
115269e3c75fSJohann Baudy {
115369e3c75fSJohann Baudy 	buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
115469e3c75fSJohann Baudy }
115569e3c75fSJohann Baudy 
11561da177e4SLinus Torvalds static void packet_sock_destruct(struct sock *sk)
11571da177e4SLinus Torvalds {
1158ed85b565SRichard Cochran 	skb_queue_purge(&sk->sk_error_queue);
1159ed85b565SRichard Cochran 
1160547b792cSIlpo Järvinen 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
1161547b792cSIlpo Järvinen 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
11621da177e4SLinus Torvalds 
11631da177e4SLinus Torvalds 	if (!sock_flag(sk, SOCK_DEAD)) {
116440d4e3dfSEric Dumazet 		pr_err("Attempt to release alive packet socket: %p\n", sk);
11651da177e4SLinus Torvalds 		return;
11661da177e4SLinus Torvalds 	}
11671da177e4SLinus Torvalds 
116817ab56a2SPavel Emelyanov 	sk_refcnt_debug_dec(sk);
11691da177e4SLinus Torvalds }
11701da177e4SLinus Torvalds 
1171dc99f600SDavid S. Miller static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
1172dc99f600SDavid S. Miller {
1173dc99f600SDavid S. Miller 	int x = atomic_read(&f->rr_cur) + 1;
1174dc99f600SDavid S. Miller 
1175dc99f600SDavid S. Miller 	if (x >= num)
1176dc99f600SDavid S. Miller 		x = 0;
1177dc99f600SDavid S. Miller 
1178dc99f600SDavid S. Miller 	return x;
1179dc99f600SDavid S. Miller }
1180dc99f600SDavid S. Miller 
1181dc99f600SDavid S. Miller static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
1182dc99f600SDavid S. Miller {
1183dc99f600SDavid S. Miller 	u32 idx, hash = skb->rxhash;
1184dc99f600SDavid S. Miller 
1185dc99f600SDavid S. Miller 	idx = ((u64)hash * num) >> 32;
1186dc99f600SDavid S. Miller 
1187dc99f600SDavid S. Miller 	return f->arr[idx];
1188dc99f600SDavid S. Miller }
1189dc99f600SDavid S. Miller 
1190dc99f600SDavid S. Miller static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
1191dc99f600SDavid S. Miller {
1192dc99f600SDavid S. Miller 	int cur, old;
1193dc99f600SDavid S. Miller 
1194dc99f600SDavid S. Miller 	cur = atomic_read(&f->rr_cur);
1195dc99f600SDavid S. Miller 	while ((old = atomic_cmpxchg(&f->rr_cur, cur,
1196dc99f600SDavid S. Miller 				     fanout_rr_next(f, num))) != cur)
1197dc99f600SDavid S. Miller 		cur = old;
1198dc99f600SDavid S. Miller 	return f->arr[cur];
1199dc99f600SDavid S. Miller }
1200dc99f600SDavid S. Miller 
120195ec3eb4SDavid S. Miller static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
120295ec3eb4SDavid S. Miller {
120395ec3eb4SDavid S. Miller 	unsigned int cpu = smp_processor_id();
120495ec3eb4SDavid S. Miller 
120595ec3eb4SDavid S. Miller 	return f->arr[cpu % num];
120695ec3eb4SDavid S. Miller }
120795ec3eb4SDavid S. Miller 
120895ec3eb4SDavid S. Miller static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1209dc99f600SDavid S. Miller 			     struct packet_type *pt, struct net_device *orig_dev)
1210dc99f600SDavid S. Miller {
1211dc99f600SDavid S. Miller 	struct packet_fanout *f = pt->af_packet_priv;
1212dc99f600SDavid S. Miller 	unsigned int num = f->num_members;
1213dc99f600SDavid S. Miller 	struct packet_sock *po;
1214dc99f600SDavid S. Miller 	struct sock *sk;
1215dc99f600SDavid S. Miller 
1216dc99f600SDavid S. Miller 	if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||
1217dc99f600SDavid S. Miller 	    !num) {
1218dc99f600SDavid S. Miller 		kfree_skb(skb);
1219dc99f600SDavid S. Miller 		return 0;
1220dc99f600SDavid S. Miller 	}
1221dc99f600SDavid S. Miller 
122295ec3eb4SDavid S. Miller 	switch (f->type) {
122395ec3eb4SDavid S. Miller 	case PACKET_FANOUT_HASH:
122495ec3eb4SDavid S. Miller 	default:
12257736d33fSDavid S. Miller 		if (f->defrag) {
1226bc416d97SEric Dumazet 			skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
12277736d33fSDavid S. Miller 			if (!skb)
12287736d33fSDavid S. Miller 				return 0;
12297736d33fSDavid S. Miller 		}
1230dc99f600SDavid S. Miller 		skb_get_rxhash(skb);
1231dc99f600SDavid S. Miller 		sk = fanout_demux_hash(f, skb, num);
123295ec3eb4SDavid S. Miller 		break;
123395ec3eb4SDavid S. Miller 	case PACKET_FANOUT_LB:
1234dc99f600SDavid S. Miller 		sk = fanout_demux_lb(f, skb, num);
123595ec3eb4SDavid S. Miller 		break;
123695ec3eb4SDavid S. Miller 	case PACKET_FANOUT_CPU:
123795ec3eb4SDavid S. Miller 		sk = fanout_demux_cpu(f, skb, num);
123895ec3eb4SDavid S. Miller 		break;
123995ec3eb4SDavid S. Miller 	}
124095ec3eb4SDavid S. Miller 
1241dc99f600SDavid S. Miller 	po = pkt_sk(sk);
1242dc99f600SDavid S. Miller 
1243dc99f600SDavid S. Miller 	return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1244dc99f600SDavid S. Miller }
1245dc99f600SDavid S. Miller 
1246dc99f600SDavid S. Miller static DEFINE_MUTEX(fanout_mutex);
1247dc99f600SDavid S. Miller static LIST_HEAD(fanout_list);
1248dc99f600SDavid S. Miller 
1249dc99f600SDavid S. Miller static void __fanout_link(struct sock *sk, struct packet_sock *po)
1250dc99f600SDavid S. Miller {
1251dc99f600SDavid S. Miller 	struct packet_fanout *f = po->fanout;
1252dc99f600SDavid S. Miller 
1253dc99f600SDavid S. Miller 	spin_lock(&f->lock);
1254dc99f600SDavid S. Miller 	f->arr[f->num_members] = sk;
1255dc99f600SDavid S. Miller 	smp_wmb();
1256dc99f600SDavid S. Miller 	f->num_members++;
1257dc99f600SDavid S. Miller 	spin_unlock(&f->lock);
1258dc99f600SDavid S. Miller }
1259dc99f600SDavid S. Miller 
1260dc99f600SDavid S. Miller static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
1261dc99f600SDavid S. Miller {
1262dc99f600SDavid S. Miller 	struct packet_fanout *f = po->fanout;
1263dc99f600SDavid S. Miller 	int i;
1264dc99f600SDavid S. Miller 
1265dc99f600SDavid S. Miller 	spin_lock(&f->lock);
1266dc99f600SDavid S. Miller 	for (i = 0; i < f->num_members; i++) {
1267dc99f600SDavid S. Miller 		if (f->arr[i] == sk)
1268dc99f600SDavid S. Miller 			break;
1269dc99f600SDavid S. Miller 	}
1270dc99f600SDavid S. Miller 	BUG_ON(i >= f->num_members);
1271dc99f600SDavid S. Miller 	f->arr[i] = f->arr[f->num_members - 1];
1272dc99f600SDavid S. Miller 	f->num_members--;
1273dc99f600SDavid S. Miller 	spin_unlock(&f->lock);
1274dc99f600SDavid S. Miller }
1275dc99f600SDavid S. Miller 
12767736d33fSDavid S. Miller static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1277dc99f600SDavid S. Miller {
1278dc99f600SDavid S. Miller 	struct packet_sock *po = pkt_sk(sk);
1279dc99f600SDavid S. Miller 	struct packet_fanout *f, *match;
12807736d33fSDavid S. Miller 	u8 type = type_flags & 0xff;
12817736d33fSDavid S. Miller 	u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0;
1282dc99f600SDavid S. Miller 	int err;
1283dc99f600SDavid S. Miller 
1284dc99f600SDavid S. Miller 	switch (type) {
1285dc99f600SDavid S. Miller 	case PACKET_FANOUT_HASH:
1286dc99f600SDavid S. Miller 	case PACKET_FANOUT_LB:
128795ec3eb4SDavid S. Miller 	case PACKET_FANOUT_CPU:
1288dc99f600SDavid S. Miller 		break;
1289dc99f600SDavid S. Miller 	default:
1290dc99f600SDavid S. Miller 		return -EINVAL;
1291dc99f600SDavid S. Miller 	}
1292dc99f600SDavid S. Miller 
1293dc99f600SDavid S. Miller 	if (!po->running)
1294dc99f600SDavid S. Miller 		return -EINVAL;
1295dc99f600SDavid S. Miller 
1296dc99f600SDavid S. Miller 	if (po->fanout)
1297dc99f600SDavid S. Miller 		return -EALREADY;
1298dc99f600SDavid S. Miller 
1299dc99f600SDavid S. Miller 	mutex_lock(&fanout_mutex);
1300dc99f600SDavid S. Miller 	match = NULL;
1301dc99f600SDavid S. Miller 	list_for_each_entry(f, &fanout_list, list) {
1302dc99f600SDavid S. Miller 		if (f->id == id &&
1303dc99f600SDavid S. Miller 		    read_pnet(&f->net) == sock_net(sk)) {
1304dc99f600SDavid S. Miller 			match = f;
1305dc99f600SDavid S. Miller 			break;
1306dc99f600SDavid S. Miller 		}
1307dc99f600SDavid S. Miller 	}
1308afe62c68SEric Dumazet 	err = -EINVAL;
13097736d33fSDavid S. Miller 	if (match && match->defrag != defrag)
1310afe62c68SEric Dumazet 		goto out;
1311dc99f600SDavid S. Miller 	if (!match) {
1312afe62c68SEric Dumazet 		err = -ENOMEM;
1313dc99f600SDavid S. Miller 		match = kzalloc(sizeof(*match), GFP_KERNEL);
1314afe62c68SEric Dumazet 		if (!match)
1315afe62c68SEric Dumazet 			goto out;
1316dc99f600SDavid S. Miller 		write_pnet(&match->net, sock_net(sk));
1317dc99f600SDavid S. Miller 		match->id = id;
1318dc99f600SDavid S. Miller 		match->type = type;
13197736d33fSDavid S. Miller 		match->defrag = defrag;
1320dc99f600SDavid S. Miller 		atomic_set(&match->rr_cur, 0);
1321dc99f600SDavid S. Miller 		INIT_LIST_HEAD(&match->list);
1322dc99f600SDavid S. Miller 		spin_lock_init(&match->lock);
1323dc99f600SDavid S. Miller 		atomic_set(&match->sk_ref, 0);
1324dc99f600SDavid S. Miller 		match->prot_hook.type = po->prot_hook.type;
1325dc99f600SDavid S. Miller 		match->prot_hook.dev = po->prot_hook.dev;
132695ec3eb4SDavid S. Miller 		match->prot_hook.func = packet_rcv_fanout;
1327dc99f600SDavid S. Miller 		match->prot_hook.af_packet_priv = match;
1328dc99f600SDavid S. Miller 		dev_add_pack(&match->prot_hook);
1329dc99f600SDavid S. Miller 		list_add(&match->list, &fanout_list);
1330dc99f600SDavid S. Miller 	}
1331dc99f600SDavid S. Miller 	err = -EINVAL;
1332dc99f600SDavid S. Miller 	if (match->type == type &&
1333dc99f600SDavid S. Miller 	    match->prot_hook.type == po->prot_hook.type &&
1334dc99f600SDavid S. Miller 	    match->prot_hook.dev == po->prot_hook.dev) {
1335dc99f600SDavid S. Miller 		err = -ENOSPC;
1336dc99f600SDavid S. Miller 		if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
1337dc99f600SDavid S. Miller 			__dev_remove_pack(&po->prot_hook);
1338dc99f600SDavid S. Miller 			po->fanout = match;
1339dc99f600SDavid S. Miller 			atomic_inc(&match->sk_ref);
1340dc99f600SDavid S. Miller 			__fanout_link(sk, po);
1341dc99f600SDavid S. Miller 			err = 0;
1342dc99f600SDavid S. Miller 		}
1343dc99f600SDavid S. Miller 	}
1344afe62c68SEric Dumazet out:
1345dc99f600SDavid S. Miller 	mutex_unlock(&fanout_mutex);
1346dc99f600SDavid S. Miller 	return err;
1347dc99f600SDavid S. Miller }
1348dc99f600SDavid S. Miller 
1349dc99f600SDavid S. Miller static void fanout_release(struct sock *sk)
1350dc99f600SDavid S. Miller {
1351dc99f600SDavid S. Miller 	struct packet_sock *po = pkt_sk(sk);
1352dc99f600SDavid S. Miller 	struct packet_fanout *f;
1353dc99f600SDavid S. Miller 
1354dc99f600SDavid S. Miller 	f = po->fanout;
1355dc99f600SDavid S. Miller 	if (!f)
1356dc99f600SDavid S. Miller 		return;
1357dc99f600SDavid S. Miller 
1358dc99f600SDavid S. Miller 	po->fanout = NULL;
1359dc99f600SDavid S. Miller 
1360dc99f600SDavid S. Miller 	mutex_lock(&fanout_mutex);
1361dc99f600SDavid S. Miller 	if (atomic_dec_and_test(&f->sk_ref)) {
1362dc99f600SDavid S. Miller 		list_del(&f->list);
1363dc99f600SDavid S. Miller 		dev_remove_pack(&f->prot_hook);
1364dc99f600SDavid S. Miller 		kfree(f);
1365dc99f600SDavid S. Miller 	}
1366dc99f600SDavid S. Miller 	mutex_unlock(&fanout_mutex);
1367dc99f600SDavid S. Miller }
13681da177e4SLinus Torvalds 
136990ddc4f0SEric Dumazet static const struct proto_ops packet_ops;
13701da177e4SLinus Torvalds 
137190ddc4f0SEric Dumazet static const struct proto_ops packet_ops_spkt;
13721da177e4SLinus Torvalds 
137340d4e3dfSEric Dumazet static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
137440d4e3dfSEric Dumazet 			   struct packet_type *pt, struct net_device *orig_dev)
13751da177e4SLinus Torvalds {
13761da177e4SLinus Torvalds 	struct sock *sk;
13771da177e4SLinus Torvalds 	struct sockaddr_pkt *spkt;
13781da177e4SLinus Torvalds 
13791da177e4SLinus Torvalds 	/*
13801da177e4SLinus Torvalds 	 *	When we registered the protocol we saved the socket in the data
13811da177e4SLinus Torvalds 	 *	field for just this event.
13821da177e4SLinus Torvalds 	 */
13831da177e4SLinus Torvalds 
13841da177e4SLinus Torvalds 	sk = pt->af_packet_priv;
13851da177e4SLinus Torvalds 
13861da177e4SLinus Torvalds 	/*
13871da177e4SLinus Torvalds 	 *	Yank back the headers [hope the device set this
13881da177e4SLinus Torvalds 	 *	right or kerboom...]
13891da177e4SLinus Torvalds 	 *
13901da177e4SLinus Torvalds 	 *	Incoming packets have ll header pulled,
13911da177e4SLinus Torvalds 	 *	push it back.
13921da177e4SLinus Torvalds 	 *
139398e399f8SArnaldo Carvalho de Melo 	 *	For outgoing ones skb->data == skb_mac_header(skb)
13941da177e4SLinus Torvalds 	 *	so that this procedure is noop.
13951da177e4SLinus Torvalds 	 */
13961da177e4SLinus Torvalds 
13971da177e4SLinus Torvalds 	if (skb->pkt_type == PACKET_LOOPBACK)
13981da177e4SLinus Torvalds 		goto out;
13991da177e4SLinus Torvalds 
140009ad9bc7SOctavian Purdila 	if (!net_eq(dev_net(dev), sock_net(sk)))
1401d12d01d6SDenis V. Lunev 		goto out;
1402d12d01d6SDenis V. Lunev 
140340d4e3dfSEric Dumazet 	skb = skb_share_check(skb, GFP_ATOMIC);
140440d4e3dfSEric Dumazet 	if (skb == NULL)
14051da177e4SLinus Torvalds 		goto oom;
14061da177e4SLinus Torvalds 
14071da177e4SLinus Torvalds 	/* drop any routing info */
1408adf30907SEric Dumazet 	skb_dst_drop(skb);
14091da177e4SLinus Torvalds 
141084531c24SPhil Oester 	/* drop conntrack reference */
141184531c24SPhil Oester 	nf_reset(skb);
141284531c24SPhil Oester 
1413ffbc6111SHerbert Xu 	spkt = &PACKET_SKB_CB(skb)->sa.pkt;
14141da177e4SLinus Torvalds 
141598e399f8SArnaldo Carvalho de Melo 	skb_push(skb, skb->data - skb_mac_header(skb));
14161da177e4SLinus Torvalds 
14171da177e4SLinus Torvalds 	/*
14181da177e4SLinus Torvalds 	 *	The SOCK_PACKET socket receives _all_ frames.
14191da177e4SLinus Torvalds 	 */
14201da177e4SLinus Torvalds 
14211da177e4SLinus Torvalds 	spkt->spkt_family = dev->type;
14221da177e4SLinus Torvalds 	strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
14231da177e4SLinus Torvalds 	spkt->spkt_protocol = skb->protocol;
14241da177e4SLinus Torvalds 
14251da177e4SLinus Torvalds 	/*
14261da177e4SLinus Torvalds 	 *	Charge the memory to the socket. This is done specifically
14271da177e4SLinus Torvalds 	 *	to prevent sockets using all the memory up.
14281da177e4SLinus Torvalds 	 */
14291da177e4SLinus Torvalds 
14301da177e4SLinus Torvalds 	if (sock_queue_rcv_skb(sk, skb) == 0)
14311da177e4SLinus Torvalds 		return 0;
14321da177e4SLinus Torvalds 
14331da177e4SLinus Torvalds out:
14341da177e4SLinus Torvalds 	kfree_skb(skb);
14351da177e4SLinus Torvalds oom:
14361da177e4SLinus Torvalds 	return 0;
14371da177e4SLinus Torvalds }
14381da177e4SLinus Torvalds 
14391da177e4SLinus Torvalds 
14401da177e4SLinus Torvalds /*
14411da177e4SLinus Torvalds  *	Output a raw packet to a device layer. This bypasses all the other
14421da177e4SLinus Torvalds  *	protocol layers and you must therefore supply it with a complete frame
14431da177e4SLinus Torvalds  */
14441da177e4SLinus Torvalds 
14451da177e4SLinus Torvalds static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
14461da177e4SLinus Torvalds 			       struct msghdr *msg, size_t len)
14471da177e4SLinus Torvalds {
14481da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
14491da177e4SLinus Torvalds 	struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
14501a35ca80SEric Dumazet 	struct sk_buff *skb = NULL;
14511da177e4SLinus Torvalds 	struct net_device *dev;
14520e11c91eSAl Viro 	__be16 proto = 0;
14531da177e4SLinus Torvalds 	int err;
14543bdc0ebaSBen Greear 	int extra_len = 0;
14551da177e4SLinus Torvalds 
14561da177e4SLinus Torvalds 	/*
14571da177e4SLinus Torvalds 	 *	Get and verify the address.
14581da177e4SLinus Torvalds 	 */
14591da177e4SLinus Torvalds 
146040d4e3dfSEric Dumazet 	if (saddr) {
14611da177e4SLinus Torvalds 		if (msg->msg_namelen < sizeof(struct sockaddr))
146240d4e3dfSEric Dumazet 			return -EINVAL;
14631da177e4SLinus Torvalds 		if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
14641da177e4SLinus Torvalds 			proto = saddr->spkt_protocol;
146540d4e3dfSEric Dumazet 	} else
146640d4e3dfSEric Dumazet 		return -ENOTCONN;	/* SOCK_PACKET must be sent giving an address */
14671da177e4SLinus Torvalds 
14681da177e4SLinus Torvalds 	/*
14691da177e4SLinus Torvalds 	 *	Find the device first to size check it
14701da177e4SLinus Torvalds 	 */
14711da177e4SLinus Torvalds 
1472de74e92aSdanborkmann@iogearbox.net 	saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0;
14731a35ca80SEric Dumazet retry:
1474654d1f8aSEric Dumazet 	rcu_read_lock();
1475654d1f8aSEric Dumazet 	dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
14761da177e4SLinus Torvalds 	err = -ENODEV;
14771da177e4SLinus Torvalds 	if (dev == NULL)
14781da177e4SLinus Torvalds 		goto out_unlock;
14791da177e4SLinus Torvalds 
1480d5e76b0aSDavid S. Miller 	err = -ENETDOWN;
1481d5e76b0aSDavid S. Miller 	if (!(dev->flags & IFF_UP))
1482d5e76b0aSDavid S. Miller 		goto out_unlock;
1483d5e76b0aSDavid S. Miller 
14841da177e4SLinus Torvalds 	/*
14851da177e4SLinus Torvalds 	 * You may not queue a frame bigger than the mtu. This is the lowest level
14861da177e4SLinus Torvalds 	 * raw protocol and you must do your own fragmentation at this level.
14871da177e4SLinus Torvalds 	 */
14881da177e4SLinus Torvalds 
14893bdc0ebaSBen Greear 	if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
14903bdc0ebaSBen Greear 		if (!netif_supports_nofcs(dev)) {
14913bdc0ebaSBen Greear 			err = -EPROTONOSUPPORT;
14923bdc0ebaSBen Greear 			goto out_unlock;
14933bdc0ebaSBen Greear 		}
14943bdc0ebaSBen Greear 		extra_len = 4; /* We're doing our own CRC */
14953bdc0ebaSBen Greear 	}
14963bdc0ebaSBen Greear 
14971da177e4SLinus Torvalds 	err = -EMSGSIZE;
14983bdc0ebaSBen Greear 	if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
14991da177e4SLinus Torvalds 		goto out_unlock;
15001da177e4SLinus Torvalds 
15011a35ca80SEric Dumazet 	if (!skb) {
15021a35ca80SEric Dumazet 		size_t reserved = LL_RESERVED_SPACE(dev);
15034ce40912SHerbert Xu 		int tlen = dev->needed_tailroom;
15041a35ca80SEric Dumazet 		unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
15051da177e4SLinus Torvalds 
15061a35ca80SEric Dumazet 		rcu_read_unlock();
15074ce40912SHerbert Xu 		skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
15081da177e4SLinus Torvalds 		if (skb == NULL)
15091a35ca80SEric Dumazet 			return -ENOBUFS;
15101a35ca80SEric Dumazet 		/* FIXME: Save some space for broken drivers that write a hard
15111a35ca80SEric Dumazet 		 * header at transmission time by themselves. PPP is the notable
15121a35ca80SEric Dumazet 		 * one here. This should really be fixed at the driver level.
15131da177e4SLinus Torvalds 		 */
15141a35ca80SEric Dumazet 		skb_reserve(skb, reserved);
1515c1d2bbe1SArnaldo Carvalho de Melo 		skb_reset_network_header(skb);
15161da177e4SLinus Torvalds 
15171da177e4SLinus Torvalds 		/* Try to align data part correctly */
15181a35ca80SEric Dumazet 		if (hhlen) {
15191a35ca80SEric Dumazet 			skb->data -= hhlen;
15201a35ca80SEric Dumazet 			skb->tail -= hhlen;
15211a35ca80SEric Dumazet 			if (len < hhlen)
1522c1d2bbe1SArnaldo Carvalho de Melo 				skb_reset_network_header(skb);
15231da177e4SLinus Torvalds 		}
15241da177e4SLinus Torvalds 		err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
15251a35ca80SEric Dumazet 		if (err)
15261a35ca80SEric Dumazet 			goto out_free;
15271a35ca80SEric Dumazet 		goto retry;
15281a35ca80SEric Dumazet 	}
15291a35ca80SEric Dumazet 
15303bdc0ebaSBen Greear 	if (len > (dev->mtu + dev->hard_header_len + extra_len)) {
153157f89bfaSBen Greear 		/* Earlier code assumed this would be a VLAN pkt,
153257f89bfaSBen Greear 		 * double-check this now that we have the actual
153357f89bfaSBen Greear 		 * packet in hand.
153457f89bfaSBen Greear 		 */
153557f89bfaSBen Greear 		struct ethhdr *ehdr;
153657f89bfaSBen Greear 		skb_reset_mac_header(skb);
153757f89bfaSBen Greear 		ehdr = eth_hdr(skb);
153857f89bfaSBen Greear 		if (ehdr->h_proto != htons(ETH_P_8021Q)) {
153957f89bfaSBen Greear 			err = -EMSGSIZE;
154057f89bfaSBen Greear 			goto out_unlock;
154157f89bfaSBen Greear 		}
154257f89bfaSBen Greear 	}
15431a35ca80SEric Dumazet 
15441da177e4SLinus Torvalds 	skb->protocol = proto;
15451da177e4SLinus Torvalds 	skb->dev = dev;
15461da177e4SLinus Torvalds 	skb->priority = sk->sk_priority;
15472d37a186SEric Dumazet 	skb->mark = sk->sk_mark;
15482244d07bSOliver Hartkopp 	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1549ed85b565SRichard Cochran 	if (err < 0)
1550ed85b565SRichard Cochran 		goto out_unlock;
15511da177e4SLinus Torvalds 
15523bdc0ebaSBen Greear 	if (unlikely(extra_len == 4))
15533bdc0ebaSBen Greear 		skb->no_fcs = 1;
15543bdc0ebaSBen Greear 
15551da177e4SLinus Torvalds 	dev_queue_xmit(skb);
1556654d1f8aSEric Dumazet 	rcu_read_unlock();
155740d4e3dfSEric Dumazet 	return len;
15581da177e4SLinus Torvalds 
15591da177e4SLinus Torvalds out_unlock:
1560654d1f8aSEric Dumazet 	rcu_read_unlock();
15611a35ca80SEric Dumazet out_free:
15621a35ca80SEric Dumazet 	kfree_skb(skb);
15631da177e4SLinus Torvalds 	return err;
15641da177e4SLinus Torvalds }
15651da177e4SLinus Torvalds 
1566eea49cc9SOlof Johansson static unsigned int run_filter(const struct sk_buff *skb,
156762ab0812SEric Dumazet 				      const struct sock *sk,
1568dbcb5855SDavid S. Miller 				      unsigned int res)
15691da177e4SLinus Torvalds {
15701da177e4SLinus Torvalds 	struct sk_filter *filter;
15711da177e4SLinus Torvalds 
157280f8f102SEric Dumazet 	rcu_read_lock();
157380f8f102SEric Dumazet 	filter = rcu_dereference(sk->sk_filter);
1574dbcb5855SDavid S. Miller 	if (filter != NULL)
15750a14842fSEric Dumazet 		res = SK_RUN_FILTER(filter, skb);
157680f8f102SEric Dumazet 	rcu_read_unlock();
15771da177e4SLinus Torvalds 
1578dbcb5855SDavid S. Miller 	return res;
15791da177e4SLinus Torvalds }
15801da177e4SLinus Torvalds 
15811da177e4SLinus Torvalds /*
158262ab0812SEric Dumazet  * This function makes lazy skb cloning in hope that most of packets
158362ab0812SEric Dumazet  * are discarded by BPF.
158462ab0812SEric Dumazet  *
158562ab0812SEric Dumazet  * Note tricky part: we DO mangle shared skb! skb->data, skb->len
158662ab0812SEric Dumazet  * and skb->cb are mangled. It works because (and until) packets
158762ab0812SEric Dumazet  * falling here are owned by current CPU. Output packets are cloned
158862ab0812SEric Dumazet  * by dev_queue_xmit_nit(), input packets are processed by net_bh
158962ab0812SEric Dumazet  * sequencially, so that if we return skb to original state on exit,
159062ab0812SEric Dumazet  * we will not harm anyone.
15911da177e4SLinus Torvalds  */
15921da177e4SLinus Torvalds 
159340d4e3dfSEric Dumazet static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
159440d4e3dfSEric Dumazet 		      struct packet_type *pt, struct net_device *orig_dev)
15951da177e4SLinus Torvalds {
15961da177e4SLinus Torvalds 	struct sock *sk;
15971da177e4SLinus Torvalds 	struct sockaddr_ll *sll;
15981da177e4SLinus Torvalds 	struct packet_sock *po;
15991da177e4SLinus Torvalds 	u8 *skb_head = skb->data;
16001da177e4SLinus Torvalds 	int skb_len = skb->len;
1601dbcb5855SDavid S. Miller 	unsigned int snaplen, res;
16021da177e4SLinus Torvalds 
16031da177e4SLinus Torvalds 	if (skb->pkt_type == PACKET_LOOPBACK)
16041da177e4SLinus Torvalds 		goto drop;
16051da177e4SLinus Torvalds 
16061da177e4SLinus Torvalds 	sk = pt->af_packet_priv;
16071da177e4SLinus Torvalds 	po = pkt_sk(sk);
16081da177e4SLinus Torvalds 
160909ad9bc7SOctavian Purdila 	if (!net_eq(dev_net(dev), sock_net(sk)))
1610d12d01d6SDenis V. Lunev 		goto drop;
1611d12d01d6SDenis V. Lunev 
16121da177e4SLinus Torvalds 	skb->dev = dev;
16131da177e4SLinus Torvalds 
16143b04dddeSStephen Hemminger 	if (dev->header_ops) {
16151da177e4SLinus Torvalds 		/* The device has an explicit notion of ll header,
161662ab0812SEric Dumazet 		 * exported to higher levels.
161762ab0812SEric Dumazet 		 *
161862ab0812SEric Dumazet 		 * Otherwise, the device hides details of its frame
161962ab0812SEric Dumazet 		 * structure, so that corresponding packet head is
162062ab0812SEric Dumazet 		 * never delivered to user.
16211da177e4SLinus Torvalds 		 */
16221da177e4SLinus Torvalds 		if (sk->sk_type != SOCK_DGRAM)
162398e399f8SArnaldo Carvalho de Melo 			skb_push(skb, skb->data - skb_mac_header(skb));
16241da177e4SLinus Torvalds 		else if (skb->pkt_type == PACKET_OUTGOING) {
16251da177e4SLinus Torvalds 			/* Special case: outgoing packets have ll header at head */
1626bbe735e4SArnaldo Carvalho de Melo 			skb_pull(skb, skb_network_offset(skb));
16271da177e4SLinus Torvalds 		}
16281da177e4SLinus Torvalds 	}
16291da177e4SLinus Torvalds 
16301da177e4SLinus Torvalds 	snaplen = skb->len;
16311da177e4SLinus Torvalds 
1632dbcb5855SDavid S. Miller 	res = run_filter(skb, sk, snaplen);
1633dbcb5855SDavid S. Miller 	if (!res)
16341da177e4SLinus Torvalds 		goto drop_n_restore;
1635dbcb5855SDavid S. Miller 	if (snaplen > res)
1636dbcb5855SDavid S. Miller 		snaplen = res;
16371da177e4SLinus Torvalds 
16380fd7bac6SEric Dumazet 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
16391da177e4SLinus Torvalds 		goto drop_n_acct;
16401da177e4SLinus Torvalds 
16411da177e4SLinus Torvalds 	if (skb_shared(skb)) {
16421da177e4SLinus Torvalds 		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
16431da177e4SLinus Torvalds 		if (nskb == NULL)
16441da177e4SLinus Torvalds 			goto drop_n_acct;
16451da177e4SLinus Torvalds 
16461da177e4SLinus Torvalds 		if (skb_head != skb->data) {
16471da177e4SLinus Torvalds 			skb->data = skb_head;
16481da177e4SLinus Torvalds 			skb->len = skb_len;
16491da177e4SLinus Torvalds 		}
1650abc4e4faSEric Dumazet 		consume_skb(skb);
16511da177e4SLinus Torvalds 		skb = nskb;
16521da177e4SLinus Torvalds 	}
16531da177e4SLinus Torvalds 
1654ffbc6111SHerbert Xu 	BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
1655ffbc6111SHerbert Xu 		     sizeof(skb->cb));
1656ffbc6111SHerbert Xu 
1657ffbc6111SHerbert Xu 	sll = &PACKET_SKB_CB(skb)->sa.ll;
16581da177e4SLinus Torvalds 	sll->sll_family = AF_PACKET;
16591da177e4SLinus Torvalds 	sll->sll_hatype = dev->type;
16601da177e4SLinus Torvalds 	sll->sll_protocol = skb->protocol;
16611da177e4SLinus Torvalds 	sll->sll_pkttype = skb->pkt_type;
16628032b464SPeter P Waskiewicz Jr 	if (unlikely(po->origdev))
166380feaacbSPeter P. Waskiewicz Jr 		sll->sll_ifindex = orig_dev->ifindex;
166480feaacbSPeter P. Waskiewicz Jr 	else
16651da177e4SLinus Torvalds 		sll->sll_ifindex = dev->ifindex;
16661da177e4SLinus Torvalds 
1667b95cce35SStephen Hemminger 	sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
16681da177e4SLinus Torvalds 
1669ffbc6111SHerbert Xu 	PACKET_SKB_CB(skb)->origlen = skb->len;
16708dc41944SHerbert Xu 
16711da177e4SLinus Torvalds 	if (pskb_trim(skb, snaplen))
16721da177e4SLinus Torvalds 		goto drop_n_acct;
16731da177e4SLinus Torvalds 
16741da177e4SLinus Torvalds 	skb_set_owner_r(skb, sk);
16751da177e4SLinus Torvalds 	skb->dev = NULL;
1676adf30907SEric Dumazet 	skb_dst_drop(skb);
16771da177e4SLinus Torvalds 
167884531c24SPhil Oester 	/* drop conntrack reference */
167984531c24SPhil Oester 	nf_reset(skb);
168084531c24SPhil Oester 
16811da177e4SLinus Torvalds 	spin_lock(&sk->sk_receive_queue.lock);
16821da177e4SLinus Torvalds 	po->stats.tp_packets++;
16833b885787SNeil Horman 	skb->dropcount = atomic_read(&sk->sk_drops);
16841da177e4SLinus Torvalds 	__skb_queue_tail(&sk->sk_receive_queue, skb);
16851da177e4SLinus Torvalds 	spin_unlock(&sk->sk_receive_queue.lock);
16861da177e4SLinus Torvalds 	sk->sk_data_ready(sk, skb->len);
16871da177e4SLinus Torvalds 	return 0;
16881da177e4SLinus Torvalds 
16891da177e4SLinus Torvalds drop_n_acct:
16907091fbd8SWillem de Bruijn 	spin_lock(&sk->sk_receive_queue.lock);
16917091fbd8SWillem de Bruijn 	po->stats.tp_drops++;
16927091fbd8SWillem de Bruijn 	atomic_inc(&sk->sk_drops);
16937091fbd8SWillem de Bruijn 	spin_unlock(&sk->sk_receive_queue.lock);
16941da177e4SLinus Torvalds 
16951da177e4SLinus Torvalds drop_n_restore:
16961da177e4SLinus Torvalds 	if (skb_head != skb->data && skb_shared(skb)) {
16971da177e4SLinus Torvalds 		skb->data = skb_head;
16981da177e4SLinus Torvalds 		skb->len = skb_len;
16991da177e4SLinus Torvalds 	}
17001da177e4SLinus Torvalds drop:
1701ead2ceb0SNeil Horman 	consume_skb(skb);
17021da177e4SLinus Torvalds 	return 0;
17031da177e4SLinus Torvalds }
17041da177e4SLinus Torvalds 
170540d4e3dfSEric Dumazet static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
170640d4e3dfSEric Dumazet 		       struct packet_type *pt, struct net_device *orig_dev)
17071da177e4SLinus Torvalds {
17081da177e4SLinus Torvalds 	struct sock *sk;
17091da177e4SLinus Torvalds 	struct packet_sock *po;
17101da177e4SLinus Torvalds 	struct sockaddr_ll *sll;
1711bbd6ef87SPatrick McHardy 	union {
1712bbd6ef87SPatrick McHardy 		struct tpacket_hdr *h1;
1713bbd6ef87SPatrick McHardy 		struct tpacket2_hdr *h2;
1714f6fb8f10Schetan loke 		struct tpacket3_hdr *h3;
1715bbd6ef87SPatrick McHardy 		void *raw;
1716bbd6ef87SPatrick McHardy 	} h;
17171da177e4SLinus Torvalds 	u8 *skb_head = skb->data;
17181da177e4SLinus Torvalds 	int skb_len = skb->len;
1719dbcb5855SDavid S. Miller 	unsigned int snaplen, res;
1720f6fb8f10Schetan loke 	unsigned long status = TP_STATUS_USER;
1721bbd6ef87SPatrick McHardy 	unsigned short macoff, netoff, hdrlen;
17221da177e4SLinus Torvalds 	struct sk_buff *copy_skb = NULL;
1723b7aa0bf7SEric Dumazet 	struct timeval tv;
1724bbd6ef87SPatrick McHardy 	struct timespec ts;
1725614f60faSScott McMillan 	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
17261da177e4SLinus Torvalds 
17271da177e4SLinus Torvalds 	if (skb->pkt_type == PACKET_LOOPBACK)
17281da177e4SLinus Torvalds 		goto drop;
17291da177e4SLinus Torvalds 
17301da177e4SLinus Torvalds 	sk = pt->af_packet_priv;
17311da177e4SLinus Torvalds 	po = pkt_sk(sk);
17321da177e4SLinus Torvalds 
173309ad9bc7SOctavian Purdila 	if (!net_eq(dev_net(dev), sock_net(sk)))
1734d12d01d6SDenis V. Lunev 		goto drop;
1735d12d01d6SDenis V. Lunev 
17363b04dddeSStephen Hemminger 	if (dev->header_ops) {
17371da177e4SLinus Torvalds 		if (sk->sk_type != SOCK_DGRAM)
173898e399f8SArnaldo Carvalho de Melo 			skb_push(skb, skb->data - skb_mac_header(skb));
17391da177e4SLinus Torvalds 		else if (skb->pkt_type == PACKET_OUTGOING) {
17401da177e4SLinus Torvalds 			/* Special case: outgoing packets have ll header at head */
1741bbe735e4SArnaldo Carvalho de Melo 			skb_pull(skb, skb_network_offset(skb));
17428dc41944SHerbert Xu 		}
17438dc41944SHerbert Xu 	}
17448dc41944SHerbert Xu 
174584fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_PARTIAL)
17461da177e4SLinus Torvalds 		status |= TP_STATUS_CSUMNOTREADY;
17471da177e4SLinus Torvalds 
17481da177e4SLinus Torvalds 	snaplen = skb->len;
17491da177e4SLinus Torvalds 
1750dbcb5855SDavid S. Miller 	res = run_filter(skb, sk, snaplen);
1751dbcb5855SDavid S. Miller 	if (!res)
17521da177e4SLinus Torvalds 		goto drop_n_restore;
1753dbcb5855SDavid S. Miller 	if (snaplen > res)
1754dbcb5855SDavid S. Miller 		snaplen = res;
17551da177e4SLinus Torvalds 
17561da177e4SLinus Torvalds 	if (sk->sk_type == SOCK_DGRAM) {
17578913336aSPatrick McHardy 		macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
17588913336aSPatrick McHardy 				  po->tp_reserve;
17591da177e4SLinus Torvalds 	} else {
176095c96174SEric Dumazet 		unsigned int maclen = skb_network_offset(skb);
1761bbd6ef87SPatrick McHardy 		netoff = TPACKET_ALIGN(po->tp_hdrlen +
17628913336aSPatrick McHardy 				       (maclen < 16 ? 16 : maclen)) +
17638913336aSPatrick McHardy 			po->tp_reserve;
17641da177e4SLinus Torvalds 		macoff = netoff - maclen;
17651da177e4SLinus Torvalds 	}
1766f6fb8f10Schetan loke 	if (po->tp_version <= TPACKET_V2) {
176769e3c75fSJohann Baudy 		if (macoff + snaplen > po->rx_ring.frame_size) {
17681da177e4SLinus Torvalds 			if (po->copy_thresh &&
17690fd7bac6SEric Dumazet 			    atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
17701da177e4SLinus Torvalds 				if (skb_shared(skb)) {
17711da177e4SLinus Torvalds 					copy_skb = skb_clone(skb, GFP_ATOMIC);
17721da177e4SLinus Torvalds 				} else {
17731da177e4SLinus Torvalds 					copy_skb = skb_get(skb);
17741da177e4SLinus Torvalds 					skb_head = skb->data;
17751da177e4SLinus Torvalds 				}
17761da177e4SLinus Torvalds 				if (copy_skb)
17771da177e4SLinus Torvalds 					skb_set_owner_r(copy_skb, sk);
17781da177e4SLinus Torvalds 			}
177969e3c75fSJohann Baudy 			snaplen = po->rx_ring.frame_size - macoff;
17801da177e4SLinus Torvalds 			if ((int)snaplen < 0)
17811da177e4SLinus Torvalds 				snaplen = 0;
17821da177e4SLinus Torvalds 		}
1783f6fb8f10Schetan loke 	}
17841da177e4SLinus Torvalds 	spin_lock(&sk->sk_receive_queue.lock);
1785f6fb8f10Schetan loke 	h.raw = packet_current_rx_frame(po, skb,
1786f6fb8f10Schetan loke 					TP_STATUS_KERNEL, (macoff+snaplen));
1787bbd6ef87SPatrick McHardy 	if (!h.raw)
17881da177e4SLinus Torvalds 		goto ring_is_full;
1789f6fb8f10Schetan loke 	if (po->tp_version <= TPACKET_V2) {
1790f6fb8f10Schetan loke 		packet_increment_rx_head(po, &po->rx_ring);
1791f6fb8f10Schetan loke 	/*
1792f6fb8f10Schetan loke 	 * LOSING will be reported till you read the stats,
1793f6fb8f10Schetan loke 	 * because it's COR - Clear On Read.
1794f6fb8f10Schetan loke 	 * Anyways, moving it for V1/V2 only as V3 doesn't need this
1795f6fb8f10Schetan loke 	 * at packet level.
1796f6fb8f10Schetan loke 	 */
1797f6fb8f10Schetan loke 		if (po->stats.tp_drops)
1798f6fb8f10Schetan loke 			status |= TP_STATUS_LOSING;
1799f6fb8f10Schetan loke 	}
18001da177e4SLinus Torvalds 	po->stats.tp_packets++;
18011da177e4SLinus Torvalds 	if (copy_skb) {
18021da177e4SLinus Torvalds 		status |= TP_STATUS_COPY;
18031da177e4SLinus Torvalds 		__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
18041da177e4SLinus Torvalds 	}
18051da177e4SLinus Torvalds 	spin_unlock(&sk->sk_receive_queue.lock);
18061da177e4SLinus Torvalds 
1807bbd6ef87SPatrick McHardy 	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
18081da177e4SLinus Torvalds 
1809bbd6ef87SPatrick McHardy 	switch (po->tp_version) {
1810bbd6ef87SPatrick McHardy 	case TPACKET_V1:
1811bbd6ef87SPatrick McHardy 		h.h1->tp_len = skb->len;
1812bbd6ef87SPatrick McHardy 		h.h1->tp_snaplen = snaplen;
1813bbd6ef87SPatrick McHardy 		h.h1->tp_mac = macoff;
1814bbd6ef87SPatrick McHardy 		h.h1->tp_net = netoff;
1815614f60faSScott McMillan 		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1816614f60faSScott McMillan 				&& shhwtstamps->syststamp.tv64)
1817614f60faSScott McMillan 			tv = ktime_to_timeval(shhwtstamps->syststamp);
1818614f60faSScott McMillan 		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1819614f60faSScott McMillan 				&& shhwtstamps->hwtstamp.tv64)
1820614f60faSScott McMillan 			tv = ktime_to_timeval(shhwtstamps->hwtstamp);
1821614f60faSScott McMillan 		else if (skb->tstamp.tv64)
1822b7aa0bf7SEric Dumazet 			tv = ktime_to_timeval(skb->tstamp);
182350f17787SStephen Hemminger 		else
182450f17787SStephen Hemminger 			do_gettimeofday(&tv);
1825bbd6ef87SPatrick McHardy 		h.h1->tp_sec = tv.tv_sec;
1826bbd6ef87SPatrick McHardy 		h.h1->tp_usec = tv.tv_usec;
1827bbd6ef87SPatrick McHardy 		hdrlen = sizeof(*h.h1);
1828bbd6ef87SPatrick McHardy 		break;
1829bbd6ef87SPatrick McHardy 	case TPACKET_V2:
1830bbd6ef87SPatrick McHardy 		h.h2->tp_len = skb->len;
1831bbd6ef87SPatrick McHardy 		h.h2->tp_snaplen = snaplen;
1832bbd6ef87SPatrick McHardy 		h.h2->tp_mac = macoff;
1833bbd6ef87SPatrick McHardy 		h.h2->tp_net = netoff;
1834614f60faSScott McMillan 		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1835614f60faSScott McMillan 				&& shhwtstamps->syststamp.tv64)
1836614f60faSScott McMillan 			ts = ktime_to_timespec(shhwtstamps->syststamp);
1837614f60faSScott McMillan 		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1838614f60faSScott McMillan 				&& shhwtstamps->hwtstamp.tv64)
1839614f60faSScott McMillan 			ts = ktime_to_timespec(shhwtstamps->hwtstamp);
1840614f60faSScott McMillan 		else if (skb->tstamp.tv64)
1841bbd6ef87SPatrick McHardy 			ts = ktime_to_timespec(skb->tstamp);
1842bbd6ef87SPatrick McHardy 		else
1843bbd6ef87SPatrick McHardy 			getnstimeofday(&ts);
1844bbd6ef87SPatrick McHardy 		h.h2->tp_sec = ts.tv_sec;
1845bbd6ef87SPatrick McHardy 		h.h2->tp_nsec = ts.tv_nsec;
1846a3bcc23eSBen Greear 		if (vlan_tx_tag_present(skb)) {
184705423b24SEric Dumazet 			h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
1848a3bcc23eSBen Greear 			status |= TP_STATUS_VLAN_VALID;
1849a3bcc23eSBen Greear 		} else {
1850a3bcc23eSBen Greear 			h.h2->tp_vlan_tci = 0;
1851a3bcc23eSBen Greear 		}
185213fcb7bdSEric Dumazet 		h.h2->tp_padding = 0;
1853bbd6ef87SPatrick McHardy 		hdrlen = sizeof(*h.h2);
1854bbd6ef87SPatrick McHardy 		break;
1855f6fb8f10Schetan loke 	case TPACKET_V3:
1856f6fb8f10Schetan loke 		/* tp_nxt_offset,vlan are already populated above.
1857f6fb8f10Schetan loke 		 * So DONT clear those fields here
1858f6fb8f10Schetan loke 		 */
1859f6fb8f10Schetan loke 		h.h3->tp_status |= status;
1860f6fb8f10Schetan loke 		h.h3->tp_len = skb->len;
1861f6fb8f10Schetan loke 		h.h3->tp_snaplen = snaplen;
1862f6fb8f10Schetan loke 		h.h3->tp_mac = macoff;
1863f6fb8f10Schetan loke 		h.h3->tp_net = netoff;
1864f6fb8f10Schetan loke 		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1865f6fb8f10Schetan loke 				&& shhwtstamps->syststamp.tv64)
1866f6fb8f10Schetan loke 			ts = ktime_to_timespec(shhwtstamps->syststamp);
1867f6fb8f10Schetan loke 		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1868f6fb8f10Schetan loke 				&& shhwtstamps->hwtstamp.tv64)
1869f6fb8f10Schetan loke 			ts = ktime_to_timespec(shhwtstamps->hwtstamp);
1870f6fb8f10Schetan loke 		else if (skb->tstamp.tv64)
1871f6fb8f10Schetan loke 			ts = ktime_to_timespec(skb->tstamp);
1872f6fb8f10Schetan loke 		else
1873f6fb8f10Schetan loke 			getnstimeofday(&ts);
1874f6fb8f10Schetan loke 		h.h3->tp_sec  = ts.tv_sec;
1875f6fb8f10Schetan loke 		h.h3->tp_nsec = ts.tv_nsec;
1876f6fb8f10Schetan loke 		hdrlen = sizeof(*h.h3);
1877f6fb8f10Schetan loke 		break;
1878bbd6ef87SPatrick McHardy 	default:
1879bbd6ef87SPatrick McHardy 		BUG();
1880bbd6ef87SPatrick McHardy 	}
18811da177e4SLinus Torvalds 
1882bbd6ef87SPatrick McHardy 	sll = h.raw + TPACKET_ALIGN(hdrlen);
1883b95cce35SStephen Hemminger 	sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
18841da177e4SLinus Torvalds 	sll->sll_family = AF_PACKET;
18851da177e4SLinus Torvalds 	sll->sll_hatype = dev->type;
18861da177e4SLinus Torvalds 	sll->sll_protocol = skb->protocol;
18871da177e4SLinus Torvalds 	sll->sll_pkttype = skb->pkt_type;
18888032b464SPeter P Waskiewicz Jr 	if (unlikely(po->origdev))
188980feaacbSPeter P. Waskiewicz Jr 		sll->sll_ifindex = orig_dev->ifindex;
189080feaacbSPeter P. Waskiewicz Jr 	else
18911da177e4SLinus Torvalds 		sll->sll_ifindex = dev->ifindex;
18921da177e4SLinus Torvalds 
1893e16aa207SRalf Baechle 	smp_mb();
1894f6dafa95SChangli Gao #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
18951da177e4SLinus Torvalds 	{
18960af55bb5SChangli Gao 		u8 *start, *end;
18971da177e4SLinus Torvalds 
1898f6fb8f10Schetan loke 		if (po->tp_version <= TPACKET_V2) {
1899f6fb8f10Schetan loke 			end = (u8 *)PAGE_ALIGN((unsigned long)h.raw
1900f6fb8f10Schetan loke 				+ macoff + snaplen);
19010af55bb5SChangli Gao 			for (start = h.raw; start < end; start += PAGE_SIZE)
19020af55bb5SChangli Gao 				flush_dcache_page(pgv_to_page(start));
1903f6fb8f10Schetan loke 		}
1904cc9f01b2SChetan Loke 		smp_wmb();
19051da177e4SLinus Torvalds 	}
1906f6dafa95SChangli Gao #endif
1907f6fb8f10Schetan loke 	if (po->tp_version <= TPACKET_V2)
1908cc9f01b2SChetan Loke 		__packet_set_status(po, h.raw, status);
1909f6fb8f10Schetan loke 	else
1910f6fb8f10Schetan loke 		prb_clear_blk_fill_status(&po->rx_ring);
19111da177e4SLinus Torvalds 
19121da177e4SLinus Torvalds 	sk->sk_data_ready(sk, 0);
19131da177e4SLinus Torvalds 
19141da177e4SLinus Torvalds drop_n_restore:
19151da177e4SLinus Torvalds 	if (skb_head != skb->data && skb_shared(skb)) {
19161da177e4SLinus Torvalds 		skb->data = skb_head;
19171da177e4SLinus Torvalds 		skb->len = skb_len;
19181da177e4SLinus Torvalds 	}
19191da177e4SLinus Torvalds drop:
19201da177e4SLinus Torvalds 	kfree_skb(skb);
19211da177e4SLinus Torvalds 	return 0;
19221da177e4SLinus Torvalds 
19231da177e4SLinus Torvalds ring_is_full:
19241da177e4SLinus Torvalds 	po->stats.tp_drops++;
19251da177e4SLinus Torvalds 	spin_unlock(&sk->sk_receive_queue.lock);
19261da177e4SLinus Torvalds 
19271da177e4SLinus Torvalds 	sk->sk_data_ready(sk, 0);
19281da177e4SLinus Torvalds 	kfree_skb(copy_skb);
19291da177e4SLinus Torvalds 	goto drop_n_restore;
19301da177e4SLinus Torvalds }
19311da177e4SLinus Torvalds 
193269e3c75fSJohann Baudy static void tpacket_destruct_skb(struct sk_buff *skb)
193369e3c75fSJohann Baudy {
193469e3c75fSJohann Baudy 	struct packet_sock *po = pkt_sk(skb->sk);
193569e3c75fSJohann Baudy 	void *ph;
193669e3c75fSJohann Baudy 
193769e3c75fSJohann Baudy 	if (likely(po->tx_ring.pg_vec)) {
193869e3c75fSJohann Baudy 		ph = skb_shinfo(skb)->destructor_arg;
193969e3c75fSJohann Baudy 		BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
194069e3c75fSJohann Baudy 		BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
194169e3c75fSJohann Baudy 		atomic_dec(&po->tx_ring.pending);
194269e3c75fSJohann Baudy 		__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
194369e3c75fSJohann Baudy 	}
194469e3c75fSJohann Baudy 
194569e3c75fSJohann Baudy 	sock_wfree(skb);
194669e3c75fSJohann Baudy }
194769e3c75fSJohann Baudy 
194869e3c75fSJohann Baudy static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
194969e3c75fSJohann Baudy 		void *frame, struct net_device *dev, int size_max,
1950ae641949SHerbert Xu 		__be16 proto, unsigned char *addr, int hlen)
195169e3c75fSJohann Baudy {
195269e3c75fSJohann Baudy 	union {
195369e3c75fSJohann Baudy 		struct tpacket_hdr *h1;
195469e3c75fSJohann Baudy 		struct tpacket2_hdr *h2;
195569e3c75fSJohann Baudy 		void *raw;
195669e3c75fSJohann Baudy 	} ph;
195769e3c75fSJohann Baudy 	int to_write, offset, len, tp_len, nr_frags, len_max;
195869e3c75fSJohann Baudy 	struct socket *sock = po->sk.sk_socket;
195969e3c75fSJohann Baudy 	struct page *page;
196069e3c75fSJohann Baudy 	void *data;
196169e3c75fSJohann Baudy 	int err;
196269e3c75fSJohann Baudy 
196369e3c75fSJohann Baudy 	ph.raw = frame;
196469e3c75fSJohann Baudy 
196569e3c75fSJohann Baudy 	skb->protocol = proto;
196669e3c75fSJohann Baudy 	skb->dev = dev;
196769e3c75fSJohann Baudy 	skb->priority = po->sk.sk_priority;
19682d37a186SEric Dumazet 	skb->mark = po->sk.sk_mark;
196969e3c75fSJohann Baudy 	skb_shinfo(skb)->destructor_arg = ph.raw;
197069e3c75fSJohann Baudy 
197169e3c75fSJohann Baudy 	switch (po->tp_version) {
197269e3c75fSJohann Baudy 	case TPACKET_V2:
197369e3c75fSJohann Baudy 		tp_len = ph.h2->tp_len;
197469e3c75fSJohann Baudy 		break;
197569e3c75fSJohann Baudy 	default:
197669e3c75fSJohann Baudy 		tp_len = ph.h1->tp_len;
197769e3c75fSJohann Baudy 		break;
197869e3c75fSJohann Baudy 	}
197969e3c75fSJohann Baudy 	if (unlikely(tp_len > size_max)) {
198040d4e3dfSEric Dumazet 		pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
198169e3c75fSJohann Baudy 		return -EMSGSIZE;
198269e3c75fSJohann Baudy 	}
198369e3c75fSJohann Baudy 
1984ae641949SHerbert Xu 	skb_reserve(skb, hlen);
198569e3c75fSJohann Baudy 	skb_reset_network_header(skb);
198669e3c75fSJohann Baudy 
198769e3c75fSJohann Baudy 	data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
198869e3c75fSJohann Baudy 	to_write = tp_len;
198969e3c75fSJohann Baudy 
199069e3c75fSJohann Baudy 	if (sock->type == SOCK_DGRAM) {
199169e3c75fSJohann Baudy 		err = dev_hard_header(skb, dev, ntohs(proto), addr,
199269e3c75fSJohann Baudy 				NULL, tp_len);
199369e3c75fSJohann Baudy 		if (unlikely(err < 0))
199469e3c75fSJohann Baudy 			return -EINVAL;
199569e3c75fSJohann Baudy 	} else if (dev->hard_header_len) {
199669e3c75fSJohann Baudy 		/* net device doesn't like empty head */
199769e3c75fSJohann Baudy 		if (unlikely(tp_len <= dev->hard_header_len)) {
199840d4e3dfSEric Dumazet 			pr_err("packet size is too short (%d < %d)\n",
199940d4e3dfSEric Dumazet 			       tp_len, dev->hard_header_len);
200069e3c75fSJohann Baudy 			return -EINVAL;
200169e3c75fSJohann Baudy 		}
200269e3c75fSJohann Baudy 
200369e3c75fSJohann Baudy 		skb_push(skb, dev->hard_header_len);
200469e3c75fSJohann Baudy 		err = skb_store_bits(skb, 0, data,
200569e3c75fSJohann Baudy 				dev->hard_header_len);
200669e3c75fSJohann Baudy 		if (unlikely(err))
200769e3c75fSJohann Baudy 			return err;
200869e3c75fSJohann Baudy 
200969e3c75fSJohann Baudy 		data += dev->hard_header_len;
201069e3c75fSJohann Baudy 		to_write -= dev->hard_header_len;
201169e3c75fSJohann Baudy 	}
201269e3c75fSJohann Baudy 
201369e3c75fSJohann Baudy 	err = -EFAULT;
201469e3c75fSJohann Baudy 	offset = offset_in_page(data);
201569e3c75fSJohann Baudy 	len_max = PAGE_SIZE - offset;
201669e3c75fSJohann Baudy 	len = ((to_write > len_max) ? len_max : to_write);
201769e3c75fSJohann Baudy 
201869e3c75fSJohann Baudy 	skb->data_len = to_write;
201969e3c75fSJohann Baudy 	skb->len += to_write;
202069e3c75fSJohann Baudy 	skb->truesize += to_write;
202169e3c75fSJohann Baudy 	atomic_add(to_write, &po->sk.sk_wmem_alloc);
202269e3c75fSJohann Baudy 
202369e3c75fSJohann Baudy 	while (likely(to_write)) {
202469e3c75fSJohann Baudy 		nr_frags = skb_shinfo(skb)->nr_frags;
202569e3c75fSJohann Baudy 
202669e3c75fSJohann Baudy 		if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
202740d4e3dfSEric Dumazet 			pr_err("Packet exceed the number of skb frags(%lu)\n",
202869e3c75fSJohann Baudy 			       MAX_SKB_FRAGS);
202969e3c75fSJohann Baudy 			return -EFAULT;
203069e3c75fSJohann Baudy 		}
203169e3c75fSJohann Baudy 
20320af55bb5SChangli Gao 		page = pgv_to_page(data);
20330af55bb5SChangli Gao 		data += len;
203469e3c75fSJohann Baudy 		flush_dcache_page(page);
203569e3c75fSJohann Baudy 		get_page(page);
20360af55bb5SChangli Gao 		skb_fill_page_desc(skb, nr_frags, page, offset, len);
203769e3c75fSJohann Baudy 		to_write -= len;
203869e3c75fSJohann Baudy 		offset = 0;
203969e3c75fSJohann Baudy 		len_max = PAGE_SIZE;
204069e3c75fSJohann Baudy 		len = ((to_write > len_max) ? len_max : to_write);
204169e3c75fSJohann Baudy 	}
204269e3c75fSJohann Baudy 
204369e3c75fSJohann Baudy 	return tp_len;
204469e3c75fSJohann Baudy }
204569e3c75fSJohann Baudy 
204669e3c75fSJohann Baudy static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
204769e3c75fSJohann Baudy {
204869e3c75fSJohann Baudy 	struct sk_buff *skb;
204969e3c75fSJohann Baudy 	struct net_device *dev;
205069e3c75fSJohann Baudy 	__be16 proto;
2051827d9780SBen Greear 	bool need_rls_dev = false;
2052827d9780SBen Greear 	int err, reserve = 0;
205369e3c75fSJohann Baudy 	void *ph;
205469e3c75fSJohann Baudy 	struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
205569e3c75fSJohann Baudy 	int tp_len, size_max;
205669e3c75fSJohann Baudy 	unsigned char *addr;
205769e3c75fSJohann Baudy 	int len_sum = 0;
205869e3c75fSJohann Baudy 	int status = 0;
2059ae641949SHerbert Xu 	int hlen, tlen;
206069e3c75fSJohann Baudy 
206169e3c75fSJohann Baudy 	mutex_lock(&po->pg_vec_lock);
206269e3c75fSJohann Baudy 
206369e3c75fSJohann Baudy 	err = -EBUSY;
206469e3c75fSJohann Baudy 	if (saddr == NULL) {
2065827d9780SBen Greear 		dev = po->prot_hook.dev;
206669e3c75fSJohann Baudy 		proto	= po->num;
206769e3c75fSJohann Baudy 		addr	= NULL;
206869e3c75fSJohann Baudy 	} else {
206969e3c75fSJohann Baudy 		err = -EINVAL;
207069e3c75fSJohann Baudy 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
207169e3c75fSJohann Baudy 			goto out;
207269e3c75fSJohann Baudy 		if (msg->msg_namelen < (saddr->sll_halen
207369e3c75fSJohann Baudy 					+ offsetof(struct sockaddr_ll,
207469e3c75fSJohann Baudy 						sll_addr)))
207569e3c75fSJohann Baudy 			goto out;
207669e3c75fSJohann Baudy 		proto	= saddr->sll_protocol;
207769e3c75fSJohann Baudy 		addr	= saddr->sll_addr;
2078827d9780SBen Greear 		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
2079827d9780SBen Greear 		need_rls_dev = true;
208069e3c75fSJohann Baudy 	}
208169e3c75fSJohann Baudy 
208269e3c75fSJohann Baudy 	err = -ENXIO;
208369e3c75fSJohann Baudy 	if (unlikely(dev == NULL))
208469e3c75fSJohann Baudy 		goto out;
208569e3c75fSJohann Baudy 
208669e3c75fSJohann Baudy 	reserve = dev->hard_header_len;
208769e3c75fSJohann Baudy 
208869e3c75fSJohann Baudy 	err = -ENETDOWN;
208969e3c75fSJohann Baudy 	if (unlikely(!(dev->flags & IFF_UP)))
209069e3c75fSJohann Baudy 		goto out_put;
209169e3c75fSJohann Baudy 
209269e3c75fSJohann Baudy 	size_max = po->tx_ring.frame_size
2093b5dd884eSGabor Gombas 		- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
209469e3c75fSJohann Baudy 
209569e3c75fSJohann Baudy 	if (size_max > dev->mtu + reserve)
209669e3c75fSJohann Baudy 		size_max = dev->mtu + reserve;
209769e3c75fSJohann Baudy 
209869e3c75fSJohann Baudy 	do {
209969e3c75fSJohann Baudy 		ph = packet_current_frame(po, &po->tx_ring,
210069e3c75fSJohann Baudy 				TP_STATUS_SEND_REQUEST);
210169e3c75fSJohann Baudy 
210269e3c75fSJohann Baudy 		if (unlikely(ph == NULL)) {
210369e3c75fSJohann Baudy 			schedule();
210469e3c75fSJohann Baudy 			continue;
210569e3c75fSJohann Baudy 		}
210669e3c75fSJohann Baudy 
210769e3c75fSJohann Baudy 		status = TP_STATUS_SEND_REQUEST;
2108ae641949SHerbert Xu 		hlen = LL_RESERVED_SPACE(dev);
2109ae641949SHerbert Xu 		tlen = dev->needed_tailroom;
211069e3c75fSJohann Baudy 		skb = sock_alloc_send_skb(&po->sk,
2111ae641949SHerbert Xu 				hlen + tlen + sizeof(struct sockaddr_ll),
211269e3c75fSJohann Baudy 				0, &err);
211369e3c75fSJohann Baudy 
211469e3c75fSJohann Baudy 		if (unlikely(skb == NULL))
211569e3c75fSJohann Baudy 			goto out_status;
211669e3c75fSJohann Baudy 
211769e3c75fSJohann Baudy 		tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
2118ae641949SHerbert Xu 				addr, hlen);
211969e3c75fSJohann Baudy 
212069e3c75fSJohann Baudy 		if (unlikely(tp_len < 0)) {
212169e3c75fSJohann Baudy 			if (po->tp_loss) {
212269e3c75fSJohann Baudy 				__packet_set_status(po, ph,
212369e3c75fSJohann Baudy 						TP_STATUS_AVAILABLE);
212469e3c75fSJohann Baudy 				packet_increment_head(&po->tx_ring);
212569e3c75fSJohann Baudy 				kfree_skb(skb);
212669e3c75fSJohann Baudy 				continue;
212769e3c75fSJohann Baudy 			} else {
212869e3c75fSJohann Baudy 				status = TP_STATUS_WRONG_FORMAT;
212969e3c75fSJohann Baudy 				err = tp_len;
213069e3c75fSJohann Baudy 				goto out_status;
213169e3c75fSJohann Baudy 			}
213269e3c75fSJohann Baudy 		}
213369e3c75fSJohann Baudy 
213469e3c75fSJohann Baudy 		skb->destructor = tpacket_destruct_skb;
213569e3c75fSJohann Baudy 		__packet_set_status(po, ph, TP_STATUS_SENDING);
213669e3c75fSJohann Baudy 		atomic_inc(&po->tx_ring.pending);
213769e3c75fSJohann Baudy 
213869e3c75fSJohann Baudy 		status = TP_STATUS_SEND_REQUEST;
213969e3c75fSJohann Baudy 		err = dev_queue_xmit(skb);
2140eb70df13SJarek Poplawski 		if (unlikely(err > 0)) {
2141eb70df13SJarek Poplawski 			err = net_xmit_errno(err);
2142eb70df13SJarek Poplawski 			if (err && __packet_get_status(po, ph) ==
2143eb70df13SJarek Poplawski 				   TP_STATUS_AVAILABLE) {
2144eb70df13SJarek Poplawski 				/* skb was destructed already */
2145eb70df13SJarek Poplawski 				skb = NULL;
2146eb70df13SJarek Poplawski 				goto out_status;
2147eb70df13SJarek Poplawski 			}
2148eb70df13SJarek Poplawski 			/*
2149eb70df13SJarek Poplawski 			 * skb was dropped but not destructed yet;
2150eb70df13SJarek Poplawski 			 * let's treat it like congestion or err < 0
2151eb70df13SJarek Poplawski 			 */
2152eb70df13SJarek Poplawski 			err = 0;
2153eb70df13SJarek Poplawski 		}
215469e3c75fSJohann Baudy 		packet_increment_head(&po->tx_ring);
215569e3c75fSJohann Baudy 		len_sum += tp_len;
2156f64f9e71SJoe Perches 	} while (likely((ph != NULL) ||
2157f64f9e71SJoe Perches 			((!(msg->msg_flags & MSG_DONTWAIT)) &&
2158f64f9e71SJoe Perches 			 (atomic_read(&po->tx_ring.pending))))
215969e3c75fSJohann Baudy 		);
216069e3c75fSJohann Baudy 
216169e3c75fSJohann Baudy 	err = len_sum;
216269e3c75fSJohann Baudy 	goto out_put;
216369e3c75fSJohann Baudy 
216469e3c75fSJohann Baudy out_status:
216569e3c75fSJohann Baudy 	__packet_set_status(po, ph, status);
216669e3c75fSJohann Baudy 	kfree_skb(skb);
216769e3c75fSJohann Baudy out_put:
2168827d9780SBen Greear 	if (need_rls_dev)
216969e3c75fSJohann Baudy 		dev_put(dev);
217069e3c75fSJohann Baudy out:
217169e3c75fSJohann Baudy 	mutex_unlock(&po->pg_vec_lock);
217269e3c75fSJohann Baudy 	return err;
217369e3c75fSJohann Baudy }
21741da177e4SLinus Torvalds 
2175eea49cc9SOlof Johansson static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
2176bfd5f4a3SSridhar Samudrala 				        size_t reserve, size_t len,
2177bfd5f4a3SSridhar Samudrala 				        size_t linear, int noblock,
2178bfd5f4a3SSridhar Samudrala 				        int *err)
2179bfd5f4a3SSridhar Samudrala {
2180bfd5f4a3SSridhar Samudrala 	struct sk_buff *skb;
2181bfd5f4a3SSridhar Samudrala 
2182bfd5f4a3SSridhar Samudrala 	/* Under a page?  Don't bother with paged skb. */
2183bfd5f4a3SSridhar Samudrala 	if (prepad + len < PAGE_SIZE || !linear)
2184bfd5f4a3SSridhar Samudrala 		linear = len;
2185bfd5f4a3SSridhar Samudrala 
2186bfd5f4a3SSridhar Samudrala 	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2187bfd5f4a3SSridhar Samudrala 				   err);
2188bfd5f4a3SSridhar Samudrala 	if (!skb)
2189bfd5f4a3SSridhar Samudrala 		return NULL;
2190bfd5f4a3SSridhar Samudrala 
2191bfd5f4a3SSridhar Samudrala 	skb_reserve(skb, reserve);
2192bfd5f4a3SSridhar Samudrala 	skb_put(skb, linear);
2193bfd5f4a3SSridhar Samudrala 	skb->data_len = len - linear;
2194bfd5f4a3SSridhar Samudrala 	skb->len += len - linear;
2195bfd5f4a3SSridhar Samudrala 
2196bfd5f4a3SSridhar Samudrala 	return skb;
2197bfd5f4a3SSridhar Samudrala }
2198bfd5f4a3SSridhar Samudrala 
219969e3c75fSJohann Baudy static int packet_snd(struct socket *sock,
22001da177e4SLinus Torvalds 			  struct msghdr *msg, size_t len)
22011da177e4SLinus Torvalds {
22021da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
22031da177e4SLinus Torvalds 	struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
22041da177e4SLinus Torvalds 	struct sk_buff *skb;
22051da177e4SLinus Torvalds 	struct net_device *dev;
22060e11c91eSAl Viro 	__be16 proto;
2207827d9780SBen Greear 	bool need_rls_dev = false;
22081da177e4SLinus Torvalds 	unsigned char *addr;
2209827d9780SBen Greear 	int err, reserve = 0;
2210bfd5f4a3SSridhar Samudrala 	struct virtio_net_hdr vnet_hdr = { 0 };
2211bfd5f4a3SSridhar Samudrala 	int offset = 0;
2212bfd5f4a3SSridhar Samudrala 	int vnet_hdr_len;
2213bfd5f4a3SSridhar Samudrala 	struct packet_sock *po = pkt_sk(sk);
2214bfd5f4a3SSridhar Samudrala 	unsigned short gso_type = 0;
2215ae641949SHerbert Xu 	int hlen, tlen;
22163bdc0ebaSBen Greear 	int extra_len = 0;
22171da177e4SLinus Torvalds 
22181da177e4SLinus Torvalds 	/*
22191da177e4SLinus Torvalds 	 *	Get and verify the address.
22201da177e4SLinus Torvalds 	 */
22211da177e4SLinus Torvalds 
22221da177e4SLinus Torvalds 	if (saddr == NULL) {
2223827d9780SBen Greear 		dev = po->prot_hook.dev;
22241da177e4SLinus Torvalds 		proto	= po->num;
22251da177e4SLinus Torvalds 		addr	= NULL;
22261da177e4SLinus Torvalds 	} else {
22271da177e4SLinus Torvalds 		err = -EINVAL;
22281da177e4SLinus Torvalds 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
22291da177e4SLinus Torvalds 			goto out;
22300fb375fbSEric W. Biederman 		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
22310fb375fbSEric W. Biederman 			goto out;
22321da177e4SLinus Torvalds 		proto	= saddr->sll_protocol;
22331da177e4SLinus Torvalds 		addr	= saddr->sll_addr;
2234827d9780SBen Greear 		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
2235827d9780SBen Greear 		need_rls_dev = true;
22361da177e4SLinus Torvalds 	}
22371da177e4SLinus Torvalds 
22381da177e4SLinus Torvalds 	err = -ENXIO;
22391da177e4SLinus Torvalds 	if (dev == NULL)
22401da177e4SLinus Torvalds 		goto out_unlock;
22411da177e4SLinus Torvalds 	if (sock->type == SOCK_RAW)
22421da177e4SLinus Torvalds 		reserve = dev->hard_header_len;
22431da177e4SLinus Torvalds 
2244d5e76b0aSDavid S. Miller 	err = -ENETDOWN;
2245d5e76b0aSDavid S. Miller 	if (!(dev->flags & IFF_UP))
2246d5e76b0aSDavid S. Miller 		goto out_unlock;
2247d5e76b0aSDavid S. Miller 
2248bfd5f4a3SSridhar Samudrala 	if (po->has_vnet_hdr) {
2249bfd5f4a3SSridhar Samudrala 		vnet_hdr_len = sizeof(vnet_hdr);
2250bfd5f4a3SSridhar Samudrala 
2251bfd5f4a3SSridhar Samudrala 		err = -EINVAL;
2252bfd5f4a3SSridhar Samudrala 		if (len < vnet_hdr_len)
22531da177e4SLinus Torvalds 			goto out_unlock;
22541da177e4SLinus Torvalds 
2255bfd5f4a3SSridhar Samudrala 		len -= vnet_hdr_len;
2256bfd5f4a3SSridhar Samudrala 
2257bfd5f4a3SSridhar Samudrala 		err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
2258bfd5f4a3SSridhar Samudrala 				       vnet_hdr_len);
2259bfd5f4a3SSridhar Samudrala 		if (err < 0)
2260bfd5f4a3SSridhar Samudrala 			goto out_unlock;
2261bfd5f4a3SSridhar Samudrala 
2262bfd5f4a3SSridhar Samudrala 		if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2263bfd5f4a3SSridhar Samudrala 		    (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
2264bfd5f4a3SSridhar Samudrala 		      vnet_hdr.hdr_len))
2265bfd5f4a3SSridhar Samudrala 			vnet_hdr.hdr_len = vnet_hdr.csum_start +
2266bfd5f4a3SSridhar Samudrala 						 vnet_hdr.csum_offset + 2;
2267bfd5f4a3SSridhar Samudrala 
2268bfd5f4a3SSridhar Samudrala 		err = -EINVAL;
2269bfd5f4a3SSridhar Samudrala 		if (vnet_hdr.hdr_len > len)
2270bfd5f4a3SSridhar Samudrala 			goto out_unlock;
2271bfd5f4a3SSridhar Samudrala 
2272bfd5f4a3SSridhar Samudrala 		if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2273bfd5f4a3SSridhar Samudrala 			switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2274bfd5f4a3SSridhar Samudrala 			case VIRTIO_NET_HDR_GSO_TCPV4:
2275bfd5f4a3SSridhar Samudrala 				gso_type = SKB_GSO_TCPV4;
2276bfd5f4a3SSridhar Samudrala 				break;
2277bfd5f4a3SSridhar Samudrala 			case VIRTIO_NET_HDR_GSO_TCPV6:
2278bfd5f4a3SSridhar Samudrala 				gso_type = SKB_GSO_TCPV6;
2279bfd5f4a3SSridhar Samudrala 				break;
2280bfd5f4a3SSridhar Samudrala 			case VIRTIO_NET_HDR_GSO_UDP:
2281bfd5f4a3SSridhar Samudrala 				gso_type = SKB_GSO_UDP;
2282bfd5f4a3SSridhar Samudrala 				break;
2283bfd5f4a3SSridhar Samudrala 			default:
2284bfd5f4a3SSridhar Samudrala 				goto out_unlock;
2285bfd5f4a3SSridhar Samudrala 			}
2286bfd5f4a3SSridhar Samudrala 
2287bfd5f4a3SSridhar Samudrala 			if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
2288bfd5f4a3SSridhar Samudrala 				gso_type |= SKB_GSO_TCP_ECN;
2289bfd5f4a3SSridhar Samudrala 
2290bfd5f4a3SSridhar Samudrala 			if (vnet_hdr.gso_size == 0)
2291bfd5f4a3SSridhar Samudrala 				goto out_unlock;
2292bfd5f4a3SSridhar Samudrala 
2293bfd5f4a3SSridhar Samudrala 		}
2294bfd5f4a3SSridhar Samudrala 	}
2295bfd5f4a3SSridhar Samudrala 
22963bdc0ebaSBen Greear 	if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
22973bdc0ebaSBen Greear 		if (!netif_supports_nofcs(dev)) {
22983bdc0ebaSBen Greear 			err = -EPROTONOSUPPORT;
22993bdc0ebaSBen Greear 			goto out_unlock;
23003bdc0ebaSBen Greear 		}
23013bdc0ebaSBen Greear 		extra_len = 4; /* We're doing our own CRC */
23023bdc0ebaSBen Greear 	}
23033bdc0ebaSBen Greear 
2304bfd5f4a3SSridhar Samudrala 	err = -EMSGSIZE;
23053bdc0ebaSBen Greear 	if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
2306bfd5f4a3SSridhar Samudrala 		goto out_unlock;
2307bfd5f4a3SSridhar Samudrala 
2308bfd5f4a3SSridhar Samudrala 	err = -ENOBUFS;
2309ae641949SHerbert Xu 	hlen = LL_RESERVED_SPACE(dev);
2310ae641949SHerbert Xu 	tlen = dev->needed_tailroom;
2311ae641949SHerbert Xu 	skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, vnet_hdr.hdr_len,
23121da177e4SLinus Torvalds 			       msg->msg_flags & MSG_DONTWAIT, &err);
23131da177e4SLinus Torvalds 	if (skb == NULL)
23141da177e4SLinus Torvalds 		goto out_unlock;
23151da177e4SLinus Torvalds 
2316bfd5f4a3SSridhar Samudrala 	skb_set_network_header(skb, reserve);
23171da177e4SLinus Torvalds 
23181da177e4SLinus Torvalds 	err = -EINVAL;
23190c4e8581SStephen Hemminger 	if (sock->type == SOCK_DGRAM &&
2320bfd5f4a3SSridhar Samudrala 	    (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
23211da177e4SLinus Torvalds 		goto out_free;
23221da177e4SLinus Torvalds 
23231da177e4SLinus Torvalds 	/* Returns -EFAULT on error */
2324bfd5f4a3SSridhar Samudrala 	err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
23251da177e4SLinus Torvalds 	if (err)
23261da177e4SLinus Torvalds 		goto out_free;
23272244d07bSOliver Hartkopp 	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
2328ed85b565SRichard Cochran 	if (err < 0)
2329ed85b565SRichard Cochran 		goto out_free;
23301da177e4SLinus Torvalds 
23313bdc0ebaSBen Greear 	if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
233257f89bfaSBen Greear 		/* Earlier code assumed this would be a VLAN pkt,
233357f89bfaSBen Greear 		 * double-check this now that we have the actual
233457f89bfaSBen Greear 		 * packet in hand.
233557f89bfaSBen Greear 		 */
233657f89bfaSBen Greear 		struct ethhdr *ehdr;
233757f89bfaSBen Greear 		skb_reset_mac_header(skb);
233857f89bfaSBen Greear 		ehdr = eth_hdr(skb);
233957f89bfaSBen Greear 		if (ehdr->h_proto != htons(ETH_P_8021Q)) {
234057f89bfaSBen Greear 			err = -EMSGSIZE;
234157f89bfaSBen Greear 			goto out_free;
234257f89bfaSBen Greear 		}
234357f89bfaSBen Greear 	}
234457f89bfaSBen Greear 
23451da177e4SLinus Torvalds 	skb->protocol = proto;
23461da177e4SLinus Torvalds 	skb->dev = dev;
23471da177e4SLinus Torvalds 	skb->priority = sk->sk_priority;
23482d37a186SEric Dumazet 	skb->mark = sk->sk_mark;
23491da177e4SLinus Torvalds 
2350bfd5f4a3SSridhar Samudrala 	if (po->has_vnet_hdr) {
2351bfd5f4a3SSridhar Samudrala 		if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2352bfd5f4a3SSridhar Samudrala 			if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
2353bfd5f4a3SSridhar Samudrala 						  vnet_hdr.csum_offset)) {
2354bfd5f4a3SSridhar Samudrala 				err = -EINVAL;
2355bfd5f4a3SSridhar Samudrala 				goto out_free;
2356bfd5f4a3SSridhar Samudrala 			}
2357bfd5f4a3SSridhar Samudrala 		}
2358bfd5f4a3SSridhar Samudrala 
2359bfd5f4a3SSridhar Samudrala 		skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
2360bfd5f4a3SSridhar Samudrala 		skb_shinfo(skb)->gso_type = gso_type;
2361bfd5f4a3SSridhar Samudrala 
2362bfd5f4a3SSridhar Samudrala 		/* Header must be checked, and gso_segs computed. */
2363bfd5f4a3SSridhar Samudrala 		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2364bfd5f4a3SSridhar Samudrala 		skb_shinfo(skb)->gso_segs = 0;
2365bfd5f4a3SSridhar Samudrala 
2366bfd5f4a3SSridhar Samudrala 		len += vnet_hdr_len;
2367bfd5f4a3SSridhar Samudrala 	}
2368bfd5f4a3SSridhar Samudrala 
23693bdc0ebaSBen Greear 	if (unlikely(extra_len == 4))
23703bdc0ebaSBen Greear 		skb->no_fcs = 1;
23713bdc0ebaSBen Greear 
23721da177e4SLinus Torvalds 	/*
23731da177e4SLinus Torvalds 	 *	Now send it
23741da177e4SLinus Torvalds 	 */
23751da177e4SLinus Torvalds 
23761da177e4SLinus Torvalds 	err = dev_queue_xmit(skb);
23771da177e4SLinus Torvalds 	if (err > 0 && (err = net_xmit_errno(err)) != 0)
23781da177e4SLinus Torvalds 		goto out_unlock;
23791da177e4SLinus Torvalds 
2380827d9780SBen Greear 	if (need_rls_dev)
23811da177e4SLinus Torvalds 		dev_put(dev);
23821da177e4SLinus Torvalds 
238340d4e3dfSEric Dumazet 	return len;
23841da177e4SLinus Torvalds 
23851da177e4SLinus Torvalds out_free:
23861da177e4SLinus Torvalds 	kfree_skb(skb);
23871da177e4SLinus Torvalds out_unlock:
2388827d9780SBen Greear 	if (dev && need_rls_dev)
23891da177e4SLinus Torvalds 		dev_put(dev);
23901da177e4SLinus Torvalds out:
23911da177e4SLinus Torvalds 	return err;
23921da177e4SLinus Torvalds }
23931da177e4SLinus Torvalds 
239469e3c75fSJohann Baudy static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
239569e3c75fSJohann Baudy 		struct msghdr *msg, size_t len)
239669e3c75fSJohann Baudy {
239769e3c75fSJohann Baudy 	struct sock *sk = sock->sk;
239869e3c75fSJohann Baudy 	struct packet_sock *po = pkt_sk(sk);
239969e3c75fSJohann Baudy 	if (po->tx_ring.pg_vec)
240069e3c75fSJohann Baudy 		return tpacket_snd(po, msg);
240169e3c75fSJohann Baudy 	else
240269e3c75fSJohann Baudy 		return packet_snd(sock, msg, len);
240369e3c75fSJohann Baudy }
240469e3c75fSJohann Baudy 
24051da177e4SLinus Torvalds /*
24061da177e4SLinus Torvalds  *	Close a PACKET socket. This is fairly simple. We immediately go
24071da177e4SLinus Torvalds  *	to 'closed' state and remove our protocol entry in the device list.
24081da177e4SLinus Torvalds  */
24091da177e4SLinus Torvalds 
24101da177e4SLinus Torvalds static int packet_release(struct socket *sock)
24111da177e4SLinus Torvalds {
24121da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
24131da177e4SLinus Torvalds 	struct packet_sock *po;
2414d12d01d6SDenis V. Lunev 	struct net *net;
2415f6fb8f10Schetan loke 	union tpacket_req_u req_u;
24161da177e4SLinus Torvalds 
24171da177e4SLinus Torvalds 	if (!sk)
24181da177e4SLinus Torvalds 		return 0;
24191da177e4SLinus Torvalds 
24203b1e0a65SYOSHIFUJI Hideaki 	net = sock_net(sk);
24211da177e4SLinus Torvalds 	po = pkt_sk(sk);
24221da177e4SLinus Torvalds 
2423808f5114Sstephen hemminger 	spin_lock_bh(&net->packet.sklist_lock);
2424808f5114Sstephen hemminger 	sk_del_node_init_rcu(sk);
2425920de804SEric Dumazet 	sock_prot_inuse_add(net, sk->sk_prot, -1);
2426808f5114Sstephen hemminger 	spin_unlock_bh(&net->packet.sklist_lock);
24271da177e4SLinus Torvalds 
2428808f5114Sstephen hemminger 	spin_lock(&po->bind_lock);
2429ce06b03eSDavid S. Miller 	unregister_prot_hook(sk, false);
2430160ff18aSBen Greear 	if (po->prot_hook.dev) {
2431160ff18aSBen Greear 		dev_put(po->prot_hook.dev);
2432160ff18aSBen Greear 		po->prot_hook.dev = NULL;
2433160ff18aSBen Greear 	}
2434808f5114Sstephen hemminger 	spin_unlock(&po->bind_lock);
24351da177e4SLinus Torvalds 
24361da177e4SLinus Torvalds 	packet_flush_mclist(sk);
24371da177e4SLinus Torvalds 
2438f6fb8f10Schetan loke 	memset(&req_u, 0, sizeof(req_u));
243969e3c75fSJohann Baudy 
244069e3c75fSJohann Baudy 	if (po->rx_ring.pg_vec)
2441f6fb8f10Schetan loke 		packet_set_ring(sk, &req_u, 1, 0);
244269e3c75fSJohann Baudy 
244369e3c75fSJohann Baudy 	if (po->tx_ring.pg_vec)
2444f6fb8f10Schetan loke 		packet_set_ring(sk, &req_u, 1, 1);
24451da177e4SLinus Torvalds 
2446dc99f600SDavid S. Miller 	fanout_release(sk);
2447dc99f600SDavid S. Miller 
2448808f5114Sstephen hemminger 	synchronize_net();
24491da177e4SLinus Torvalds 	/*
24501da177e4SLinus Torvalds 	 *	Now the socket is dead. No more input will appear.
24511da177e4SLinus Torvalds 	 */
24521da177e4SLinus Torvalds 	sock_orphan(sk);
24531da177e4SLinus Torvalds 	sock->sk = NULL;
24541da177e4SLinus Torvalds 
24551da177e4SLinus Torvalds 	/* Purge queues */
24561da177e4SLinus Torvalds 
24571da177e4SLinus Torvalds 	skb_queue_purge(&sk->sk_receive_queue);
245817ab56a2SPavel Emelyanov 	sk_refcnt_debug_release(sk);
24591da177e4SLinus Torvalds 
24601da177e4SLinus Torvalds 	sock_put(sk);
24611da177e4SLinus Torvalds 	return 0;
24621da177e4SLinus Torvalds }
24631da177e4SLinus Torvalds 
24641da177e4SLinus Torvalds /*
24651da177e4SLinus Torvalds  *	Attach a packet hook.
24661da177e4SLinus Torvalds  */
24671da177e4SLinus Torvalds 
24680e11c91eSAl Viro static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
24691da177e4SLinus Torvalds {
24701da177e4SLinus Torvalds 	struct packet_sock *po = pkt_sk(sk);
2471dc99f600SDavid S. Miller 
2472aef950b4SWei Yongjun 	if (po->fanout) {
2473aef950b4SWei Yongjun 		if (dev)
2474aef950b4SWei Yongjun 			dev_put(dev);
2475aef950b4SWei Yongjun 
2476dc99f600SDavid S. Miller 		return -EINVAL;
2477aef950b4SWei Yongjun 	}
24781da177e4SLinus Torvalds 
24791da177e4SLinus Torvalds 	lock_sock(sk);
24801da177e4SLinus Torvalds 
24811da177e4SLinus Torvalds 	spin_lock(&po->bind_lock);
2482ce06b03eSDavid S. Miller 	unregister_prot_hook(sk, true);
24831da177e4SLinus Torvalds 	po->num = protocol;
24841da177e4SLinus Torvalds 	po->prot_hook.type = protocol;
2485160ff18aSBen Greear 	if (po->prot_hook.dev)
2486160ff18aSBen Greear 		dev_put(po->prot_hook.dev);
24871da177e4SLinus Torvalds 	po->prot_hook.dev = dev;
24881da177e4SLinus Torvalds 
24891da177e4SLinus Torvalds 	po->ifindex = dev ? dev->ifindex : 0;
24901da177e4SLinus Torvalds 
24911da177e4SLinus Torvalds 	if (protocol == 0)
24921da177e4SLinus Torvalds 		goto out_unlock;
24931da177e4SLinus Torvalds 
2494be85d4adSUrs Thuermann 	if (!dev || (dev->flags & IFF_UP)) {
2495ce06b03eSDavid S. Miller 		register_prot_hook(sk);
24961da177e4SLinus Torvalds 	} else {
24971da177e4SLinus Torvalds 		sk->sk_err = ENETDOWN;
24981da177e4SLinus Torvalds 		if (!sock_flag(sk, SOCK_DEAD))
24991da177e4SLinus Torvalds 			sk->sk_error_report(sk);
25001da177e4SLinus Torvalds 	}
25011da177e4SLinus Torvalds 
25021da177e4SLinus Torvalds out_unlock:
25031da177e4SLinus Torvalds 	spin_unlock(&po->bind_lock);
25041da177e4SLinus Torvalds 	release_sock(sk);
25051da177e4SLinus Torvalds 	return 0;
25061da177e4SLinus Torvalds }
25071da177e4SLinus Torvalds 
25081da177e4SLinus Torvalds /*
25091da177e4SLinus Torvalds  *	Bind a packet socket to a device
25101da177e4SLinus Torvalds  */
25111da177e4SLinus Torvalds 
251240d4e3dfSEric Dumazet static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
251340d4e3dfSEric Dumazet 			    int addr_len)
25141da177e4SLinus Torvalds {
25151da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
25161da177e4SLinus Torvalds 	char name[15];
25171da177e4SLinus Torvalds 	struct net_device *dev;
25181da177e4SLinus Torvalds 	int err = -ENODEV;
25191da177e4SLinus Torvalds 
25201da177e4SLinus Torvalds 	/*
25211da177e4SLinus Torvalds 	 *	Check legality
25221da177e4SLinus Torvalds 	 */
25231da177e4SLinus Torvalds 
25241da177e4SLinus Torvalds 	if (addr_len != sizeof(struct sockaddr))
25251da177e4SLinus Torvalds 		return -EINVAL;
25261da177e4SLinus Torvalds 	strlcpy(name, uaddr->sa_data, sizeof(name));
25271da177e4SLinus Torvalds 
25283b1e0a65SYOSHIFUJI Hideaki 	dev = dev_get_by_name(sock_net(sk), name);
2529160ff18aSBen Greear 	if (dev)
25301da177e4SLinus Torvalds 		err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
25311da177e4SLinus Torvalds 	return err;
25321da177e4SLinus Torvalds }
25331da177e4SLinus Torvalds 
25341da177e4SLinus Torvalds static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
25351da177e4SLinus Torvalds {
25361da177e4SLinus Torvalds 	struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
25371da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
25381da177e4SLinus Torvalds 	struct net_device *dev = NULL;
25391da177e4SLinus Torvalds 	int err;
25401da177e4SLinus Torvalds 
25411da177e4SLinus Torvalds 
25421da177e4SLinus Torvalds 	/*
25431da177e4SLinus Torvalds 	 *	Check legality
25441da177e4SLinus Torvalds 	 */
25451da177e4SLinus Torvalds 
25461da177e4SLinus Torvalds 	if (addr_len < sizeof(struct sockaddr_ll))
25471da177e4SLinus Torvalds 		return -EINVAL;
25481da177e4SLinus Torvalds 	if (sll->sll_family != AF_PACKET)
25491da177e4SLinus Torvalds 		return -EINVAL;
25501da177e4SLinus Torvalds 
25511da177e4SLinus Torvalds 	if (sll->sll_ifindex) {
25521da177e4SLinus Torvalds 		err = -ENODEV;
25533b1e0a65SYOSHIFUJI Hideaki 		dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
25541da177e4SLinus Torvalds 		if (dev == NULL)
25551da177e4SLinus Torvalds 			goto out;
25561da177e4SLinus Torvalds 	}
25571da177e4SLinus Torvalds 	err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
25581da177e4SLinus Torvalds 
25591da177e4SLinus Torvalds out:
25601da177e4SLinus Torvalds 	return err;
25611da177e4SLinus Torvalds }
25621da177e4SLinus Torvalds 
25631da177e4SLinus Torvalds static struct proto packet_proto = {
25641da177e4SLinus Torvalds 	.name	  = "PACKET",
25651da177e4SLinus Torvalds 	.owner	  = THIS_MODULE,
25661da177e4SLinus Torvalds 	.obj_size = sizeof(struct packet_sock),
25671da177e4SLinus Torvalds };
25681da177e4SLinus Torvalds 
25691da177e4SLinus Torvalds /*
25701da177e4SLinus Torvalds  *	Create a packet of type SOCK_PACKET.
25711da177e4SLinus Torvalds  */
25721da177e4SLinus Torvalds 
25733f378b68SEric Paris static int packet_create(struct net *net, struct socket *sock, int protocol,
25743f378b68SEric Paris 			 int kern)
25751da177e4SLinus Torvalds {
25761da177e4SLinus Torvalds 	struct sock *sk;
25771da177e4SLinus Torvalds 	struct packet_sock *po;
25780e11c91eSAl Viro 	__be16 proto = (__force __be16)protocol; /* weird, but documented */
25791da177e4SLinus Torvalds 	int err;
25801da177e4SLinus Torvalds 
25811da177e4SLinus Torvalds 	if (!capable(CAP_NET_RAW))
25821da177e4SLinus Torvalds 		return -EPERM;
2583be02097cSDavid S. Miller 	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
2584be02097cSDavid S. Miller 	    sock->type != SOCK_PACKET)
25851da177e4SLinus Torvalds 		return -ESOCKTNOSUPPORT;
25861da177e4SLinus Torvalds 
25871da177e4SLinus Torvalds 	sock->state = SS_UNCONNECTED;
25881da177e4SLinus Torvalds 
25891da177e4SLinus Torvalds 	err = -ENOBUFS;
25906257ff21SPavel Emelyanov 	sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
25911da177e4SLinus Torvalds 	if (sk == NULL)
25921da177e4SLinus Torvalds 		goto out;
25931da177e4SLinus Torvalds 
25941da177e4SLinus Torvalds 	sock->ops = &packet_ops;
25951da177e4SLinus Torvalds 	if (sock->type == SOCK_PACKET)
25961da177e4SLinus Torvalds 		sock->ops = &packet_ops_spkt;
2597be02097cSDavid S. Miller 
25981da177e4SLinus Torvalds 	sock_init_data(sock, sk);
25991da177e4SLinus Torvalds 
26001da177e4SLinus Torvalds 	po = pkt_sk(sk);
26011da177e4SLinus Torvalds 	sk->sk_family = PF_PACKET;
26020e11c91eSAl Viro 	po->num = proto;
26031da177e4SLinus Torvalds 
26041da177e4SLinus Torvalds 	sk->sk_destruct = packet_sock_destruct;
260517ab56a2SPavel Emelyanov 	sk_refcnt_debug_inc(sk);
26061da177e4SLinus Torvalds 
26071da177e4SLinus Torvalds 	/*
26081da177e4SLinus Torvalds 	 *	Attach a protocol block
26091da177e4SLinus Torvalds 	 */
26101da177e4SLinus Torvalds 
26111da177e4SLinus Torvalds 	spin_lock_init(&po->bind_lock);
2612905db440SHerbert Xu 	mutex_init(&po->pg_vec_lock);
26131da177e4SLinus Torvalds 	po->prot_hook.func = packet_rcv;
2614be02097cSDavid S. Miller 
26151da177e4SLinus Torvalds 	if (sock->type == SOCK_PACKET)
26161da177e4SLinus Torvalds 		po->prot_hook.func = packet_rcv_spkt;
2617be02097cSDavid S. Miller 
26181da177e4SLinus Torvalds 	po->prot_hook.af_packet_priv = sk;
26191da177e4SLinus Torvalds 
26200e11c91eSAl Viro 	if (proto) {
26210e11c91eSAl Viro 		po->prot_hook.type = proto;
2622ce06b03eSDavid S. Miller 		register_prot_hook(sk);
26231da177e4SLinus Torvalds 	}
26241da177e4SLinus Torvalds 
2625808f5114Sstephen hemminger 	spin_lock_bh(&net->packet.sklist_lock);
2626808f5114Sstephen hemminger 	sk_add_node_rcu(sk, &net->packet.sklist);
26273680453cSEric Dumazet 	sock_prot_inuse_add(net, &packet_proto, 1);
2628808f5114Sstephen hemminger 	spin_unlock_bh(&net->packet.sklist_lock);
2629808f5114Sstephen hemminger 
263040d4e3dfSEric Dumazet 	return 0;
26311da177e4SLinus Torvalds out:
26321da177e4SLinus Torvalds 	return err;
26331da177e4SLinus Torvalds }
26341da177e4SLinus Torvalds 
2635ed85b565SRichard Cochran static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
2636ed85b565SRichard Cochran {
2637ed85b565SRichard Cochran 	struct sock_exterr_skb *serr;
2638ed85b565SRichard Cochran 	struct sk_buff *skb, *skb2;
2639ed85b565SRichard Cochran 	int copied, err;
2640ed85b565SRichard Cochran 
2641ed85b565SRichard Cochran 	err = -EAGAIN;
2642ed85b565SRichard Cochran 	skb = skb_dequeue(&sk->sk_error_queue);
2643ed85b565SRichard Cochran 	if (skb == NULL)
2644ed85b565SRichard Cochran 		goto out;
2645ed85b565SRichard Cochran 
2646ed85b565SRichard Cochran 	copied = skb->len;
2647ed85b565SRichard Cochran 	if (copied > len) {
2648ed85b565SRichard Cochran 		msg->msg_flags |= MSG_TRUNC;
2649ed85b565SRichard Cochran 		copied = len;
2650ed85b565SRichard Cochran 	}
2651ed85b565SRichard Cochran 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2652ed85b565SRichard Cochran 	if (err)
2653ed85b565SRichard Cochran 		goto out_free_skb;
2654ed85b565SRichard Cochran 
2655ed85b565SRichard Cochran 	sock_recv_timestamp(msg, sk, skb);
2656ed85b565SRichard Cochran 
2657ed85b565SRichard Cochran 	serr = SKB_EXT_ERR(skb);
2658ed85b565SRichard Cochran 	put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
2659ed85b565SRichard Cochran 		 sizeof(serr->ee), &serr->ee);
2660ed85b565SRichard Cochran 
2661ed85b565SRichard Cochran 	msg->msg_flags |= MSG_ERRQUEUE;
2662ed85b565SRichard Cochran 	err = copied;
2663ed85b565SRichard Cochran 
2664ed85b565SRichard Cochran 	/* Reset and regenerate socket error */
2665ed85b565SRichard Cochran 	spin_lock_bh(&sk->sk_error_queue.lock);
2666ed85b565SRichard Cochran 	sk->sk_err = 0;
2667ed85b565SRichard Cochran 	if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
2668ed85b565SRichard Cochran 		sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
2669ed85b565SRichard Cochran 		spin_unlock_bh(&sk->sk_error_queue.lock);
2670ed85b565SRichard Cochran 		sk->sk_error_report(sk);
2671ed85b565SRichard Cochran 	} else
2672ed85b565SRichard Cochran 		spin_unlock_bh(&sk->sk_error_queue.lock);
2673ed85b565SRichard Cochran 
2674ed85b565SRichard Cochran out_free_skb:
2675ed85b565SRichard Cochran 	kfree_skb(skb);
2676ed85b565SRichard Cochran out:
2677ed85b565SRichard Cochran 	return err;
2678ed85b565SRichard Cochran }
2679ed85b565SRichard Cochran 
26801da177e4SLinus Torvalds /*
26811da177e4SLinus Torvalds  *	Pull a packet from our receive queue and hand it to the user.
26821da177e4SLinus Torvalds  *	If necessary we block.
26831da177e4SLinus Torvalds  */
26841da177e4SLinus Torvalds 
26851da177e4SLinus Torvalds static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
26861da177e4SLinus Torvalds 			  struct msghdr *msg, size_t len, int flags)
26871da177e4SLinus Torvalds {
26881da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
26891da177e4SLinus Torvalds 	struct sk_buff *skb;
26901da177e4SLinus Torvalds 	int copied, err;
26910fb375fbSEric W. Biederman 	struct sockaddr_ll *sll;
2692bfd5f4a3SSridhar Samudrala 	int vnet_hdr_len = 0;
26931da177e4SLinus Torvalds 
26941da177e4SLinus Torvalds 	err = -EINVAL;
2695ed85b565SRichard Cochran 	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
26961da177e4SLinus Torvalds 		goto out;
26971da177e4SLinus Torvalds 
26981da177e4SLinus Torvalds #if 0
26991da177e4SLinus Torvalds 	/* What error should we return now? EUNATTACH? */
27001da177e4SLinus Torvalds 	if (pkt_sk(sk)->ifindex < 0)
27011da177e4SLinus Torvalds 		return -ENODEV;
27021da177e4SLinus Torvalds #endif
27031da177e4SLinus Torvalds 
2704ed85b565SRichard Cochran 	if (flags & MSG_ERRQUEUE) {
2705ed85b565SRichard Cochran 		err = packet_recv_error(sk, msg, len);
2706ed85b565SRichard Cochran 		goto out;
2707ed85b565SRichard Cochran 	}
2708ed85b565SRichard Cochran 
27091da177e4SLinus Torvalds 	/*
27101da177e4SLinus Torvalds 	 *	Call the generic datagram receiver. This handles all sorts
27111da177e4SLinus Torvalds 	 *	of horrible races and re-entrancy so we can forget about it
27121da177e4SLinus Torvalds 	 *	in the protocol layers.
27131da177e4SLinus Torvalds 	 *
27141da177e4SLinus Torvalds 	 *	Now it will return ENETDOWN, if device have just gone down,
27151da177e4SLinus Torvalds 	 *	but then it will block.
27161da177e4SLinus Torvalds 	 */
27171da177e4SLinus Torvalds 
27181da177e4SLinus Torvalds 	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
27191da177e4SLinus Torvalds 
27201da177e4SLinus Torvalds 	/*
27211da177e4SLinus Torvalds 	 *	An error occurred so return it. Because skb_recv_datagram()
27221da177e4SLinus Torvalds 	 *	handles the blocking we don't see and worry about blocking
27231da177e4SLinus Torvalds 	 *	retries.
27241da177e4SLinus Torvalds 	 */
27251da177e4SLinus Torvalds 
27261da177e4SLinus Torvalds 	if (skb == NULL)
27271da177e4SLinus Torvalds 		goto out;
27281da177e4SLinus Torvalds 
2729bfd5f4a3SSridhar Samudrala 	if (pkt_sk(sk)->has_vnet_hdr) {
2730bfd5f4a3SSridhar Samudrala 		struct virtio_net_hdr vnet_hdr = { 0 };
2731bfd5f4a3SSridhar Samudrala 
2732bfd5f4a3SSridhar Samudrala 		err = -EINVAL;
2733bfd5f4a3SSridhar Samudrala 		vnet_hdr_len = sizeof(vnet_hdr);
27341f18b717SMariusz Kozlowski 		if (len < vnet_hdr_len)
2735bfd5f4a3SSridhar Samudrala 			goto out_free;
2736bfd5f4a3SSridhar Samudrala 
27371f18b717SMariusz Kozlowski 		len -= vnet_hdr_len;
27381f18b717SMariusz Kozlowski 
2739bfd5f4a3SSridhar Samudrala 		if (skb_is_gso(skb)) {
2740bfd5f4a3SSridhar Samudrala 			struct skb_shared_info *sinfo = skb_shinfo(skb);
2741bfd5f4a3SSridhar Samudrala 
2742bfd5f4a3SSridhar Samudrala 			/* This is a hint as to how much should be linear. */
2743bfd5f4a3SSridhar Samudrala 			vnet_hdr.hdr_len = skb_headlen(skb);
2744bfd5f4a3SSridhar Samudrala 			vnet_hdr.gso_size = sinfo->gso_size;
2745bfd5f4a3SSridhar Samudrala 			if (sinfo->gso_type & SKB_GSO_TCPV4)
2746bfd5f4a3SSridhar Samudrala 				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2747bfd5f4a3SSridhar Samudrala 			else if (sinfo->gso_type & SKB_GSO_TCPV6)
2748bfd5f4a3SSridhar Samudrala 				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2749bfd5f4a3SSridhar Samudrala 			else if (sinfo->gso_type & SKB_GSO_UDP)
2750bfd5f4a3SSridhar Samudrala 				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
2751bfd5f4a3SSridhar Samudrala 			else if (sinfo->gso_type & SKB_GSO_FCOE)
2752bfd5f4a3SSridhar Samudrala 				goto out_free;
2753bfd5f4a3SSridhar Samudrala 			else
2754bfd5f4a3SSridhar Samudrala 				BUG();
2755bfd5f4a3SSridhar Samudrala 			if (sinfo->gso_type & SKB_GSO_TCP_ECN)
2756bfd5f4a3SSridhar Samudrala 				vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2757bfd5f4a3SSridhar Samudrala 		} else
2758bfd5f4a3SSridhar Samudrala 			vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
2759bfd5f4a3SSridhar Samudrala 
2760bfd5f4a3SSridhar Samudrala 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
2761bfd5f4a3SSridhar Samudrala 			vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
276255508d60SMichał Mirosław 			vnet_hdr.csum_start = skb_checksum_start_offset(skb);
2763bfd5f4a3SSridhar Samudrala 			vnet_hdr.csum_offset = skb->csum_offset;
276410a8d94aSJason Wang 		} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
276510a8d94aSJason Wang 			vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
2766bfd5f4a3SSridhar Samudrala 		} /* else everything is zero */
2767bfd5f4a3SSridhar Samudrala 
2768bfd5f4a3SSridhar Samudrala 		err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
2769bfd5f4a3SSridhar Samudrala 				     vnet_hdr_len);
2770bfd5f4a3SSridhar Samudrala 		if (err < 0)
2771bfd5f4a3SSridhar Samudrala 			goto out_free;
2772bfd5f4a3SSridhar Samudrala 	}
2773bfd5f4a3SSridhar Samudrala 
27741da177e4SLinus Torvalds 	/*
27750fb375fbSEric W. Biederman 	 *	If the address length field is there to be filled in, we fill
27760fb375fbSEric W. Biederman 	 *	it in now.
27770fb375fbSEric W. Biederman 	 */
27780fb375fbSEric W. Biederman 
2779ffbc6111SHerbert Xu 	sll = &PACKET_SKB_CB(skb)->sa.ll;
27800fb375fbSEric W. Biederman 	if (sock->type == SOCK_PACKET)
27810fb375fbSEric W. Biederman 		msg->msg_namelen = sizeof(struct sockaddr_pkt);
27820fb375fbSEric W. Biederman 	else
27830fb375fbSEric W. Biederman 		msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
27840fb375fbSEric W. Biederman 
27850fb375fbSEric W. Biederman 	/*
27861da177e4SLinus Torvalds 	 *	You lose any data beyond the buffer you gave. If it worries a
27871da177e4SLinus Torvalds 	 *	user program they can ask the device for its MTU anyway.
27881da177e4SLinus Torvalds 	 */
27891da177e4SLinus Torvalds 
27901da177e4SLinus Torvalds 	copied = skb->len;
279140d4e3dfSEric Dumazet 	if (copied > len) {
27921da177e4SLinus Torvalds 		copied = len;
27931da177e4SLinus Torvalds 		msg->msg_flags |= MSG_TRUNC;
27941da177e4SLinus Torvalds 	}
27951da177e4SLinus Torvalds 
27961da177e4SLinus Torvalds 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
27971da177e4SLinus Torvalds 	if (err)
27981da177e4SLinus Torvalds 		goto out_free;
27991da177e4SLinus Torvalds 
28003b885787SNeil Horman 	sock_recv_ts_and_drops(msg, sk, skb);
28011da177e4SLinus Torvalds 
28021da177e4SLinus Torvalds 	if (msg->msg_name)
2803ffbc6111SHerbert Xu 		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
2804ffbc6111SHerbert Xu 		       msg->msg_namelen);
28051da177e4SLinus Torvalds 
28068dc41944SHerbert Xu 	if (pkt_sk(sk)->auxdata) {
2807ffbc6111SHerbert Xu 		struct tpacket_auxdata aux;
2808ffbc6111SHerbert Xu 
2809ffbc6111SHerbert Xu 		aux.tp_status = TP_STATUS_USER;
2810ffbc6111SHerbert Xu 		if (skb->ip_summed == CHECKSUM_PARTIAL)
2811ffbc6111SHerbert Xu 			aux.tp_status |= TP_STATUS_CSUMNOTREADY;
2812ffbc6111SHerbert Xu 		aux.tp_len = PACKET_SKB_CB(skb)->origlen;
2813ffbc6111SHerbert Xu 		aux.tp_snaplen = skb->len;
2814ffbc6111SHerbert Xu 		aux.tp_mac = 0;
2815bbe735e4SArnaldo Carvalho de Melo 		aux.tp_net = skb_network_offset(skb);
2816a3bcc23eSBen Greear 		if (vlan_tx_tag_present(skb)) {
281705423b24SEric Dumazet 			aux.tp_vlan_tci = vlan_tx_tag_get(skb);
2818a3bcc23eSBen Greear 			aux.tp_status |= TP_STATUS_VLAN_VALID;
2819a3bcc23eSBen Greear 		} else {
2820a3bcc23eSBen Greear 			aux.tp_vlan_tci = 0;
2821a3bcc23eSBen Greear 		}
282213fcb7bdSEric Dumazet 		aux.tp_padding = 0;
2823ffbc6111SHerbert Xu 		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
28248dc41944SHerbert Xu 	}
28258dc41944SHerbert Xu 
28261da177e4SLinus Torvalds 	/*
28271da177e4SLinus Torvalds 	 *	Free or return the buffer as appropriate. Again this
28281da177e4SLinus Torvalds 	 *	hides all the races and re-entrancy issues from us.
28291da177e4SLinus Torvalds 	 */
2830bfd5f4a3SSridhar Samudrala 	err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
28311da177e4SLinus Torvalds 
28321da177e4SLinus Torvalds out_free:
28331da177e4SLinus Torvalds 	skb_free_datagram(sk, skb);
28341da177e4SLinus Torvalds out:
28351da177e4SLinus Torvalds 	return err;
28361da177e4SLinus Torvalds }
28371da177e4SLinus Torvalds 
28381da177e4SLinus Torvalds static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
28391da177e4SLinus Torvalds 			       int *uaddr_len, int peer)
28401da177e4SLinus Torvalds {
28411da177e4SLinus Torvalds 	struct net_device *dev;
28421da177e4SLinus Torvalds 	struct sock *sk	= sock->sk;
28431da177e4SLinus Torvalds 
28441da177e4SLinus Torvalds 	if (peer)
28451da177e4SLinus Torvalds 		return -EOPNOTSUPP;
28461da177e4SLinus Torvalds 
28471da177e4SLinus Torvalds 	uaddr->sa_family = AF_PACKET;
2848654d1f8aSEric Dumazet 	rcu_read_lock();
2849654d1f8aSEric Dumazet 	dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
2850654d1f8aSEric Dumazet 	if (dev)
285167286640SVasiliy Kulikov 		strncpy(uaddr->sa_data, dev->name, 14);
2852654d1f8aSEric Dumazet 	else
28531da177e4SLinus Torvalds 		memset(uaddr->sa_data, 0, 14);
2854654d1f8aSEric Dumazet 	rcu_read_unlock();
28551da177e4SLinus Torvalds 	*uaddr_len = sizeof(*uaddr);
28561da177e4SLinus Torvalds 
28571da177e4SLinus Torvalds 	return 0;
28581da177e4SLinus Torvalds }
28591da177e4SLinus Torvalds 
28601da177e4SLinus Torvalds static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
28611da177e4SLinus Torvalds 			  int *uaddr_len, int peer)
28621da177e4SLinus Torvalds {
28631da177e4SLinus Torvalds 	struct net_device *dev;
28641da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
28651da177e4SLinus Torvalds 	struct packet_sock *po = pkt_sk(sk);
286613cfa97bSCyrill Gorcunov 	DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
28671da177e4SLinus Torvalds 
28681da177e4SLinus Torvalds 	if (peer)
28691da177e4SLinus Torvalds 		return -EOPNOTSUPP;
28701da177e4SLinus Torvalds 
28711da177e4SLinus Torvalds 	sll->sll_family = AF_PACKET;
28721da177e4SLinus Torvalds 	sll->sll_ifindex = po->ifindex;
28731da177e4SLinus Torvalds 	sll->sll_protocol = po->num;
287467286640SVasiliy Kulikov 	sll->sll_pkttype = 0;
2875654d1f8aSEric Dumazet 	rcu_read_lock();
2876654d1f8aSEric Dumazet 	dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
28771da177e4SLinus Torvalds 	if (dev) {
28781da177e4SLinus Torvalds 		sll->sll_hatype = dev->type;
28791da177e4SLinus Torvalds 		sll->sll_halen = dev->addr_len;
28801da177e4SLinus Torvalds 		memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
28811da177e4SLinus Torvalds 	} else {
28821da177e4SLinus Torvalds 		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
28831da177e4SLinus Torvalds 		sll->sll_halen = 0;
28841da177e4SLinus Torvalds 	}
2885654d1f8aSEric Dumazet 	rcu_read_unlock();
28860fb375fbSEric W. Biederman 	*uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
28871da177e4SLinus Torvalds 
28881da177e4SLinus Torvalds 	return 0;
28891da177e4SLinus Torvalds }
28901da177e4SLinus Torvalds 
28912aeb0b88SWang Chen static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
28922aeb0b88SWang Chen 			 int what)
28931da177e4SLinus Torvalds {
28941da177e4SLinus Torvalds 	switch (i->type) {
28951da177e4SLinus Torvalds 	case PACKET_MR_MULTICAST:
28961162563fSJiri Pirko 		if (i->alen != dev->addr_len)
28971162563fSJiri Pirko 			return -EINVAL;
28981da177e4SLinus Torvalds 		if (what > 0)
289922bedad3SJiri Pirko 			return dev_mc_add(dev, i->addr);
29001da177e4SLinus Torvalds 		else
290122bedad3SJiri Pirko 			return dev_mc_del(dev, i->addr);
29021da177e4SLinus Torvalds 		break;
29031da177e4SLinus Torvalds 	case PACKET_MR_PROMISC:
29042aeb0b88SWang Chen 		return dev_set_promiscuity(dev, what);
29051da177e4SLinus Torvalds 		break;
29061da177e4SLinus Torvalds 	case PACKET_MR_ALLMULTI:
29072aeb0b88SWang Chen 		return dev_set_allmulti(dev, what);
29081da177e4SLinus Torvalds 		break;
2909d95ed927SEric W. Biederman 	case PACKET_MR_UNICAST:
29101162563fSJiri Pirko 		if (i->alen != dev->addr_len)
29111162563fSJiri Pirko 			return -EINVAL;
2912d95ed927SEric W. Biederman 		if (what > 0)
2913a748ee24SJiri Pirko 			return dev_uc_add(dev, i->addr);
2914d95ed927SEric W. Biederman 		else
2915a748ee24SJiri Pirko 			return dev_uc_del(dev, i->addr);
2916d95ed927SEric W. Biederman 		break;
291740d4e3dfSEric Dumazet 	default:
291840d4e3dfSEric Dumazet 		break;
29191da177e4SLinus Torvalds 	}
29202aeb0b88SWang Chen 	return 0;
29211da177e4SLinus Torvalds }
29221da177e4SLinus Torvalds 
29231da177e4SLinus Torvalds static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
29241da177e4SLinus Torvalds {
29251da177e4SLinus Torvalds 	for ( ; i; i = i->next) {
29261da177e4SLinus Torvalds 		if (i->ifindex == dev->ifindex)
29271da177e4SLinus Torvalds 			packet_dev_mc(dev, i, what);
29281da177e4SLinus Torvalds 	}
29291da177e4SLinus Torvalds }
29301da177e4SLinus Torvalds 
29310fb375fbSEric W. Biederman static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
29321da177e4SLinus Torvalds {
29331da177e4SLinus Torvalds 	struct packet_sock *po = pkt_sk(sk);
29341da177e4SLinus Torvalds 	struct packet_mclist *ml, *i;
29351da177e4SLinus Torvalds 	struct net_device *dev;
29361da177e4SLinus Torvalds 	int err;
29371da177e4SLinus Torvalds 
29381da177e4SLinus Torvalds 	rtnl_lock();
29391da177e4SLinus Torvalds 
29401da177e4SLinus Torvalds 	err = -ENODEV;
29413b1e0a65SYOSHIFUJI Hideaki 	dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
29421da177e4SLinus Torvalds 	if (!dev)
29431da177e4SLinus Torvalds 		goto done;
29441da177e4SLinus Torvalds 
29451da177e4SLinus Torvalds 	err = -EINVAL;
29461162563fSJiri Pirko 	if (mreq->mr_alen > dev->addr_len)
29471da177e4SLinus Torvalds 		goto done;
29481da177e4SLinus Torvalds 
29491da177e4SLinus Torvalds 	err = -ENOBUFS;
29508b3a7005SKris Katterjohn 	i = kmalloc(sizeof(*i), GFP_KERNEL);
29511da177e4SLinus Torvalds 	if (i == NULL)
29521da177e4SLinus Torvalds 		goto done;
29531da177e4SLinus Torvalds 
29541da177e4SLinus Torvalds 	err = 0;
29551da177e4SLinus Torvalds 	for (ml = po->mclist; ml; ml = ml->next) {
29561da177e4SLinus Torvalds 		if (ml->ifindex == mreq->mr_ifindex &&
29571da177e4SLinus Torvalds 		    ml->type == mreq->mr_type &&
29581da177e4SLinus Torvalds 		    ml->alen == mreq->mr_alen &&
29591da177e4SLinus Torvalds 		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
29601da177e4SLinus Torvalds 			ml->count++;
29611da177e4SLinus Torvalds 			/* Free the new element ... */
29621da177e4SLinus Torvalds 			kfree(i);
29631da177e4SLinus Torvalds 			goto done;
29641da177e4SLinus Torvalds 		}
29651da177e4SLinus Torvalds 	}
29661da177e4SLinus Torvalds 
29671da177e4SLinus Torvalds 	i->type = mreq->mr_type;
29681da177e4SLinus Torvalds 	i->ifindex = mreq->mr_ifindex;
29691da177e4SLinus Torvalds 	i->alen = mreq->mr_alen;
29701da177e4SLinus Torvalds 	memcpy(i->addr, mreq->mr_address, i->alen);
29711da177e4SLinus Torvalds 	i->count = 1;
29721da177e4SLinus Torvalds 	i->next = po->mclist;
29731da177e4SLinus Torvalds 	po->mclist = i;
29742aeb0b88SWang Chen 	err = packet_dev_mc(dev, i, 1);
29752aeb0b88SWang Chen 	if (err) {
29762aeb0b88SWang Chen 		po->mclist = i->next;
29772aeb0b88SWang Chen 		kfree(i);
29782aeb0b88SWang Chen 	}
29791da177e4SLinus Torvalds 
29801da177e4SLinus Torvalds done:
29811da177e4SLinus Torvalds 	rtnl_unlock();
29821da177e4SLinus Torvalds 	return err;
29831da177e4SLinus Torvalds }
29841da177e4SLinus Torvalds 
29850fb375fbSEric W. Biederman static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
29861da177e4SLinus Torvalds {
29871da177e4SLinus Torvalds 	struct packet_mclist *ml, **mlp;
29881da177e4SLinus Torvalds 
29891da177e4SLinus Torvalds 	rtnl_lock();
29901da177e4SLinus Torvalds 
29911da177e4SLinus Torvalds 	for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
29921da177e4SLinus Torvalds 		if (ml->ifindex == mreq->mr_ifindex &&
29931da177e4SLinus Torvalds 		    ml->type == mreq->mr_type &&
29941da177e4SLinus Torvalds 		    ml->alen == mreq->mr_alen &&
29951da177e4SLinus Torvalds 		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
29961da177e4SLinus Torvalds 			if (--ml->count == 0) {
29971da177e4SLinus Torvalds 				struct net_device *dev;
29981da177e4SLinus Torvalds 				*mlp = ml->next;
2999ad959e76SEric Dumazet 				dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3000ad959e76SEric Dumazet 				if (dev)
30011da177e4SLinus Torvalds 					packet_dev_mc(dev, ml, -1);
30021da177e4SLinus Torvalds 				kfree(ml);
30031da177e4SLinus Torvalds 			}
30041da177e4SLinus Torvalds 			rtnl_unlock();
30051da177e4SLinus Torvalds 			return 0;
30061da177e4SLinus Torvalds 		}
30071da177e4SLinus Torvalds 	}
30081da177e4SLinus Torvalds 	rtnl_unlock();
30091da177e4SLinus Torvalds 	return -EADDRNOTAVAIL;
30101da177e4SLinus Torvalds }
30111da177e4SLinus Torvalds 
30121da177e4SLinus Torvalds static void packet_flush_mclist(struct sock *sk)
30131da177e4SLinus Torvalds {
30141da177e4SLinus Torvalds 	struct packet_sock *po = pkt_sk(sk);
30151da177e4SLinus Torvalds 	struct packet_mclist *ml;
30161da177e4SLinus Torvalds 
30171da177e4SLinus Torvalds 	if (!po->mclist)
30181da177e4SLinus Torvalds 		return;
30191da177e4SLinus Torvalds 
30201da177e4SLinus Torvalds 	rtnl_lock();
30211da177e4SLinus Torvalds 	while ((ml = po->mclist) != NULL) {
30221da177e4SLinus Torvalds 		struct net_device *dev;
30231da177e4SLinus Torvalds 
30241da177e4SLinus Torvalds 		po->mclist = ml->next;
3025ad959e76SEric Dumazet 		dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3026ad959e76SEric Dumazet 		if (dev != NULL)
30271da177e4SLinus Torvalds 			packet_dev_mc(dev, ml, -1);
30281da177e4SLinus Torvalds 		kfree(ml);
30291da177e4SLinus Torvalds 	}
30301da177e4SLinus Torvalds 	rtnl_unlock();
30311da177e4SLinus Torvalds }
30321da177e4SLinus Torvalds 
30331da177e4SLinus Torvalds static int
3034b7058842SDavid S. Miller packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
30351da177e4SLinus Torvalds {
30361da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
30378dc41944SHerbert Xu 	struct packet_sock *po = pkt_sk(sk);
30381da177e4SLinus Torvalds 	int ret;
30391da177e4SLinus Torvalds 
30401da177e4SLinus Torvalds 	if (level != SOL_PACKET)
30411da177e4SLinus Torvalds 		return -ENOPROTOOPT;
30421da177e4SLinus Torvalds 
30431da177e4SLinus Torvalds 	switch (optname) {
30441da177e4SLinus Torvalds 	case PACKET_ADD_MEMBERSHIP:
30451da177e4SLinus Torvalds 	case PACKET_DROP_MEMBERSHIP:
30461da177e4SLinus Torvalds 	{
30470fb375fbSEric W. Biederman 		struct packet_mreq_max mreq;
30480fb375fbSEric W. Biederman 		int len = optlen;
30490fb375fbSEric W. Biederman 		memset(&mreq, 0, sizeof(mreq));
30500fb375fbSEric W. Biederman 		if (len < sizeof(struct packet_mreq))
30511da177e4SLinus Torvalds 			return -EINVAL;
30520fb375fbSEric W. Biederman 		if (len > sizeof(mreq))
30530fb375fbSEric W. Biederman 			len = sizeof(mreq);
30540fb375fbSEric W. Biederman 		if (copy_from_user(&mreq, optval, len))
30551da177e4SLinus Torvalds 			return -EFAULT;
30560fb375fbSEric W. Biederman 		if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
30570fb375fbSEric W. Biederman 			return -EINVAL;
30581da177e4SLinus Torvalds 		if (optname == PACKET_ADD_MEMBERSHIP)
30591da177e4SLinus Torvalds 			ret = packet_mc_add(sk, &mreq);
30601da177e4SLinus Torvalds 		else
30611da177e4SLinus Torvalds 			ret = packet_mc_drop(sk, &mreq);
30621da177e4SLinus Torvalds 		return ret;
30631da177e4SLinus Torvalds 	}
3064a2efcfa0SDavid S. Miller 
30651da177e4SLinus Torvalds 	case PACKET_RX_RING:
306669e3c75fSJohann Baudy 	case PACKET_TX_RING:
30671da177e4SLinus Torvalds 	{
3068f6fb8f10Schetan loke 		union tpacket_req_u req_u;
3069f6fb8f10Schetan loke 		int len;
30701da177e4SLinus Torvalds 
3071f6fb8f10Schetan loke 		switch (po->tp_version) {
3072f6fb8f10Schetan loke 		case TPACKET_V1:
3073f6fb8f10Schetan loke 		case TPACKET_V2:
3074f6fb8f10Schetan loke 			len = sizeof(req_u.req);
3075f6fb8f10Schetan loke 			break;
3076f6fb8f10Schetan loke 		case TPACKET_V3:
3077f6fb8f10Schetan loke 		default:
3078f6fb8f10Schetan loke 			len = sizeof(req_u.req3);
3079f6fb8f10Schetan loke 			break;
3080f6fb8f10Schetan loke 		}
3081f6fb8f10Schetan loke 		if (optlen < len)
30821da177e4SLinus Torvalds 			return -EINVAL;
3083bfd5f4a3SSridhar Samudrala 		if (pkt_sk(sk)->has_vnet_hdr)
3084bfd5f4a3SSridhar Samudrala 			return -EINVAL;
3085f6fb8f10Schetan loke 		if (copy_from_user(&req_u.req, optval, len))
30861da177e4SLinus Torvalds 			return -EFAULT;
3087f6fb8f10Schetan loke 		return packet_set_ring(sk, &req_u, 0,
3088f6fb8f10Schetan loke 			optname == PACKET_TX_RING);
30891da177e4SLinus Torvalds 	}
30901da177e4SLinus Torvalds 	case PACKET_COPY_THRESH:
30911da177e4SLinus Torvalds 	{
30921da177e4SLinus Torvalds 		int val;
30931da177e4SLinus Torvalds 
30941da177e4SLinus Torvalds 		if (optlen != sizeof(val))
30951da177e4SLinus Torvalds 			return -EINVAL;
30961da177e4SLinus Torvalds 		if (copy_from_user(&val, optval, sizeof(val)))
30971da177e4SLinus Torvalds 			return -EFAULT;
30981da177e4SLinus Torvalds 
30991da177e4SLinus Torvalds 		pkt_sk(sk)->copy_thresh = val;
31001da177e4SLinus Torvalds 		return 0;
31011da177e4SLinus Torvalds 	}
3102bbd6ef87SPatrick McHardy 	case PACKET_VERSION:
3103bbd6ef87SPatrick McHardy 	{
3104bbd6ef87SPatrick McHardy 		int val;
3105bbd6ef87SPatrick McHardy 
3106bbd6ef87SPatrick McHardy 		if (optlen != sizeof(val))
3107bbd6ef87SPatrick McHardy 			return -EINVAL;
310869e3c75fSJohann Baudy 		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3109bbd6ef87SPatrick McHardy 			return -EBUSY;
3110bbd6ef87SPatrick McHardy 		if (copy_from_user(&val, optval, sizeof(val)))
3111bbd6ef87SPatrick McHardy 			return -EFAULT;
3112bbd6ef87SPatrick McHardy 		switch (val) {
3113bbd6ef87SPatrick McHardy 		case TPACKET_V1:
3114bbd6ef87SPatrick McHardy 		case TPACKET_V2:
3115f6fb8f10Schetan loke 		case TPACKET_V3:
3116bbd6ef87SPatrick McHardy 			po->tp_version = val;
3117bbd6ef87SPatrick McHardy 			return 0;
3118bbd6ef87SPatrick McHardy 		default:
3119bbd6ef87SPatrick McHardy 			return -EINVAL;
3120bbd6ef87SPatrick McHardy 		}
3121bbd6ef87SPatrick McHardy 	}
31228913336aSPatrick McHardy 	case PACKET_RESERVE:
31238913336aSPatrick McHardy 	{
31248913336aSPatrick McHardy 		unsigned int val;
31258913336aSPatrick McHardy 
31268913336aSPatrick McHardy 		if (optlen != sizeof(val))
31278913336aSPatrick McHardy 			return -EINVAL;
312869e3c75fSJohann Baudy 		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
31298913336aSPatrick McHardy 			return -EBUSY;
31308913336aSPatrick McHardy 		if (copy_from_user(&val, optval, sizeof(val)))
31318913336aSPatrick McHardy 			return -EFAULT;
31328913336aSPatrick McHardy 		po->tp_reserve = val;
31338913336aSPatrick McHardy 		return 0;
31348913336aSPatrick McHardy 	}
313569e3c75fSJohann Baudy 	case PACKET_LOSS:
313669e3c75fSJohann Baudy 	{
313769e3c75fSJohann Baudy 		unsigned int val;
313869e3c75fSJohann Baudy 
313969e3c75fSJohann Baudy 		if (optlen != sizeof(val))
314069e3c75fSJohann Baudy 			return -EINVAL;
314169e3c75fSJohann Baudy 		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
314269e3c75fSJohann Baudy 			return -EBUSY;
314369e3c75fSJohann Baudy 		if (copy_from_user(&val, optval, sizeof(val)))
314469e3c75fSJohann Baudy 			return -EFAULT;
314569e3c75fSJohann Baudy 		po->tp_loss = !!val;
314669e3c75fSJohann Baudy 		return 0;
314769e3c75fSJohann Baudy 	}
31488dc41944SHerbert Xu 	case PACKET_AUXDATA:
31498dc41944SHerbert Xu 	{
31508dc41944SHerbert Xu 		int val;
31518dc41944SHerbert Xu 
31528dc41944SHerbert Xu 		if (optlen < sizeof(val))
31538dc41944SHerbert Xu 			return -EINVAL;
31548dc41944SHerbert Xu 		if (copy_from_user(&val, optval, sizeof(val)))
31558dc41944SHerbert Xu 			return -EFAULT;
31568dc41944SHerbert Xu 
31578dc41944SHerbert Xu 		po->auxdata = !!val;
31588dc41944SHerbert Xu 		return 0;
31598dc41944SHerbert Xu 	}
316080feaacbSPeter P. Waskiewicz Jr 	case PACKET_ORIGDEV:
316180feaacbSPeter P. Waskiewicz Jr 	{
316280feaacbSPeter P. Waskiewicz Jr 		int val;
316380feaacbSPeter P. Waskiewicz Jr 
316480feaacbSPeter P. Waskiewicz Jr 		if (optlen < sizeof(val))
316580feaacbSPeter P. Waskiewicz Jr 			return -EINVAL;
316680feaacbSPeter P. Waskiewicz Jr 		if (copy_from_user(&val, optval, sizeof(val)))
316780feaacbSPeter P. Waskiewicz Jr 			return -EFAULT;
316880feaacbSPeter P. Waskiewicz Jr 
316980feaacbSPeter P. Waskiewicz Jr 		po->origdev = !!val;
317080feaacbSPeter P. Waskiewicz Jr 		return 0;
317180feaacbSPeter P. Waskiewicz Jr 	}
3172bfd5f4a3SSridhar Samudrala 	case PACKET_VNET_HDR:
3173bfd5f4a3SSridhar Samudrala 	{
3174bfd5f4a3SSridhar Samudrala 		int val;
3175bfd5f4a3SSridhar Samudrala 
3176bfd5f4a3SSridhar Samudrala 		if (sock->type != SOCK_RAW)
3177bfd5f4a3SSridhar Samudrala 			return -EINVAL;
3178bfd5f4a3SSridhar Samudrala 		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3179bfd5f4a3SSridhar Samudrala 			return -EBUSY;
3180bfd5f4a3SSridhar Samudrala 		if (optlen < sizeof(val))
3181bfd5f4a3SSridhar Samudrala 			return -EINVAL;
3182bfd5f4a3SSridhar Samudrala 		if (copy_from_user(&val, optval, sizeof(val)))
3183bfd5f4a3SSridhar Samudrala 			return -EFAULT;
3184bfd5f4a3SSridhar Samudrala 
3185bfd5f4a3SSridhar Samudrala 		po->has_vnet_hdr = !!val;
3186bfd5f4a3SSridhar Samudrala 		return 0;
3187bfd5f4a3SSridhar Samudrala 	}
3188614f60faSScott McMillan 	case PACKET_TIMESTAMP:
3189614f60faSScott McMillan 	{
3190614f60faSScott McMillan 		int val;
3191614f60faSScott McMillan 
3192614f60faSScott McMillan 		if (optlen != sizeof(val))
3193614f60faSScott McMillan 			return -EINVAL;
3194614f60faSScott McMillan 		if (copy_from_user(&val, optval, sizeof(val)))
3195614f60faSScott McMillan 			return -EFAULT;
3196614f60faSScott McMillan 
3197614f60faSScott McMillan 		po->tp_tstamp = val;
3198614f60faSScott McMillan 		return 0;
3199614f60faSScott McMillan 	}
3200dc99f600SDavid S. Miller 	case PACKET_FANOUT:
3201dc99f600SDavid S. Miller 	{
3202dc99f600SDavid S. Miller 		int val;
3203dc99f600SDavid S. Miller 
3204dc99f600SDavid S. Miller 		if (optlen != sizeof(val))
3205dc99f600SDavid S. Miller 			return -EINVAL;
3206dc99f600SDavid S. Miller 		if (copy_from_user(&val, optval, sizeof(val)))
3207dc99f600SDavid S. Miller 			return -EFAULT;
3208dc99f600SDavid S. Miller 
3209dc99f600SDavid S. Miller 		return fanout_add(sk, val & 0xffff, val >> 16);
3210dc99f600SDavid S. Miller 	}
32111da177e4SLinus Torvalds 	default:
32121da177e4SLinus Torvalds 		return -ENOPROTOOPT;
32131da177e4SLinus Torvalds 	}
32141da177e4SLinus Torvalds }
32151da177e4SLinus Torvalds 
32161da177e4SLinus Torvalds static int packet_getsockopt(struct socket *sock, int level, int optname,
32171da177e4SLinus Torvalds 			     char __user *optval, int __user *optlen)
32181da177e4SLinus Torvalds {
32191da177e4SLinus Torvalds 	int len;
3220c06fff6eSEric Dumazet 	int val, lv = sizeof(val);
32211da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
32221da177e4SLinus Torvalds 	struct packet_sock *po = pkt_sk(sk);
3223c06fff6eSEric Dumazet 	void *data = &val;
32248dc41944SHerbert Xu 	struct tpacket_stats st;
3225f6fb8f10Schetan loke 	union tpacket_stats_u st_u;
32261da177e4SLinus Torvalds 
32271da177e4SLinus Torvalds 	if (level != SOL_PACKET)
32281da177e4SLinus Torvalds 		return -ENOPROTOOPT;
32291da177e4SLinus Torvalds 
32301da177e4SLinus Torvalds 	if (get_user(len, optlen))
32311da177e4SLinus Torvalds 		return -EFAULT;
32321da177e4SLinus Torvalds 
32331da177e4SLinus Torvalds 	if (len < 0)
32341da177e4SLinus Torvalds 		return -EINVAL;
32351da177e4SLinus Torvalds 
32361da177e4SLinus Torvalds 	switch (optname) {
32371da177e4SLinus Torvalds 	case PACKET_STATISTICS:
32381da177e4SLinus Torvalds 		spin_lock_bh(&sk->sk_receive_queue.lock);
3239f6fb8f10Schetan loke 		if (po->tp_version == TPACKET_V3) {
3240c06fff6eSEric Dumazet 			lv = sizeof(struct tpacket_stats_v3);
3241f6fb8f10Schetan loke 			memcpy(&st_u.stats3, &po->stats,
3242f6fb8f10Schetan loke 			       sizeof(struct tpacket_stats));
3243f6fb8f10Schetan loke 			st_u.stats3.tp_freeze_q_cnt =
3244f6fb8f10Schetan loke 					po->stats_u.stats3.tp_freeze_q_cnt;
3245f6fb8f10Schetan loke 			st_u.stats3.tp_packets += po->stats.tp_drops;
3246f6fb8f10Schetan loke 			data = &st_u.stats3;
3247f6fb8f10Schetan loke 		} else {
3248c06fff6eSEric Dumazet 			lv = sizeof(struct tpacket_stats);
32491da177e4SLinus Torvalds 			st = po->stats;
3250f6fb8f10Schetan loke 			st.tp_packets += st.tp_drops;
3251f6fb8f10Schetan loke 			data = &st;
3252f6fb8f10Schetan loke 		}
32531da177e4SLinus Torvalds 		memset(&po->stats, 0, sizeof(st));
32541da177e4SLinus Torvalds 		spin_unlock_bh(&sk->sk_receive_queue.lock);
32551da177e4SLinus Torvalds 		break;
32568dc41944SHerbert Xu 	case PACKET_AUXDATA:
32578dc41944SHerbert Xu 		val = po->auxdata;
32588dc41944SHerbert Xu 		break;
325980feaacbSPeter P. Waskiewicz Jr 	case PACKET_ORIGDEV:
326080feaacbSPeter P. Waskiewicz Jr 		val = po->origdev;
326180feaacbSPeter P. Waskiewicz Jr 		break;
3262bfd5f4a3SSridhar Samudrala 	case PACKET_VNET_HDR:
3263bfd5f4a3SSridhar Samudrala 		val = po->has_vnet_hdr;
3264bfd5f4a3SSridhar Samudrala 		break;
3265bbd6ef87SPatrick McHardy 	case PACKET_VERSION:
3266bbd6ef87SPatrick McHardy 		val = po->tp_version;
3267bbd6ef87SPatrick McHardy 		break;
3268bbd6ef87SPatrick McHardy 	case PACKET_HDRLEN:
3269bbd6ef87SPatrick McHardy 		if (len > sizeof(int))
3270bbd6ef87SPatrick McHardy 			len = sizeof(int);
3271bbd6ef87SPatrick McHardy 		if (copy_from_user(&val, optval, len))
3272bbd6ef87SPatrick McHardy 			return -EFAULT;
3273bbd6ef87SPatrick McHardy 		switch (val) {
3274bbd6ef87SPatrick McHardy 		case TPACKET_V1:
3275bbd6ef87SPatrick McHardy 			val = sizeof(struct tpacket_hdr);
3276bbd6ef87SPatrick McHardy 			break;
3277bbd6ef87SPatrick McHardy 		case TPACKET_V2:
3278bbd6ef87SPatrick McHardy 			val = sizeof(struct tpacket2_hdr);
3279bbd6ef87SPatrick McHardy 			break;
3280f6fb8f10Schetan loke 		case TPACKET_V3:
3281f6fb8f10Schetan loke 			val = sizeof(struct tpacket3_hdr);
3282f6fb8f10Schetan loke 			break;
3283bbd6ef87SPatrick McHardy 		default:
3284bbd6ef87SPatrick McHardy 			return -EINVAL;
3285bbd6ef87SPatrick McHardy 		}
3286bbd6ef87SPatrick McHardy 		break;
32878913336aSPatrick McHardy 	case PACKET_RESERVE:
32888913336aSPatrick McHardy 		val = po->tp_reserve;
32898913336aSPatrick McHardy 		break;
329069e3c75fSJohann Baudy 	case PACKET_LOSS:
329169e3c75fSJohann Baudy 		val = po->tp_loss;
329269e3c75fSJohann Baudy 		break;
3293614f60faSScott McMillan 	case PACKET_TIMESTAMP:
3294614f60faSScott McMillan 		val = po->tp_tstamp;
3295614f60faSScott McMillan 		break;
3296dc99f600SDavid S. Miller 	case PACKET_FANOUT:
3297dc99f600SDavid S. Miller 		val = (po->fanout ?
3298dc99f600SDavid S. Miller 		       ((u32)po->fanout->id |
3299dc99f600SDavid S. Miller 			((u32)po->fanout->type << 16)) :
3300dc99f600SDavid S. Miller 		       0);
3301dc99f600SDavid S. Miller 		break;
33021da177e4SLinus Torvalds 	default:
33031da177e4SLinus Torvalds 		return -ENOPROTOOPT;
33041da177e4SLinus Torvalds 	}
33051da177e4SLinus Torvalds 
3306c06fff6eSEric Dumazet 	if (len > lv)
3307c06fff6eSEric Dumazet 		len = lv;
33081da177e4SLinus Torvalds 	if (put_user(len, optlen))
33091da177e4SLinus Torvalds 		return -EFAULT;
33108dc41944SHerbert Xu 	if (copy_to_user(optval, data, len))
33118dc41944SHerbert Xu 		return -EFAULT;
33121da177e4SLinus Torvalds 	return 0;
33131da177e4SLinus Torvalds }
33141da177e4SLinus Torvalds 
33151da177e4SLinus Torvalds 
33161da177e4SLinus Torvalds static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
33171da177e4SLinus Torvalds {
33181da177e4SLinus Torvalds 	struct sock *sk;
33191da177e4SLinus Torvalds 	struct hlist_node *node;
3320ad930650SJason Lunz 	struct net_device *dev = data;
3321c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
33221da177e4SLinus Torvalds 
3323808f5114Sstephen hemminger 	rcu_read_lock();
3324808f5114Sstephen hemminger 	sk_for_each_rcu(sk, node, &net->packet.sklist) {
33251da177e4SLinus Torvalds 		struct packet_sock *po = pkt_sk(sk);
33261da177e4SLinus Torvalds 
33271da177e4SLinus Torvalds 		switch (msg) {
33281da177e4SLinus Torvalds 		case NETDEV_UNREGISTER:
33291da177e4SLinus Torvalds 			if (po->mclist)
33301da177e4SLinus Torvalds 				packet_dev_mclist(dev, po->mclist, -1);
3331a2efcfa0SDavid S. Miller 			/* fallthrough */
3332a2efcfa0SDavid S. Miller 
33331da177e4SLinus Torvalds 		case NETDEV_DOWN:
33341da177e4SLinus Torvalds 			if (dev->ifindex == po->ifindex) {
33351da177e4SLinus Torvalds 				spin_lock(&po->bind_lock);
33361da177e4SLinus Torvalds 				if (po->running) {
3337ce06b03eSDavid S. Miller 					__unregister_prot_hook(sk, false);
33381da177e4SLinus Torvalds 					sk->sk_err = ENETDOWN;
33391da177e4SLinus Torvalds 					if (!sock_flag(sk, SOCK_DEAD))
33401da177e4SLinus Torvalds 						sk->sk_error_report(sk);
33411da177e4SLinus Torvalds 				}
33421da177e4SLinus Torvalds 				if (msg == NETDEV_UNREGISTER) {
33431da177e4SLinus Torvalds 					po->ifindex = -1;
3344160ff18aSBen Greear 					if (po->prot_hook.dev)
3345160ff18aSBen Greear 						dev_put(po->prot_hook.dev);
33461da177e4SLinus Torvalds 					po->prot_hook.dev = NULL;
33471da177e4SLinus Torvalds 				}
33481da177e4SLinus Torvalds 				spin_unlock(&po->bind_lock);
33491da177e4SLinus Torvalds 			}
33501da177e4SLinus Torvalds 			break;
33511da177e4SLinus Torvalds 		case NETDEV_UP:
3352808f5114Sstephen hemminger 			if (dev->ifindex == po->ifindex) {
33531da177e4SLinus Torvalds 				spin_lock(&po->bind_lock);
3354ce06b03eSDavid S. Miller 				if (po->num)
3355ce06b03eSDavid S. Miller 					register_prot_hook(sk);
33561da177e4SLinus Torvalds 				spin_unlock(&po->bind_lock);
3357808f5114Sstephen hemminger 			}
33581da177e4SLinus Torvalds 			break;
33591da177e4SLinus Torvalds 		}
33601da177e4SLinus Torvalds 	}
3361808f5114Sstephen hemminger 	rcu_read_unlock();
33621da177e4SLinus Torvalds 	return NOTIFY_DONE;
33631da177e4SLinus Torvalds }
33641da177e4SLinus Torvalds 
33651da177e4SLinus Torvalds 
33661da177e4SLinus Torvalds static int packet_ioctl(struct socket *sock, unsigned int cmd,
33671da177e4SLinus Torvalds 			unsigned long arg)
33681da177e4SLinus Torvalds {
33691da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
33701da177e4SLinus Torvalds 
33711da177e4SLinus Torvalds 	switch (cmd) {
33721da177e4SLinus Torvalds 	case SIOCOUTQ:
33731da177e4SLinus Torvalds 	{
337431e6d363SEric Dumazet 		int amount = sk_wmem_alloc_get(sk);
337531e6d363SEric Dumazet 
33761da177e4SLinus Torvalds 		return put_user(amount, (int __user *)arg);
33771da177e4SLinus Torvalds 	}
33781da177e4SLinus Torvalds 	case SIOCINQ:
33791da177e4SLinus Torvalds 	{
33801da177e4SLinus Torvalds 		struct sk_buff *skb;
33811da177e4SLinus Torvalds 		int amount = 0;
33821da177e4SLinus Torvalds 
33831da177e4SLinus Torvalds 		spin_lock_bh(&sk->sk_receive_queue.lock);
33841da177e4SLinus Torvalds 		skb = skb_peek(&sk->sk_receive_queue);
33851da177e4SLinus Torvalds 		if (skb)
33861da177e4SLinus Torvalds 			amount = skb->len;
33871da177e4SLinus Torvalds 		spin_unlock_bh(&sk->sk_receive_queue.lock);
33881da177e4SLinus Torvalds 		return put_user(amount, (int __user *)arg);
33891da177e4SLinus Torvalds 	}
33901da177e4SLinus Torvalds 	case SIOCGSTAMP:
33911da177e4SLinus Torvalds 		return sock_get_timestamp(sk, (struct timeval __user *)arg);
3392ae40eb1eSEric Dumazet 	case SIOCGSTAMPNS:
3393ae40eb1eSEric Dumazet 		return sock_get_timestampns(sk, (struct timespec __user *)arg);
33941da177e4SLinus Torvalds 
33951da177e4SLinus Torvalds #ifdef CONFIG_INET
33961da177e4SLinus Torvalds 	case SIOCADDRT:
33971da177e4SLinus Torvalds 	case SIOCDELRT:
33981da177e4SLinus Torvalds 	case SIOCDARP:
33991da177e4SLinus Torvalds 	case SIOCGARP:
34001da177e4SLinus Torvalds 	case SIOCSARP:
34011da177e4SLinus Torvalds 	case SIOCGIFADDR:
34021da177e4SLinus Torvalds 	case SIOCSIFADDR:
34031da177e4SLinus Torvalds 	case SIOCGIFBRDADDR:
34041da177e4SLinus Torvalds 	case SIOCSIFBRDADDR:
34051da177e4SLinus Torvalds 	case SIOCGIFNETMASK:
34061da177e4SLinus Torvalds 	case SIOCSIFNETMASK:
34071da177e4SLinus Torvalds 	case SIOCGIFDSTADDR:
34081da177e4SLinus Torvalds 	case SIOCSIFDSTADDR:
34091da177e4SLinus Torvalds 	case SIOCSIFFLAGS:
34101da177e4SLinus Torvalds 		return inet_dgram_ops.ioctl(sock, cmd, arg);
34111da177e4SLinus Torvalds #endif
34121da177e4SLinus Torvalds 
34131da177e4SLinus Torvalds 	default:
3414b5e5fa5eSChristoph Hellwig 		return -ENOIOCTLCMD;
34151da177e4SLinus Torvalds 	}
34161da177e4SLinus Torvalds 	return 0;
34171da177e4SLinus Torvalds }
34181da177e4SLinus Torvalds 
34191da177e4SLinus Torvalds static unsigned int packet_poll(struct file *file, struct socket *sock,
34201da177e4SLinus Torvalds 				poll_table *wait)
34211da177e4SLinus Torvalds {
34221da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
34231da177e4SLinus Torvalds 	struct packet_sock *po = pkt_sk(sk);
34241da177e4SLinus Torvalds 	unsigned int mask = datagram_poll(file, sock, wait);
34251da177e4SLinus Torvalds 
34261da177e4SLinus Torvalds 	spin_lock_bh(&sk->sk_receive_queue.lock);
342769e3c75fSJohann Baudy 	if (po->rx_ring.pg_vec) {
3428f6fb8f10Schetan loke 		if (!packet_previous_rx_frame(po, &po->rx_ring,
3429f6fb8f10Schetan loke 			TP_STATUS_KERNEL))
34301da177e4SLinus Torvalds 			mask |= POLLIN | POLLRDNORM;
34311da177e4SLinus Torvalds 	}
34321da177e4SLinus Torvalds 	spin_unlock_bh(&sk->sk_receive_queue.lock);
343369e3c75fSJohann Baudy 	spin_lock_bh(&sk->sk_write_queue.lock);
343469e3c75fSJohann Baudy 	if (po->tx_ring.pg_vec) {
343569e3c75fSJohann Baudy 		if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
343669e3c75fSJohann Baudy 			mask |= POLLOUT | POLLWRNORM;
343769e3c75fSJohann Baudy 	}
343869e3c75fSJohann Baudy 	spin_unlock_bh(&sk->sk_write_queue.lock);
34391da177e4SLinus Torvalds 	return mask;
34401da177e4SLinus Torvalds }
34411da177e4SLinus Torvalds 
34421da177e4SLinus Torvalds 
34431da177e4SLinus Torvalds /* Dirty? Well, I still did not learn better way to account
34441da177e4SLinus Torvalds  * for user mmaps.
34451da177e4SLinus Torvalds  */
34461da177e4SLinus Torvalds 
34471da177e4SLinus Torvalds static void packet_mm_open(struct vm_area_struct *vma)
34481da177e4SLinus Torvalds {
34491da177e4SLinus Torvalds 	struct file *file = vma->vm_file;
3450b69aee04SEric Dumazet 	struct socket *sock = file->private_data;
34511da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
34521da177e4SLinus Torvalds 
34531da177e4SLinus Torvalds 	if (sk)
34541da177e4SLinus Torvalds 		atomic_inc(&pkt_sk(sk)->mapped);
34551da177e4SLinus Torvalds }
34561da177e4SLinus Torvalds 
34571da177e4SLinus Torvalds static void packet_mm_close(struct vm_area_struct *vma)
34581da177e4SLinus Torvalds {
34591da177e4SLinus Torvalds 	struct file *file = vma->vm_file;
3460b69aee04SEric Dumazet 	struct socket *sock = file->private_data;
34611da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
34621da177e4SLinus Torvalds 
34631da177e4SLinus Torvalds 	if (sk)
34641da177e4SLinus Torvalds 		atomic_dec(&pkt_sk(sk)->mapped);
34651da177e4SLinus Torvalds }
34661da177e4SLinus Torvalds 
3467f0f37e2fSAlexey Dobriyan static const struct vm_operations_struct packet_mmap_ops = {
34681da177e4SLinus Torvalds 	.open	=	packet_mm_open,
34691da177e4SLinus Torvalds 	.close	=	packet_mm_close,
34701da177e4SLinus Torvalds };
34711da177e4SLinus Torvalds 
34720e3125c7SNeil Horman static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
34730e3125c7SNeil Horman 			unsigned int len)
34741da177e4SLinus Torvalds {
34751da177e4SLinus Torvalds 	int i;
34761da177e4SLinus Torvalds 
34771da177e4SLinus Torvalds 	for (i = 0; i < len; i++) {
34780e3125c7SNeil Horman 		if (likely(pg_vec[i].buffer)) {
3479c56b4d90SChangli Gao 			if (is_vmalloc_addr(pg_vec[i].buffer))
34800e3125c7SNeil Horman 				vfree(pg_vec[i].buffer);
34810e3125c7SNeil Horman 			else
34820e3125c7SNeil Horman 				free_pages((unsigned long)pg_vec[i].buffer,
34830e3125c7SNeil Horman 					   order);
34840e3125c7SNeil Horman 			pg_vec[i].buffer = NULL;
34850e3125c7SNeil Horman 		}
34861da177e4SLinus Torvalds 	}
34871da177e4SLinus Torvalds 	kfree(pg_vec);
34881da177e4SLinus Torvalds }
34891da177e4SLinus Torvalds 
3490eea49cc9SOlof Johansson static char *alloc_one_pg_vec_page(unsigned long order)
34914ebf0ae2SDavid S. Miller {
34920e3125c7SNeil Horman 	char *buffer = NULL;
34930e3125c7SNeil Horman 	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
34940e3125c7SNeil Horman 			  __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
3495719bfeaaSEric Dumazet 
34960e3125c7SNeil Horman 	buffer = (char *) __get_free_pages(gfp_flags, order);
34970e3125c7SNeil Horman 
34980e3125c7SNeil Horman 	if (buffer)
34990e3125c7SNeil Horman 		return buffer;
35000e3125c7SNeil Horman 
35010e3125c7SNeil Horman 	/*
35020e3125c7SNeil Horman 	 * __get_free_pages failed, fall back to vmalloc
35030e3125c7SNeil Horman 	 */
3504bbce5a59SEric Dumazet 	buffer = vzalloc((1 << order) * PAGE_SIZE);
35050e3125c7SNeil Horman 
35060e3125c7SNeil Horman 	if (buffer)
35070e3125c7SNeil Horman 		return buffer;
35080e3125c7SNeil Horman 
35090e3125c7SNeil Horman 	/*
35100e3125c7SNeil Horman 	 * vmalloc failed, lets dig into swap here
35110e3125c7SNeil Horman 	 */
35120e3125c7SNeil Horman 	gfp_flags &= ~__GFP_NORETRY;
35130e3125c7SNeil Horman 	buffer = (char *)__get_free_pages(gfp_flags, order);
35140e3125c7SNeil Horman 	if (buffer)
35150e3125c7SNeil Horman 		return buffer;
35160e3125c7SNeil Horman 
35170e3125c7SNeil Horman 	/*
35180e3125c7SNeil Horman 	 * complete and utter failure
35190e3125c7SNeil Horman 	 */
35200e3125c7SNeil Horman 	return NULL;
35214ebf0ae2SDavid S. Miller }
35224ebf0ae2SDavid S. Miller 
35230e3125c7SNeil Horman static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
35244ebf0ae2SDavid S. Miller {
35254ebf0ae2SDavid S. Miller 	unsigned int block_nr = req->tp_block_nr;
35260e3125c7SNeil Horman 	struct pgv *pg_vec;
35274ebf0ae2SDavid S. Miller 	int i;
35284ebf0ae2SDavid S. Miller 
35290e3125c7SNeil Horman 	pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
35304ebf0ae2SDavid S. Miller 	if (unlikely(!pg_vec))
35314ebf0ae2SDavid S. Miller 		goto out;
35324ebf0ae2SDavid S. Miller 
35334ebf0ae2SDavid S. Miller 	for (i = 0; i < block_nr; i++) {
3534c56b4d90SChangli Gao 		pg_vec[i].buffer = alloc_one_pg_vec_page(order);
35350e3125c7SNeil Horman 		if (unlikely(!pg_vec[i].buffer))
35364ebf0ae2SDavid S. Miller 			goto out_free_pgvec;
35374ebf0ae2SDavid S. Miller 	}
35384ebf0ae2SDavid S. Miller 
35394ebf0ae2SDavid S. Miller out:
35404ebf0ae2SDavid S. Miller 	return pg_vec;
35414ebf0ae2SDavid S. Miller 
35424ebf0ae2SDavid S. Miller out_free_pgvec:
35434ebf0ae2SDavid S. Miller 	free_pg_vec(pg_vec, order, block_nr);
35444ebf0ae2SDavid S. Miller 	pg_vec = NULL;
35454ebf0ae2SDavid S. Miller 	goto out;
35464ebf0ae2SDavid S. Miller }
35471da177e4SLinus Torvalds 
3548f6fb8f10Schetan loke static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
354969e3c75fSJohann Baudy 		int closing, int tx_ring)
35501da177e4SLinus Torvalds {
35510e3125c7SNeil Horman 	struct pgv *pg_vec = NULL;
35521da177e4SLinus Torvalds 	struct packet_sock *po = pkt_sk(sk);
35530e11c91eSAl Viro 	int was_running, order = 0;
355469e3c75fSJohann Baudy 	struct packet_ring_buffer *rb;
355569e3c75fSJohann Baudy 	struct sk_buff_head *rb_queue;
35560e11c91eSAl Viro 	__be16 num;
3557f6fb8f10Schetan loke 	int err = -EINVAL;
3558f6fb8f10Schetan loke 	/* Added to avoid minimal code churn */
3559f6fb8f10Schetan loke 	struct tpacket_req *req = &req_u->req;
3560f6fb8f10Schetan loke 
3561f6fb8f10Schetan loke 	/* Opening a Tx-ring is NOT supported in TPACKET_V3 */
3562f6fb8f10Schetan loke 	if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
3563f6fb8f10Schetan loke 		WARN(1, "Tx-ring is not supported.\n");
3564f6fb8f10Schetan loke 		goto out;
3565f6fb8f10Schetan loke 	}
356669e3c75fSJohann Baudy 
356769e3c75fSJohann Baudy 	rb = tx_ring ? &po->tx_ring : &po->rx_ring;
356869e3c75fSJohann Baudy 	rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
356969e3c75fSJohann Baudy 
357069e3c75fSJohann Baudy 	err = -EBUSY;
357169e3c75fSJohann Baudy 	if (!closing) {
357269e3c75fSJohann Baudy 		if (atomic_read(&po->mapped))
357369e3c75fSJohann Baudy 			goto out;
357469e3c75fSJohann Baudy 		if (atomic_read(&rb->pending))
357569e3c75fSJohann Baudy 			goto out;
357669e3c75fSJohann Baudy 	}
35771da177e4SLinus Torvalds 
35781da177e4SLinus Torvalds 	if (req->tp_block_nr) {
35791da177e4SLinus Torvalds 		/* Sanity tests and some calculations */
358069e3c75fSJohann Baudy 		err = -EBUSY;
358169e3c75fSJohann Baudy 		if (unlikely(rb->pg_vec))
358269e3c75fSJohann Baudy 			goto out;
35831da177e4SLinus Torvalds 
3584bbd6ef87SPatrick McHardy 		switch (po->tp_version) {
3585bbd6ef87SPatrick McHardy 		case TPACKET_V1:
3586bbd6ef87SPatrick McHardy 			po->tp_hdrlen = TPACKET_HDRLEN;
3587bbd6ef87SPatrick McHardy 			break;
3588bbd6ef87SPatrick McHardy 		case TPACKET_V2:
3589bbd6ef87SPatrick McHardy 			po->tp_hdrlen = TPACKET2_HDRLEN;
3590bbd6ef87SPatrick McHardy 			break;
3591f6fb8f10Schetan loke 		case TPACKET_V3:
3592f6fb8f10Schetan loke 			po->tp_hdrlen = TPACKET3_HDRLEN;
3593f6fb8f10Schetan loke 			break;
3594bbd6ef87SPatrick McHardy 		}
3595bbd6ef87SPatrick McHardy 
359669e3c75fSJohann Baudy 		err = -EINVAL;
35974ebf0ae2SDavid S. Miller 		if (unlikely((int)req->tp_block_size <= 0))
359869e3c75fSJohann Baudy 			goto out;
35994ebf0ae2SDavid S. Miller 		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
360069e3c75fSJohann Baudy 			goto out;
36018913336aSPatrick McHardy 		if (unlikely(req->tp_frame_size < po->tp_hdrlen +
36028913336aSPatrick McHardy 					po->tp_reserve))
360369e3c75fSJohann Baudy 			goto out;
36044ebf0ae2SDavid S. Miller 		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
360569e3c75fSJohann Baudy 			goto out;
36061da177e4SLinus Torvalds 
360769e3c75fSJohann Baudy 		rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
360869e3c75fSJohann Baudy 		if (unlikely(rb->frames_per_block <= 0))
360969e3c75fSJohann Baudy 			goto out;
361069e3c75fSJohann Baudy 		if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
36114ebf0ae2SDavid S. Miller 					req->tp_frame_nr))
361269e3c75fSJohann Baudy 			goto out;
36131da177e4SLinus Torvalds 
36141da177e4SLinus Torvalds 		err = -ENOMEM;
36154ebf0ae2SDavid S. Miller 		order = get_order(req->tp_block_size);
36164ebf0ae2SDavid S. Miller 		pg_vec = alloc_pg_vec(req, order);
36174ebf0ae2SDavid S. Miller 		if (unlikely(!pg_vec))
36181da177e4SLinus Torvalds 			goto out;
3619f6fb8f10Schetan loke 		switch (po->tp_version) {
3620f6fb8f10Schetan loke 		case TPACKET_V3:
3621f6fb8f10Schetan loke 		/* Transmit path is not supported. We checked
3622f6fb8f10Schetan loke 		 * it above but just being paranoid
3623f6fb8f10Schetan loke 		 */
3624f6fb8f10Schetan loke 			if (!tx_ring)
3625f6fb8f10Schetan loke 				init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
3626f6fb8f10Schetan loke 				break;
3627f6fb8f10Schetan loke 		default:
3628f6fb8f10Schetan loke 			break;
3629f6fb8f10Schetan loke 		}
36301da177e4SLinus Torvalds 	}
36311da177e4SLinus Torvalds 	/* Done */
363269e3c75fSJohann Baudy 	else {
363369e3c75fSJohann Baudy 		err = -EINVAL;
36344ebf0ae2SDavid S. Miller 		if (unlikely(req->tp_frame_nr))
363569e3c75fSJohann Baudy 			goto out;
36361da177e4SLinus Torvalds 	}
36371da177e4SLinus Torvalds 
36381da177e4SLinus Torvalds 	lock_sock(sk);
36391da177e4SLinus Torvalds 
36401da177e4SLinus Torvalds 	/* Detach socket from network */
36411da177e4SLinus Torvalds 	spin_lock(&po->bind_lock);
36421da177e4SLinus Torvalds 	was_running = po->running;
36431da177e4SLinus Torvalds 	num = po->num;
36441da177e4SLinus Torvalds 	if (was_running) {
36451da177e4SLinus Torvalds 		po->num = 0;
3646ce06b03eSDavid S. Miller 		__unregister_prot_hook(sk, false);
36471da177e4SLinus Torvalds 	}
36481da177e4SLinus Torvalds 	spin_unlock(&po->bind_lock);
36491da177e4SLinus Torvalds 
36501da177e4SLinus Torvalds 	synchronize_net();
36511da177e4SLinus Torvalds 
36521da177e4SLinus Torvalds 	err = -EBUSY;
3653905db440SHerbert Xu 	mutex_lock(&po->pg_vec_lock);
36541da177e4SLinus Torvalds 	if (closing || atomic_read(&po->mapped) == 0) {
36551da177e4SLinus Torvalds 		err = 0;
365669e3c75fSJohann Baudy 		spin_lock_bh(&rb_queue->lock);
3657c053fd96SChangli Gao 		swap(rb->pg_vec, pg_vec);
365869e3c75fSJohann Baudy 		rb->frame_max = (req->tp_frame_nr - 1);
365969e3c75fSJohann Baudy 		rb->head = 0;
366069e3c75fSJohann Baudy 		rb->frame_size = req->tp_frame_size;
366169e3c75fSJohann Baudy 		spin_unlock_bh(&rb_queue->lock);
36621da177e4SLinus Torvalds 
3663c053fd96SChangli Gao 		swap(rb->pg_vec_order, order);
3664c053fd96SChangli Gao 		swap(rb->pg_vec_len, req->tp_block_nr);
36651da177e4SLinus Torvalds 
366669e3c75fSJohann Baudy 		rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
366769e3c75fSJohann Baudy 		po->prot_hook.func = (po->rx_ring.pg_vec) ?
366869e3c75fSJohann Baudy 						tpacket_rcv : packet_rcv;
366969e3c75fSJohann Baudy 		skb_queue_purge(rb_queue);
36701da177e4SLinus Torvalds 		if (atomic_read(&po->mapped))
367140d4e3dfSEric Dumazet 			pr_err("packet_mmap: vma is busy: %d\n",
367269e3c75fSJohann Baudy 			       atomic_read(&po->mapped));
36731da177e4SLinus Torvalds 	}
3674905db440SHerbert Xu 	mutex_unlock(&po->pg_vec_lock);
36751da177e4SLinus Torvalds 
36761da177e4SLinus Torvalds 	spin_lock(&po->bind_lock);
3677ce06b03eSDavid S. Miller 	if (was_running) {
36781da177e4SLinus Torvalds 		po->num = num;
3679ce06b03eSDavid S. Miller 		register_prot_hook(sk);
36801da177e4SLinus Torvalds 	}
36811da177e4SLinus Torvalds 	spin_unlock(&po->bind_lock);
3682f6fb8f10Schetan loke 	if (closing && (po->tp_version > TPACKET_V2)) {
3683f6fb8f10Schetan loke 		/* Because we don't support block-based V3 on tx-ring */
3684f6fb8f10Schetan loke 		if (!tx_ring)
3685f6fb8f10Schetan loke 			prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue);
3686f6fb8f10Schetan loke 	}
36871da177e4SLinus Torvalds 	release_sock(sk);
36881da177e4SLinus Torvalds 
36891da177e4SLinus Torvalds 	if (pg_vec)
36901da177e4SLinus Torvalds 		free_pg_vec(pg_vec, order, req->tp_block_nr);
36911da177e4SLinus Torvalds out:
36921da177e4SLinus Torvalds 	return err;
36931da177e4SLinus Torvalds }
36941da177e4SLinus Torvalds 
369569e3c75fSJohann Baudy static int packet_mmap(struct file *file, struct socket *sock,
369669e3c75fSJohann Baudy 		struct vm_area_struct *vma)
36971da177e4SLinus Torvalds {
36981da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
36991da177e4SLinus Torvalds 	struct packet_sock *po = pkt_sk(sk);
370069e3c75fSJohann Baudy 	unsigned long size, expected_size;
370169e3c75fSJohann Baudy 	struct packet_ring_buffer *rb;
37021da177e4SLinus Torvalds 	unsigned long start;
37031da177e4SLinus Torvalds 	int err = -EINVAL;
37041da177e4SLinus Torvalds 	int i;
37051da177e4SLinus Torvalds 
37061da177e4SLinus Torvalds 	if (vma->vm_pgoff)
37071da177e4SLinus Torvalds 		return -EINVAL;
37081da177e4SLinus Torvalds 
3709905db440SHerbert Xu 	mutex_lock(&po->pg_vec_lock);
371069e3c75fSJohann Baudy 
371169e3c75fSJohann Baudy 	expected_size = 0;
371269e3c75fSJohann Baudy 	for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
371369e3c75fSJohann Baudy 		if (rb->pg_vec) {
371469e3c75fSJohann Baudy 			expected_size += rb->pg_vec_len
371569e3c75fSJohann Baudy 						* rb->pg_vec_pages
371669e3c75fSJohann Baudy 						* PAGE_SIZE;
371769e3c75fSJohann Baudy 		}
371869e3c75fSJohann Baudy 	}
371969e3c75fSJohann Baudy 
372069e3c75fSJohann Baudy 	if (expected_size == 0)
37211da177e4SLinus Torvalds 		goto out;
372269e3c75fSJohann Baudy 
372369e3c75fSJohann Baudy 	size = vma->vm_end - vma->vm_start;
372469e3c75fSJohann Baudy 	if (size != expected_size)
37251da177e4SLinus Torvalds 		goto out;
37261da177e4SLinus Torvalds 
37271da177e4SLinus Torvalds 	start = vma->vm_start;
372869e3c75fSJohann Baudy 	for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
372969e3c75fSJohann Baudy 		if (rb->pg_vec == NULL)
373069e3c75fSJohann Baudy 			continue;
373169e3c75fSJohann Baudy 
373269e3c75fSJohann Baudy 		for (i = 0; i < rb->pg_vec_len; i++) {
37330e3125c7SNeil Horman 			struct page *page;
37340e3125c7SNeil Horman 			void *kaddr = rb->pg_vec[i].buffer;
37354ebf0ae2SDavid S. Miller 			int pg_num;
37364ebf0ae2SDavid S. Miller 
3737c56b4d90SChangli Gao 			for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
3738c56b4d90SChangli Gao 				page = pgv_to_page(kaddr);
37394ebf0ae2SDavid S. Miller 				err = vm_insert_page(vma, start, page);
37404ebf0ae2SDavid S. Miller 				if (unlikely(err))
37411da177e4SLinus Torvalds 					goto out;
37424ebf0ae2SDavid S. Miller 				start += PAGE_SIZE;
37430e3125c7SNeil Horman 				kaddr += PAGE_SIZE;
37441da177e4SLinus Torvalds 			}
37454ebf0ae2SDavid S. Miller 		}
374669e3c75fSJohann Baudy 	}
374769e3c75fSJohann Baudy 
37484ebf0ae2SDavid S. Miller 	atomic_inc(&po->mapped);
37491da177e4SLinus Torvalds 	vma->vm_ops = &packet_mmap_ops;
37501da177e4SLinus Torvalds 	err = 0;
37511da177e4SLinus Torvalds 
37521da177e4SLinus Torvalds out:
3753905db440SHerbert Xu 	mutex_unlock(&po->pg_vec_lock);
37541da177e4SLinus Torvalds 	return err;
37551da177e4SLinus Torvalds }
37561da177e4SLinus Torvalds 
375790ddc4f0SEric Dumazet static const struct proto_ops packet_ops_spkt = {
37581da177e4SLinus Torvalds 	.family =	PF_PACKET,
37591da177e4SLinus Torvalds 	.owner =	THIS_MODULE,
37601da177e4SLinus Torvalds 	.release =	packet_release,
37611da177e4SLinus Torvalds 	.bind =		packet_bind_spkt,
37621da177e4SLinus Torvalds 	.connect =	sock_no_connect,
37631da177e4SLinus Torvalds 	.socketpair =	sock_no_socketpair,
37641da177e4SLinus Torvalds 	.accept =	sock_no_accept,
37651da177e4SLinus Torvalds 	.getname =	packet_getname_spkt,
37661da177e4SLinus Torvalds 	.poll =		datagram_poll,
37671da177e4SLinus Torvalds 	.ioctl =	packet_ioctl,
37681da177e4SLinus Torvalds 	.listen =	sock_no_listen,
37691da177e4SLinus Torvalds 	.shutdown =	sock_no_shutdown,
37701da177e4SLinus Torvalds 	.setsockopt =	sock_no_setsockopt,
37711da177e4SLinus Torvalds 	.getsockopt =	sock_no_getsockopt,
37721da177e4SLinus Torvalds 	.sendmsg =	packet_sendmsg_spkt,
37731da177e4SLinus Torvalds 	.recvmsg =	packet_recvmsg,
37741da177e4SLinus Torvalds 	.mmap =		sock_no_mmap,
37751da177e4SLinus Torvalds 	.sendpage =	sock_no_sendpage,
37761da177e4SLinus Torvalds };
37771da177e4SLinus Torvalds 
377890ddc4f0SEric Dumazet static const struct proto_ops packet_ops = {
37791da177e4SLinus Torvalds 	.family =	PF_PACKET,
37801da177e4SLinus Torvalds 	.owner =	THIS_MODULE,
37811da177e4SLinus Torvalds 	.release =	packet_release,
37821da177e4SLinus Torvalds 	.bind =		packet_bind,
37831da177e4SLinus Torvalds 	.connect =	sock_no_connect,
37841da177e4SLinus Torvalds 	.socketpair =	sock_no_socketpair,
37851da177e4SLinus Torvalds 	.accept =	sock_no_accept,
37861da177e4SLinus Torvalds 	.getname =	packet_getname,
37871da177e4SLinus Torvalds 	.poll =		packet_poll,
37881da177e4SLinus Torvalds 	.ioctl =	packet_ioctl,
37891da177e4SLinus Torvalds 	.listen =	sock_no_listen,
37901da177e4SLinus Torvalds 	.shutdown =	sock_no_shutdown,
37911da177e4SLinus Torvalds 	.setsockopt =	packet_setsockopt,
37921da177e4SLinus Torvalds 	.getsockopt =	packet_getsockopt,
37931da177e4SLinus Torvalds 	.sendmsg =	packet_sendmsg,
37941da177e4SLinus Torvalds 	.recvmsg =	packet_recvmsg,
37951da177e4SLinus Torvalds 	.mmap =		packet_mmap,
37961da177e4SLinus Torvalds 	.sendpage =	sock_no_sendpage,
37971da177e4SLinus Torvalds };
37981da177e4SLinus Torvalds 
3799ec1b4cf7SStephen Hemminger static const struct net_proto_family packet_family_ops = {
38001da177e4SLinus Torvalds 	.family =	PF_PACKET,
38011da177e4SLinus Torvalds 	.create =	packet_create,
38021da177e4SLinus Torvalds 	.owner	=	THIS_MODULE,
38031da177e4SLinus Torvalds };
38041da177e4SLinus Torvalds 
38051da177e4SLinus Torvalds static struct notifier_block packet_netdev_notifier = {
38061da177e4SLinus Torvalds 	.notifier_call =	packet_notifier,
38071da177e4SLinus Torvalds };
38081da177e4SLinus Torvalds 
38091da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
38101da177e4SLinus Torvalds 
38111da177e4SLinus Torvalds static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
3812808f5114Sstephen hemminger 	__acquires(RCU)
38131da177e4SLinus Torvalds {
3814e372c414SDenis V. Lunev 	struct net *net = seq_file_net(seq);
3815808f5114Sstephen hemminger 
3816808f5114Sstephen hemminger 	rcu_read_lock();
3817808f5114Sstephen hemminger 	return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
38181da177e4SLinus Torvalds }
38191da177e4SLinus Torvalds 
38201da177e4SLinus Torvalds static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
38211da177e4SLinus Torvalds {
38221bf40954SHerbert Xu 	struct net *net = seq_file_net(seq);
3823808f5114Sstephen hemminger 	return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
38241da177e4SLinus Torvalds }
38251da177e4SLinus Torvalds 
38261da177e4SLinus Torvalds static void packet_seq_stop(struct seq_file *seq, void *v)
3827808f5114Sstephen hemminger 	__releases(RCU)
38281da177e4SLinus Torvalds {
3829808f5114Sstephen hemminger 	rcu_read_unlock();
38301da177e4SLinus Torvalds }
38311da177e4SLinus Torvalds 
38321da177e4SLinus Torvalds static int packet_seq_show(struct seq_file *seq, void *v)
38331da177e4SLinus Torvalds {
38341da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN)
38351da177e4SLinus Torvalds 		seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
38361da177e4SLinus Torvalds 	else {
3837b7ceabd9SLi Zefan 		struct sock *s = sk_entry(v);
38381da177e4SLinus Torvalds 		const struct packet_sock *po = pkt_sk(s);
38391da177e4SLinus Torvalds 
38401da177e4SLinus Torvalds 		seq_printf(seq,
384171338aa7SDan Rosenberg 			   "%pK %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
38421da177e4SLinus Torvalds 			   s,
38431da177e4SLinus Torvalds 			   atomic_read(&s->sk_refcnt),
38441da177e4SLinus Torvalds 			   s->sk_type,
38451da177e4SLinus Torvalds 			   ntohs(po->num),
38461da177e4SLinus Torvalds 			   po->ifindex,
38471da177e4SLinus Torvalds 			   po->running,
38481da177e4SLinus Torvalds 			   atomic_read(&s->sk_rmem_alloc),
38491da177e4SLinus Torvalds 			   sock_i_uid(s),
38501da177e4SLinus Torvalds 			   sock_i_ino(s));
38511da177e4SLinus Torvalds 	}
38521da177e4SLinus Torvalds 
38531da177e4SLinus Torvalds 	return 0;
38541da177e4SLinus Torvalds }
38551da177e4SLinus Torvalds 
385656b3d975SPhilippe De Muyter static const struct seq_operations packet_seq_ops = {
38571da177e4SLinus Torvalds 	.start	= packet_seq_start,
38581da177e4SLinus Torvalds 	.next	= packet_seq_next,
38591da177e4SLinus Torvalds 	.stop	= packet_seq_stop,
38601da177e4SLinus Torvalds 	.show	= packet_seq_show,
38611da177e4SLinus Torvalds };
38621da177e4SLinus Torvalds 
38631da177e4SLinus Torvalds static int packet_seq_open(struct inode *inode, struct file *file)
38641da177e4SLinus Torvalds {
3865e372c414SDenis V. Lunev 	return seq_open_net(inode, file, &packet_seq_ops,
3866e372c414SDenis V. Lunev 			    sizeof(struct seq_net_private));
38671da177e4SLinus Torvalds }
38681da177e4SLinus Torvalds 
3869da7071d7SArjan van de Ven static const struct file_operations packet_seq_fops = {
38701da177e4SLinus Torvalds 	.owner		= THIS_MODULE,
38711da177e4SLinus Torvalds 	.open		= packet_seq_open,
38721da177e4SLinus Torvalds 	.read		= seq_read,
38731da177e4SLinus Torvalds 	.llseek		= seq_lseek,
3874e372c414SDenis V. Lunev 	.release	= seq_release_net,
38751da177e4SLinus Torvalds };
38761da177e4SLinus Torvalds 
38771da177e4SLinus Torvalds #endif
38781da177e4SLinus Torvalds 
38792c8c1e72SAlexey Dobriyan static int __net_init packet_net_init(struct net *net)
3880d12d01d6SDenis V. Lunev {
3881808f5114Sstephen hemminger 	spin_lock_init(&net->packet.sklist_lock);
38822aaef4e4SDenis V. Lunev 	INIT_HLIST_HEAD(&net->packet.sklist);
3883d12d01d6SDenis V. Lunev 
3884d12d01d6SDenis V. Lunev 	if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
3885d12d01d6SDenis V. Lunev 		return -ENOMEM;
3886d12d01d6SDenis V. Lunev 
3887d12d01d6SDenis V. Lunev 	return 0;
3888d12d01d6SDenis V. Lunev }
3889d12d01d6SDenis V. Lunev 
38902c8c1e72SAlexey Dobriyan static void __net_exit packet_net_exit(struct net *net)
3891d12d01d6SDenis V. Lunev {
3892d12d01d6SDenis V. Lunev 	proc_net_remove(net, "packet");
3893d12d01d6SDenis V. Lunev }
3894d12d01d6SDenis V. Lunev 
3895d12d01d6SDenis V. Lunev static struct pernet_operations packet_net_ops = {
3896d12d01d6SDenis V. Lunev 	.init = packet_net_init,
3897d12d01d6SDenis V. Lunev 	.exit = packet_net_exit,
3898d12d01d6SDenis V. Lunev };
3899d12d01d6SDenis V. Lunev 
3900d12d01d6SDenis V. Lunev 
39011da177e4SLinus Torvalds static void __exit packet_exit(void)
39021da177e4SLinus Torvalds {
39031da177e4SLinus Torvalds 	unregister_netdevice_notifier(&packet_netdev_notifier);
3904d12d01d6SDenis V. Lunev 	unregister_pernet_subsys(&packet_net_ops);
39051da177e4SLinus Torvalds 	sock_unregister(PF_PACKET);
39061da177e4SLinus Torvalds 	proto_unregister(&packet_proto);
39071da177e4SLinus Torvalds }
39081da177e4SLinus Torvalds 
39091da177e4SLinus Torvalds static int __init packet_init(void)
39101da177e4SLinus Torvalds {
39111da177e4SLinus Torvalds 	int rc = proto_register(&packet_proto, 0);
39121da177e4SLinus Torvalds 
39131da177e4SLinus Torvalds 	if (rc != 0)
39141da177e4SLinus Torvalds 		goto out;
39151da177e4SLinus Torvalds 
39161da177e4SLinus Torvalds 	sock_register(&packet_family_ops);
3917d12d01d6SDenis V. Lunev 	register_pernet_subsys(&packet_net_ops);
39181da177e4SLinus Torvalds 	register_netdevice_notifier(&packet_netdev_notifier);
39191da177e4SLinus Torvalds out:
39201da177e4SLinus Torvalds 	return rc;
39211da177e4SLinus Torvalds }
39221da177e4SLinus Torvalds 
39231da177e4SLinus Torvalds module_init(packet_init);
39241da177e4SLinus Torvalds module_exit(packet_exit);
39251da177e4SLinus Torvalds MODULE_LICENSE("GPL");
39261da177e4SLinus Torvalds MODULE_ALIAS_NETPROTO(PF_PACKET);
3927