tap.c (ebc05ba7e8600b52a2a0c87a43105143368aca2a) tap.c (6fe3faf86757eb7f078ff06b23b206f17dc4fb36)
1#include <linux/etherdevice.h>
1#include <linux/etherdevice.h>
2#include <linux/if_macvlan.h>
2#include <linux/if_tap.h>
3#include <linux/if_vlan.h>
4#include <linux/interrupt.h>
5#include <linux/nsproxy.h>
6#include <linux/compat.h>
7#include <linux/if_tun.h>
8#include <linux/module.h>
9#include <linux/skbuff.h>
10#include <linux/cache.h>

--- 7 unchanged lines hidden (view full) ---

18#include <linux/uio.h>
19
20#include <net/net_namespace.h>
21#include <net/rtnetlink.h>
22#include <net/sock.h>
23#include <linux/virtio_net.h>
24#include <linux/skb_array.h>
25
3#include <linux/if_vlan.h>
4#include <linux/interrupt.h>
5#include <linux/nsproxy.h>
6#include <linux/compat.h>
7#include <linux/if_tun.h>
8#include <linux/module.h>
9#include <linux/skbuff.h>
10#include <linux/cache.h>

--- 7 unchanged lines hidden (view full) ---

18#include <linux/uio.h>
19
20#include <net/net_namespace.h>
21#include <net/rtnetlink.h>
22#include <net/sock.h>
23#include <linux/virtio_net.h>
24#include <linux/skb_array.h>
25
26/*
27 * A tap queue is the central object of this driver, it connects
28 * an open character device to a macvlan interface. There can be
29 * multiple queues on one interface, which map back to queues
30 * implemented in hardware on the underlying device.
31 *
32 * tap_proto is used to allocate queues through the sock allocation
33 * mechanism.
34 *
35 */
36struct tap_queue {
37 struct sock sk;
38 struct socket sock;
39 struct socket_wq wq;
40 int vnet_hdr_sz;
41 struct macvlan_dev __rcu *vlan;
42 struct file *file;
43 unsigned int flags;
44 u16 queue_index;
45 bool enabled;
46 struct list_head next;
47 struct skb_array skb_array;
48};
49
50#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
51
52#define TAP_VNET_LE 0x80000000
53#define TAP_VNET_BE 0x40000000
54
55#ifdef CONFIG_TUN_VNET_CROSS_LE
56static inline bool tap_legacy_is_little_endian(struct tap_queue *q)
57{

--- 74 unchanged lines hidden (view full) ---

132
133#define GOODCOPY_LEN 128
134
135static const struct proto_ops tap_socket_ops;
136
137#define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO)
138#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST)
139
26#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
27
28#define TAP_VNET_LE 0x80000000
29#define TAP_VNET_BE 0x40000000
30
31#ifdef CONFIG_TUN_VNET_CROSS_LE
32static inline bool tap_legacy_is_little_endian(struct tap_queue *q)
33{

--- 74 unchanged lines hidden (view full) ---

108
109#define GOODCOPY_LEN 128
110
111static const struct proto_ops tap_socket_ops;
112
113#define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO)
114#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST)
115
140static struct macvlan_dev *tap_get_vlan_rcu(const struct net_device *dev)
116static struct tap_dev *tap_dev_get_rcu(const struct net_device *dev)
141{
142 return rcu_dereference(dev->rx_handler_data);
143}
144
145/*
146 * RCU usage:
147 * The tap_queue and the macvlan_dev are loosely coupled, the
148 * pointers from one to the other can only be read while rcu_read_lock

--- 5 unchanged lines hidden (view full) ---

154 * tap_get_queue() fails.
155 *
156 * There may still be references to the struct sock inside of the
157 * queue from outbound SKBs, but these never reference back to the
158 * file or the dev. The data structure is freed through __sk_free
159 * when both our references and any pending SKBs are gone.
160 */
161
117{
118 return rcu_dereference(dev->rx_handler_data);
119}
120
121/*
122 * RCU usage:
123 * The tap_queue and the macvlan_dev are loosely coupled, the
124 * pointers from one to the other can only be read while rcu_read_lock

--- 5 unchanged lines hidden (view full) ---

130 * tap_get_queue() fails.
131 *
132 * There may still be references to the struct sock inside of the
133 * queue from outbound SKBs, but these never reference back to the
134 * file or the dev. The data structure is freed through __sk_free
135 * when both our references and any pending SKBs are gone.
136 */
137
162static int tap_enable_queue(struct net_device *dev, struct file *file,
138static int tap_enable_queue(struct tap_dev *tap, struct file *file,
163 struct tap_queue *q)
164{
139 struct tap_queue *q)
140{
165 struct macvlan_dev *vlan = netdev_priv(dev);
166 int err = -EINVAL;
167
168 ASSERT_RTNL();
169
170 if (q->enabled)
171 goto out;
172
173 err = 0;
141 int err = -EINVAL;
142
143 ASSERT_RTNL();
144
145 if (q->enabled)
146 goto out;
147
148 err = 0;
174 rcu_assign_pointer(vlan->taps[vlan->numvtaps], q);
175 q->queue_index = vlan->numvtaps;
149 rcu_assign_pointer(tap->taps[tap->numvtaps], q);
150 q->queue_index = tap->numvtaps;
176 q->enabled = true;
177
151 q->enabled = true;
152
178 vlan->numvtaps++;
153 tap->numvtaps++;
179out:
180 return err;
181}
182
183/* Requires RTNL */
154out:
155 return err;
156}
157
158/* Requires RTNL */
184static int tap_set_queue(struct net_device *dev, struct file *file,
159static int tap_set_queue(struct tap_dev *tap, struct file *file,
185 struct tap_queue *q)
186{
160 struct tap_queue *q)
161{
187 struct macvlan_dev *vlan = netdev_priv(dev);
188
189 if (vlan->numqueues == MAX_TAP_QUEUES)
162 if (tap->numqueues == MAX_TAP_QUEUES)
190 return -EBUSY;
191
163 return -EBUSY;
164
192 rcu_assign_pointer(q->vlan, vlan);
193 rcu_assign_pointer(vlan->taps[vlan->numvtaps], q);
165 rcu_assign_pointer(q->tap, tap);
166 rcu_assign_pointer(tap->taps[tap->numvtaps], q);
194 sock_hold(&q->sk);
195
196 q->file = file;
167 sock_hold(&q->sk);
168
169 q->file = file;
197 q->queue_index = vlan->numvtaps;
170 q->queue_index = tap->numvtaps;
198 q->enabled = true;
199 file->private_data = q;
171 q->enabled = true;
172 file->private_data = q;
200 list_add_tail(&q->next, &vlan->queue_list);
173 list_add_tail(&q->next, &tap->queue_list);
201
174
202 vlan->numvtaps++;
203 vlan->numqueues++;
175 tap->numvtaps++;
176 tap->numqueues++;
204
205 return 0;
206}
207
208static int tap_disable_queue(struct tap_queue *q)
209{
177
178 return 0;
179}
180
181static int tap_disable_queue(struct tap_queue *q)
182{
210 struct macvlan_dev *vlan;
183 struct tap_dev *tap;
211 struct tap_queue *nq;
212
213 ASSERT_RTNL();
214 if (!q->enabled)
215 return -EINVAL;
216
184 struct tap_queue *nq;
185
186 ASSERT_RTNL();
187 if (!q->enabled)
188 return -EINVAL;
189
217 vlan = rtnl_dereference(q->vlan);
190 tap = rtnl_dereference(q->tap);
218
191
219 if (vlan) {
192 if (tap) {
220 int index = q->queue_index;
193 int index = q->queue_index;
221 BUG_ON(index >= vlan->numvtaps);
222 nq = rtnl_dereference(vlan->taps[vlan->numvtaps - 1]);
194 BUG_ON(index >= tap->numvtaps);
195 nq = rtnl_dereference(tap->taps[tap->numvtaps - 1]);
223 nq->queue_index = index;
224
196 nq->queue_index = index;
197
225 rcu_assign_pointer(vlan->taps[index], nq);
226 RCU_INIT_POINTER(vlan->taps[vlan->numvtaps - 1], NULL);
198 rcu_assign_pointer(tap->taps[index], nq);
199 RCU_INIT_POINTER(tap->taps[tap->numvtaps - 1], NULL);
227 q->enabled = false;
228
200 q->enabled = false;
201
229 vlan->numvtaps--;
202 tap->numvtaps--;
230 }
231
232 return 0;
233}
234
235/*
236 * The file owning the queue got closed, give up both
237 * the reference that the files holds as well as the
238 * one from the macvlan_dev if that still exists.
239 *
240 * Using the spinlock makes sure that we don't get
241 * to the queue again after destroying it.
242 */
243static void tap_put_queue(struct tap_queue *q)
244{
203 }
204
205 return 0;
206}
207
208/*
209 * The file owning the queue got closed, give up both
210 * the reference that the files holds as well as the
211 * one from the macvlan_dev if that still exists.
212 *
213 * Using the spinlock makes sure that we don't get
214 * to the queue again after destroying it.
215 */
216static void tap_put_queue(struct tap_queue *q)
217{
245 struct macvlan_dev *vlan;
218 struct tap_dev *tap;
246
247 rtnl_lock();
219
220 rtnl_lock();
248 vlan = rtnl_dereference(q->vlan);
221 tap = rtnl_dereference(q->tap);
249
222
250 if (vlan) {
223 if (tap) {
251 if (q->enabled)
252 BUG_ON(tap_disable_queue(q));
253
224 if (q->enabled)
225 BUG_ON(tap_disable_queue(q));
226
254 vlan->numqueues--;
255 RCU_INIT_POINTER(q->vlan, NULL);
227 tap->numqueues--;
228 RCU_INIT_POINTER(q->tap, NULL);
256 sock_put(&q->sk);
257 list_del_init(&q->next);
258 }
259
260 rtnl_unlock();
261
262 synchronize_rcu();
263 sock_put(&q->sk);
264}
265
266/*
267 * Select a queue based on the rxq of the device on which this packet
268 * arrived. If the incoming device is not mq, calculate a flow hash
269 * to select a queue. If all fails, find the first available queue.
270 * Cache vlan->numvtaps since it can become zero during the execution
271 * of this function.
272 */
229 sock_put(&q->sk);
230 list_del_init(&q->next);
231 }
232
233 rtnl_unlock();
234
235 synchronize_rcu();
236 sock_put(&q->sk);
237}
238
239/*
240 * Select a queue based on the rxq of the device on which this packet
241 * arrived. If the incoming device is not mq, calculate a flow hash
242 * to select a queue. If all fails, find the first available queue.
243 * Cache vlan->numvtaps since it can become zero during the execution
244 * of this function.
245 */
273static struct tap_queue *tap_get_queue(struct net_device *dev,
246static struct tap_queue *tap_get_queue(struct tap_dev *tap,
274 struct sk_buff *skb)
275{
247 struct sk_buff *skb)
248{
276 struct macvlan_dev *vlan = netdev_priv(dev);
277 struct tap_queue *tap = NULL;
249 struct tap_queue *queue = NULL;
278 /* Access to taps array is protected by rcu, but access to numvtaps
279 * isn't. Below we use it to lookup a queue, but treat it as a hint
280 * and validate that the result isn't NULL - in case we are
281 * racing against queue removal.
282 */
250 /* Access to taps array is protected by rcu, but access to numvtaps
251 * isn't. Below we use it to lookup a queue, but treat it as a hint
252 * and validate that the result isn't NULL - in case we are
253 * racing against queue removal.
254 */
283 int numvtaps = ACCESS_ONCE(vlan->numvtaps);
255 int numvtaps = ACCESS_ONCE(tap->numvtaps);
284 __u32 rxq;
285
286 if (!numvtaps)
287 goto out;
288
289 if (numvtaps == 1)
290 goto single;
291
292 /* Check if we can use flow to select a queue */
293 rxq = skb_get_hash(skb);
294 if (rxq) {
256 __u32 rxq;
257
258 if (!numvtaps)
259 goto out;
260
261 if (numvtaps == 1)
262 goto single;
263
264 /* Check if we can use flow to select a queue */
265 rxq = skb_get_hash(skb);
266 if (rxq) {
295 tap = rcu_dereference(vlan->taps[rxq % numvtaps]);
267 queue = rcu_dereference(tap->taps[rxq % numvtaps]);
296 goto out;
297 }
298
299 if (likely(skb_rx_queue_recorded(skb))) {
300 rxq = skb_get_rx_queue(skb);
301
302 while (unlikely(rxq >= numvtaps))
303 rxq -= numvtaps;
304
268 goto out;
269 }
270
271 if (likely(skb_rx_queue_recorded(skb))) {
272 rxq = skb_get_rx_queue(skb);
273
274 while (unlikely(rxq >= numvtaps))
275 rxq -= numvtaps;
276
305 tap = rcu_dereference(vlan->taps[rxq]);
277 queue = rcu_dereference(tap->taps[rxq]);
306 goto out;
307 }
308
309single:
278 goto out;
279 }
280
281single:
310 tap = rcu_dereference(vlan->taps[0]);
282 queue = rcu_dereference(tap->taps[0]);
311out:
283out:
312 return tap;
284 return queue;
313}
314
315/*
316 * The net_device is going away, give up the reference
317 * that it holds on all queues and safely set the pointer
318 * from the queues to NULL.
319 */
285}
286
287/*
288 * The net_device is going away, give up the reference
289 * that it holds on all queues and safely set the pointer
290 * from the queues to NULL.
291 */
320void tap_del_queues(struct net_device *dev)
292void tap_del_queues(struct tap_dev *tap)
321{
293{
322 struct macvlan_dev *vlan = netdev_priv(dev);
323 struct tap_queue *q, *tmp;
324
325 ASSERT_RTNL();
294 struct tap_queue *q, *tmp;
295
296 ASSERT_RTNL();
326 list_for_each_entry_safe(q, tmp, &vlan->queue_list, next) {
297 list_for_each_entry_safe(q, tmp, &tap->queue_list, next) {
327 list_del_init(&q->next);
298 list_del_init(&q->next);
328 RCU_INIT_POINTER(q->vlan, NULL);
299 RCU_INIT_POINTER(q->tap, NULL);
329 if (q->enabled)
300 if (q->enabled)
330 vlan->numvtaps--;
331 vlan->numqueues--;
301 tap->numvtaps--;
302 tap->numqueues--;
332 sock_put(&q->sk);
333 }
303 sock_put(&q->sk);
304 }
334 BUG_ON(vlan->numvtaps);
335 BUG_ON(vlan->numqueues);
305 BUG_ON(tap->numvtaps);
306 BUG_ON(tap->numqueues);
336 /* guarantee that any future tap_set_queue will fail */
307 /* guarantee that any future tap_set_queue will fail */
337 vlan->numvtaps = MAX_TAP_QUEUES;
308 tap->numvtaps = MAX_TAP_QUEUES;
338}
339
340rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
341{
342 struct sk_buff *skb = *pskb;
343 struct net_device *dev = skb->dev;
309}
310
311rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
312{
313 struct sk_buff *skb = *pskb;
314 struct net_device *dev = skb->dev;
344 struct macvlan_dev *vlan;
315 struct tap_dev *tap;
345 struct tap_queue *q;
346 netdev_features_t features = TAP_FEATURES;
347
316 struct tap_queue *q;
317 netdev_features_t features = TAP_FEATURES;
318
348 vlan = tap_get_vlan_rcu(dev);
349 if (!vlan)
319 tap = tap_dev_get_rcu(dev);
320 if (!tap)
350 return RX_HANDLER_PASS;
351
321 return RX_HANDLER_PASS;
322
352 q = tap_get_queue(dev, skb);
323 q = tap_get_queue(tap, skb);
353 if (!q)
354 return RX_HANDLER_PASS;
355
356 if (__skb_array_full(&q->skb_array))
357 goto drop;
358
359 skb_push(skb, ETH_HLEN);
360
361 /* Apply the forward feature mask so that we perform segmentation
362 * according to users wishes. This only works if VNET_HDR is
363 * enabled.
364 */
365 if (q->flags & IFF_VNET_HDR)
324 if (!q)
325 return RX_HANDLER_PASS;
326
327 if (__skb_array_full(&q->skb_array))
328 goto drop;
329
330 skb_push(skb, ETH_HLEN);
331
332 /* Apply the forward feature mask so that we perform segmentation
333 * according to users wishes. This only works if VNET_HDR is
334 * enabled.
335 */
336 if (q->flags & IFF_VNET_HDR)
366 features |= vlan->tap_features;
337 features |= tap->tap_features;
367 if (netif_needs_gso(skb, features)) {
368 struct sk_buff *segs = __skb_gso_segment(skb, features, false);
369
370 if (IS_ERR(segs))
371 goto drop;
372
373 if (!segs) {
374 if (skb_array_produce(&q->skb_array, skb))

--- 28 unchanged lines hidden (view full) ---

403 }
404
405wake_up:
406 wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND);
407 return RX_HANDLER_CONSUMED;
408
409drop:
410 /* Count errors/drops only here, thus don't care about args. */
338 if (netif_needs_gso(skb, features)) {
339 struct sk_buff *segs = __skb_gso_segment(skb, features, false);
340
341 if (IS_ERR(segs))
342 goto drop;
343
344 if (!segs) {
345 if (skb_array_produce(&q->skb_array, skb))

--- 28 unchanged lines hidden (view full) ---

374 }
375
376wake_up:
377 wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND);
378 return RX_HANDLER_CONSUMED;
379
380drop:
381 /* Count errors/drops only here, thus don't care about args. */
411 macvlan_count_rx(vlan, 0, 0, 0);
382 if (tap->count_rx_dropped)
383 tap->count_rx_dropped(tap);
412 kfree_skb(skb);
413 return RX_HANDLER_CONSUMED;
414}
415
384 kfree_skb(skb);
385 return RX_HANDLER_CONSUMED;
386}
387
416int tap_get_minor(struct macvlan_dev *vlan)
388int tap_get_minor(struct tap_dev *tap)
417{
418 int retval = -ENOMEM;
419
420 mutex_lock(&macvtap_major.minor_lock);
389{
390 int retval = -ENOMEM;
391
392 mutex_lock(&macvtap_major.minor_lock);
421 retval = idr_alloc(&macvtap_major.minor_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL);
393 retval = idr_alloc(&macvtap_major.minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL);
422 if (retval >= 0) {
394 if (retval >= 0) {
423 vlan->minor = retval;
395 tap->minor = retval;
424 } else if (retval == -ENOSPC) {
396 } else if (retval == -ENOSPC) {
425 netdev_err(vlan->dev, "Too many tap devices\n");
397 netdev_err(tap->dev, "Too many tap devices\n");
426 retval = -EINVAL;
427 }
428 mutex_unlock(&macvtap_major.minor_lock);
429 return retval < 0 ? retval : 0;
430}
431
398 retval = -EINVAL;
399 }
400 mutex_unlock(&macvtap_major.minor_lock);
401 return retval < 0 ? retval : 0;
402}
403
432void tap_free_minor(struct macvlan_dev *vlan)
404void tap_free_minor(struct tap_dev *tap)
433{
434 mutex_lock(&macvtap_major.minor_lock);
405{
406 mutex_lock(&macvtap_major.minor_lock);
435 if (vlan->minor) {
436 idr_remove(&macvtap_major.minor_idr, vlan->minor);
437 vlan->minor = 0;
407 if (tap->minor) {
408 idr_remove(&macvtap_major.minor_idr, tap->minor);
409 tap->minor = 0;
438 }
439 mutex_unlock(&macvtap_major.minor_lock);
440}
441
410 }
411 mutex_unlock(&macvtap_major.minor_lock);
412}
413
442static struct net_device *dev_get_by_tap_minor(int minor)
414static struct tap_dev *dev_get_by_tap_minor(int minor)
443{
444 struct net_device *dev = NULL;
415{
416 struct net_device *dev = NULL;
445 struct macvlan_dev *vlan;
417 struct tap_dev *tap;
446
447 mutex_lock(&macvtap_major.minor_lock);
418
419 mutex_lock(&macvtap_major.minor_lock);
448 vlan = idr_find(&macvtap_major.minor_idr, minor);
449 if (vlan) {
450 dev = vlan->dev;
420 tap = idr_find(&macvtap_major.minor_idr, minor);
421 if (tap) {
422 dev = tap->dev;
451 dev_hold(dev);
452 }
453 mutex_unlock(&macvtap_major.minor_lock);
423 dev_hold(dev);
424 }
425 mutex_unlock(&macvtap_major.minor_lock);
454 return dev;
426 return tap;
455}
456
457static void tap_sock_write_space(struct sock *sk)
458{
459 wait_queue_head_t *wqueue;
460
461 if (!sock_writeable(sk) ||
462 !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))

--- 9 unchanged lines hidden (view full) ---

472 struct tap_queue *q = container_of(sk, struct tap_queue, sk);
473
474 skb_array_cleanup(&q->skb_array);
475}
476
477static int tap_open(struct inode *inode, struct file *file)
478{
479 struct net *net = current->nsproxy->net_ns;
427}
428
429static void tap_sock_write_space(struct sock *sk)
430{
431 wait_queue_head_t *wqueue;
432
433 if (!sock_writeable(sk) ||
434 !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))

--- 9 unchanged lines hidden (view full) ---

444 struct tap_queue *q = container_of(sk, struct tap_queue, sk);
445
446 skb_array_cleanup(&q->skb_array);
447}
448
449static int tap_open(struct inode *inode, struct file *file)
450{
451 struct net *net = current->nsproxy->net_ns;
480 struct net_device *dev;
452 struct tap_dev *tap;
481 struct tap_queue *q;
482 int err = -ENODEV;
483
484 rtnl_lock();
453 struct tap_queue *q;
454 int err = -ENODEV;
455
456 rtnl_lock();
485 dev = dev_get_by_tap_minor(iminor(inode));
486 if (!dev)
457 tap = dev_get_by_tap_minor(iminor(inode));
458 if (!tap)
487 goto err;
488
489 err = -ENOMEM;
490 q = (struct tap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
491 &tap_proto, 0);
492 if (!q)
493 goto err;
494

--- 11 unchanged lines hidden (view full) ---

506
507 /*
508 * so far only KVM virtio_net uses tap, enable zero copy between
509 * guest kernel and host kernel when lower device supports zerocopy
510 *
511 * The macvlan supports zerocopy iff the lower device supports zero
512 * copy so we don't have to look at the lower device directly.
513 */
459 goto err;
460
461 err = -ENOMEM;
462 q = (struct tap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
463 &tap_proto, 0);
464 if (!q)
465 goto err;
466

--- 11 unchanged lines hidden (view full) ---

478
479 /*
480 * so far only KVM virtio_net uses tap, enable zero copy between
481 * guest kernel and host kernel when lower device supports zerocopy
482 *
483 * The macvlan supports zerocopy iff the lower device supports zero
484 * copy so we don't have to look at the lower device directly.
485 */
514 if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG))
486 if ((tap->dev->features & NETIF_F_HIGHDMA) && (tap->dev->features & NETIF_F_SG))
515 sock_set_flag(&q->sk, SOCK_ZEROCOPY);
516
517 err = -ENOMEM;
487 sock_set_flag(&q->sk, SOCK_ZEROCOPY);
488
489 err = -ENOMEM;
518 if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL))
490 if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL))
519 goto err_array;
520
491 goto err_array;
492
521 err = tap_set_queue(dev, file, q);
493 err = tap_set_queue(tap, file, q);
522 if (err)
523 goto err_queue;
524
494 if (err)
495 goto err_queue;
496
525 dev_put(dev);
497 dev_put(tap->dev);
526
527 rtnl_unlock();
528 return err;
529
530err_queue:
531 skb_array_cleanup(&q->skb_array);
532err_array:
533 sock_put(&q->sk);
534err:
498
499 rtnl_unlock();
500 return err;
501
502err_queue:
503 skb_array_cleanup(&q->skb_array);
504err_array:
505 sock_put(&q->sk);
506err:
535 if (dev)
536 dev_put(dev);
507 if (tap)
508 dev_put(tap->dev);
537
538 rtnl_unlock();
539 return err;
540}
541
542static int tap_release(struct inode *inode, struct file *file)
543{
544 struct tap_queue *q = file->private_data;

--- 51 unchanged lines hidden (view full) ---

596#define TAP_RESERVE HH_DATA_OFF(ETH_HLEN)
597
598/* Get packet from user space buffer */
599static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m,
600 struct iov_iter *from, int noblock)
601{
602 int good_linear = SKB_MAX_HEAD(TAP_RESERVE);
603 struct sk_buff *skb;
509
510 rtnl_unlock();
511 return err;
512}
513
514static int tap_release(struct inode *inode, struct file *file)
515{
516 struct tap_queue *q = file->private_data;

--- 51 unchanged lines hidden (view full) ---

568#define TAP_RESERVE HH_DATA_OFF(ETH_HLEN)
569
570/* Get packet from user space buffer */
571static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m,
572 struct iov_iter *from, int noblock)
573{
574 int good_linear = SKB_MAX_HEAD(TAP_RESERVE);
575 struct sk_buff *skb;
604 struct macvlan_dev *vlan;
576 struct tap_dev *tap;
605 unsigned long total_len = iov_iter_count(from);
606 unsigned long len = total_len;
607 int err;
608 struct virtio_net_hdr vnet_hdr = { 0 };
609 int vnet_hdr_len = 0;
610 int copylen = 0;
611 int depth;
612 bool zerocopy = false;

--- 80 unchanged lines hidden (view full) ---

693
694 /* Move network header to the right position for VLAN tagged packets */
695 if ((skb->protocol == htons(ETH_P_8021Q) ||
696 skb->protocol == htons(ETH_P_8021AD)) &&
697 __vlan_get_protocol(skb, skb->protocol, &depth) != 0)
698 skb_set_network_header(skb, depth);
699
700 rcu_read_lock();
577 unsigned long total_len = iov_iter_count(from);
578 unsigned long len = total_len;
579 int err;
580 struct virtio_net_hdr vnet_hdr = { 0 };
581 int vnet_hdr_len = 0;
582 int copylen = 0;
583 int depth;
584 bool zerocopy = false;

--- 80 unchanged lines hidden (view full) ---

665
666 /* Move network header to the right position for VLAN tagged packets */
667 if ((skb->protocol == htons(ETH_P_8021Q) ||
668 skb->protocol == htons(ETH_P_8021AD)) &&
669 __vlan_get_protocol(skb, skb->protocol, &depth) != 0)
670 skb_set_network_header(skb, depth);
671
672 rcu_read_lock();
701 vlan = rcu_dereference(q->vlan);
673 tap = rcu_dereference(q->tap);
702 /* copy skb_ubuf_info for callback when skb has no error */
703 if (zerocopy) {
704 skb_shinfo(skb)->destructor_arg = m->msg_control;
705 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
706 skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
707 } else if (m && m->msg_control) {
708 struct ubuf_info *uarg = m->msg_control;
709 uarg->callback(uarg, false);
710 }
711
674 /* copy skb_ubuf_info for callback when skb has no error */
675 if (zerocopy) {
676 skb_shinfo(skb)->destructor_arg = m->msg_control;
677 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
678 skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
679 } else if (m && m->msg_control) {
680 struct ubuf_info *uarg = m->msg_control;
681 uarg->callback(uarg, false);
682 }
683
712 if (vlan) {
713 skb->dev = vlan->dev;
684 if (tap) {
685 skb->dev = tap->dev;
714 dev_queue_xmit(skb);
715 } else {
716 kfree_skb(skb);
717 }
718 rcu_read_unlock();
719
720 return total_len;
721
722err_kfree:
723 kfree_skb(skb);
724
725err:
726 rcu_read_lock();
686 dev_queue_xmit(skb);
687 } else {
688 kfree_skb(skb);
689 }
690 rcu_read_unlock();
691
692 return total_len;
693
694err_kfree:
695 kfree_skb(skb);
696
697err:
698 rcu_read_lock();
727 vlan = rcu_dereference(q->vlan);
728 if (vlan)
729 this_cpu_inc(vlan->pcpu_stats->tx_dropped);
699 tap = rcu_dereference(q->tap);
700 if (tap && tap->count_tx_dropped)
701 tap->count_tx_dropped(tap);
730 rcu_read_unlock();
731
732 return err;
733}
734
735static ssize_t tap_write_iter(struct kiocb *iocb, struct iov_iter *from)
736{
737 struct file *file = iocb->ki_filp;

--- 110 unchanged lines hidden (view full) ---

848
849 ret = tap_do_read(q, to, file->f_flags & O_NONBLOCK);
850 ret = min_t(ssize_t, ret, len);
851 if (ret > 0)
852 iocb->ki_pos = ret;
853 return ret;
854}
855
702 rcu_read_unlock();
703
704 return err;
705}
706
707static ssize_t tap_write_iter(struct kiocb *iocb, struct iov_iter *from)
708{
709 struct file *file = iocb->ki_filp;

--- 110 unchanged lines hidden (view full) ---

820
821 ret = tap_do_read(q, to, file->f_flags & O_NONBLOCK);
822 ret = min_t(ssize_t, ret, len);
823 if (ret > 0)
824 iocb->ki_pos = ret;
825 return ret;
826}
827
856static struct macvlan_dev *tap_get_vlan(struct tap_queue *q)
828static struct tap_dev *tap_get_tap_dev(struct tap_queue *q)
857{
829{
858 struct macvlan_dev *vlan;
830 struct tap_dev *tap;
859
860 ASSERT_RTNL();
831
832 ASSERT_RTNL();
861 vlan = rtnl_dereference(q->vlan);
862 if (vlan)
863 dev_hold(vlan->dev);
833 tap = rtnl_dereference(q->tap);
834 if (tap)
835 dev_hold(tap->dev);
864
836
865 return vlan;
837 return tap;
866}
867
838}
839
868static void tap_put_vlan(struct macvlan_dev *vlan)
840static void tap_put_tap_dev(struct tap_dev *tap)
869{
841{
870 dev_put(vlan->dev);
842 dev_put(tap->dev);
871}
872
873static int tap_ioctl_set_queue(struct file *file, unsigned int flags)
874{
875 struct tap_queue *q = file->private_data;
843}
844
845static int tap_ioctl_set_queue(struct file *file, unsigned int flags)
846{
847 struct tap_queue *q = file->private_data;
876 struct macvlan_dev *vlan;
848 struct tap_dev *tap;
877 int ret;
878
849 int ret;
850
879 vlan = tap_get_vlan(q);
880 if (!vlan)
851 tap = tap_get_tap_dev(q);
852 if (!tap)
881 return -EINVAL;
882
883 if (flags & IFF_ATTACH_QUEUE)
853 return -EINVAL;
854
855 if (flags & IFF_ATTACH_QUEUE)
884 ret = tap_enable_queue(vlan->dev, file, q);
856 ret = tap_enable_queue(tap, file, q);
885 else if (flags & IFF_DETACH_QUEUE)
886 ret = tap_disable_queue(q);
887 else
888 ret = -EINVAL;
889
857 else if (flags & IFF_DETACH_QUEUE)
858 ret = tap_disable_queue(q);
859 else
860 ret = -EINVAL;
861
890 tap_put_vlan(vlan);
862 tap_put_tap_dev(tap);
891 return ret;
892}
893
894static int set_offload(struct tap_queue *q, unsigned long arg)
895{
863 return ret;
864}
865
866static int set_offload(struct tap_queue *q, unsigned long arg)
867{
896 struct macvlan_dev *vlan;
868 struct tap_dev *tap;
897 netdev_features_t features;
898 netdev_features_t feature_mask = 0;
899
869 netdev_features_t features;
870 netdev_features_t feature_mask = 0;
871
900 vlan = rtnl_dereference(q->vlan);
901 if (!vlan)
872 tap = rtnl_dereference(q->tap);
873 if (!tap)
902 return -ENOLINK;
903
874 return -ENOLINK;
875
904 features = vlan->dev->features;
876 features = tap->dev->features;
905
906 if (arg & TUN_F_CSUM) {
907 feature_mask = NETIF_F_HW_CSUM;
908
909 if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) {
910 if (arg & TUN_F_TSO_ECN)
911 feature_mask |= NETIF_F_TSO_ECN;
912 if (arg & TUN_F_TSO4)

--- 17 unchanged lines hidden (view full) ---

930 if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO))
931 features |= RX_OFFLOADS;
932 else
933 features &= ~RX_OFFLOADS;
934
935 /* tap_features are the same as features on tun/tap and
936 * reflect user expectations.
937 */
877
878 if (arg & TUN_F_CSUM) {
879 feature_mask = NETIF_F_HW_CSUM;
880
881 if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) {
882 if (arg & TUN_F_TSO_ECN)
883 feature_mask |= NETIF_F_TSO_ECN;
884 if (arg & TUN_F_TSO4)

--- 17 unchanged lines hidden (view full) ---

902 if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO))
903 features |= RX_OFFLOADS;
904 else
905 features &= ~RX_OFFLOADS;
906
907 /* tap_features are the same as features on tun/tap and
908 * reflect user expectations.
909 */
938 vlan->tap_features = feature_mask;
939 vlan->set_features = features;
940 netdev_update_features(vlan->dev);
910 tap->tap_features = feature_mask;
911 if (tap->update_features)
912 tap->update_features(tap, features);
941
942 return 0;
943}
944
945/*
946 * provide compatibility with generic tun/tap interface
947 */
948static long tap_ioctl(struct file *file, unsigned int cmd,
949 unsigned long arg)
950{
951 struct tap_queue *q = file->private_data;
913
914 return 0;
915}
916
917/*
918 * provide compatibility with generic tun/tap interface
919 */
920static long tap_ioctl(struct file *file, unsigned int cmd,
921 unsigned long arg)
922{
923 struct tap_queue *q = file->private_data;
952 struct macvlan_dev *vlan;
924 struct tap_dev *tap;
953 void __user *argp = (void __user *)arg;
954 struct ifreq __user *ifr = argp;
955 unsigned int __user *up = argp;
956 unsigned short u;
957 int __user *sp = argp;
958 struct sockaddr sa;
959 int s;
960 int ret;

--- 9 unchanged lines hidden (view full) ---

970 ret = -EINVAL;
971 else
972 q->flags = (q->flags & ~TAP_IFFEATURES) | u;
973
974 return ret;
975
976 case TUNGETIFF:
977 rtnl_lock();
925 void __user *argp = (void __user *)arg;
926 struct ifreq __user *ifr = argp;
927 unsigned int __user *up = argp;
928 unsigned short u;
929 int __user *sp = argp;
930 struct sockaddr sa;
931 int s;
932 int ret;

--- 9 unchanged lines hidden (view full) ---

942 ret = -EINVAL;
943 else
944 q->flags = (q->flags & ~TAP_IFFEATURES) | u;
945
946 return ret;
947
948 case TUNGETIFF:
949 rtnl_lock();
978 vlan = tap_get_vlan(q);
979 if (!vlan) {
950 tap = tap_get_tap_dev(q);
951 if (!tap) {
980 rtnl_unlock();
981 return -ENOLINK;
982 }
983
984 ret = 0;
985 u = q->flags;
952 rtnl_unlock();
953 return -ENOLINK;
954 }
955
956 ret = 0;
957 u = q->flags;
986 if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) ||
958 if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) ||
987 put_user(u, &ifr->ifr_flags))
988 ret = -EFAULT;
959 put_user(u, &ifr->ifr_flags))
960 ret = -EFAULT;
989 tap_put_vlan(vlan);
961 tap_put_tap_dev(tap);
990 rtnl_unlock();
991 return ret;
992
993 case TUNSETQUEUE:
994 if (get_user(u, &ifr->ifr_flags))
995 return -EFAULT;
996 rtnl_lock();
997 ret = tap_ioctl_set_queue(file, u);

--- 56 unchanged lines hidden (view full) ---

1054
1055 rtnl_lock();
1056 ret = set_offload(q, arg);
1057 rtnl_unlock();
1058 return ret;
1059
1060 case SIOCGIFHWADDR:
1061 rtnl_lock();
962 rtnl_unlock();
963 return ret;
964
965 case TUNSETQUEUE:
966 if (get_user(u, &ifr->ifr_flags))
967 return -EFAULT;
968 rtnl_lock();
969 ret = tap_ioctl_set_queue(file, u);

--- 56 unchanged lines hidden (view full) ---

1026
1027 rtnl_lock();
1028 ret = set_offload(q, arg);
1029 rtnl_unlock();
1030 return ret;
1031
1032 case SIOCGIFHWADDR:
1033 rtnl_lock();
1062 vlan = tap_get_vlan(q);
1063 if (!vlan) {
1034 tap = tap_get_tap_dev(q);
1035 if (!tap) {
1064 rtnl_unlock();
1065 return -ENOLINK;
1066 }
1067 ret = 0;
1036 rtnl_unlock();
1037 return -ENOLINK;
1038 }
1039 ret = 0;
1068 u = vlan->dev->type;
1069 if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) ||
1070 copy_to_user(&ifr->ifr_hwaddr.sa_data, vlan->dev->dev_addr, ETH_ALEN) ||
1040 u = tap->dev->type;
1041 if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) ||
1042 copy_to_user(&ifr->ifr_hwaddr.sa_data, tap->dev->dev_addr, ETH_ALEN) ||
1071 put_user(u, &ifr->ifr_hwaddr.sa_family))
1072 ret = -EFAULT;
1043 put_user(u, &ifr->ifr_hwaddr.sa_family))
1044 ret = -EFAULT;
1073 tap_put_vlan(vlan);
1045 tap_put_tap_dev(tap);
1074 rtnl_unlock();
1075 return ret;
1076
1077 case SIOCSIFHWADDR:
1078 if (copy_from_user(&sa, &ifr->ifr_hwaddr, sizeof(sa)))
1079 return -EFAULT;
1080 rtnl_lock();
1046 rtnl_unlock();
1047 return ret;
1048
1049 case SIOCSIFHWADDR:
1050 if (copy_from_user(&sa, &ifr->ifr_hwaddr, sizeof(sa)))
1051 return -EFAULT;
1052 rtnl_lock();
1081 vlan = tap_get_vlan(q);
1082 if (!vlan) {
1053 tap = tap_get_tap_dev(q);
1054 if (!tap) {
1083 rtnl_unlock();
1084 return -ENOLINK;
1085 }
1055 rtnl_unlock();
1056 return -ENOLINK;
1057 }
1086 ret = dev_set_mac_address(vlan->dev, &sa);
1087 tap_put_vlan(vlan);
1058 ret = dev_set_mac_address(tap->dev, &sa);
1059 tap_put_tap_dev(tap);
1088 rtnl_unlock();
1089 return ret;
1090
1091 default:
1092 return -EINVAL;
1093 }
1094}
1095

--- 66 unchanged lines hidden (view full) ---

1162 return ERR_PTR(-EINVAL);
1163 q = file->private_data;
1164 if (!q)
1165 return ERR_PTR(-EBADFD);
1166 return &q->sock;
1167}
1168EXPORT_SYMBOL_GPL(tap_get_socket);
1169
1060 rtnl_unlock();
1061 return ret;
1062
1063 default:
1064 return -EINVAL;
1065 }
1066}
1067

--- 66 unchanged lines hidden (view full) ---

1134 return ERR_PTR(-EINVAL);
1135 q = file->private_data;
1136 if (!q)
1137 return ERR_PTR(-EBADFD);
1138 return &q->sock;
1139}
1140EXPORT_SYMBOL_GPL(tap_get_socket);
1141
1170int tap_queue_resize(struct macvlan_dev *vlan)
1142int tap_queue_resize(struct tap_dev *tap)
1171{
1143{
1172 struct net_device *dev = vlan->dev;
1144 struct net_device *dev = tap->dev;
1173 struct tap_queue *q;
1174 struct skb_array **arrays;
1145 struct tap_queue *q;
1146 struct skb_array **arrays;
1175 int n = vlan->numqueues;
1147 int n = tap->numqueues;
1176 int ret, i = 0;
1177
1178 arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
1179 if (!arrays)
1180 return -ENOMEM;
1181
1148 int ret, i = 0;
1149
1150 arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
1151 if (!arrays)
1152 return -ENOMEM;
1153
1182 list_for_each_entry(q, &vlan->queue_list, next)
1154 list_for_each_entry(q, &tap->queue_list, next)
1183 arrays[i++] = &q->skb_array;
1184
1185 ret = skb_array_resize_multiple(arrays, n,
1186 dev->tx_queue_len, GFP_KERNEL);
1187
1188 kfree(arrays);
1189 return ret;
1190}

--- 36 unchanged lines hidden ---
1155 arrays[i++] = &q->skb_array;
1156
1157 ret = skb_array_resize_multiple(arrays, n,
1158 dev->tx_queue_len, GFP_KERNEL);
1159
1160 kfree(arrays);
1161 return ret;
1162}

--- 36 unchanged lines hidden ---