tun.c (14a4467a0a5eacb2ebbe3aab1b4e25af3519c76a) tun.c (83b1bc122cab87547731a154db5feec5b9d4807c)
1/*
2 * TUN - Universal TUN/TAP device driver.
3 * Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.

--- 174 unchanged lines hidden (view full) ---

183 bool napi_frags_enabled;
184 struct mutex napi_mutex; /* Protects access to the above napi */
185 struct list_head next;
186 struct tun_struct *detached;
187 struct ptr_ring tx_ring;
188 struct xdp_rxq_info xdp_rxq;
189};
190
1/*
2 * TUN - Universal TUN/TAP device driver.
3 * Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.

--- 174 unchanged lines hidden (view full) ---

183 bool napi_frags_enabled;
184 struct mutex napi_mutex; /* Protects access to the above napi */
185 struct list_head next;
186 struct tun_struct *detached;
187 struct ptr_ring tx_ring;
188 struct xdp_rxq_info xdp_rxq;
189};
190
191struct tun_page {
192 struct page *page;
193 int count;
194};
195
191struct tun_flow_entry {
192 struct hlist_node hash_link;
193 struct rcu_head rcu;
194 struct tun_struct *tun;
195
196 u32 rxhash;
197 u32 rps_rxhash;
198 int queue_index;
196struct tun_flow_entry {
197 struct hlist_node hash_link;
198 struct rcu_head rcu;
199 struct tun_struct *tun;
200
201 u32 rxhash;
202 u32 rps_rxhash;
203 int queue_index;
199 unsigned long updated;
204 unsigned long updated ____cacheline_aligned_in_smp;
200};
201
202#define TUN_NUM_FLOW_ENTRIES 1024
203#define TUN_MASK_FLOW_ENTRIES (TUN_NUM_FLOW_ENTRIES - 1)
204
205struct tun_prog {
206 struct rcu_head rcu;
207 struct bpf_prog *prog;

--- 321 unchanged lines hidden (view full) ---

529 else
530 head = &tun->flows[tun_hashfn(rxhash)];
531
532 rcu_read_lock();
533
534 e = tun_flow_find(head, rxhash);
535 if (likely(e)) {
536 /* TODO: keep queueing to old queue until it's empty? */
205};
206
207#define TUN_NUM_FLOW_ENTRIES 1024
208#define TUN_MASK_FLOW_ENTRIES (TUN_NUM_FLOW_ENTRIES - 1)
209
210struct tun_prog {
211 struct rcu_head rcu;
212 struct bpf_prog *prog;

--- 321 unchanged lines hidden (view full) ---

534 else
535 head = &tun->flows[tun_hashfn(rxhash)];
536
537 rcu_read_lock();
538
539 e = tun_flow_find(head, rxhash);
540 if (likely(e)) {
541 /* TODO: keep queueing to old queue until it's empty? */
537 e->queue_index = queue_index;
538 e->updated = jiffies;
542 if (e->queue_index != queue_index)
543 e->queue_index = queue_index;
544 if (e->updated != jiffies)
545 e->updated = jiffies;
539 sock_rps_record_flow_hash(e->rps_rxhash);
540 } else {
541 spin_lock_bh(&tun->lock);
542 if (!tun_flow_find(head, rxhash) &&
543 tun->flow_count < MAX_TAP_FLOWS)
544 tun_flow_create(tun, head, rxhash, queue_index);
545
546 if (!timer_pending(&tun->flow_gc_timer))

--- 697 unchanged lines hidden (view full) ---

1244 case XDP_QUERY_PROG:
1245 xdp->prog_id = tun_xdp_query(dev);
1246 return 0;
1247 default:
1248 return -EINVAL;
1249 }
1250}
1251
546 sock_rps_record_flow_hash(e->rps_rxhash);
547 } else {
548 spin_lock_bh(&tun->lock);
549 if (!tun_flow_find(head, rxhash) &&
550 tun->flow_count < MAX_TAP_FLOWS)
551 tun_flow_create(tun, head, rxhash, queue_index);
552
553 if (!timer_pending(&tun->flow_gc_timer))

--- 697 unchanged lines hidden (view full) ---

1251 case XDP_QUERY_PROG:
1252 xdp->prog_id = tun_xdp_query(dev);
1253 return 0;
1254 default:
1255 return -EINVAL;
1256 }
1257}
1258
1259static int tun_net_change_carrier(struct net_device *dev, bool new_carrier)
1260{
1261 if (new_carrier) {
1262 struct tun_struct *tun = netdev_priv(dev);
1263
1264 if (!tun->numqueues)
1265 return -EPERM;
1266
1267 netif_carrier_on(dev);
1268 } else {
1269 netif_carrier_off(dev);
1270 }
1271 return 0;
1272}
1273
1252static const struct net_device_ops tun_netdev_ops = {
1253 .ndo_uninit = tun_net_uninit,
1254 .ndo_open = tun_net_open,
1255 .ndo_stop = tun_net_close,
1256 .ndo_start_xmit = tun_net_xmit,
1257 .ndo_fix_features = tun_net_fix_features,
1258 .ndo_select_queue = tun_select_queue,
1259 .ndo_set_rx_headroom = tun_set_headroom,
1260 .ndo_get_stats64 = tun_net_get_stats64,
1274static const struct net_device_ops tun_netdev_ops = {
1275 .ndo_uninit = tun_net_uninit,
1276 .ndo_open = tun_net_open,
1277 .ndo_stop = tun_net_close,
1278 .ndo_start_xmit = tun_net_xmit,
1279 .ndo_fix_features = tun_net_fix_features,
1280 .ndo_select_queue = tun_select_queue,
1281 .ndo_set_rx_headroom = tun_set_headroom,
1282 .ndo_get_stats64 = tun_net_get_stats64,
1283 .ndo_change_carrier = tun_net_change_carrier,
1261};
1262
1263static void __tun_xdp_flush_tfile(struct tun_file *tfile)
1264{
1265 /* Notify and wake up reader process */
1266 if (tfile->flags & TUN_FASYNC)
1267 kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
1268 tfile->socket.sk->sk_data_ready(tfile->socket.sk);

--- 66 unchanged lines hidden (view full) ---

1335 .ndo_set_mac_address = eth_mac_addr,
1336 .ndo_validate_addr = eth_validate_addr,
1337 .ndo_select_queue = tun_select_queue,
1338 .ndo_features_check = passthru_features_check,
1339 .ndo_set_rx_headroom = tun_set_headroom,
1340 .ndo_get_stats64 = tun_net_get_stats64,
1341 .ndo_bpf = tun_xdp,
1342 .ndo_xdp_xmit = tun_xdp_xmit,
1284};
1285
1286static void __tun_xdp_flush_tfile(struct tun_file *tfile)
1287{
1288 /* Notify and wake up reader process */
1289 if (tfile->flags & TUN_FASYNC)
1290 kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
1291 tfile->socket.sk->sk_data_ready(tfile->socket.sk);

--- 66 unchanged lines hidden (view full) ---

1358 .ndo_set_mac_address = eth_mac_addr,
1359 .ndo_validate_addr = eth_validate_addr,
1360 .ndo_select_queue = tun_select_queue,
1361 .ndo_features_check = passthru_features_check,
1362 .ndo_set_rx_headroom = tun_set_headroom,
1363 .ndo_get_stats64 = tun_net_get_stats64,
1364 .ndo_bpf = tun_xdp,
1365 .ndo_xdp_xmit = tun_xdp_xmit,
1366 .ndo_change_carrier = tun_net_change_carrier,
1343};
1344
1345static void tun_flow_init(struct tun_struct *tun)
1346{
1347 int i;
1348
1349 for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++)
1350 INIT_HLIST_HEAD(&tun->flows[i]);

--- 117 unchanged lines hidden (view full) ---

1468 if (err)
1469 goto free;
1470
1471 skb->len = len;
1472 skb->data_len = len - linear;
1473 skb->truesize += skb->data_len;
1474
1475 for (i = 1; i < it->nr_segs; i++) {
1367};
1368
1369static void tun_flow_init(struct tun_struct *tun)
1370{
1371 int i;
1372
1373 for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++)
1374 INIT_HLIST_HEAD(&tun->flows[i]);

--- 117 unchanged lines hidden (view full) ---

1492 if (err)
1493 goto free;
1494
1495 skb->len = len;
1496 skb->data_len = len - linear;
1497 skb->truesize += skb->data_len;
1498
1499 for (i = 1; i < it->nr_segs; i++) {
1476 struct page_frag *pfrag = &current->task_frag;
1477 size_t fragsz = it->iov[i].iov_len;
1500 size_t fragsz = it->iov[i].iov_len;
1501 struct page *page;
1502 void *frag;
1478
1479 if (fragsz == 0 || fragsz > PAGE_SIZE) {
1480 err = -EINVAL;
1481 goto free;
1482 }
1503
1504 if (fragsz == 0 || fragsz > PAGE_SIZE) {
1505 err = -EINVAL;
1506 goto free;
1507 }
1483
1484 if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) {
1508 frag = netdev_alloc_frag(fragsz);
1509 if (!frag) {
1485 err = -ENOMEM;
1486 goto free;
1487 }
1510 err = -ENOMEM;
1511 goto free;
1512 }
1488
1489 skb_fill_page_desc(skb, i - 1, pfrag->page,
1490 pfrag->offset, fragsz);
1491 page_ref_inc(pfrag->page);
1492 pfrag->offset += fragsz;
1513 page = virt_to_head_page(frag);
1514 skb_fill_page_desc(skb, i - 1, page,
1515 frag - page_address(page), fragsz);
1493 }
1494
1495 return skb;
1496free:
1497 /* frees skb and all frags allocated with napi_alloc_frag() */
1498 napi_free_frags(&tfile->napi);
1499 return ERR_PTR(err);
1500}

--- 875 unchanged lines hidden (view full) ---

2376 if (wqueue && waitqueue_active(wqueue))
2377 wake_up_interruptible_sync_poll(wqueue, EPOLLOUT |
2378 EPOLLWRNORM | EPOLLWRBAND);
2379
2380 tfile = container_of(sk, struct tun_file, sk);
2381 kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
2382}
2383
1516 }
1517
1518 return skb;
1519free:
1520 /* frees skb and all frags allocated with napi_alloc_frag() */
1521 napi_free_frags(&tfile->napi);
1522 return ERR_PTR(err);
1523}

--- 875 unchanged lines hidden (view full) ---

2399 if (wqueue && waitqueue_active(wqueue))
2400 wake_up_interruptible_sync_poll(wqueue, EPOLLOUT |
2401 EPOLLWRNORM | EPOLLWRBAND);
2402
2403 tfile = container_of(sk, struct tun_file, sk);
2404 kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
2405}
2406
2407static void tun_put_page(struct tun_page *tpage)
2408{
2409 if (tpage->page)
2410 __page_frag_cache_drain(tpage->page, tpage->count);
2411}
2412
2384static int tun_xdp_one(struct tun_struct *tun,
2385 struct tun_file *tfile,
2413static int tun_xdp_one(struct tun_struct *tun,
2414 struct tun_file *tfile,
2386 struct xdp_buff *xdp, int *flush)
2415 struct xdp_buff *xdp, int *flush,
2416 struct tun_page *tpage)
2387{
2388 struct tun_xdp_hdr *hdr = xdp->data_hard_start;
2389 struct virtio_net_hdr *gso = &hdr->gso;
2390 struct tun_pcpu_stats *stats;
2391 struct bpf_prog *xdp_prog;
2392 struct sk_buff *skb = NULL;
2393 u32 rxhash = 0, act;
2394 int buflen = hdr->buflen;
2395 int err = 0;
2396 bool skb_xdp = false;
2417{
2418 struct tun_xdp_hdr *hdr = xdp->data_hard_start;
2419 struct virtio_net_hdr *gso = &hdr->gso;
2420 struct tun_pcpu_stats *stats;
2421 struct bpf_prog *xdp_prog;
2422 struct sk_buff *skb = NULL;
2423 u32 rxhash = 0, act;
2424 int buflen = hdr->buflen;
2425 int err = 0;
2426 bool skb_xdp = false;
2427 struct page *page;
2397
2398 xdp_prog = rcu_dereference(tun->xdp_prog);
2399 if (xdp_prog) {
2400 if (gso->gso_type) {
2401 skb_xdp = true;
2402 goto build;
2403 }
2404 xdp_set_data_meta_invalid(xdp);

--- 10 unchanged lines hidden (view full) ---

2415 case XDP_REDIRECT:
2416 *flush = true;
2417 /* fall through */
2418 case XDP_TX:
2419 return 0;
2420 case XDP_PASS:
2421 break;
2422 default:
2428
2429 xdp_prog = rcu_dereference(tun->xdp_prog);
2430 if (xdp_prog) {
2431 if (gso->gso_type) {
2432 skb_xdp = true;
2433 goto build;
2434 }
2435 xdp_set_data_meta_invalid(xdp);

--- 10 unchanged lines hidden (view full) ---

2446 case XDP_REDIRECT:
2447 *flush = true;
2448 /* fall through */
2449 case XDP_TX:
2450 return 0;
2451 case XDP_PASS:
2452 break;
2453 default:
2423 put_page(virt_to_head_page(xdp->data));
2454 page = virt_to_head_page(xdp->data);
2455 if (tpage->page == page) {
2456 ++tpage->count;
2457 } else {
2458 tun_put_page(tpage);
2459 tpage->page = page;
2460 tpage->count = 1;
2461 }
2424 return 0;
2425 }
2426 }
2427
2428build:
2429 skb = build_skb(xdp->data_hard_start, buflen);
2430 if (!skb) {
2431 err = -ENOMEM;

--- 15 unchanged lines hidden (view full) ---

2447 skb_probe_transport_header(skb, 0);
2448
2449 if (skb_xdp) {
2450 err = do_xdp_generic(xdp_prog, skb);
2451 if (err != XDP_PASS)
2452 goto out;
2453 }
2454
2462 return 0;
2463 }
2464 }
2465
2466build:
2467 skb = build_skb(xdp->data_hard_start, buflen);
2468 if (!skb) {
2469 err = -ENOMEM;

--- 15 unchanged lines hidden (view full) ---

2485 skb_probe_transport_header(skb, 0);
2486
2487 if (skb_xdp) {
2488 err = do_xdp_generic(xdp_prog, skb);
2489 if (err != XDP_PASS)
2490 goto out;
2491 }
2492
2455 if (!rcu_dereference(tun->steering_prog))
2493 if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 &&
2494 !tfile->detached)
2456 rxhash = __skb_get_hash_symmetric(skb);
2457
2458 skb_record_rx_queue(skb, tfile->queue_index);
2459 netif_receive_skb(skb);
2460
2461 stats = get_cpu_ptr(tun->pcpu_stats);
2462 u64_stats_update_begin(&stats->syncp);
2463 stats->rx_packets++;

--- 15 unchanged lines hidden (view full) ---

2479 struct tun_struct *tun = tun_get(tfile);
2480 struct tun_msg_ctl *ctl = m->msg_control;
2481 struct xdp_buff *xdp;
2482
2483 if (!tun)
2484 return -EBADFD;
2485
2486 if (ctl && (ctl->type == TUN_MSG_PTR)) {
2495 rxhash = __skb_get_hash_symmetric(skb);
2496
2497 skb_record_rx_queue(skb, tfile->queue_index);
2498 netif_receive_skb(skb);
2499
2500 stats = get_cpu_ptr(tun->pcpu_stats);
2501 u64_stats_update_begin(&stats->syncp);
2502 stats->rx_packets++;

--- 15 unchanged lines hidden (view full) ---

2518 struct tun_struct *tun = tun_get(tfile);
2519 struct tun_msg_ctl *ctl = m->msg_control;
2520 struct xdp_buff *xdp;
2521
2522 if (!tun)
2523 return -EBADFD;
2524
2525 if (ctl && (ctl->type == TUN_MSG_PTR)) {
2526 struct tun_page tpage;
2487 int n = ctl->num;
2488 int flush = 0;
2489
2527 int n = ctl->num;
2528 int flush = 0;
2529
2530 memset(&tpage, 0, sizeof(tpage));
2531
2490 local_bh_disable();
2491 rcu_read_lock();
2492
2493 for (i = 0; i < n; i++) {
2494 xdp = &((struct xdp_buff *)ctl->ptr)[i];
2532 local_bh_disable();
2533 rcu_read_lock();
2534
2535 for (i = 0; i < n; i++) {
2536 xdp = &((struct xdp_buff *)ctl->ptr)[i];
2495 tun_xdp_one(tun, tfile, xdp, &flush);
2537 tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
2496 }
2497
2498 if (flush)
2499 xdp_do_flush_map();
2500
2501 rcu_read_unlock();
2502 local_bh_enable();
2503
2538 }
2539
2540 if (flush)
2541 xdp_do_flush_map();
2542
2543 rcu_read_unlock();
2544 local_bh_enable();
2545
2546 tun_put_page(&tpage);
2547
2504 ret = total_len;
2505 goto out;
2506 }
2507
2508 ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
2509 m->msg_flags & MSG_DONTWAIT,
2510 m->msg_flags & MSG_MORE);
2511out:

--- 460 unchanged lines hidden (view full) ---

2972
2973static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
2974 unsigned long arg, int ifreq_len)
2975{
2976 struct tun_file *tfile = file->private_data;
2977 struct net *net = sock_net(&tfile->sk);
2978 struct tun_struct *tun;
2979 void __user* argp = (void __user*)arg;
2548 ret = total_len;
2549 goto out;
2550 }
2551
2552 ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
2553 m->msg_flags & MSG_DONTWAIT,
2554 m->msg_flags & MSG_MORE);
2555out:

--- 460 unchanged lines hidden (view full) ---

3016
3017static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
3018 unsigned long arg, int ifreq_len)
3019{
3020 struct tun_file *tfile = file->private_data;
3021 struct net *net = sock_net(&tfile->sk);
3022 struct tun_struct *tun;
3023 void __user* argp = (void __user*)arg;
3024 unsigned int ifindex, carrier;
2980 struct ifreq ifr;
2981 kuid_t owner;
2982 kgid_t group;
2983 int sndbuf;
2984 int vnet_hdr_sz;
3025 struct ifreq ifr;
3026 kuid_t owner;
3027 kgid_t group;
3028 int sndbuf;
3029 int vnet_hdr_sz;
2985 unsigned int ifindex;
2986 int le;
2987 int ret;
2988 bool do_notify = false;
2989
2990 if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
2991 (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
2992 if (copy_from_user(&ifr, argp, ifreq_len))
2993 return -EFAULT;

--- 267 unchanged lines hidden (view full) ---

3261 case TUNSETSTEERINGEBPF:
3262 ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
3263 break;
3264
3265 case TUNSETFILTEREBPF:
3266 ret = tun_set_ebpf(tun, &tun->filter_prog, argp);
3267 break;
3268
3030 int le;
3031 int ret;
3032 bool do_notify = false;
3033
3034 if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
3035 (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
3036 if (copy_from_user(&ifr, argp, ifreq_len))
3037 return -EFAULT;

--- 267 unchanged lines hidden (view full) ---

3305 case TUNSETSTEERINGEBPF:
3306 ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
3307 break;
3308
3309 case TUNSETFILTEREBPF:
3310 ret = tun_set_ebpf(tun, &tun->filter_prog, argp);
3311 break;
3312
3313 case TUNSETCARRIER:
3314 ret = -EFAULT;
3315 if (copy_from_user(&carrier, argp, sizeof(carrier)))
3316 goto unlock;
3317
3318 ret = tun_net_change_carrier(tun->dev, (bool)carrier);
3319 break;
3320
3269 default:
3270 ret = -EINVAL;
3271 break;
3272 }
3273
3274 if (do_notify)
3275 netdev_state_change(tun->dev);
3276

--- 387 unchanged lines hidden ---
3321 default:
3322 ret = -EINVAL;
3323 break;
3324 }
3325
3326 if (do_notify)
3327 netdev_state_change(tun->dev);
3328

--- 387 unchanged lines hidden ---