1c28b1c10SMark McLoughlin /* 2c28b1c10SMark McLoughlin * QEMU System Emulator 3c28b1c10SMark McLoughlin * 4c28b1c10SMark McLoughlin * Copyright (c) 2003-2008 Fabrice Bellard 5c28b1c10SMark McLoughlin * Copyright (c) 2009 Red Hat, Inc. 6c28b1c10SMark McLoughlin * 7c28b1c10SMark McLoughlin * Permission is hereby granted, free of charge, to any person obtaining a copy 8c28b1c10SMark McLoughlin * of this software and associated documentation files (the "Software"), to deal 9c28b1c10SMark McLoughlin * in the Software without restriction, including without limitation the rights 10c28b1c10SMark McLoughlin * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11c28b1c10SMark McLoughlin * copies of the Software, and to permit persons to whom the Software is 12c28b1c10SMark McLoughlin * furnished to do so, subject to the following conditions: 13c28b1c10SMark McLoughlin * 14c28b1c10SMark McLoughlin * The above copyright notice and this permission notice shall be included in 15c28b1c10SMark McLoughlin * all copies or substantial portions of the Software. 16c28b1c10SMark McLoughlin * 17c28b1c10SMark McLoughlin * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18c28b1c10SMark McLoughlin * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19c28b1c10SMark McLoughlin * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20c28b1c10SMark McLoughlin * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21c28b1c10SMark McLoughlin * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22c28b1c10SMark McLoughlin * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23c28b1c10SMark McLoughlin * THE SOFTWARE. 24c28b1c10SMark McLoughlin */ 25c28b1c10SMark McLoughlin 261422e32dSPaolo Bonzini #include "tap_int.h" 271422e32dSPaolo Bonzini #include "tap-linux.h" 28c28b1c10SMark McLoughlin #include "net/tap.h" 29c28b1c10SMark McLoughlin 30c28b1c10SMark McLoughlin #include <net/if.h> 31c28b1c10SMark McLoughlin #include <sys/ioctl.h> 32c28b1c10SMark McLoughlin 339c17d615SPaolo Bonzini #include "sysemu/sysemu.h" 34c28b1c10SMark McLoughlin #include "qemu-common.h" 351de7afc9SPaolo Bonzini #include "qemu/error-report.h" 36c28b1c10SMark McLoughlin 3791ca60e0SMichael Tokarev #define PATH_NET_TUN "/dev/net/tun" 3891ca60e0SMichael Tokarev 39264986e2SJason Wang int tap_open(char *ifname, int ifname_size, int *vnet_hdr, 40264986e2SJason Wang int vnet_hdr_required, int mq_required) 41c28b1c10SMark McLoughlin { 42c28b1c10SMark McLoughlin struct ifreq ifr; 43c28b1c10SMark McLoughlin int fd, ret; 4489e6d68eSMichael S. Tsirkin int len = sizeof(struct virtio_net_hdr); 45d26e445cSPeter Lieven unsigned int features; 46c28b1c10SMark McLoughlin 4791ca60e0SMichael Tokarev TFR(fd = open(PATH_NET_TUN, O_RDWR)); 48c28b1c10SMark McLoughlin if (fd < 0) { 4991ca60e0SMichael Tokarev error_report("could not open %s: %m", PATH_NET_TUN); 50c28b1c10SMark McLoughlin return -1; 51c28b1c10SMark McLoughlin } 52c28b1c10SMark McLoughlin memset(&ifr, 0, sizeof(ifr)); 53c28b1c10SMark McLoughlin ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 54c28b1c10SMark McLoughlin 551f149e72SKusanagi Kouichi if (ioctl(fd, TUNGETFEATURES, &features) == -1) { 561f149e72SKusanagi Kouichi error_report("warning: TUNGETFEATURES failed: %s", strerror(errno)); 571f149e72SKusanagi Kouichi features = 0; 581f149e72SKusanagi Kouichi } 591f149e72SKusanagi Kouichi 601f149e72SKusanagi Kouichi if (features & IFF_ONE_QUEUE) { 61d26e445cSPeter Lieven ifr.ifr_flags |= IFF_ONE_QUEUE; 62d26e445cSPeter Lieven } 63c28b1c10SMark McLoughlin 64d26e445cSPeter Lieven if (*vnet_hdr) { 651f149e72SKusanagi Kouichi if (features & IFF_VNET_HDR) { 66c28b1c10SMark McLoughlin *vnet_hdr = 1; 67c28b1c10SMark McLoughlin ifr.ifr_flags |= IFF_VNET_HDR; 686720b35bSPierre Riteau } else { 696720b35bSPierre Riteau *vnet_hdr = 0; 70c28b1c10SMark McLoughlin } 71c28b1c10SMark McLoughlin 72c28b1c10SMark McLoughlin if (vnet_hdr_required && !*vnet_hdr) { 731ecda02bSMarkus Armbruster error_report("vnet_hdr=1 requested, but no kernel " 74c28b1c10SMark McLoughlin "support for IFF_VNET_HDR available"); 75c28b1c10SMark McLoughlin close(fd); 76c28b1c10SMark McLoughlin return -1; 77c28b1c10SMark McLoughlin } 7889e6d68eSMichael S. Tsirkin /* 7989e6d68eSMichael S. Tsirkin * Make sure vnet header size has the default value: for a persistent 8089e6d68eSMichael S. Tsirkin * tap it might have been modified e.g. by another instance of qemu. 8189e6d68eSMichael S. Tsirkin * Ignore errors since old kernels do not support this ioctl: in this 8289e6d68eSMichael S. Tsirkin * case the header size implicitly has the correct value. 8389e6d68eSMichael S. Tsirkin */ 8489e6d68eSMichael S. Tsirkin ioctl(fd, TUNSETVNETHDRSZ, &len); 85c28b1c10SMark McLoughlin } 86c28b1c10SMark McLoughlin 8794fdc6d0SJason Wang if (mq_required) { 881f149e72SKusanagi Kouichi if (!(features & IFF_MULTI_QUEUE)) { 8994fdc6d0SJason Wang error_report("multiqueue required, but no kernel " 9094fdc6d0SJason Wang "support for IFF_MULTI_QUEUE available"); 9194fdc6d0SJason Wang close(fd); 9294fdc6d0SJason Wang return -1; 9394fdc6d0SJason Wang } else { 9494fdc6d0SJason Wang ifr.ifr_flags |= IFF_MULTI_QUEUE; 9594fdc6d0SJason Wang } 9694fdc6d0SJason Wang } 9794fdc6d0SJason Wang 98c28b1c10SMark McLoughlin if (ifname[0] != '\0') 99c28b1c10SMark McLoughlin pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname); 100c28b1c10SMark McLoughlin else 101c28b1c10SMark McLoughlin pstrcpy(ifr.ifr_name, IFNAMSIZ, "tap%d"); 102c28b1c10SMark McLoughlin ret = ioctl(fd, TUNSETIFF, (void *) &ifr); 103c28b1c10SMark McLoughlin if (ret != 0) { 10493a7320eSLuiz Capitulino if (ifname[0] != '\0') { 10591ca60e0SMichael Tokarev error_report("could not configure %s (%s): %m", PATH_NET_TUN, ifr.ifr_name); 10693a7320eSLuiz Capitulino } else { 10793a7320eSLuiz Capitulino error_report("could not configure %s: %m", PATH_NET_TUN); 10893a7320eSLuiz Capitulino } 109c28b1c10SMark McLoughlin close(fd); 110c28b1c10SMark McLoughlin return -1; 111c28b1c10SMark McLoughlin } 112c28b1c10SMark McLoughlin pstrcpy(ifname, ifname_size, ifr.ifr_name); 113c28b1c10SMark McLoughlin fcntl(fd, F_SETFL, O_NONBLOCK); 114c28b1c10SMark McLoughlin return fd; 115c28b1c10SMark McLoughlin } 11615ac913bSMark McLoughlin 117f157ed20SMichael S. Tsirkin /* sndbuf implements a kind of flow control for tap. 118f157ed20SMichael S. Tsirkin * Unfortunately when it's enabled, and packets are sent 119f157ed20SMichael S. Tsirkin * to other guests on the same host, the receiver 120f157ed20SMichael S. Tsirkin * can lock up the transmitter indefinitely. 121f157ed20SMichael S. Tsirkin * 122f157ed20SMichael S. Tsirkin * To avoid packet loss, sndbuf should be set to a value lower than the tx 123f157ed20SMichael S. Tsirkin * queue capacity of any destination network interface. 12415ac913bSMark McLoughlin * Ethernet NICs generally have txqueuelen=1000, so 1Mb is 125f157ed20SMichael S. Tsirkin * a good value, given a 1500 byte MTU. 12615ac913bSMark McLoughlin */ 127f157ed20SMichael S. Tsirkin #define TAP_DEFAULT_SNDBUF 0 12815ac913bSMark McLoughlin 129*80b832c3SMarkus Armbruster void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) 13015ac913bSMark McLoughlin { 13115ac913bSMark McLoughlin int sndbuf; 13215ac913bSMark McLoughlin 13308c573a8SLaszlo Ersek sndbuf = !tap->has_sndbuf ? TAP_DEFAULT_SNDBUF : 13408c573a8SLaszlo Ersek tap->sndbuf > INT_MAX ? INT_MAX : 13508c573a8SLaszlo Ersek tap->sndbuf; 13608c573a8SLaszlo Ersek 13715ac913bSMark McLoughlin if (!sndbuf) { 13815ac913bSMark McLoughlin sndbuf = INT_MAX; 13915ac913bSMark McLoughlin } 14015ac913bSMark McLoughlin 14108c573a8SLaszlo Ersek if (ioctl(fd, TUNSETSNDBUF, &sndbuf) == -1 && tap->has_sndbuf) { 142*80b832c3SMarkus Armbruster error_setg_errno(errp, errno, "TUNSETSNDBUF ioctl failed"); 14315ac913bSMark McLoughlin } 14415ac913bSMark McLoughlin } 145dc69004cSMark McLoughlin 146dc69004cSMark McLoughlin int tap_probe_vnet_hdr(int fd) 147dc69004cSMark McLoughlin { 148dc69004cSMark McLoughlin struct ifreq ifr; 149dc69004cSMark McLoughlin 150dc69004cSMark McLoughlin if (ioctl(fd, TUNGETIFF, &ifr) != 0) { 1511ecda02bSMarkus Armbruster error_report("TUNGETIFF ioctl() failed: %s", strerror(errno)); 152dc69004cSMark McLoughlin return 0; 153dc69004cSMark McLoughlin } 154dc69004cSMark McLoughlin 155dc69004cSMark McLoughlin return ifr.ifr_flags & IFF_VNET_HDR; 156dc69004cSMark McLoughlin } 1571faac1f7SMark McLoughlin 1589c282718SMark McLoughlin int tap_probe_has_ufo(int fd) 1599c282718SMark McLoughlin { 1609c282718SMark McLoughlin unsigned offload; 1619c282718SMark McLoughlin 1629c282718SMark McLoughlin offload = TUN_F_CSUM | TUN_F_UFO; 1639c282718SMark McLoughlin 1649c282718SMark McLoughlin if (ioctl(fd, TUNSETOFFLOAD, offload) < 0) 1659c282718SMark McLoughlin return 0; 1669c282718SMark McLoughlin 1679c282718SMark McLoughlin return 1; 1689c282718SMark McLoughlin } 1699c282718SMark McLoughlin 170445d892fSMichael S. Tsirkin /* Verify that we can assign given length */ 171445d892fSMichael S. Tsirkin int tap_probe_vnet_hdr_len(int fd, int len) 172445d892fSMichael S. Tsirkin { 173445d892fSMichael S. Tsirkin int orig; 174445d892fSMichael S. Tsirkin if (ioctl(fd, TUNGETVNETHDRSZ, &orig) == -1) { 175445d892fSMichael S. Tsirkin return 0; 176445d892fSMichael S. Tsirkin } 177445d892fSMichael S. Tsirkin if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) { 178445d892fSMichael S. Tsirkin return 0; 179445d892fSMichael S. Tsirkin } 180445d892fSMichael S. Tsirkin /* Restore original length: we can't handle failure. */ 181445d892fSMichael S. Tsirkin if (ioctl(fd, TUNSETVNETHDRSZ, &orig) == -1) { 182445d892fSMichael S. Tsirkin fprintf(stderr, "TUNGETVNETHDRSZ ioctl() failed: %s. Exiting.\n", 183445d892fSMichael S. Tsirkin strerror(errno)); 18428a65891SJason Wang abort(); 185445d892fSMichael S. Tsirkin return -errno; 186445d892fSMichael S. Tsirkin } 187445d892fSMichael S. Tsirkin return 1; 188445d892fSMichael S. Tsirkin } 189445d892fSMichael S. Tsirkin 190445d892fSMichael S. Tsirkin void tap_fd_set_vnet_hdr_len(int fd, int len) 191445d892fSMichael S. Tsirkin { 192445d892fSMichael S. Tsirkin if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) { 193445d892fSMichael S. Tsirkin fprintf(stderr, "TUNSETVNETHDRSZ ioctl() failed: %s. Exiting.\n", 194445d892fSMichael S. Tsirkin strerror(errno)); 19528a65891SJason Wang abort(); 196445d892fSMichael S. Tsirkin } 197445d892fSMichael S. Tsirkin } 198445d892fSMichael S. Tsirkin 1991faac1f7SMark McLoughlin void tap_fd_set_offload(int fd, int csum, int tso4, 2001faac1f7SMark McLoughlin int tso6, int ecn, int ufo) 2011faac1f7SMark McLoughlin { 2021faac1f7SMark McLoughlin unsigned int offload = 0; 2031faac1f7SMark McLoughlin 2042e50326cSPierre Riteau /* Check if our kernel supports TUNSETOFFLOAD */ 2052e50326cSPierre Riteau if (ioctl(fd, TUNSETOFFLOAD, 0) != 0 && errno == EINVAL) { 2062e50326cSPierre Riteau return; 2072e50326cSPierre Riteau } 2082e50326cSPierre Riteau 2091faac1f7SMark McLoughlin if (csum) { 2101faac1f7SMark McLoughlin offload |= TUN_F_CSUM; 2111faac1f7SMark McLoughlin if (tso4) 2121faac1f7SMark McLoughlin offload |= TUN_F_TSO4; 2131faac1f7SMark McLoughlin if (tso6) 2141faac1f7SMark McLoughlin offload |= TUN_F_TSO6; 2151faac1f7SMark McLoughlin if ((tso4 || tso6) && ecn) 2161faac1f7SMark McLoughlin offload |= TUN_F_TSO_ECN; 2171faac1f7SMark McLoughlin if (ufo) 2181faac1f7SMark McLoughlin offload |= TUN_F_UFO; 2191faac1f7SMark McLoughlin } 2201faac1f7SMark McLoughlin 2211faac1f7SMark McLoughlin if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) { 2221faac1f7SMark McLoughlin offload &= ~TUN_F_UFO; 2231faac1f7SMark McLoughlin if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) { 2241faac1f7SMark McLoughlin fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n", 2251faac1f7SMark McLoughlin strerror(errno)); 2261faac1f7SMark McLoughlin } 2271faac1f7SMark McLoughlin } 2281faac1f7SMark McLoughlin } 22994fdc6d0SJason Wang 23094fdc6d0SJason Wang /* Enable a specific queue of tap. */ 23194fdc6d0SJason Wang int tap_fd_enable(int fd) 23294fdc6d0SJason Wang { 23394fdc6d0SJason Wang struct ifreq ifr; 23494fdc6d0SJason Wang int ret; 23594fdc6d0SJason Wang 23694fdc6d0SJason Wang memset(&ifr, 0, sizeof(ifr)); 23794fdc6d0SJason Wang 23894fdc6d0SJason Wang ifr.ifr_flags = IFF_ATTACH_QUEUE; 23994fdc6d0SJason Wang ret = ioctl(fd, TUNSETQUEUE, (void *) &ifr); 24094fdc6d0SJason Wang 24194fdc6d0SJason Wang if (ret != 0) { 24294fdc6d0SJason Wang error_report("could not enable queue"); 24394fdc6d0SJason Wang } 24494fdc6d0SJason Wang 24594fdc6d0SJason Wang return ret; 24694fdc6d0SJason Wang } 24794fdc6d0SJason Wang 24894fdc6d0SJason Wang /* Disable a specific queue of tap/ */ 24994fdc6d0SJason Wang int tap_fd_disable(int fd) 25094fdc6d0SJason Wang { 25194fdc6d0SJason Wang struct ifreq ifr; 25294fdc6d0SJason Wang int ret; 25394fdc6d0SJason Wang 25494fdc6d0SJason Wang memset(&ifr, 0, sizeof(ifr)); 25594fdc6d0SJason Wang 25694fdc6d0SJason Wang ifr.ifr_flags = IFF_DETACH_QUEUE; 25794fdc6d0SJason Wang ret = ioctl(fd, TUNSETQUEUE, (void *) &ifr); 25894fdc6d0SJason Wang 25994fdc6d0SJason Wang if (ret != 0) { 26094fdc6d0SJason Wang error_report("could not disable queue"); 26194fdc6d0SJason Wang } 26294fdc6d0SJason Wang 26394fdc6d0SJason Wang return ret; 26494fdc6d0SJason Wang } 26594fdc6d0SJason Wang 266e5dc0b40SJason Wang int tap_fd_get_ifname(int fd, char *ifname) 267e5dc0b40SJason Wang { 268e5dc0b40SJason Wang struct ifreq ifr; 269e5dc0b40SJason Wang 270e5dc0b40SJason Wang if (ioctl(fd, TUNGETIFF, &ifr) != 0) { 271e5dc0b40SJason Wang error_report("TUNGETIFF ioctl() failed: %s", 272e5dc0b40SJason Wang strerror(errno)); 273e5dc0b40SJason Wang return -1; 274e5dc0b40SJason Wang } 275e5dc0b40SJason Wang 276e5dc0b40SJason Wang pstrcpy(ifname, sizeof(ifr.ifr_name), ifr.ifr_name); 277e5dc0b40SJason Wang return 0; 278e5dc0b40SJason Wang } 279