1 /* 2 * netmap access for qemu 3 * 4 * Copyright (c) 2012-2013 Luigi Rizzo 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 26 #include "qemu/osdep.h" 27 #include <sys/ioctl.h> 28 #include <net/if.h> 29 #define NETMAP_WITH_LIBS 30 #include <net/netmap.h> 31 #include <net/netmap_user.h> 32 33 #include "net/net.h" 34 #include "net/tap.h" 35 #include "clients.h" 36 #include "sysemu/sysemu.h" 37 #include "qemu/error-report.h" 38 #include "qapi/error.h" 39 #include "qemu/iov.h" 40 #include "qemu/cutils.h" 41 #include "qemu/main-loop.h" 42 43 typedef struct NetmapState { 44 NetClientState nc; 45 struct nm_desc *nmd; 46 char ifname[IFNAMSIZ]; 47 struct netmap_ring *tx; 48 struct netmap_ring *rx; 49 bool read_poll; 50 bool write_poll; 51 struct iovec iov[IOV_MAX]; 52 int vnet_hdr_len; /* Current virtio-net header length. */ 53 } NetmapState; 54 55 #ifndef __FreeBSD__ 56 #define pkt_copy bcopy 57 #else 58 /* A fast copy routine only for multiples of 64 bytes, non overlapped. */ 59 static inline void 60 pkt_copy(const void *_src, void *_dst, int l) 61 { 62 const uint64_t *src = _src; 63 uint64_t *dst = _dst; 64 if (unlikely(l >= 1024)) { 65 bcopy(src, dst, l); 66 return; 67 } 68 for (; l > 0; l -= 64) { 69 *dst++ = *src++; 70 *dst++ = *src++; 71 *dst++ = *src++; 72 *dst++ = *src++; 73 *dst++ = *src++; 74 *dst++ = *src++; 75 *dst++ = *src++; 76 *dst++ = *src++; 77 } 78 } 79 #endif /* __FreeBSD__ */ 80 81 /* 82 * Open a netmap device. We assume there is only one queue 83 * (which is the case for the VALE bridge). 84 */ 85 static struct nm_desc *netmap_open(const NetdevNetmapOptions *nm_opts, 86 Error **errp) 87 { 88 struct nm_desc *nmd; 89 struct nmreq req; 90 91 memset(&req, 0, sizeof(req)); 92 93 nmd = nm_open(nm_opts->ifname, &req, NETMAP_NO_TX_POLL, 94 NULL); 95 if (nmd == NULL) { 96 error_setg_errno(errp, errno, "Failed to nm_open() %s", 97 nm_opts->ifname); 98 return NULL; 99 } 100 101 return nmd; 102 } 103 104 static void netmap_send(void *opaque); 105 static void netmap_writable(void *opaque); 106 107 /* Set the event-loop handlers for the netmap backend. */ 108 static void netmap_update_fd_handler(NetmapState *s) 109 { 110 qemu_set_fd_handler(s->nmd->fd, 111 s->read_poll ? netmap_send : NULL, 112 s->write_poll ? netmap_writable : NULL, 113 s); 114 } 115 116 /* Update the read handler. */ 117 static void netmap_read_poll(NetmapState *s, bool enable) 118 { 119 if (s->read_poll != enable) { /* Do nothing if not changed. */ 120 s->read_poll = enable; 121 netmap_update_fd_handler(s); 122 } 123 } 124 125 /* Update the write handler. */ 126 static void netmap_write_poll(NetmapState *s, bool enable) 127 { 128 if (s->write_poll != enable) { 129 s->write_poll = enable; 130 netmap_update_fd_handler(s); 131 } 132 } 133 134 static void netmap_poll(NetClientState *nc, bool enable) 135 { 136 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 137 138 if (s->read_poll != enable || s->write_poll != enable) { 139 s->write_poll = enable; 140 s->read_poll = enable; 141 netmap_update_fd_handler(s); 142 } 143 } 144 145 /* 146 * The fd_write() callback, invoked if the fd is marked as 147 * writable after a poll. Unregister the handler and flush any 148 * buffered packets. 149 */ 150 static void netmap_writable(void *opaque) 151 { 152 NetmapState *s = opaque; 153 154 netmap_write_poll(s, false); 155 qemu_flush_queued_packets(&s->nc); 156 } 157 158 static ssize_t netmap_receive_iov(NetClientState *nc, 159 const struct iovec *iov, int iovcnt) 160 { 161 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 162 struct netmap_ring *ring = s->tx; 163 unsigned int tail = ring->tail; 164 ssize_t totlen = 0; 165 uint32_t last; 166 uint32_t idx; 167 uint8_t *dst; 168 int j; 169 uint32_t i; 170 171 last = i = ring->head; 172 173 if (nm_ring_space(ring) < iovcnt) { 174 /* Not enough netmap slots. Tell the kernel that we have seen the new 175 * available slots (so that it notifies us again when it has more 176 * ones), but without publishing any new slots to be processed 177 * (e.g., we don't advance ring->head). */ 178 ring->cur = tail; 179 netmap_write_poll(s, true); 180 return 0; 181 } 182 183 for (j = 0; j < iovcnt; j++) { 184 int iov_frag_size = iov[j].iov_len; 185 int offset = 0; 186 int nm_frag_size; 187 188 totlen += iov_frag_size; 189 190 /* Split each iovec fragment over more netmap slots, if 191 necessary. */ 192 while (iov_frag_size) { 193 nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size); 194 195 if (unlikely(i == tail)) { 196 /* We ran out of netmap slots while splitting the 197 iovec fragments. */ 198 ring->cur = tail; 199 netmap_write_poll(s, true); 200 return 0; 201 } 202 203 idx = ring->slot[i].buf_idx; 204 dst = (uint8_t *)NETMAP_BUF(ring, idx); 205 206 ring->slot[i].len = nm_frag_size; 207 ring->slot[i].flags = NS_MOREFRAG; 208 pkt_copy(iov[j].iov_base + offset, dst, nm_frag_size); 209 210 last = i; 211 i = nm_ring_next(ring, i); 212 213 offset += nm_frag_size; 214 iov_frag_size -= nm_frag_size; 215 } 216 } 217 /* The last slot must not have NS_MOREFRAG set. */ 218 ring->slot[last].flags &= ~NS_MOREFRAG; 219 220 /* Now update ring->head and ring->cur to publish the new slots and 221 * the new wakeup point. */ 222 ring->head = ring->cur = i; 223 224 ioctl(s->nmd->fd, NIOCTXSYNC, NULL); 225 226 return totlen; 227 } 228 229 static ssize_t netmap_receive(NetClientState *nc, 230 const uint8_t *buf, size_t size) 231 { 232 struct iovec iov; 233 234 iov.iov_base = (void *)buf; 235 iov.iov_len = size; 236 237 return netmap_receive_iov(nc, &iov, 1); 238 } 239 240 /* Complete a previous send (backend --> guest) and enable the 241 fd_read callback. */ 242 static void netmap_send_completed(NetClientState *nc, ssize_t len) 243 { 244 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 245 246 netmap_read_poll(s, true); 247 } 248 249 static void netmap_send(void *opaque) 250 { 251 NetmapState *s = opaque; 252 struct netmap_ring *ring = s->rx; 253 unsigned int tail = ring->tail; 254 255 /* Keep sending while there are available slots in the netmap 256 RX ring and the forwarding path towards the peer is open. */ 257 while (ring->head != tail) { 258 uint32_t i = ring->head; 259 uint32_t idx; 260 bool morefrag; 261 int iovcnt = 0; 262 int iovsize; 263 264 /* Get a (possibly multi-slot) packet. */ 265 do { 266 idx = ring->slot[i].buf_idx; 267 morefrag = (ring->slot[i].flags & NS_MOREFRAG); 268 s->iov[iovcnt].iov_base = (void *)NETMAP_BUF(ring, idx); 269 s->iov[iovcnt].iov_len = ring->slot[i].len; 270 iovcnt++; 271 i = nm_ring_next(ring, i); 272 } while (i != tail && morefrag); 273 274 /* Advance ring->cur to tell the kernel that we have seen the slots. */ 275 ring->cur = i; 276 277 if (unlikely(morefrag)) { 278 /* This is a truncated packet, so we can stop without releasing the 279 * incomplete slots by updating ring->head. We will hopefully 280 * re-read the complete packet the next time we are called. */ 281 break; 282 } 283 284 iovsize = qemu_sendv_packet_async(&s->nc, s->iov, iovcnt, 285 netmap_send_completed); 286 287 /* Release the slots to the kernel. */ 288 ring->head = i; 289 290 if (iovsize == 0) { 291 /* The peer does not receive anymore. Packet is queued, stop 292 * reading from the backend until netmap_send_completed(). */ 293 netmap_read_poll(s, false); 294 break; 295 } 296 } 297 } 298 299 /* Flush and close. */ 300 static void netmap_cleanup(NetClientState *nc) 301 { 302 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 303 304 qemu_purge_queued_packets(nc); 305 306 netmap_poll(nc, false); 307 nm_close(s->nmd); 308 s->nmd = NULL; 309 } 310 311 /* Offloading manipulation support callbacks. */ 312 static int netmap_fd_set_vnet_hdr_len(NetmapState *s, int len) 313 { 314 struct nmreq req; 315 316 /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header 317 * length for the netmap adapter associated to 's->ifname'. 318 */ 319 memset(&req, 0, sizeof(req)); 320 pstrcpy(req.nr_name, sizeof(req.nr_name), s->ifname); 321 req.nr_version = NETMAP_API; 322 req.nr_cmd = NETMAP_BDG_VNET_HDR; 323 req.nr_arg1 = len; 324 325 return ioctl(s->nmd->fd, NIOCREGIF, &req); 326 } 327 328 static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len) 329 { 330 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 331 int prev_len = s->vnet_hdr_len; 332 333 /* Check that we can set the new length. */ 334 if (netmap_fd_set_vnet_hdr_len(s, len)) { 335 return false; 336 } 337 338 /* Restore the previous length. */ 339 if (netmap_fd_set_vnet_hdr_len(s, prev_len)) { 340 error_report("Failed to restore vnet-hdr length %d on %s: %s", 341 prev_len, s->ifname, strerror(errno)); 342 abort(); 343 } 344 345 return true; 346 } 347 348 /* A netmap interface that supports virtio-net headers always 349 * supports UFO, so we use this callback also for the has_ufo hook. */ 350 static bool netmap_has_vnet_hdr(NetClientState *nc) 351 { 352 return netmap_has_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr)); 353 } 354 355 static void netmap_using_vnet_hdr(NetClientState *nc, bool enable) 356 { 357 } 358 359 static void netmap_set_vnet_hdr_len(NetClientState *nc, int len) 360 { 361 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 362 int err; 363 364 err = netmap_fd_set_vnet_hdr_len(s, len); 365 if (err) { 366 error_report("Unable to set vnet-hdr length %d on %s: %s", 367 len, s->ifname, strerror(errno)); 368 } else { 369 /* Keep track of the current length. */ 370 s->vnet_hdr_len = len; 371 } 372 } 373 374 static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, 375 int ecn, int ufo) 376 { 377 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 378 379 /* Setting a virtio-net header length greater than zero automatically 380 * enables the offloadings. */ 381 if (!s->vnet_hdr_len) { 382 netmap_set_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr)); 383 } 384 } 385 386 /* NetClientInfo methods */ 387 static NetClientInfo net_netmap_info = { 388 .type = NET_CLIENT_DRIVER_NETMAP, 389 .size = sizeof(NetmapState), 390 .receive = netmap_receive, 391 .receive_iov = netmap_receive_iov, 392 .poll = netmap_poll, 393 .cleanup = netmap_cleanup, 394 .has_ufo = netmap_has_vnet_hdr, 395 .has_vnet_hdr = netmap_has_vnet_hdr, 396 .has_vnet_hdr_len = netmap_has_vnet_hdr_len, 397 .using_vnet_hdr = netmap_using_vnet_hdr, 398 .set_offload = netmap_set_offload, 399 .set_vnet_hdr_len = netmap_set_vnet_hdr_len, 400 }; 401 402 /* The exported init function 403 * 404 * ... -net netmap,ifname="..." 405 */ 406 int net_init_netmap(const Netdev *netdev, 407 const char *name, NetClientState *peer, Error **errp) 408 { 409 const NetdevNetmapOptions *netmap_opts = &netdev->u.netmap; 410 struct nm_desc *nmd; 411 NetClientState *nc; 412 Error *err = NULL; 413 NetmapState *s; 414 415 nmd = netmap_open(netmap_opts, &err); 416 if (err) { 417 error_propagate(errp, err); 418 return -1; 419 } 420 /* Create the object. */ 421 nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name); 422 s = DO_UPCAST(NetmapState, nc, nc); 423 s->nmd = nmd; 424 s->tx = NETMAP_TXRING(nmd->nifp, 0); 425 s->rx = NETMAP_RXRING(nmd->nifp, 0); 426 s->vnet_hdr_len = 0; 427 pstrcpy(s->ifname, sizeof(s->ifname), netmap_opts->ifname); 428 netmap_read_poll(s, true); /* Initially only poll for reads. */ 429 430 return 0; 431 } 432 433