1 /* 2 * netmap access for qemu 3 * 4 * Copyright (c) 2012-2013 Luigi Rizzo 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 26 #include <sys/ioctl.h> 27 #include <net/if.h> 28 #include <sys/mman.h> 29 #include <stdint.h> 30 #include <stdio.h> 31 #define NETMAP_WITH_LIBS 32 #include <net/netmap.h> 33 #include <net/netmap_user.h> 34 35 #include "net/net.h" 36 #include "net/tap.h" 37 #include "clients.h" 38 #include "sysemu/sysemu.h" 39 #include "qemu/error-report.h" 40 #include "qemu/iov.h" 41 42 /* Private netmap device info. */ 43 typedef struct NetmapPriv { 44 int fd; 45 size_t memsize; 46 void *mem; 47 struct netmap_if *nifp; 48 struct netmap_ring *rx; 49 struct netmap_ring *tx; 50 char fdname[PATH_MAX]; /* Normally "/dev/netmap". */ 51 char ifname[IFNAMSIZ]; 52 } NetmapPriv; 53 54 typedef struct NetmapState { 55 NetClientState nc; 56 NetmapPriv me; 57 bool read_poll; 58 bool write_poll; 59 struct iovec iov[IOV_MAX]; 60 int vnet_hdr_len; /* Current virtio-net header length. */ 61 } NetmapState; 62 63 #ifndef __FreeBSD__ 64 #define pkt_copy bcopy 65 #else 66 /* A fast copy routine only for multiples of 64 bytes, non overlapped. */ 67 static inline void 68 pkt_copy(const void *_src, void *_dst, int l) 69 { 70 const uint64_t *src = _src; 71 uint64_t *dst = _dst; 72 if (unlikely(l >= 1024)) { 73 bcopy(src, dst, l); 74 return; 75 } 76 for (; l > 0; l -= 64) { 77 *dst++ = *src++; 78 *dst++ = *src++; 79 *dst++ = *src++; 80 *dst++ = *src++; 81 *dst++ = *src++; 82 *dst++ = *src++; 83 *dst++ = *src++; 84 *dst++ = *src++; 85 } 86 } 87 #endif /* __FreeBSD__ */ 88 89 /* 90 * Open a netmap device. We assume there is only one queue 91 * (which is the case for the VALE bridge). 92 */ 93 static int netmap_open(NetmapPriv *me) 94 { 95 int fd; 96 int err; 97 size_t l; 98 struct nmreq req; 99 100 me->fd = fd = open(me->fdname, O_RDWR); 101 if (fd < 0) { 102 error_report("Unable to open netmap device '%s' (%s)", 103 me->fdname, strerror(errno)); 104 return -1; 105 } 106 memset(&req, 0, sizeof(req)); 107 pstrcpy(req.nr_name, sizeof(req.nr_name), me->ifname); 108 req.nr_ringid = NETMAP_NO_TX_POLL; 109 req.nr_version = NETMAP_API; 110 err = ioctl(fd, NIOCREGIF, &req); 111 if (err) { 112 error_report("Unable to register %s: %s", me->ifname, strerror(errno)); 113 goto error; 114 } 115 l = me->memsize = req.nr_memsize; 116 117 me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); 118 if (me->mem == MAP_FAILED) { 119 error_report("Unable to mmap netmap shared memory: %s", 120 strerror(errno)); 121 me->mem = NULL; 122 goto error; 123 } 124 125 me->nifp = NETMAP_IF(me->mem, req.nr_offset); 126 me->tx = NETMAP_TXRING(me->nifp, 0); 127 me->rx = NETMAP_RXRING(me->nifp, 0); 128 return 0; 129 130 error: 131 close(me->fd); 132 return -1; 133 } 134 135 static void netmap_send(void *opaque); 136 static void netmap_writable(void *opaque); 137 138 /* Set the event-loop handlers for the netmap backend. */ 139 static void netmap_update_fd_handler(NetmapState *s) 140 { 141 qemu_set_fd_handler(s->me.fd, 142 s->read_poll ? netmap_send : NULL, 143 s->write_poll ? netmap_writable : NULL, 144 s); 145 } 146 147 /* Update the read handler. */ 148 static void netmap_read_poll(NetmapState *s, bool enable) 149 { 150 if (s->read_poll != enable) { /* Do nothing if not changed. */ 151 s->read_poll = enable; 152 netmap_update_fd_handler(s); 153 } 154 } 155 156 /* Update the write handler. */ 157 static void netmap_write_poll(NetmapState *s, bool enable) 158 { 159 if (s->write_poll != enable) { 160 s->write_poll = enable; 161 netmap_update_fd_handler(s); 162 } 163 } 164 165 static void netmap_poll(NetClientState *nc, bool enable) 166 { 167 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 168 169 if (s->read_poll != enable || s->write_poll != enable) { 170 s->write_poll = enable; 171 s->read_poll = enable; 172 netmap_update_fd_handler(s); 173 } 174 } 175 176 /* 177 * The fd_write() callback, invoked if the fd is marked as 178 * writable after a poll. Unregister the handler and flush any 179 * buffered packets. 180 */ 181 static void netmap_writable(void *opaque) 182 { 183 NetmapState *s = opaque; 184 185 netmap_write_poll(s, false); 186 qemu_flush_queued_packets(&s->nc); 187 } 188 189 static ssize_t netmap_receive(NetClientState *nc, 190 const uint8_t *buf, size_t size) 191 { 192 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 193 struct netmap_ring *ring = s->me.tx; 194 uint32_t i; 195 uint32_t idx; 196 uint8_t *dst; 197 198 if (unlikely(!ring)) { 199 /* Drop. */ 200 return size; 201 } 202 203 if (unlikely(size > ring->nr_buf_size)) { 204 RD(5, "[netmap_receive] drop packet of size %d > %d\n", 205 (int)size, ring->nr_buf_size); 206 return size; 207 } 208 209 if (nm_ring_empty(ring)) { 210 /* No available slots in the netmap TX ring. */ 211 netmap_write_poll(s, true); 212 return 0; 213 } 214 215 i = ring->cur; 216 idx = ring->slot[i].buf_idx; 217 dst = (uint8_t *)NETMAP_BUF(ring, idx); 218 219 ring->slot[i].len = size; 220 ring->slot[i].flags = 0; 221 pkt_copy(buf, dst, size); 222 ring->cur = ring->head = nm_ring_next(ring, i); 223 ioctl(s->me.fd, NIOCTXSYNC, NULL); 224 225 return size; 226 } 227 228 static ssize_t netmap_receive_iov(NetClientState *nc, 229 const struct iovec *iov, int iovcnt) 230 { 231 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 232 struct netmap_ring *ring = s->me.tx; 233 uint32_t last; 234 uint32_t idx; 235 uint8_t *dst; 236 int j; 237 uint32_t i; 238 239 if (unlikely(!ring)) { 240 /* Drop the packet. */ 241 return iov_size(iov, iovcnt); 242 } 243 244 last = i = ring->cur; 245 246 if (nm_ring_space(ring) < iovcnt) { 247 /* Not enough netmap slots. */ 248 netmap_write_poll(s, true); 249 return 0; 250 } 251 252 for (j = 0; j < iovcnt; j++) { 253 int iov_frag_size = iov[j].iov_len; 254 int offset = 0; 255 int nm_frag_size; 256 257 /* Split each iovec fragment over more netmap slots, if 258 necessary. */ 259 while (iov_frag_size) { 260 nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size); 261 262 if (unlikely(nm_ring_empty(ring))) { 263 /* We run out of netmap slots while splitting the 264 iovec fragments. */ 265 netmap_write_poll(s, true); 266 return 0; 267 } 268 269 idx = ring->slot[i].buf_idx; 270 dst = (uint8_t *)NETMAP_BUF(ring, idx); 271 272 ring->slot[i].len = nm_frag_size; 273 ring->slot[i].flags = NS_MOREFRAG; 274 pkt_copy(iov[j].iov_base + offset, dst, nm_frag_size); 275 276 last = i; 277 i = nm_ring_next(ring, i); 278 279 offset += nm_frag_size; 280 iov_frag_size -= nm_frag_size; 281 } 282 } 283 /* The last slot must not have NS_MOREFRAG set. */ 284 ring->slot[last].flags &= ~NS_MOREFRAG; 285 286 /* Now update ring->cur and ring->head. */ 287 ring->cur = ring->head = i; 288 289 ioctl(s->me.fd, NIOCTXSYNC, NULL); 290 291 return iov_size(iov, iovcnt); 292 } 293 294 /* Complete a previous send (backend --> guest) and enable the 295 fd_read callback. */ 296 static void netmap_send_completed(NetClientState *nc, ssize_t len) 297 { 298 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 299 300 netmap_read_poll(s, true); 301 } 302 303 static void netmap_send(void *opaque) 304 { 305 NetmapState *s = opaque; 306 struct netmap_ring *ring = s->me.rx; 307 308 /* Keep sending while there are available packets into the netmap 309 RX ring and the forwarding path towards the peer is open. */ 310 while (!nm_ring_empty(ring)) { 311 uint32_t i; 312 uint32_t idx; 313 bool morefrag; 314 int iovcnt = 0; 315 int iovsize; 316 317 do { 318 i = ring->cur; 319 idx = ring->slot[i].buf_idx; 320 morefrag = (ring->slot[i].flags & NS_MOREFRAG); 321 s->iov[iovcnt].iov_base = (u_char *)NETMAP_BUF(ring, idx); 322 s->iov[iovcnt].iov_len = ring->slot[i].len; 323 iovcnt++; 324 325 ring->cur = ring->head = nm_ring_next(ring, i); 326 } while (!nm_ring_empty(ring) && morefrag); 327 328 if (unlikely(nm_ring_empty(ring) && morefrag)) { 329 RD(5, "[netmap_send] ran out of slots, with a pending" 330 "incomplete packet\n"); 331 } 332 333 iovsize = qemu_sendv_packet_async(&s->nc, s->iov, iovcnt, 334 netmap_send_completed); 335 336 if (iovsize == 0) { 337 /* The peer does not receive anymore. Packet is queued, stop 338 * reading from the backend until netmap_send_completed() 339 */ 340 netmap_read_poll(s, false); 341 break; 342 } 343 } 344 } 345 346 /* Flush and close. */ 347 static void netmap_cleanup(NetClientState *nc) 348 { 349 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 350 351 qemu_purge_queued_packets(nc); 352 353 netmap_poll(nc, false); 354 munmap(s->me.mem, s->me.memsize); 355 close(s->me.fd); 356 357 s->me.fd = -1; 358 } 359 360 /* Offloading manipulation support callbacks. */ 361 static bool netmap_has_ufo(NetClientState *nc) 362 { 363 return true; 364 } 365 366 static bool netmap_has_vnet_hdr(NetClientState *nc) 367 { 368 return true; 369 } 370 371 static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len) 372 { 373 return len == 0 || len == sizeof(struct virtio_net_hdr) || 374 len == sizeof(struct virtio_net_hdr_mrg_rxbuf); 375 } 376 377 static void netmap_using_vnet_hdr(NetClientState *nc, bool enable) 378 { 379 } 380 381 static void netmap_set_vnet_hdr_len(NetClientState *nc, int len) 382 { 383 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 384 int err; 385 struct nmreq req; 386 387 /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header 388 * length for the netmap adapter associated to 'me->ifname'. 389 */ 390 memset(&req, 0, sizeof(req)); 391 pstrcpy(req.nr_name, sizeof(req.nr_name), s->me.ifname); 392 req.nr_version = NETMAP_API; 393 req.nr_cmd = NETMAP_BDG_VNET_HDR; 394 req.nr_arg1 = len; 395 err = ioctl(s->me.fd, NIOCREGIF, &req); 396 if (err) { 397 error_report("Unable to execute NETMAP_BDG_VNET_HDR on %s: %s", 398 s->me.ifname, strerror(errno)); 399 } else { 400 /* Keep track of the current length. */ 401 s->vnet_hdr_len = len; 402 } 403 } 404 405 static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, 406 int ecn, int ufo) 407 { 408 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 409 410 /* Setting a virtio-net header length greater than zero automatically 411 * enables the offloadings. 412 */ 413 if (!s->vnet_hdr_len) { 414 netmap_set_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr)); 415 } 416 } 417 418 /* NetClientInfo methods */ 419 static NetClientInfo net_netmap_info = { 420 .type = NET_CLIENT_OPTIONS_KIND_NETMAP, 421 .size = sizeof(NetmapState), 422 .receive = netmap_receive, 423 .receive_iov = netmap_receive_iov, 424 .poll = netmap_poll, 425 .cleanup = netmap_cleanup, 426 .has_ufo = netmap_has_ufo, 427 .has_vnet_hdr = netmap_has_vnet_hdr, 428 .has_vnet_hdr_len = netmap_has_vnet_hdr_len, 429 .using_vnet_hdr = netmap_using_vnet_hdr, 430 .set_offload = netmap_set_offload, 431 .set_vnet_hdr_len = netmap_set_vnet_hdr_len, 432 }; 433 434 /* The exported init function 435 * 436 * ... -net netmap,ifname="..." 437 */ 438 int net_init_netmap(const NetClientOptions *opts, 439 const char *name, NetClientState *peer, Error **errp) 440 { 441 /* FIXME error_setg(errp, ...) on failure */ 442 const NetdevNetmapOptions *netmap_opts = opts->netmap; 443 NetClientState *nc; 444 NetmapPriv me; 445 NetmapState *s; 446 447 pstrcpy(me.fdname, sizeof(me.fdname), 448 netmap_opts->has_devname ? netmap_opts->devname : "/dev/netmap"); 449 /* Set default name for the port if not supplied. */ 450 pstrcpy(me.ifname, sizeof(me.ifname), netmap_opts->ifname); 451 if (netmap_open(&me)) { 452 return -1; 453 } 454 /* Create the object. */ 455 nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name); 456 s = DO_UPCAST(NetmapState, nc, nc); 457 s->me = me; 458 s->vnet_hdr_len = 0; 459 netmap_read_poll(s, true); /* Initially only poll for reads. */ 460 461 return 0; 462 } 463 464