1 /* 2 * netmap access for qemu 3 * 4 * Copyright (c) 2012-2013 Luigi Rizzo 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 26 #include <sys/ioctl.h> 27 #include <net/if.h> 28 #include <sys/mman.h> 29 #include <stdint.h> 30 #include <stdio.h> 31 #define NETMAP_WITH_LIBS 32 #include <net/netmap.h> 33 #include <net/netmap_user.h> 34 35 #include "net/net.h" 36 #include "net/tap.h" 37 #include "clients.h" 38 #include "sysemu/sysemu.h" 39 #include "qemu/error-report.h" 40 #include "qemu/iov.h" 41 42 /* Private netmap device info. */ 43 typedef struct NetmapPriv { 44 int fd; 45 size_t memsize; 46 void *mem; 47 struct netmap_if *nifp; 48 struct netmap_ring *rx; 49 struct netmap_ring *tx; 50 char fdname[PATH_MAX]; /* Normally "/dev/netmap". */ 51 char ifname[IFNAMSIZ]; 52 } NetmapPriv; 53 54 typedef struct NetmapState { 55 NetClientState nc; 56 NetmapPriv me; 57 bool read_poll; 58 bool write_poll; 59 struct iovec iov[IOV_MAX]; 60 int vnet_hdr_len; /* Current virtio-net header length. */ 61 } NetmapState; 62 63 #ifndef __FreeBSD__ 64 #define pkt_copy bcopy 65 #else 66 /* A fast copy routine only for multiples of 64 bytes, non overlapped. */ 67 static inline void 68 pkt_copy(const void *_src, void *_dst, int l) 69 { 70 const uint64_t *src = _src; 71 uint64_t *dst = _dst; 72 if (unlikely(l >= 1024)) { 73 bcopy(src, dst, l); 74 return; 75 } 76 for (; l > 0; l -= 64) { 77 *dst++ = *src++; 78 *dst++ = *src++; 79 *dst++ = *src++; 80 *dst++ = *src++; 81 *dst++ = *src++; 82 *dst++ = *src++; 83 *dst++ = *src++; 84 *dst++ = *src++; 85 } 86 } 87 #endif /* __FreeBSD__ */ 88 89 /* 90 * Open a netmap device. We assume there is only one queue 91 * (which is the case for the VALE bridge). 92 */ 93 static void netmap_open(NetmapPriv *me, Error **errp) 94 { 95 int fd; 96 int err; 97 size_t l; 98 struct nmreq req; 99 100 me->fd = fd = open(me->fdname, O_RDWR); 101 if (fd < 0) { 102 error_setg_file_open(errp, errno, me->fdname); 103 return; 104 } 105 memset(&req, 0, sizeof(req)); 106 pstrcpy(req.nr_name, sizeof(req.nr_name), me->ifname); 107 req.nr_ringid = NETMAP_NO_TX_POLL; 108 req.nr_version = NETMAP_API; 109 err = ioctl(fd, NIOCREGIF, &req); 110 if (err) { 111 error_setg_errno(errp, errno, "Unable to register %s", me->ifname); 112 goto error; 113 } 114 l = me->memsize = req.nr_memsize; 115 116 me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); 117 if (me->mem == MAP_FAILED) { 118 error_setg_errno(errp, errno, "Unable to mmap netmap shared memory"); 119 me->mem = NULL; 120 goto error; 121 } 122 123 me->nifp = NETMAP_IF(me->mem, req.nr_offset); 124 me->tx = NETMAP_TXRING(me->nifp, 0); 125 me->rx = NETMAP_RXRING(me->nifp, 0); 126 127 return; 128 129 error: 130 close(me->fd); 131 } 132 133 static void netmap_send(void *opaque); 134 static void netmap_writable(void *opaque); 135 136 /* Set the event-loop handlers for the netmap backend. */ 137 static void netmap_update_fd_handler(NetmapState *s) 138 { 139 qemu_set_fd_handler(s->me.fd, 140 s->read_poll ? netmap_send : NULL, 141 s->write_poll ? netmap_writable : NULL, 142 s); 143 } 144 145 /* Update the read handler. */ 146 static void netmap_read_poll(NetmapState *s, bool enable) 147 { 148 if (s->read_poll != enable) { /* Do nothing if not changed. */ 149 s->read_poll = enable; 150 netmap_update_fd_handler(s); 151 } 152 } 153 154 /* Update the write handler. */ 155 static void netmap_write_poll(NetmapState *s, bool enable) 156 { 157 if (s->write_poll != enable) { 158 s->write_poll = enable; 159 netmap_update_fd_handler(s); 160 } 161 } 162 163 static void netmap_poll(NetClientState *nc, bool enable) 164 { 165 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 166 167 if (s->read_poll != enable || s->write_poll != enable) { 168 s->write_poll = enable; 169 s->read_poll = enable; 170 netmap_update_fd_handler(s); 171 } 172 } 173 174 /* 175 * The fd_write() callback, invoked if the fd is marked as 176 * writable after a poll. Unregister the handler and flush any 177 * buffered packets. 178 */ 179 static void netmap_writable(void *opaque) 180 { 181 NetmapState *s = opaque; 182 183 netmap_write_poll(s, false); 184 qemu_flush_queued_packets(&s->nc); 185 } 186 187 static ssize_t netmap_receive(NetClientState *nc, 188 const uint8_t *buf, size_t size) 189 { 190 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 191 struct netmap_ring *ring = s->me.tx; 192 uint32_t i; 193 uint32_t idx; 194 uint8_t *dst; 195 196 if (unlikely(!ring)) { 197 /* Drop. */ 198 return size; 199 } 200 201 if (unlikely(size > ring->nr_buf_size)) { 202 RD(5, "[netmap_receive] drop packet of size %d > %d\n", 203 (int)size, ring->nr_buf_size); 204 return size; 205 } 206 207 if (nm_ring_empty(ring)) { 208 /* No available slots in the netmap TX ring. */ 209 netmap_write_poll(s, true); 210 return 0; 211 } 212 213 i = ring->cur; 214 idx = ring->slot[i].buf_idx; 215 dst = (uint8_t *)NETMAP_BUF(ring, idx); 216 217 ring->slot[i].len = size; 218 ring->slot[i].flags = 0; 219 pkt_copy(buf, dst, size); 220 ring->cur = ring->head = nm_ring_next(ring, i); 221 ioctl(s->me.fd, NIOCTXSYNC, NULL); 222 223 return size; 224 } 225 226 static ssize_t netmap_receive_iov(NetClientState *nc, 227 const struct iovec *iov, int iovcnt) 228 { 229 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 230 struct netmap_ring *ring = s->me.tx; 231 uint32_t last; 232 uint32_t idx; 233 uint8_t *dst; 234 int j; 235 uint32_t i; 236 237 if (unlikely(!ring)) { 238 /* Drop the packet. */ 239 return iov_size(iov, iovcnt); 240 } 241 242 last = i = ring->cur; 243 244 if (nm_ring_space(ring) < iovcnt) { 245 /* Not enough netmap slots. */ 246 netmap_write_poll(s, true); 247 return 0; 248 } 249 250 for (j = 0; j < iovcnt; j++) { 251 int iov_frag_size = iov[j].iov_len; 252 int offset = 0; 253 int nm_frag_size; 254 255 /* Split each iovec fragment over more netmap slots, if 256 necessary. */ 257 while (iov_frag_size) { 258 nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size); 259 260 if (unlikely(nm_ring_empty(ring))) { 261 /* We run out of netmap slots while splitting the 262 iovec fragments. */ 263 netmap_write_poll(s, true); 264 return 0; 265 } 266 267 idx = ring->slot[i].buf_idx; 268 dst = (uint8_t *)NETMAP_BUF(ring, idx); 269 270 ring->slot[i].len = nm_frag_size; 271 ring->slot[i].flags = NS_MOREFRAG; 272 pkt_copy(iov[j].iov_base + offset, dst, nm_frag_size); 273 274 last = i; 275 i = nm_ring_next(ring, i); 276 277 offset += nm_frag_size; 278 iov_frag_size -= nm_frag_size; 279 } 280 } 281 /* The last slot must not have NS_MOREFRAG set. */ 282 ring->slot[last].flags &= ~NS_MOREFRAG; 283 284 /* Now update ring->cur and ring->head. */ 285 ring->cur = ring->head = i; 286 287 ioctl(s->me.fd, NIOCTXSYNC, NULL); 288 289 return iov_size(iov, iovcnt); 290 } 291 292 /* Complete a previous send (backend --> guest) and enable the 293 fd_read callback. */ 294 static void netmap_send_completed(NetClientState *nc, ssize_t len) 295 { 296 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 297 298 netmap_read_poll(s, true); 299 } 300 301 static void netmap_send(void *opaque) 302 { 303 NetmapState *s = opaque; 304 struct netmap_ring *ring = s->me.rx; 305 306 /* Keep sending while there are available packets into the netmap 307 RX ring and the forwarding path towards the peer is open. */ 308 while (!nm_ring_empty(ring)) { 309 uint32_t i; 310 uint32_t idx; 311 bool morefrag; 312 int iovcnt = 0; 313 int iovsize; 314 315 do { 316 i = ring->cur; 317 idx = ring->slot[i].buf_idx; 318 morefrag = (ring->slot[i].flags & NS_MOREFRAG); 319 s->iov[iovcnt].iov_base = (u_char *)NETMAP_BUF(ring, idx); 320 s->iov[iovcnt].iov_len = ring->slot[i].len; 321 iovcnt++; 322 323 ring->cur = ring->head = nm_ring_next(ring, i); 324 } while (!nm_ring_empty(ring) && morefrag); 325 326 if (unlikely(nm_ring_empty(ring) && morefrag)) { 327 RD(5, "[netmap_send] ran out of slots, with a pending" 328 "incomplete packet\n"); 329 } 330 331 iovsize = qemu_sendv_packet_async(&s->nc, s->iov, iovcnt, 332 netmap_send_completed); 333 334 if (iovsize == 0) { 335 /* The peer does not receive anymore. Packet is queued, stop 336 * reading from the backend until netmap_send_completed() 337 */ 338 netmap_read_poll(s, false); 339 break; 340 } 341 } 342 } 343 344 /* Flush and close. */ 345 static void netmap_cleanup(NetClientState *nc) 346 { 347 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 348 349 qemu_purge_queued_packets(nc); 350 351 netmap_poll(nc, false); 352 munmap(s->me.mem, s->me.memsize); 353 close(s->me.fd); 354 355 s->me.fd = -1; 356 } 357 358 /* Offloading manipulation support callbacks. */ 359 static bool netmap_has_ufo(NetClientState *nc) 360 { 361 return true; 362 } 363 364 static bool netmap_has_vnet_hdr(NetClientState *nc) 365 { 366 return true; 367 } 368 369 static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len) 370 { 371 return len == 0 || len == sizeof(struct virtio_net_hdr) || 372 len == sizeof(struct virtio_net_hdr_mrg_rxbuf); 373 } 374 375 static void netmap_using_vnet_hdr(NetClientState *nc, bool enable) 376 { 377 } 378 379 static void netmap_set_vnet_hdr_len(NetClientState *nc, int len) 380 { 381 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 382 int err; 383 struct nmreq req; 384 385 /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header 386 * length for the netmap adapter associated to 'me->ifname'. 387 */ 388 memset(&req, 0, sizeof(req)); 389 pstrcpy(req.nr_name, sizeof(req.nr_name), s->me.ifname); 390 req.nr_version = NETMAP_API; 391 req.nr_cmd = NETMAP_BDG_VNET_HDR; 392 req.nr_arg1 = len; 393 err = ioctl(s->me.fd, NIOCREGIF, &req); 394 if (err) { 395 error_report("Unable to execute NETMAP_BDG_VNET_HDR on %s: %s", 396 s->me.ifname, strerror(errno)); 397 } else { 398 /* Keep track of the current length. */ 399 s->vnet_hdr_len = len; 400 } 401 } 402 403 static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, 404 int ecn, int ufo) 405 { 406 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 407 408 /* Setting a virtio-net header length greater than zero automatically 409 * enables the offloadings. 410 */ 411 if (!s->vnet_hdr_len) { 412 netmap_set_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr)); 413 } 414 } 415 416 /* NetClientInfo methods */ 417 static NetClientInfo net_netmap_info = { 418 .type = NET_CLIENT_OPTIONS_KIND_NETMAP, 419 .size = sizeof(NetmapState), 420 .receive = netmap_receive, 421 .receive_iov = netmap_receive_iov, 422 .poll = netmap_poll, 423 .cleanup = netmap_cleanup, 424 .has_ufo = netmap_has_ufo, 425 .has_vnet_hdr = netmap_has_vnet_hdr, 426 .has_vnet_hdr_len = netmap_has_vnet_hdr_len, 427 .using_vnet_hdr = netmap_using_vnet_hdr, 428 .set_offload = netmap_set_offload, 429 .set_vnet_hdr_len = netmap_set_vnet_hdr_len, 430 }; 431 432 /* The exported init function 433 * 434 * ... -net netmap,ifname="..." 435 */ 436 int net_init_netmap(const NetClientOptions *opts, 437 const char *name, NetClientState *peer, Error **errp) 438 { 439 const NetdevNetmapOptions *netmap_opts = opts->u.netmap; 440 NetClientState *nc; 441 Error *err = NULL; 442 NetmapPriv me; 443 NetmapState *s; 444 445 pstrcpy(me.fdname, sizeof(me.fdname), 446 netmap_opts->has_devname ? netmap_opts->devname : "/dev/netmap"); 447 /* Set default name for the port if not supplied. */ 448 pstrcpy(me.ifname, sizeof(me.ifname), netmap_opts->ifname); 449 netmap_open(&me, &err); 450 if (err) { 451 error_propagate(errp, err); 452 return -1; 453 } 454 /* Create the object. */ 455 nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name); 456 s = DO_UPCAST(NetmapState, nc, nc); 457 s->me = me; 458 s->vnet_hdr_len = 0; 459 netmap_read_poll(s, true); /* Initially only poll for reads. */ 460 461 return 0; 462 } 463 464