1 /* 2 * netmap access for qemu 3 * 4 * Copyright (c) 2012-2013 Luigi Rizzo 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 26 #include <sys/ioctl.h> 27 #include <net/if.h> 28 #include <sys/mman.h> 29 #include <stdint.h> 30 #include <stdio.h> 31 #define NETMAP_WITH_LIBS 32 #include <net/netmap.h> 33 #include <net/netmap_user.h> 34 35 #include "net/net.h" 36 #include "net/tap.h" 37 #include "clients.h" 38 #include "sysemu/sysemu.h" 39 #include "qemu/error-report.h" 40 #include "qemu/iov.h" 41 42 /* Private netmap device info. */ 43 typedef struct NetmapPriv { 44 int fd; 45 size_t memsize; 46 void *mem; 47 struct netmap_if *nifp; 48 struct netmap_ring *rx; 49 struct netmap_ring *tx; 50 char fdname[PATH_MAX]; /* Normally "/dev/netmap". */ 51 char ifname[IFNAMSIZ]; 52 } NetmapPriv; 53 54 typedef struct NetmapState { 55 NetClientState nc; 56 NetmapPriv me; 57 bool read_poll; 58 bool write_poll; 59 struct iovec iov[IOV_MAX]; 60 int vnet_hdr_len; /* Current virtio-net header length. */ 61 } NetmapState; 62 63 #ifndef __FreeBSD__ 64 #define pkt_copy bcopy 65 #else 66 /* A fast copy routine only for multiples of 64 bytes, non overlapped. */ 67 static inline void 68 pkt_copy(const void *_src, void *_dst, int l) 69 { 70 const uint64_t *src = _src; 71 uint64_t *dst = _dst; 72 if (unlikely(l >= 1024)) { 73 bcopy(src, dst, l); 74 return; 75 } 76 for (; l > 0; l -= 64) { 77 *dst++ = *src++; 78 *dst++ = *src++; 79 *dst++ = *src++; 80 *dst++ = *src++; 81 *dst++ = *src++; 82 *dst++ = *src++; 83 *dst++ = *src++; 84 *dst++ = *src++; 85 } 86 } 87 #endif /* __FreeBSD__ */ 88 89 /* 90 * Open a netmap device. We assume there is only one queue 91 * (which is the case for the VALE bridge). 92 */ 93 static int netmap_open(NetmapPriv *me) 94 { 95 int fd; 96 int err; 97 size_t l; 98 struct nmreq req; 99 100 me->fd = fd = open(me->fdname, O_RDWR); 101 if (fd < 0) { 102 error_report("Unable to open netmap device '%s' (%s)", 103 me->fdname, strerror(errno)); 104 return -1; 105 } 106 memset(&req, 0, sizeof(req)); 107 pstrcpy(req.nr_name, sizeof(req.nr_name), me->ifname); 108 req.nr_ringid = NETMAP_NO_TX_POLL; 109 req.nr_version = NETMAP_API; 110 err = ioctl(fd, NIOCREGIF, &req); 111 if (err) { 112 error_report("Unable to register %s: %s", me->ifname, strerror(errno)); 113 goto error; 114 } 115 l = me->memsize = req.nr_memsize; 116 117 me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); 118 if (me->mem == MAP_FAILED) { 119 error_report("Unable to mmap netmap shared memory: %s", 120 strerror(errno)); 121 me->mem = NULL; 122 goto error; 123 } 124 125 me->nifp = NETMAP_IF(me->mem, req.nr_offset); 126 me->tx = NETMAP_TXRING(me->nifp, 0); 127 me->rx = NETMAP_RXRING(me->nifp, 0); 128 return 0; 129 130 error: 131 close(me->fd); 132 return -1; 133 } 134 135 /* Tell the event-loop if the netmap backend can send packets 136 to the frontend. */ 137 static int netmap_can_send(void *opaque) 138 { 139 NetmapState *s = opaque; 140 141 return qemu_can_send_packet(&s->nc); 142 } 143 144 static void netmap_send(void *opaque); 145 static void netmap_writable(void *opaque); 146 147 /* Set the event-loop handlers for the netmap backend. */ 148 static void netmap_update_fd_handler(NetmapState *s) 149 { 150 qemu_set_fd_handler2(s->me.fd, 151 s->read_poll ? netmap_can_send : NULL, 152 s->read_poll ? netmap_send : NULL, 153 s->write_poll ? netmap_writable : NULL, 154 s); 155 } 156 157 /* Update the read handler. */ 158 static void netmap_read_poll(NetmapState *s, bool enable) 159 { 160 if (s->read_poll != enable) { /* Do nothing if not changed. */ 161 s->read_poll = enable; 162 netmap_update_fd_handler(s); 163 } 164 } 165 166 /* Update the write handler. */ 167 static void netmap_write_poll(NetmapState *s, bool enable) 168 { 169 if (s->write_poll != enable) { 170 s->write_poll = enable; 171 netmap_update_fd_handler(s); 172 } 173 } 174 175 static void netmap_poll(NetClientState *nc, bool enable) 176 { 177 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 178 179 if (s->read_poll != enable || s->write_poll != enable) { 180 s->write_poll = enable; 181 s->read_poll = enable; 182 netmap_update_fd_handler(s); 183 } 184 } 185 186 /* 187 * The fd_write() callback, invoked if the fd is marked as 188 * writable after a poll. Unregister the handler and flush any 189 * buffered packets. 190 */ 191 static void netmap_writable(void *opaque) 192 { 193 NetmapState *s = opaque; 194 195 netmap_write_poll(s, false); 196 qemu_flush_queued_packets(&s->nc); 197 } 198 199 static ssize_t netmap_receive(NetClientState *nc, 200 const uint8_t *buf, size_t size) 201 { 202 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 203 struct netmap_ring *ring = s->me.tx; 204 uint32_t i; 205 uint32_t idx; 206 uint8_t *dst; 207 208 if (unlikely(!ring)) { 209 /* Drop. */ 210 return size; 211 } 212 213 if (unlikely(size > ring->nr_buf_size)) { 214 RD(5, "[netmap_receive] drop packet of size %d > %d\n", 215 (int)size, ring->nr_buf_size); 216 return size; 217 } 218 219 if (nm_ring_empty(ring)) { 220 /* No available slots in the netmap TX ring. */ 221 netmap_write_poll(s, true); 222 return 0; 223 } 224 225 i = ring->cur; 226 idx = ring->slot[i].buf_idx; 227 dst = (uint8_t *)NETMAP_BUF(ring, idx); 228 229 ring->slot[i].len = size; 230 ring->slot[i].flags = 0; 231 pkt_copy(buf, dst, size); 232 ring->cur = ring->head = nm_ring_next(ring, i); 233 ioctl(s->me.fd, NIOCTXSYNC, NULL); 234 235 return size; 236 } 237 238 static ssize_t netmap_receive_iov(NetClientState *nc, 239 const struct iovec *iov, int iovcnt) 240 { 241 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 242 struct netmap_ring *ring = s->me.tx; 243 uint32_t last; 244 uint32_t idx; 245 uint8_t *dst; 246 int j; 247 uint32_t i; 248 249 if (unlikely(!ring)) { 250 /* Drop the packet. */ 251 return iov_size(iov, iovcnt); 252 } 253 254 last = i = ring->cur; 255 256 if (nm_ring_space(ring) < iovcnt) { 257 /* Not enough netmap slots. */ 258 netmap_write_poll(s, true); 259 return 0; 260 } 261 262 for (j = 0; j < iovcnt; j++) { 263 int iov_frag_size = iov[j].iov_len; 264 int offset = 0; 265 int nm_frag_size; 266 267 /* Split each iovec fragment over more netmap slots, if 268 necessary. */ 269 while (iov_frag_size) { 270 nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size); 271 272 if (unlikely(nm_ring_empty(ring))) { 273 /* We run out of netmap slots while splitting the 274 iovec fragments. */ 275 netmap_write_poll(s, true); 276 return 0; 277 } 278 279 idx = ring->slot[i].buf_idx; 280 dst = (uint8_t *)NETMAP_BUF(ring, idx); 281 282 ring->slot[i].len = nm_frag_size; 283 ring->slot[i].flags = NS_MOREFRAG; 284 pkt_copy(iov[j].iov_base + offset, dst, nm_frag_size); 285 286 last = i; 287 i = nm_ring_next(ring, i); 288 289 offset += nm_frag_size; 290 iov_frag_size -= nm_frag_size; 291 } 292 } 293 /* The last slot must not have NS_MOREFRAG set. */ 294 ring->slot[last].flags &= ~NS_MOREFRAG; 295 296 /* Now update ring->cur and ring->head. */ 297 ring->cur = ring->head = i; 298 299 ioctl(s->me.fd, NIOCTXSYNC, NULL); 300 301 return iov_size(iov, iovcnt); 302 } 303 304 /* Complete a previous send (backend --> guest) and enable the 305 fd_read callback. */ 306 static void netmap_send_completed(NetClientState *nc, ssize_t len) 307 { 308 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 309 310 netmap_read_poll(s, true); 311 } 312 313 static void netmap_send(void *opaque) 314 { 315 NetmapState *s = opaque; 316 struct netmap_ring *ring = s->me.rx; 317 318 /* Keep sending while there are available packets into the netmap 319 RX ring and the forwarding path towards the peer is open. */ 320 while (!nm_ring_empty(ring) && qemu_can_send_packet(&s->nc)) { 321 uint32_t i; 322 uint32_t idx; 323 bool morefrag; 324 int iovcnt = 0; 325 int iovsize; 326 327 do { 328 i = ring->cur; 329 idx = ring->slot[i].buf_idx; 330 morefrag = (ring->slot[i].flags & NS_MOREFRAG); 331 s->iov[iovcnt].iov_base = (u_char *)NETMAP_BUF(ring, idx); 332 s->iov[iovcnt].iov_len = ring->slot[i].len; 333 iovcnt++; 334 335 ring->cur = ring->head = nm_ring_next(ring, i); 336 } while (!nm_ring_empty(ring) && morefrag); 337 338 if (unlikely(nm_ring_empty(ring) && morefrag)) { 339 RD(5, "[netmap_send] ran out of slots, with a pending" 340 "incomplete packet\n"); 341 } 342 343 iovsize = qemu_sendv_packet_async(&s->nc, s->iov, iovcnt, 344 netmap_send_completed); 345 346 if (iovsize == 0) { 347 /* The peer does not receive anymore. Packet is queued, stop 348 * reading from the backend until netmap_send_completed() 349 */ 350 netmap_read_poll(s, false); 351 break; 352 } 353 } 354 } 355 356 /* Flush and close. */ 357 static void netmap_cleanup(NetClientState *nc) 358 { 359 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 360 361 qemu_purge_queued_packets(nc); 362 363 netmap_poll(nc, false); 364 munmap(s->me.mem, s->me.memsize); 365 close(s->me.fd); 366 367 s->me.fd = -1; 368 } 369 370 /* Offloading manipulation support callbacks. */ 371 static bool netmap_has_ufo(NetClientState *nc) 372 { 373 return true; 374 } 375 376 static bool netmap_has_vnet_hdr(NetClientState *nc) 377 { 378 return true; 379 } 380 381 static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len) 382 { 383 return len == 0 || len == sizeof(struct virtio_net_hdr) || 384 len == sizeof(struct virtio_net_hdr_mrg_rxbuf); 385 } 386 387 static void netmap_using_vnet_hdr(NetClientState *nc, bool enable) 388 { 389 } 390 391 static void netmap_set_vnet_hdr_len(NetClientState *nc, int len) 392 { 393 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 394 int err; 395 struct nmreq req; 396 397 /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header 398 * length for the netmap adapter associated to 'me->ifname'. 399 */ 400 memset(&req, 0, sizeof(req)); 401 pstrcpy(req.nr_name, sizeof(req.nr_name), s->me.ifname); 402 req.nr_version = NETMAP_API; 403 req.nr_cmd = NETMAP_BDG_VNET_HDR; 404 req.nr_arg1 = len; 405 err = ioctl(s->me.fd, NIOCREGIF, &req); 406 if (err) { 407 error_report("Unable to execute NETMAP_BDG_VNET_HDR on %s: %s", 408 s->me.ifname, strerror(errno)); 409 } else { 410 /* Keep track of the current length. */ 411 s->vnet_hdr_len = len; 412 } 413 } 414 415 static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, 416 int ecn, int ufo) 417 { 418 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 419 420 /* Setting a virtio-net header length greater than zero automatically 421 * enables the offloadings. 422 */ 423 if (!s->vnet_hdr_len) { 424 netmap_set_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr)); 425 } 426 } 427 428 /* NetClientInfo methods */ 429 static NetClientInfo net_netmap_info = { 430 .type = NET_CLIENT_OPTIONS_KIND_NETMAP, 431 .size = sizeof(NetmapState), 432 .receive = netmap_receive, 433 .receive_iov = netmap_receive_iov, 434 .poll = netmap_poll, 435 .cleanup = netmap_cleanup, 436 .has_ufo = netmap_has_ufo, 437 .has_vnet_hdr = netmap_has_vnet_hdr, 438 .has_vnet_hdr_len = netmap_has_vnet_hdr_len, 439 .using_vnet_hdr = netmap_using_vnet_hdr, 440 .set_offload = netmap_set_offload, 441 .set_vnet_hdr_len = netmap_set_vnet_hdr_len, 442 }; 443 444 /* The exported init function 445 * 446 * ... -net netmap,ifname="..." 447 */ 448 int net_init_netmap(const NetClientOptions *opts, 449 const char *name, NetClientState *peer) 450 { 451 const NetdevNetmapOptions *netmap_opts = opts->netmap; 452 NetClientState *nc; 453 NetmapPriv me; 454 NetmapState *s; 455 456 pstrcpy(me.fdname, sizeof(me.fdname), 457 netmap_opts->has_devname ? netmap_opts->devname : "/dev/netmap"); 458 /* Set default name for the port if not supplied. */ 459 pstrcpy(me.ifname, sizeof(me.ifname), netmap_opts->ifname); 460 if (netmap_open(&me)) { 461 return -1; 462 } 463 /* Create the object. */ 464 nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name); 465 s = DO_UPCAST(NetmapState, nc, nc); 466 s->me = me; 467 s->vnet_hdr_len = 0; 468 netmap_read_poll(s, true); /* Initially only poll for reads. */ 469 470 return 0; 471 } 472 473