1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2009 Red Hat, Inc. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "tap_int.h" 28 29 30 #include <sys/ioctl.h> 31 #include <sys/wait.h> 32 #include <sys/socket.h> 33 #include <net/if.h> 34 35 #include "net/eth.h" 36 #include "net/net.h" 37 #include "clients.h" 38 #include "monitor/monitor.h" 39 #include "sysemu/sysemu.h" 40 #include "qapi/error.h" 41 #include "qemu/cutils.h" 42 #include "qemu/error-report.h" 43 #include "qemu/main-loop.h" 44 #include "qemu/sockets.h" 45 46 #include "net/tap.h" 47 48 #include "net/vhost_net.h" 49 50 typedef struct TAPState { 51 NetClientState nc; 52 int fd; 53 char down_script[1024]; 54 char down_script_arg[128]; 55 uint8_t buf[NET_BUFSIZE]; 56 bool read_poll; 57 bool write_poll; 58 bool using_vnet_hdr; 59 bool has_ufo; 60 bool enabled; 61 VHostNetState *vhost_net; 62 unsigned host_vnet_hdr_len; 63 Notifier exit; 64 } TAPState; 65 66 static void launch_script(const char *setup_script, const char *ifname, 67 int fd, Error **errp); 68 69 static void tap_send(void *opaque); 70 static void tap_writable(void *opaque); 71 72 static void tap_update_fd_handler(TAPState *s) 73 { 74 qemu_set_fd_handler(s->fd, 75 s->read_poll && s->enabled ? tap_send : NULL, 76 s->write_poll && s->enabled ? tap_writable : NULL, 77 s); 78 } 79 80 static void tap_read_poll(TAPState *s, bool enable) 81 { 82 s->read_poll = enable; 83 tap_update_fd_handler(s); 84 } 85 86 static void tap_write_poll(TAPState *s, bool enable) 87 { 88 s->write_poll = enable; 89 tap_update_fd_handler(s); 90 } 91 92 static void tap_writable(void *opaque) 93 { 94 TAPState *s = opaque; 95 96 tap_write_poll(s, false); 97 98 qemu_flush_queued_packets(&s->nc); 99 } 100 101 static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt) 102 { 103 ssize_t len; 104 105 len = RETRY_ON_EINTR(writev(s->fd, iov, iovcnt)); 106 107 if (len == -1 && errno == EAGAIN) { 108 tap_write_poll(s, true); 109 return 0; 110 } 111 112 return len; 113 } 114 115 static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov, 116 int iovcnt) 117 { 118 TAPState *s = DO_UPCAST(TAPState, nc, nc); 119 const struct iovec *iovp = iov; 120 struct iovec iov_copy[iovcnt + 1]; 121 struct virtio_net_hdr_mrg_rxbuf hdr = { }; 122 123 if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { 124 iov_copy[0].iov_base = &hdr; 125 iov_copy[0].iov_len = s->host_vnet_hdr_len; 126 memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); 127 iovp = iov_copy; 128 iovcnt++; 129 } 130 131 return tap_write_packet(s, iovp, iovcnt); 132 } 133 134 static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size) 135 { 136 TAPState *s = DO_UPCAST(TAPState, nc, nc); 137 struct iovec iov[2]; 138 int iovcnt = 0; 139 struct virtio_net_hdr_mrg_rxbuf hdr = { }; 140 141 if (s->host_vnet_hdr_len) { 142 iov[iovcnt].iov_base = &hdr; 143 iov[iovcnt].iov_len = s->host_vnet_hdr_len; 144 iovcnt++; 145 } 146 147 iov[iovcnt].iov_base = (char *)buf; 148 iov[iovcnt].iov_len = size; 149 iovcnt++; 150 151 return tap_write_packet(s, iov, iovcnt); 152 } 153 154 static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size) 155 { 156 TAPState *s = DO_UPCAST(TAPState, nc, nc); 157 struct iovec iov[1]; 158 159 if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { 160 return tap_receive_raw(nc, buf, size); 161 } 162 163 iov[0].iov_base = (char *)buf; 164 iov[0].iov_len = size; 165 166 return tap_write_packet(s, iov, 1); 167 } 168 169 #ifndef __sun__ 170 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen) 171 { 172 return read(tapfd, buf, maxlen); 173 } 174 #endif 175 176 static void tap_send_completed(NetClientState *nc, ssize_t len) 177 { 178 TAPState *s = DO_UPCAST(TAPState, nc, nc); 179 tap_read_poll(s, true); 180 } 181 182 static void tap_send(void *opaque) 183 { 184 TAPState *s = opaque; 185 int size; 186 int packets = 0; 187 188 while (true) { 189 uint8_t *buf = s->buf; 190 uint8_t min_pkt[ETH_ZLEN]; 191 size_t min_pktsz = sizeof(min_pkt); 192 193 size = tap_read_packet(s->fd, s->buf, sizeof(s->buf)); 194 if (size <= 0) { 195 break; 196 } 197 198 if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { 199 buf += s->host_vnet_hdr_len; 200 size -= s->host_vnet_hdr_len; 201 } 202 203 if (net_peer_needs_padding(&s->nc)) { 204 if (eth_pad_short_frame(min_pkt, &min_pktsz, buf, size)) { 205 buf = min_pkt; 206 size = min_pktsz; 207 } 208 } 209 210 size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed); 211 if (size == 0) { 212 tap_read_poll(s, false); 213 break; 214 } else if (size < 0) { 215 break; 216 } 217 218 /* 219 * When the host keeps receiving more packets while tap_send() is 220 * running we can hog the QEMU global mutex. Limit the number of 221 * packets that are processed per tap_send() callback to prevent 222 * stalling the guest. 223 */ 224 packets++; 225 if (packets >= 50) { 226 break; 227 } 228 } 229 } 230 231 static bool tap_has_ufo(NetClientState *nc) 232 { 233 TAPState *s = DO_UPCAST(TAPState, nc, nc); 234 235 assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 236 237 return s->has_ufo; 238 } 239 240 static bool tap_has_vnet_hdr(NetClientState *nc) 241 { 242 TAPState *s = DO_UPCAST(TAPState, nc, nc); 243 244 assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 245 246 return !!s->host_vnet_hdr_len; 247 } 248 249 static bool tap_has_vnet_hdr_len(NetClientState *nc, int len) 250 { 251 TAPState *s = DO_UPCAST(TAPState, nc, nc); 252 253 assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 254 255 return !!tap_probe_vnet_hdr_len(s->fd, len); 256 } 257 258 static int tap_get_vnet_hdr_len(NetClientState *nc) 259 { 260 TAPState *s = DO_UPCAST(TAPState, nc, nc); 261 262 return s->host_vnet_hdr_len; 263 } 264 265 static void tap_set_vnet_hdr_len(NetClientState *nc, int len) 266 { 267 TAPState *s = DO_UPCAST(TAPState, nc, nc); 268 269 assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 270 assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || 271 len == sizeof(struct virtio_net_hdr) || 272 len == sizeof(struct virtio_net_hdr_v1_hash)); 273 274 tap_fd_set_vnet_hdr_len(s->fd, len); 275 s->host_vnet_hdr_len = len; 276 } 277 278 static bool tap_get_using_vnet_hdr(NetClientState *nc) 279 { 280 TAPState *s = DO_UPCAST(TAPState, nc, nc); 281 282 return s->using_vnet_hdr; 283 } 284 285 static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr) 286 { 287 TAPState *s = DO_UPCAST(TAPState, nc, nc); 288 289 assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 290 assert(!!s->host_vnet_hdr_len == using_vnet_hdr); 291 292 s->using_vnet_hdr = using_vnet_hdr; 293 } 294 295 static int tap_set_vnet_le(NetClientState *nc, bool is_le) 296 { 297 TAPState *s = DO_UPCAST(TAPState, nc, nc); 298 299 return tap_fd_set_vnet_le(s->fd, is_le); 300 } 301 302 static int tap_set_vnet_be(NetClientState *nc, bool is_be) 303 { 304 TAPState *s = DO_UPCAST(TAPState, nc, nc); 305 306 return tap_fd_set_vnet_be(s->fd, is_be); 307 } 308 309 static void tap_set_offload(NetClientState *nc, int csum, int tso4, 310 int tso6, int ecn, int ufo) 311 { 312 TAPState *s = DO_UPCAST(TAPState, nc, nc); 313 if (s->fd < 0) { 314 return; 315 } 316 317 tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo); 318 } 319 320 static void tap_exit_notify(Notifier *notifier, void *data) 321 { 322 TAPState *s = container_of(notifier, TAPState, exit); 323 Error *err = NULL; 324 325 if (s->down_script[0]) { 326 launch_script(s->down_script, s->down_script_arg, s->fd, &err); 327 if (err) { 328 error_report_err(err); 329 } 330 } 331 } 332 333 static void tap_cleanup(NetClientState *nc) 334 { 335 TAPState *s = DO_UPCAST(TAPState, nc, nc); 336 337 if (s->vhost_net) { 338 vhost_net_cleanup(s->vhost_net); 339 g_free(s->vhost_net); 340 s->vhost_net = NULL; 341 } 342 343 qemu_purge_queued_packets(nc); 344 345 tap_exit_notify(&s->exit, NULL); 346 qemu_remove_exit_notifier(&s->exit); 347 348 tap_read_poll(s, false); 349 tap_write_poll(s, false); 350 close(s->fd); 351 s->fd = -1; 352 } 353 354 static void tap_poll(NetClientState *nc, bool enable) 355 { 356 TAPState *s = DO_UPCAST(TAPState, nc, nc); 357 tap_read_poll(s, enable); 358 tap_write_poll(s, enable); 359 } 360 361 static bool tap_set_steering_ebpf(NetClientState *nc, int prog_fd) 362 { 363 TAPState *s = DO_UPCAST(TAPState, nc, nc); 364 assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 365 366 return tap_fd_set_steering_ebpf(s->fd, prog_fd) == 0; 367 } 368 369 int tap_get_fd(NetClientState *nc) 370 { 371 TAPState *s = DO_UPCAST(TAPState, nc, nc); 372 assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 373 return s->fd; 374 } 375 376 /* fd support */ 377 378 static NetClientInfo net_tap_info = { 379 .type = NET_CLIENT_DRIVER_TAP, 380 .size = sizeof(TAPState), 381 .receive = tap_receive, 382 .receive_raw = tap_receive_raw, 383 .receive_iov = tap_receive_iov, 384 .poll = tap_poll, 385 .cleanup = tap_cleanup, 386 .has_ufo = tap_has_ufo, 387 .has_vnet_hdr = tap_has_vnet_hdr, 388 .has_vnet_hdr_len = tap_has_vnet_hdr_len, 389 .get_using_vnet_hdr = tap_get_using_vnet_hdr, 390 .using_vnet_hdr = tap_using_vnet_hdr, 391 .set_offload = tap_set_offload, 392 .get_vnet_hdr_len = tap_get_vnet_hdr_len, 393 .set_vnet_hdr_len = tap_set_vnet_hdr_len, 394 .set_vnet_le = tap_set_vnet_le, 395 .set_vnet_be = tap_set_vnet_be, 396 .set_steering_ebpf = tap_set_steering_ebpf, 397 }; 398 399 static TAPState *net_tap_fd_init(NetClientState *peer, 400 const char *model, 401 const char *name, 402 int fd, 403 int vnet_hdr) 404 { 405 NetClientState *nc; 406 TAPState *s; 407 408 nc = qemu_new_net_client(&net_tap_info, peer, model, name); 409 410 s = DO_UPCAST(TAPState, nc, nc); 411 412 s->fd = fd; 413 s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0; 414 s->using_vnet_hdr = false; 415 s->has_ufo = tap_probe_has_ufo(s->fd); 416 s->enabled = true; 417 tap_set_offload(&s->nc, 0, 0, 0, 0, 0); 418 /* 419 * Make sure host header length is set correctly in tap: 420 * it might have been modified by another instance of qemu. 421 */ 422 if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) { 423 tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len); 424 } 425 tap_read_poll(s, true); 426 s->vhost_net = NULL; 427 428 s->exit.notify = tap_exit_notify; 429 qemu_add_exit_notifier(&s->exit); 430 431 return s; 432 } 433 434 static void launch_script(const char *setup_script, const char *ifname, 435 int fd, Error **errp) 436 { 437 int pid, status; 438 char *args[3]; 439 char **parg; 440 441 /* try to launch network script */ 442 pid = fork(); 443 if (pid < 0) { 444 error_setg_errno(errp, errno, "could not launch network script %s", 445 setup_script); 446 return; 447 } 448 if (pid == 0) { 449 int open_max = sysconf(_SC_OPEN_MAX), i; 450 451 for (i = 3; i < open_max; i++) { 452 if (i != fd) { 453 close(i); 454 } 455 } 456 parg = args; 457 *parg++ = (char *)setup_script; 458 *parg++ = (char *)ifname; 459 *parg = NULL; 460 execv(setup_script, args); 461 _exit(1); 462 } else { 463 while (waitpid(pid, &status, 0) != pid) { 464 /* loop */ 465 } 466 467 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 468 return; 469 } 470 error_setg(errp, "network script %s failed with status %d", 471 setup_script, status); 472 } 473 } 474 475 static int recv_fd(int c) 476 { 477 int fd; 478 uint8_t msgbuf[CMSG_SPACE(sizeof(fd))]; 479 struct msghdr msg = { 480 .msg_control = msgbuf, 481 .msg_controllen = sizeof(msgbuf), 482 }; 483 struct cmsghdr *cmsg; 484 struct iovec iov; 485 uint8_t req[1]; 486 ssize_t len; 487 488 cmsg = CMSG_FIRSTHDR(&msg); 489 cmsg->cmsg_level = SOL_SOCKET; 490 cmsg->cmsg_type = SCM_RIGHTS; 491 cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); 492 msg.msg_controllen = cmsg->cmsg_len; 493 494 iov.iov_base = req; 495 iov.iov_len = sizeof(req); 496 497 msg.msg_iov = &iov; 498 msg.msg_iovlen = 1; 499 500 len = recvmsg(c, &msg, 0); 501 if (len > 0) { 502 memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd)); 503 return fd; 504 } 505 506 return len; 507 } 508 509 static int net_bridge_run_helper(const char *helper, const char *bridge, 510 Error **errp) 511 { 512 sigset_t oldmask, mask; 513 g_autofree char *default_helper = NULL; 514 int pid, status; 515 char *args[5]; 516 char **parg; 517 int sv[2]; 518 519 sigemptyset(&mask); 520 sigaddset(&mask, SIGCHLD); 521 sigprocmask(SIG_BLOCK, &mask, &oldmask); 522 523 if (!helper) { 524 helper = default_helper = get_relocated_path(DEFAULT_BRIDGE_HELPER); 525 } 526 527 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 528 error_setg_errno(errp, errno, "socketpair() failed"); 529 return -1; 530 } 531 532 /* try to launch bridge helper */ 533 pid = fork(); 534 if (pid < 0) { 535 error_setg_errno(errp, errno, "Can't fork bridge helper"); 536 return -1; 537 } 538 if (pid == 0) { 539 int open_max = sysconf(_SC_OPEN_MAX), i; 540 char *fd_buf = NULL; 541 char *br_buf = NULL; 542 char *helper_cmd = NULL; 543 544 for (i = 3; i < open_max; i++) { 545 if (i != sv[1]) { 546 close(i); 547 } 548 } 549 550 fd_buf = g_strdup_printf("%s%d", "--fd=", sv[1]); 551 552 if (strrchr(helper, ' ') || strrchr(helper, '\t')) { 553 /* assume helper is a command */ 554 555 if (strstr(helper, "--br=") == NULL) { 556 br_buf = g_strdup_printf("%s%s", "--br=", bridge); 557 } 558 559 helper_cmd = g_strdup_printf("%s %s %s %s", helper, 560 "--use-vnet", fd_buf, br_buf ? br_buf : ""); 561 562 parg = args; 563 *parg++ = (char *)"sh"; 564 *parg++ = (char *)"-c"; 565 *parg++ = helper_cmd; 566 *parg++ = NULL; 567 568 execv("/bin/sh", args); 569 g_free(helper_cmd); 570 } else { 571 /* assume helper is just the executable path name */ 572 573 br_buf = g_strdup_printf("%s%s", "--br=", bridge); 574 575 parg = args; 576 *parg++ = (char *)helper; 577 *parg++ = (char *)"--use-vnet"; 578 *parg++ = fd_buf; 579 *parg++ = br_buf; 580 *parg++ = NULL; 581 582 execv(helper, args); 583 } 584 g_free(fd_buf); 585 g_free(br_buf); 586 _exit(1); 587 588 } else { 589 int fd; 590 int saved_errno; 591 592 close(sv[1]); 593 594 fd = RETRY_ON_EINTR(recv_fd(sv[0])); 595 saved_errno = errno; 596 597 close(sv[0]); 598 599 while (waitpid(pid, &status, 0) != pid) { 600 /* loop */ 601 } 602 sigprocmask(SIG_SETMASK, &oldmask, NULL); 603 if (fd < 0) { 604 error_setg_errno(errp, saved_errno, 605 "failed to recv file descriptor"); 606 return -1; 607 } 608 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { 609 error_setg(errp, "bridge helper failed"); 610 return -1; 611 } 612 return fd; 613 } 614 } 615 616 int net_init_bridge(const Netdev *netdev, const char *name, 617 NetClientState *peer, Error **errp) 618 { 619 const NetdevBridgeOptions *bridge; 620 const char *helper, *br; 621 TAPState *s; 622 int fd, vnet_hdr; 623 624 assert(netdev->type == NET_CLIENT_DRIVER_BRIDGE); 625 bridge = &netdev->u.bridge; 626 helper = bridge->helper; 627 br = bridge->br ?: DEFAULT_BRIDGE_INTERFACE; 628 629 fd = net_bridge_run_helper(helper, br, errp); 630 if (fd == -1) { 631 return -1; 632 } 633 634 if (!g_unix_set_fd_nonblocking(fd, true, NULL)) { 635 error_setg_errno(errp, errno, "Failed to set FD nonblocking"); 636 return -1; 637 } 638 vnet_hdr = tap_probe_vnet_hdr(fd, errp); 639 if (vnet_hdr < 0) { 640 close(fd); 641 return -1; 642 } 643 s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); 644 645 qemu_set_info_str(&s->nc, "helper=%s,br=%s", helper, br); 646 647 return 0; 648 } 649 650 static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr, 651 const char *setup_script, char *ifname, 652 size_t ifname_sz, int mq_required, Error **errp) 653 { 654 Error *err = NULL; 655 int fd, vnet_hdr_required; 656 657 if (tap->has_vnet_hdr) { 658 *vnet_hdr = tap->vnet_hdr; 659 vnet_hdr_required = *vnet_hdr; 660 } else { 661 *vnet_hdr = 1; 662 vnet_hdr_required = 0; 663 } 664 665 fd = RETRY_ON_EINTR(tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required, 666 mq_required, errp)); 667 if (fd < 0) { 668 return -1; 669 } 670 671 if (setup_script && 672 setup_script[0] != '\0' && 673 strcmp(setup_script, "no") != 0) { 674 launch_script(setup_script, ifname, fd, &err); 675 if (err) { 676 error_propagate(errp, err); 677 close(fd); 678 return -1; 679 } 680 } 681 682 return fd; 683 } 684 685 #define MAX_TAP_QUEUES 1024 686 687 static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, 688 const char *model, const char *name, 689 const char *ifname, const char *script, 690 const char *downscript, const char *vhostfdname, 691 int vnet_hdr, int fd, Error **errp) 692 { 693 Error *err = NULL; 694 TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr); 695 int vhostfd; 696 697 tap_set_sndbuf(s->fd, tap, &err); 698 if (err) { 699 error_propagate(errp, err); 700 goto failed; 701 } 702 703 if (tap->fd || tap->fds) { 704 qemu_set_info_str(&s->nc, "fd=%d", fd); 705 } else if (tap->helper) { 706 qemu_set_info_str(&s->nc, "helper=%s", tap->helper); 707 } else { 708 qemu_set_info_str(&s->nc, "ifname=%s,script=%s,downscript=%s", ifname, 709 script, downscript); 710 711 if (strcmp(downscript, "no") != 0) { 712 snprintf(s->down_script, sizeof(s->down_script), "%s", downscript); 713 snprintf(s->down_script_arg, sizeof(s->down_script_arg), 714 "%s", ifname); 715 } 716 } 717 718 if (tap->has_vhost ? tap->vhost : 719 vhostfdname || (tap->has_vhostforce && tap->vhostforce)) { 720 VhostNetOptions options; 721 722 options.backend_type = VHOST_BACKEND_TYPE_KERNEL; 723 options.net_backend = &s->nc; 724 if (tap->has_poll_us) { 725 options.busyloop_timeout = tap->poll_us; 726 } else { 727 options.busyloop_timeout = 0; 728 } 729 730 if (vhostfdname) { 731 vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err); 732 if (vhostfd == -1) { 733 if (tap->has_vhostforce && tap->vhostforce) { 734 error_propagate(errp, err); 735 } else { 736 warn_report_err(err); 737 } 738 goto failed; 739 } 740 if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) { 741 error_setg_errno(errp, errno, "%s: Can't use file descriptor %d", 742 name, fd); 743 goto failed; 744 } 745 } else { 746 vhostfd = open("/dev/vhost-net", O_RDWR); 747 if (vhostfd < 0) { 748 if (tap->has_vhostforce && tap->vhostforce) { 749 error_setg_errno(errp, errno, 750 "tap: open vhost char device failed"); 751 } else { 752 warn_report("tap: open vhost char device failed: %s", 753 strerror(errno)); 754 } 755 goto failed; 756 } 757 if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) { 758 error_setg_errno(errp, errno, "Failed to set FD nonblocking"); 759 goto failed; 760 } 761 } 762 options.opaque = (void *)(uintptr_t)vhostfd; 763 options.nvqs = 2; 764 765 s->vhost_net = vhost_net_init(&options); 766 if (!s->vhost_net) { 767 if (tap->has_vhostforce && tap->vhostforce) { 768 error_setg(errp, VHOST_NET_INIT_FAILED); 769 } else { 770 warn_report(VHOST_NET_INIT_FAILED); 771 } 772 goto failed; 773 } 774 } else if (vhostfdname) { 775 error_setg(errp, "vhostfd(s)= is not valid without vhost"); 776 goto failed; 777 } 778 779 return; 780 781 failed: 782 qemu_del_net_client(&s->nc); 783 } 784 785 static int get_fds(char *str, char *fds[], int max) 786 { 787 char *ptr = str, *this; 788 size_t len = strlen(str); 789 int i = 0; 790 791 while (i < max && ptr < str + len) { 792 this = strchr(ptr, ':'); 793 794 if (this == NULL) { 795 fds[i] = g_strdup(ptr); 796 } else { 797 fds[i] = g_strndup(ptr, this - ptr); 798 } 799 800 i++; 801 if (this == NULL) { 802 break; 803 } else { 804 ptr = this + 1; 805 } 806 } 807 808 return i; 809 } 810 811 int net_init_tap(const Netdev *netdev, const char *name, 812 NetClientState *peer, Error **errp) 813 { 814 const NetdevTapOptions *tap; 815 int fd, vnet_hdr = 0, i = 0, queues; 816 /* for the no-fd, no-helper case */ 817 const char *script; 818 const char *downscript; 819 Error *err = NULL; 820 const char *vhostfdname; 821 char ifname[128]; 822 int ret = 0; 823 824 assert(netdev->type == NET_CLIENT_DRIVER_TAP); 825 tap = &netdev->u.tap; 826 queues = tap->has_queues ? tap->queues : 1; 827 vhostfdname = tap->vhostfd; 828 script = tap->script; 829 downscript = tap->downscript; 830 831 /* QEMU hubs do not support multiqueue tap, in this case peer is set. 832 * For -netdev, peer is always NULL. */ 833 if (peer && (tap->has_queues || tap->fds || tap->vhostfds)) { 834 error_setg(errp, "Multiqueue tap cannot be used with hubs"); 835 return -1; 836 } 837 838 if (tap->fd) { 839 if (tap->ifname || tap->script || tap->downscript || 840 tap->has_vnet_hdr || tap->helper || tap->has_queues || 841 tap->fds || tap->vhostfds) { 842 error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " 843 "helper=, queues=, fds=, and vhostfds= " 844 "are invalid with fd="); 845 return -1; 846 } 847 848 fd = monitor_fd_param(monitor_cur(), tap->fd, errp); 849 if (fd == -1) { 850 return -1; 851 } 852 853 if (!g_unix_set_fd_nonblocking(fd, true, NULL)) { 854 error_setg_errno(errp, errno, "%s: Can't use file descriptor %d", 855 name, fd); 856 close(fd); 857 return -1; 858 } 859 860 vnet_hdr = tap_probe_vnet_hdr(fd, errp); 861 if (vnet_hdr < 0) { 862 close(fd); 863 return -1; 864 } 865 866 net_init_tap_one(tap, peer, "tap", name, NULL, 867 script, downscript, 868 vhostfdname, vnet_hdr, fd, &err); 869 if (err) { 870 error_propagate(errp, err); 871 close(fd); 872 return -1; 873 } 874 } else if (tap->fds) { 875 char **fds; 876 char **vhost_fds; 877 int nfds = 0, nvhosts = 0; 878 879 if (tap->ifname || tap->script || tap->downscript || 880 tap->has_vnet_hdr || tap->helper || tap->has_queues || 881 tap->vhostfd) { 882 error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " 883 "helper=, queues=, and vhostfd= " 884 "are invalid with fds="); 885 return -1; 886 } 887 888 fds = g_new0(char *, MAX_TAP_QUEUES); 889 vhost_fds = g_new0(char *, MAX_TAP_QUEUES); 890 891 nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES); 892 if (tap->vhostfds) { 893 nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES); 894 if (nfds != nvhosts) { 895 error_setg(errp, "The number of fds passed does not match " 896 "the number of vhostfds passed"); 897 ret = -1; 898 goto free_fail; 899 } 900 } 901 902 for (i = 0; i < nfds; i++) { 903 fd = monitor_fd_param(monitor_cur(), fds[i], errp); 904 if (fd == -1) { 905 ret = -1; 906 goto free_fail; 907 } 908 909 ret = g_unix_set_fd_nonblocking(fd, true, NULL); 910 if (!ret) { 911 error_setg_errno(errp, errno, "%s: Can't use file descriptor %d", 912 name, fd); 913 goto free_fail; 914 } 915 916 if (i == 0) { 917 vnet_hdr = tap_probe_vnet_hdr(fd, errp); 918 if (vnet_hdr < 0) { 919 ret = -1; 920 goto free_fail; 921 } 922 } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) { 923 error_setg(errp, 924 "vnet_hdr not consistent across given tap fds"); 925 ret = -1; 926 goto free_fail; 927 } 928 929 net_init_tap_one(tap, peer, "tap", name, ifname, 930 script, downscript, 931 tap->vhostfds ? vhost_fds[i] : NULL, 932 vnet_hdr, fd, &err); 933 if (err) { 934 error_propagate(errp, err); 935 ret = -1; 936 goto free_fail; 937 } 938 } 939 940 free_fail: 941 for (i = 0; i < nvhosts; i++) { 942 g_free(vhost_fds[i]); 943 } 944 for (i = 0; i < nfds; i++) { 945 g_free(fds[i]); 946 } 947 g_free(fds); 948 g_free(vhost_fds); 949 return ret; 950 } else if (tap->helper) { 951 if (tap->ifname || tap->script || tap->downscript || 952 tap->has_vnet_hdr || tap->has_queues || tap->vhostfds) { 953 error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " 954 "queues=, and vhostfds= are invalid with helper="); 955 return -1; 956 } 957 958 fd = net_bridge_run_helper(tap->helper, 959 tap->br ?: DEFAULT_BRIDGE_INTERFACE, 960 errp); 961 if (fd == -1) { 962 return -1; 963 } 964 965 if (!g_unix_set_fd_nonblocking(fd, true, NULL)) { 966 error_setg_errno(errp, errno, "Failed to set FD nonblocking"); 967 return -1; 968 } 969 vnet_hdr = tap_probe_vnet_hdr(fd, errp); 970 if (vnet_hdr < 0) { 971 close(fd); 972 return -1; 973 } 974 975 net_init_tap_one(tap, peer, "bridge", name, ifname, 976 script, downscript, vhostfdname, 977 vnet_hdr, fd, &err); 978 if (err) { 979 error_propagate(errp, err); 980 close(fd); 981 return -1; 982 } 983 } else { 984 g_autofree char *default_script = NULL; 985 g_autofree char *default_downscript = NULL; 986 if (tap->vhostfds) { 987 error_setg(errp, "vhostfds= is invalid if fds= wasn't specified"); 988 return -1; 989 } 990 991 if (!script) { 992 script = default_script = get_relocated_path(DEFAULT_NETWORK_SCRIPT); 993 } 994 if (!downscript) { 995 downscript = default_downscript = 996 get_relocated_path(DEFAULT_NETWORK_DOWN_SCRIPT); 997 } 998 999 if (tap->ifname) { 1000 pstrcpy(ifname, sizeof ifname, tap->ifname); 1001 } else { 1002 ifname[0] = '\0'; 1003 } 1004 1005 for (i = 0; i < queues; i++) { 1006 fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script, 1007 ifname, sizeof ifname, queues > 1, errp); 1008 if (fd == -1) { 1009 return -1; 1010 } 1011 1012 if (queues > 1 && i == 0 && !tap->ifname) { 1013 if (tap_fd_get_ifname(fd, ifname)) { 1014 error_setg(errp, "Fail to get ifname"); 1015 close(fd); 1016 return -1; 1017 } 1018 } 1019 1020 net_init_tap_one(tap, peer, "tap", name, ifname, 1021 i >= 1 ? "no" : script, 1022 i >= 1 ? "no" : downscript, 1023 vhostfdname, vnet_hdr, fd, &err); 1024 if (err) { 1025 error_propagate(errp, err); 1026 close(fd); 1027 return -1; 1028 } 1029 } 1030 } 1031 1032 return 0; 1033 } 1034 1035 VHostNetState *tap_get_vhost_net(NetClientState *nc) 1036 { 1037 TAPState *s = DO_UPCAST(TAPState, nc, nc); 1038 assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 1039 return s->vhost_net; 1040 } 1041 1042 int tap_enable(NetClientState *nc) 1043 { 1044 TAPState *s = DO_UPCAST(TAPState, nc, nc); 1045 int ret; 1046 1047 if (s->enabled) { 1048 return 0; 1049 } else { 1050 ret = tap_fd_enable(s->fd); 1051 if (ret == 0) { 1052 s->enabled = true; 1053 tap_update_fd_handler(s); 1054 } 1055 return ret; 1056 } 1057 } 1058 1059 int tap_disable(NetClientState *nc) 1060 { 1061 TAPState *s = DO_UPCAST(TAPState, nc, nc); 1062 int ret; 1063 1064 if (s->enabled == 0) { 1065 return 0; 1066 } else { 1067 ret = tap_fd_disable(s->fd); 1068 if (ret == 0) { 1069 qemu_purge_queued_packets(nc); 1070 s->enabled = false; 1071 tap_update_fd_handler(s); 1072 } 1073 return ret; 1074 } 1075 } 1076