1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2009 Red Hat, Inc. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "tap.h" 27 28 #include "config-host.h" 29 30 #include <sys/ioctl.h> 31 #include <sys/stat.h> 32 #include <sys/wait.h> 33 #include <sys/socket.h> 34 #include <net/if.h> 35 36 #include "net.h" 37 #include "clients.h" 38 #include "monitor.h" 39 #include "sysemu.h" 40 #include "qemu-char.h" 41 #include "qemu-common.h" 42 #include "qemu-error.h" 43 44 #include "net/tap-linux.h" 45 46 #include "hw/vhost_net.h" 47 48 /* Maximum GSO packet size (64k) plus plenty of room for 49 * the ethernet and virtio_net headers 50 */ 51 #define TAP_BUFSIZE (4096 + 65536) 52 53 typedef struct TAPState { 54 NetClientState nc; 55 int fd; 56 char down_script[1024]; 57 char down_script_arg[128]; 58 uint8_t buf[TAP_BUFSIZE]; 59 unsigned int read_poll : 1; 60 unsigned int write_poll : 1; 61 unsigned int using_vnet_hdr : 1; 62 unsigned int has_ufo: 1; 63 VHostNetState *vhost_net; 64 unsigned host_vnet_hdr_len; 65 } TAPState; 66 67 static int launch_script(const char *setup_script, const char *ifname, int fd); 68 69 static int tap_can_send(void *opaque); 70 static void tap_send(void *opaque); 71 static void tap_writable(void *opaque); 72 73 static void tap_update_fd_handler(TAPState *s) 74 { 75 qemu_set_fd_handler2(s->fd, 76 s->read_poll ? tap_can_send : NULL, 77 s->read_poll ? tap_send : NULL, 78 s->write_poll ? tap_writable : NULL, 79 s); 80 } 81 82 static void tap_read_poll(TAPState *s, int enable) 83 { 84 s->read_poll = !!enable; 85 tap_update_fd_handler(s); 86 } 87 88 static void tap_write_poll(TAPState *s, int enable) 89 { 90 s->write_poll = !!enable; 91 tap_update_fd_handler(s); 92 } 93 94 static void tap_writable(void *opaque) 95 { 96 TAPState *s = opaque; 97 98 tap_write_poll(s, 0); 99 100 qemu_flush_queued_packets(&s->nc); 101 } 102 103 static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt) 104 { 105 ssize_t len; 106 107 do { 108 len = writev(s->fd, iov, iovcnt); 109 } while (len == -1 && errno == EINTR); 110 111 if (len == -1 && errno == EAGAIN) { 112 tap_write_poll(s, 1); 113 return 0; 114 } 115 116 return len; 117 } 118 119 static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov, 120 int iovcnt) 121 { 122 TAPState *s = DO_UPCAST(TAPState, nc, nc); 123 const struct iovec *iovp = iov; 124 struct iovec iov_copy[iovcnt + 1]; 125 struct virtio_net_hdr_mrg_rxbuf hdr = { }; 126 127 if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { 128 iov_copy[0].iov_base = &hdr; 129 iov_copy[0].iov_len = s->host_vnet_hdr_len; 130 memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); 131 iovp = iov_copy; 132 iovcnt++; 133 } 134 135 return tap_write_packet(s, iovp, iovcnt); 136 } 137 138 static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size) 139 { 140 TAPState *s = DO_UPCAST(TAPState, nc, nc); 141 struct iovec iov[2]; 142 int iovcnt = 0; 143 struct virtio_net_hdr_mrg_rxbuf hdr = { }; 144 145 if (s->host_vnet_hdr_len) { 146 iov[iovcnt].iov_base = &hdr; 147 iov[iovcnt].iov_len = s->host_vnet_hdr_len; 148 iovcnt++; 149 } 150 151 iov[iovcnt].iov_base = (char *)buf; 152 iov[iovcnt].iov_len = size; 153 iovcnt++; 154 155 return tap_write_packet(s, iov, iovcnt); 156 } 157 158 static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size) 159 { 160 TAPState *s = DO_UPCAST(TAPState, nc, nc); 161 struct iovec iov[1]; 162 163 if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { 164 return tap_receive_raw(nc, buf, size); 165 } 166 167 iov[0].iov_base = (char *)buf; 168 iov[0].iov_len = size; 169 170 return tap_write_packet(s, iov, 1); 171 } 172 173 static int tap_can_send(void *opaque) 174 { 175 TAPState *s = opaque; 176 177 return qemu_can_send_packet(&s->nc); 178 } 179 180 #ifndef __sun__ 181 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen) 182 { 183 return read(tapfd, buf, maxlen); 184 } 185 #endif 186 187 static void tap_send_completed(NetClientState *nc, ssize_t len) 188 { 189 TAPState *s = DO_UPCAST(TAPState, nc, nc); 190 tap_read_poll(s, 1); 191 } 192 193 static void tap_send(void *opaque) 194 { 195 TAPState *s = opaque; 196 int size; 197 198 do { 199 uint8_t *buf = s->buf; 200 201 size = tap_read_packet(s->fd, s->buf, sizeof(s->buf)); 202 if (size <= 0) { 203 break; 204 } 205 206 if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { 207 buf += s->host_vnet_hdr_len; 208 size -= s->host_vnet_hdr_len; 209 } 210 211 size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed); 212 if (size == 0) { 213 tap_read_poll(s, 0); 214 } 215 } while (size > 0 && qemu_can_send_packet(&s->nc)); 216 } 217 218 int tap_has_ufo(NetClientState *nc) 219 { 220 TAPState *s = DO_UPCAST(TAPState, nc, nc); 221 222 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 223 224 return s->has_ufo; 225 } 226 227 int tap_has_vnet_hdr(NetClientState *nc) 228 { 229 TAPState *s = DO_UPCAST(TAPState, nc, nc); 230 231 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 232 233 return !!s->host_vnet_hdr_len; 234 } 235 236 int tap_has_vnet_hdr_len(NetClientState *nc, int len) 237 { 238 TAPState *s = DO_UPCAST(TAPState, nc, nc); 239 240 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 241 242 return tap_probe_vnet_hdr_len(s->fd, len); 243 } 244 245 void tap_set_vnet_hdr_len(NetClientState *nc, int len) 246 { 247 TAPState *s = DO_UPCAST(TAPState, nc, nc); 248 249 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 250 assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || 251 len == sizeof(struct virtio_net_hdr)); 252 253 tap_fd_set_vnet_hdr_len(s->fd, len); 254 s->host_vnet_hdr_len = len; 255 } 256 257 void tap_using_vnet_hdr(NetClientState *nc, int using_vnet_hdr) 258 { 259 TAPState *s = DO_UPCAST(TAPState, nc, nc); 260 261 using_vnet_hdr = using_vnet_hdr != 0; 262 263 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 264 assert(!!s->host_vnet_hdr_len == using_vnet_hdr); 265 266 s->using_vnet_hdr = using_vnet_hdr; 267 } 268 269 void tap_set_offload(NetClientState *nc, int csum, int tso4, 270 int tso6, int ecn, int ufo) 271 { 272 TAPState *s = DO_UPCAST(TAPState, nc, nc); 273 if (s->fd < 0) { 274 return; 275 } 276 277 tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo); 278 } 279 280 static void tap_cleanup(NetClientState *nc) 281 { 282 TAPState *s = DO_UPCAST(TAPState, nc, nc); 283 284 if (s->vhost_net) { 285 vhost_net_cleanup(s->vhost_net); 286 s->vhost_net = NULL; 287 } 288 289 qemu_purge_queued_packets(nc); 290 291 if (s->down_script[0]) 292 launch_script(s->down_script, s->down_script_arg, s->fd); 293 294 tap_read_poll(s, 0); 295 tap_write_poll(s, 0); 296 close(s->fd); 297 s->fd = -1; 298 } 299 300 static void tap_poll(NetClientState *nc, bool enable) 301 { 302 TAPState *s = DO_UPCAST(TAPState, nc, nc); 303 tap_read_poll(s, enable); 304 tap_write_poll(s, enable); 305 } 306 307 int tap_get_fd(NetClientState *nc) 308 { 309 TAPState *s = DO_UPCAST(TAPState, nc, nc); 310 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 311 return s->fd; 312 } 313 314 /* fd support */ 315 316 static NetClientInfo net_tap_info = { 317 .type = NET_CLIENT_OPTIONS_KIND_TAP, 318 .size = sizeof(TAPState), 319 .receive = tap_receive, 320 .receive_raw = tap_receive_raw, 321 .receive_iov = tap_receive_iov, 322 .poll = tap_poll, 323 .cleanup = tap_cleanup, 324 }; 325 326 static TAPState *net_tap_fd_init(NetClientState *peer, 327 const char *model, 328 const char *name, 329 int fd, 330 int vnet_hdr) 331 { 332 NetClientState *nc; 333 TAPState *s; 334 335 nc = qemu_new_net_client(&net_tap_info, peer, model, name); 336 337 s = DO_UPCAST(TAPState, nc, nc); 338 339 s->fd = fd; 340 s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0; 341 s->using_vnet_hdr = 0; 342 s->has_ufo = tap_probe_has_ufo(s->fd); 343 tap_set_offload(&s->nc, 0, 0, 0, 0, 0); 344 tap_read_poll(s, 1); 345 s->vhost_net = NULL; 346 return s; 347 } 348 349 static int launch_script(const char *setup_script, const char *ifname, int fd) 350 { 351 int pid, status; 352 char *args[3]; 353 char **parg; 354 355 /* try to launch network script */ 356 pid = fork(); 357 if (pid == 0) { 358 int open_max = sysconf(_SC_OPEN_MAX), i; 359 360 for (i = 0; i < open_max; i++) { 361 if (i != STDIN_FILENO && 362 i != STDOUT_FILENO && 363 i != STDERR_FILENO && 364 i != fd) { 365 close(i); 366 } 367 } 368 parg = args; 369 *parg++ = (char *)setup_script; 370 *parg++ = (char *)ifname; 371 *parg = NULL; 372 execv(setup_script, args); 373 _exit(1); 374 } else if (pid > 0) { 375 while (waitpid(pid, &status, 0) != pid) { 376 /* loop */ 377 } 378 379 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 380 return 0; 381 } 382 } 383 fprintf(stderr, "%s: could not launch network script\n", setup_script); 384 return -1; 385 } 386 387 static int recv_fd(int c) 388 { 389 int fd; 390 uint8_t msgbuf[CMSG_SPACE(sizeof(fd))]; 391 struct msghdr msg = { 392 .msg_control = msgbuf, 393 .msg_controllen = sizeof(msgbuf), 394 }; 395 struct cmsghdr *cmsg; 396 struct iovec iov; 397 uint8_t req[1]; 398 ssize_t len; 399 400 cmsg = CMSG_FIRSTHDR(&msg); 401 cmsg->cmsg_level = SOL_SOCKET; 402 cmsg->cmsg_type = SCM_RIGHTS; 403 cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); 404 msg.msg_controllen = cmsg->cmsg_len; 405 406 iov.iov_base = req; 407 iov.iov_len = sizeof(req); 408 409 msg.msg_iov = &iov; 410 msg.msg_iovlen = 1; 411 412 len = recvmsg(c, &msg, 0); 413 if (len > 0) { 414 memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd)); 415 return fd; 416 } 417 418 return len; 419 } 420 421 static int net_bridge_run_helper(const char *helper, const char *bridge) 422 { 423 sigset_t oldmask, mask; 424 int pid, status; 425 char *args[5]; 426 char **parg; 427 int sv[2]; 428 429 sigemptyset(&mask); 430 sigaddset(&mask, SIGCHLD); 431 sigprocmask(SIG_BLOCK, &mask, &oldmask); 432 433 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 434 return -1; 435 } 436 437 /* try to launch bridge helper */ 438 pid = fork(); 439 if (pid == 0) { 440 int open_max = sysconf(_SC_OPEN_MAX), i; 441 char fd_buf[6+10]; 442 char br_buf[6+IFNAMSIZ] = {0}; 443 char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15]; 444 445 for (i = 0; i < open_max; i++) { 446 if (i != STDIN_FILENO && 447 i != STDOUT_FILENO && 448 i != STDERR_FILENO && 449 i != sv[1]) { 450 close(i); 451 } 452 } 453 454 snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]); 455 456 if (strrchr(helper, ' ') || strrchr(helper, '\t')) { 457 /* assume helper is a command */ 458 459 if (strstr(helper, "--br=") == NULL) { 460 snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge); 461 } 462 463 snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s", 464 helper, "--use-vnet", fd_buf, br_buf); 465 466 parg = args; 467 *parg++ = (char *)"sh"; 468 *parg++ = (char *)"-c"; 469 *parg++ = helper_cmd; 470 *parg++ = NULL; 471 472 execv("/bin/sh", args); 473 } else { 474 /* assume helper is just the executable path name */ 475 476 snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge); 477 478 parg = args; 479 *parg++ = (char *)helper; 480 *parg++ = (char *)"--use-vnet"; 481 *parg++ = fd_buf; 482 *parg++ = br_buf; 483 *parg++ = NULL; 484 485 execv(helper, args); 486 } 487 _exit(1); 488 489 } else if (pid > 0) { 490 int fd; 491 492 close(sv[1]); 493 494 do { 495 fd = recv_fd(sv[0]); 496 } while (fd == -1 && errno == EINTR); 497 498 close(sv[0]); 499 500 while (waitpid(pid, &status, 0) != pid) { 501 /* loop */ 502 } 503 sigprocmask(SIG_SETMASK, &oldmask, NULL); 504 if (fd < 0) { 505 fprintf(stderr, "failed to recv file descriptor\n"); 506 return -1; 507 } 508 509 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 510 return fd; 511 } 512 } 513 fprintf(stderr, "failed to launch bridge helper\n"); 514 return -1; 515 } 516 517 int net_init_bridge(const NetClientOptions *opts, const char *name, 518 NetClientState *peer) 519 { 520 const NetdevBridgeOptions *bridge; 521 const char *helper, *br; 522 523 TAPState *s; 524 int fd, vnet_hdr; 525 526 assert(opts->kind == NET_CLIENT_OPTIONS_KIND_BRIDGE); 527 bridge = opts->bridge; 528 529 helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER; 530 br = bridge->has_br ? bridge->br : DEFAULT_BRIDGE_INTERFACE; 531 532 fd = net_bridge_run_helper(helper, br); 533 if (fd == -1) { 534 return -1; 535 } 536 537 fcntl(fd, F_SETFL, O_NONBLOCK); 538 539 vnet_hdr = tap_probe_vnet_hdr(fd); 540 541 s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); 542 if (!s) { 543 close(fd); 544 return -1; 545 } 546 547 snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper, 548 br); 549 550 return 0; 551 } 552 553 static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr, 554 const char *setup_script, char *ifname, 555 size_t ifname_sz) 556 { 557 int fd, vnet_hdr_required; 558 559 if (tap->has_ifname) { 560 pstrcpy(ifname, ifname_sz, tap->ifname); 561 } else { 562 assert(ifname_sz > 0); 563 ifname[0] = '\0'; 564 } 565 566 if (tap->has_vnet_hdr) { 567 *vnet_hdr = tap->vnet_hdr; 568 vnet_hdr_required = *vnet_hdr; 569 } else { 570 *vnet_hdr = 1; 571 vnet_hdr_required = 0; 572 } 573 574 TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required)); 575 if (fd < 0) { 576 return -1; 577 } 578 579 if (setup_script && 580 setup_script[0] != '\0' && 581 strcmp(setup_script, "no") != 0 && 582 launch_script(setup_script, ifname, fd)) { 583 close(fd); 584 return -1; 585 } 586 587 return fd; 588 } 589 590 int net_init_tap(const NetClientOptions *opts, const char *name, 591 NetClientState *peer) 592 { 593 const NetdevTapOptions *tap; 594 595 int fd, vnet_hdr = 0; 596 const char *model; 597 TAPState *s; 598 599 /* for the no-fd, no-helper case */ 600 const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */ 601 char ifname[128]; 602 603 assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP); 604 tap = opts->tap; 605 606 if (tap->has_fd) { 607 if (tap->has_ifname || tap->has_script || tap->has_downscript || 608 tap->has_vnet_hdr || tap->has_helper) { 609 error_report("ifname=, script=, downscript=, vnet_hdr=, " 610 "and helper= are invalid with fd="); 611 return -1; 612 } 613 614 fd = monitor_handle_fd_param(cur_mon, tap->fd); 615 if (fd == -1) { 616 return -1; 617 } 618 619 fcntl(fd, F_SETFL, O_NONBLOCK); 620 621 vnet_hdr = tap_probe_vnet_hdr(fd); 622 623 model = "tap"; 624 625 } else if (tap->has_helper) { 626 if (tap->has_ifname || tap->has_script || tap->has_downscript || 627 tap->has_vnet_hdr) { 628 error_report("ifname=, script=, downscript=, and vnet_hdr= " 629 "are invalid with helper="); 630 return -1; 631 } 632 633 fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE); 634 if (fd == -1) { 635 return -1; 636 } 637 638 fcntl(fd, F_SETFL, O_NONBLOCK); 639 640 vnet_hdr = tap_probe_vnet_hdr(fd); 641 642 model = "bridge"; 643 644 } else { 645 script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT; 646 fd = net_tap_init(tap, &vnet_hdr, script, ifname, sizeof ifname); 647 if (fd == -1) { 648 return -1; 649 } 650 651 model = "tap"; 652 } 653 654 s = net_tap_fd_init(peer, model, name, fd, vnet_hdr); 655 if (!s) { 656 close(fd); 657 return -1; 658 } 659 660 if (tap_set_sndbuf(s->fd, tap) < 0) { 661 return -1; 662 } 663 664 if (tap->has_fd) { 665 snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd); 666 } else if (tap->has_helper) { 667 snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s", 668 tap->helper); 669 } else { 670 const char *downscript; 671 672 downscript = tap->has_downscript ? tap->downscript : 673 DEFAULT_NETWORK_DOWN_SCRIPT; 674 675 snprintf(s->nc.info_str, sizeof(s->nc.info_str), 676 "ifname=%s,script=%s,downscript=%s", ifname, script, 677 downscript); 678 679 if (strcmp(downscript, "no") != 0) { 680 snprintf(s->down_script, sizeof(s->down_script), "%s", downscript); 681 snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname); 682 } 683 } 684 685 if (tap->has_vhost ? tap->vhost : 686 tap->has_vhostfd || (tap->has_vhostforce && tap->vhostforce)) { 687 int vhostfd; 688 689 if (tap->has_vhostfd) { 690 vhostfd = monitor_handle_fd_param(cur_mon, tap->vhostfd); 691 if (vhostfd == -1) { 692 return -1; 693 } 694 } else { 695 vhostfd = -1; 696 } 697 698 s->vhost_net = vhost_net_init(&s->nc, vhostfd, 699 tap->has_vhostforce && tap->vhostforce); 700 if (!s->vhost_net) { 701 error_report("vhost-net requested but could not be initialized"); 702 return -1; 703 } 704 } else if (tap->has_vhostfd) { 705 error_report("vhostfd= is not valid without vhost"); 706 return -1; 707 } 708 709 return 0; 710 } 711 712 VHostNetState *tap_get_vhost_net(NetClientState *nc) 713 { 714 TAPState *s = DO_UPCAST(TAPState, nc, nc); 715 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 716 return s->vhost_net; 717 } 718