1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2012-2014 Cisco Systems 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include <linux/ip.h> 28 #include <netdb.h> 29 #include "net/net.h" 30 #include "clients.h" 31 #include "qapi/error.h" 32 #include "qemu-common.h" 33 #include "qemu/error-report.h" 34 #include "qemu/option.h" 35 #include "qemu/sockets.h" 36 #include "qemu/iov.h" 37 #include "qemu/main-loop.h" 38 39 40 /* The buffer size needs to be investigated for optimum numbers and 41 * optimum means of paging in on different systems. This size is 42 * chosen to be sufficient to accommodate one packet with some headers 43 */ 44 45 #define BUFFER_ALIGN sysconf(_SC_PAGESIZE) 46 #define BUFFER_SIZE 2048 47 #define IOVSIZE 2 48 #define MAX_L2TPV3_MSGCNT 64 49 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE) 50 51 /* Header set to 0x30000 signifies a data packet */ 52 53 #define L2TPV3_DATA_PACKET 0x30000 54 55 /* IANA-assigned IP protocol ID for L2TPv3 */ 56 57 #ifndef IPPROTO_L2TP 58 #define IPPROTO_L2TP 0x73 59 #endif 60 61 typedef struct NetL2TPV3State { 62 NetClientState nc; 63 int fd; 64 65 /* 66 * these are used for xmit - that happens packet a time 67 * and for first sign of life packet (easier to parse that once) 68 */ 69 70 uint8_t *header_buf; 71 struct iovec *vec; 72 73 /* 74 * these are used for receive - try to "eat" up to 32 packets at a time 75 */ 76 77 struct mmsghdr *msgvec; 78 79 /* 80 * peer address 81 */ 82 83 struct sockaddr_storage *dgram_dst; 84 uint32_t dst_size; 85 86 /* 87 * L2TPv3 parameters 88 */ 89 90 uint64_t rx_cookie; 91 uint64_t tx_cookie; 92 uint32_t rx_session; 93 uint32_t tx_session; 94 uint32_t header_size; 95 uint32_t counter; 96 97 /* 98 * DOS avoidance in error handling 99 */ 100 101 bool header_mismatch; 102 103 /* 104 * Ring buffer handling 105 */ 106 107 int queue_head; 108 int queue_tail; 109 int queue_depth; 110 111 /* 112 * Precomputed offsets 113 */ 114 115 uint32_t offset; 116 uint32_t cookie_offset; 117 uint32_t counter_offset; 118 uint32_t session_offset; 119 120 /* Poll Control */ 121 122 bool read_poll; 123 bool write_poll; 124 125 /* Flags */ 126 127 bool ipv6; 128 bool udp; 129 bool has_counter; 130 bool pin_counter; 131 bool cookie; 132 bool cookie_is_64; 133 134 } NetL2TPV3State; 135 136 static void net_l2tpv3_send(void *opaque); 137 static void l2tpv3_writable(void *opaque); 138 139 static void l2tpv3_update_fd_handler(NetL2TPV3State *s) 140 { 141 qemu_set_fd_handler(s->fd, 142 s->read_poll ? net_l2tpv3_send : NULL, 143 s->write_poll ? l2tpv3_writable : NULL, 144 s); 145 } 146 147 static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable) 148 { 149 if (s->read_poll != enable) { 150 s->read_poll = enable; 151 l2tpv3_update_fd_handler(s); 152 } 153 } 154 155 static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable) 156 { 157 if (s->write_poll != enable) { 158 s->write_poll = enable; 159 l2tpv3_update_fd_handler(s); 160 } 161 } 162 163 static void l2tpv3_writable(void *opaque) 164 { 165 NetL2TPV3State *s = opaque; 166 l2tpv3_write_poll(s, false); 167 qemu_flush_queued_packets(&s->nc); 168 } 169 170 static void l2tpv3_send_completed(NetClientState *nc, ssize_t len) 171 { 172 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 173 l2tpv3_read_poll(s, true); 174 } 175 176 static void l2tpv3_poll(NetClientState *nc, bool enable) 177 { 178 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 179 l2tpv3_write_poll(s, enable); 180 l2tpv3_read_poll(s, enable); 181 } 182 183 static void l2tpv3_form_header(NetL2TPV3State *s) 184 { 185 uint32_t *counter; 186 187 if (s->udp) { 188 stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET); 189 } 190 stl_be_p( 191 (uint32_t *) (s->header_buf + s->session_offset), 192 s->tx_session 193 ); 194 if (s->cookie) { 195 if (s->cookie_is_64) { 196 stq_be_p( 197 (uint64_t *)(s->header_buf + s->cookie_offset), 198 s->tx_cookie 199 ); 200 } else { 201 stl_be_p( 202 (uint32_t *) (s->header_buf + s->cookie_offset), 203 s->tx_cookie 204 ); 205 } 206 } 207 if (s->has_counter) { 208 counter = (uint32_t *)(s->header_buf + s->counter_offset); 209 if (s->pin_counter) { 210 *counter = 0; 211 } else { 212 stl_be_p(counter, ++s->counter); 213 } 214 } 215 } 216 217 static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc, 218 const struct iovec *iov, 219 int iovcnt) 220 { 221 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 222 223 struct msghdr message; 224 int ret; 225 226 if (iovcnt > MAX_L2TPV3_IOVCNT - 1) { 227 error_report( 228 "iovec too long %d > %d, change l2tpv3.h", 229 iovcnt, MAX_L2TPV3_IOVCNT 230 ); 231 return -1; 232 } 233 l2tpv3_form_header(s); 234 memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec)); 235 s->vec->iov_base = s->header_buf; 236 s->vec->iov_len = s->offset; 237 message.msg_name = s->dgram_dst; 238 message.msg_namelen = s->dst_size; 239 message.msg_iov = s->vec; 240 message.msg_iovlen = iovcnt + 1; 241 message.msg_control = NULL; 242 message.msg_controllen = 0; 243 message.msg_flags = 0; 244 do { 245 ret = sendmsg(s->fd, &message, 0); 246 } while ((ret == -1) && (errno == EINTR)); 247 if (ret > 0) { 248 ret -= s->offset; 249 } else if (ret == 0) { 250 /* belt and braces - should not occur on DGRAM 251 * we should get an error and never a 0 send 252 */ 253 ret = iov_size(iov, iovcnt); 254 } else { 255 /* signal upper layer that socket buffer is full */ 256 ret = -errno; 257 if (ret == -EAGAIN || ret == -ENOBUFS) { 258 l2tpv3_write_poll(s, true); 259 ret = 0; 260 } 261 } 262 return ret; 263 } 264 265 static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc, 266 const uint8_t *buf, 267 size_t size) 268 { 269 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 270 271 struct iovec *vec; 272 struct msghdr message; 273 ssize_t ret = 0; 274 275 l2tpv3_form_header(s); 276 vec = s->vec; 277 vec->iov_base = s->header_buf; 278 vec->iov_len = s->offset; 279 vec++; 280 vec->iov_base = (void *) buf; 281 vec->iov_len = size; 282 message.msg_name = s->dgram_dst; 283 message.msg_namelen = s->dst_size; 284 message.msg_iov = s->vec; 285 message.msg_iovlen = 2; 286 message.msg_control = NULL; 287 message.msg_controllen = 0; 288 message.msg_flags = 0; 289 do { 290 ret = sendmsg(s->fd, &message, 0); 291 } while ((ret == -1) && (errno == EINTR)); 292 if (ret > 0) { 293 ret -= s->offset; 294 } else if (ret == 0) { 295 /* belt and braces - should not occur on DGRAM 296 * we should get an error and never a 0 send 297 */ 298 ret = size; 299 } else { 300 ret = -errno; 301 if (ret == -EAGAIN || ret == -ENOBUFS) { 302 /* signal upper layer that socket buffer is full */ 303 l2tpv3_write_poll(s, true); 304 ret = 0; 305 } 306 } 307 return ret; 308 } 309 310 static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf) 311 { 312 313 uint32_t *session; 314 uint64_t cookie; 315 316 if ((!s->udp) && (!s->ipv6)) { 317 buf += sizeof(struct iphdr) /* fix for ipv4 raw */; 318 } 319 320 /* we do not do a strict check for "data" packets as per 321 * the RFC spec because the pure IP spec does not have 322 * that anyway. 323 */ 324 325 if (s->cookie) { 326 if (s->cookie_is_64) { 327 cookie = ldq_be_p(buf + s->cookie_offset); 328 } else { 329 cookie = ldl_be_p(buf + s->cookie_offset) & 0xffffffffULL; 330 } 331 if (cookie != s->rx_cookie) { 332 if (!s->header_mismatch) { 333 error_report("unknown cookie id"); 334 } 335 return -1; 336 } 337 } 338 session = (uint32_t *) (buf + s->session_offset); 339 if (ldl_be_p(session) != s->rx_session) { 340 if (!s->header_mismatch) { 341 error_report("session mismatch"); 342 } 343 return -1; 344 } 345 return 0; 346 } 347 348 static void net_l2tpv3_process_queue(NetL2TPV3State *s) 349 { 350 int size = 0; 351 struct iovec *vec; 352 bool bad_read; 353 int data_size; 354 struct mmsghdr *msgvec; 355 356 /* go into ring mode only if there is a "pending" tail */ 357 if (s->queue_depth > 0) { 358 do { 359 msgvec = s->msgvec + s->queue_tail; 360 if (msgvec->msg_len > 0) { 361 data_size = msgvec->msg_len - s->header_size; 362 vec = msgvec->msg_hdr.msg_iov; 363 if ((data_size > 0) && 364 (l2tpv3_verify_header(s, vec->iov_base) == 0)) { 365 vec++; 366 /* Use the legacy delivery for now, we will 367 * switch to using our own ring as a queueing mechanism 368 * at a later date 369 */ 370 size = qemu_send_packet_async( 371 &s->nc, 372 vec->iov_base, 373 data_size, 374 l2tpv3_send_completed 375 ); 376 if (size == 0) { 377 l2tpv3_read_poll(s, false); 378 } 379 bad_read = false; 380 } else { 381 bad_read = true; 382 if (!s->header_mismatch) { 383 /* report error only once */ 384 error_report("l2tpv3 header verification failed"); 385 s->header_mismatch = true; 386 } 387 } 388 } else { 389 bad_read = true; 390 } 391 s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT; 392 s->queue_depth--; 393 } while ( 394 (s->queue_depth > 0) && 395 qemu_can_send_packet(&s->nc) && 396 ((size > 0) || bad_read) 397 ); 398 } 399 } 400 401 static void net_l2tpv3_send(void *opaque) 402 { 403 NetL2TPV3State *s = opaque; 404 int target_count, count; 405 struct mmsghdr *msgvec; 406 407 /* go into ring mode only if there is a "pending" tail */ 408 409 if (s->queue_depth) { 410 411 /* The ring buffer we use has variable intake 412 * count of how much we can read varies - adjust accordingly 413 */ 414 415 target_count = MAX_L2TPV3_MSGCNT - s->queue_depth; 416 417 /* Ensure we do not overrun the ring when we have 418 * a lot of enqueued packets 419 */ 420 421 if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) { 422 target_count = MAX_L2TPV3_MSGCNT - s->queue_head; 423 } 424 } else { 425 426 /* we do not have any pending packets - we can use 427 * the whole message vector linearly instead of using 428 * it as a ring 429 */ 430 431 s->queue_head = 0; 432 s->queue_tail = 0; 433 target_count = MAX_L2TPV3_MSGCNT; 434 } 435 436 msgvec = s->msgvec + s->queue_head; 437 if (target_count > 0) { 438 do { 439 count = recvmmsg( 440 s->fd, 441 msgvec, 442 target_count, MSG_DONTWAIT, NULL); 443 } while ((count == -1) && (errno == EINTR)); 444 if (count < 0) { 445 /* Recv error - we still need to flush packets here, 446 * (re)set queue head to current position 447 */ 448 count = 0; 449 } 450 s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT; 451 s->queue_depth += count; 452 } 453 net_l2tpv3_process_queue(s); 454 } 455 456 static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount) 457 { 458 int i, j; 459 struct iovec *iov; 460 struct mmsghdr *cleanup = msgvec; 461 if (cleanup) { 462 for (i = 0; i < count; i++) { 463 if (cleanup->msg_hdr.msg_iov) { 464 iov = cleanup->msg_hdr.msg_iov; 465 for (j = 0; j < iovcount; j++) { 466 g_free(iov->iov_base); 467 iov++; 468 } 469 g_free(cleanup->msg_hdr.msg_iov); 470 } 471 cleanup++; 472 } 473 g_free(msgvec); 474 } 475 } 476 477 static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count) 478 { 479 int i; 480 struct iovec *iov; 481 struct mmsghdr *msgvec, *result; 482 483 msgvec = g_new(struct mmsghdr, count); 484 result = msgvec; 485 for (i = 0; i < count ; i++) { 486 msgvec->msg_hdr.msg_name = NULL; 487 msgvec->msg_hdr.msg_namelen = 0; 488 iov = g_new(struct iovec, IOVSIZE); 489 msgvec->msg_hdr.msg_iov = iov; 490 iov->iov_base = g_malloc(s->header_size); 491 iov->iov_len = s->header_size; 492 iov++ ; 493 iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE); 494 iov->iov_len = BUFFER_SIZE; 495 msgvec->msg_hdr.msg_iovlen = 2; 496 msgvec->msg_hdr.msg_control = NULL; 497 msgvec->msg_hdr.msg_controllen = 0; 498 msgvec->msg_hdr.msg_flags = 0; 499 msgvec++; 500 } 501 return result; 502 } 503 504 static void net_l2tpv3_cleanup(NetClientState *nc) 505 { 506 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 507 qemu_purge_queued_packets(nc); 508 l2tpv3_read_poll(s, false); 509 l2tpv3_write_poll(s, false); 510 if (s->fd >= 0) { 511 close(s->fd); 512 } 513 destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE); 514 g_free(s->vec); 515 g_free(s->header_buf); 516 g_free(s->dgram_dst); 517 } 518 519 static NetClientInfo net_l2tpv3_info = { 520 .type = NET_CLIENT_DRIVER_L2TPV3, 521 .size = sizeof(NetL2TPV3State), 522 .receive = net_l2tpv3_receive_dgram, 523 .receive_iov = net_l2tpv3_receive_dgram_iov, 524 .poll = l2tpv3_poll, 525 .cleanup = net_l2tpv3_cleanup, 526 }; 527 528 int net_init_l2tpv3(const Netdev *netdev, 529 const char *name, 530 NetClientState *peer, Error **errp) 531 { 532 const NetdevL2TPv3Options *l2tpv3; 533 NetL2TPV3State *s; 534 NetClientState *nc; 535 int fd = -1, gairet; 536 struct addrinfo hints; 537 struct addrinfo *result = NULL; 538 char *srcport, *dstport; 539 540 nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name); 541 542 s = DO_UPCAST(NetL2TPV3State, nc, nc); 543 544 s->queue_head = 0; 545 s->queue_tail = 0; 546 s->header_mismatch = false; 547 548 assert(netdev->type == NET_CLIENT_DRIVER_L2TPV3); 549 l2tpv3 = &netdev->u.l2tpv3; 550 551 if (l2tpv3->has_ipv6 && l2tpv3->ipv6) { 552 s->ipv6 = l2tpv3->ipv6; 553 } else { 554 s->ipv6 = false; 555 } 556 557 if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) { 558 error_setg(errp, "offset must be less than 256 bytes"); 559 goto outerr; 560 } 561 562 if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) { 563 if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) { 564 s->cookie = true; 565 } else { 566 error_setg(errp, 567 "require both 'rxcookie' and 'txcookie' or neither"); 568 goto outerr; 569 } 570 } else { 571 s->cookie = false; 572 } 573 574 if (l2tpv3->has_cookie64 || l2tpv3->cookie64) { 575 s->cookie_is_64 = true; 576 } else { 577 s->cookie_is_64 = false; 578 } 579 580 if (l2tpv3->has_udp && l2tpv3->udp) { 581 s->udp = true; 582 if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) { 583 error_setg(errp, "need both src and dst port for udp"); 584 goto outerr; 585 } else { 586 srcport = l2tpv3->srcport; 587 dstport = l2tpv3->dstport; 588 } 589 } else { 590 s->udp = false; 591 srcport = NULL; 592 dstport = NULL; 593 } 594 595 596 s->offset = 4; 597 s->session_offset = 0; 598 s->cookie_offset = 4; 599 s->counter_offset = 4; 600 601 s->tx_session = l2tpv3->txsession; 602 if (l2tpv3->has_rxsession) { 603 s->rx_session = l2tpv3->rxsession; 604 } else { 605 s->rx_session = s->tx_session; 606 } 607 608 if (s->cookie) { 609 s->rx_cookie = l2tpv3->rxcookie; 610 s->tx_cookie = l2tpv3->txcookie; 611 if (s->cookie_is_64 == true) { 612 /* 64 bit cookie */ 613 s->offset += 8; 614 s->counter_offset += 8; 615 } else { 616 /* 32 bit cookie */ 617 s->offset += 4; 618 s->counter_offset += 4; 619 } 620 } 621 622 memset(&hints, 0, sizeof(hints)); 623 624 if (s->ipv6) { 625 hints.ai_family = AF_INET6; 626 } else { 627 hints.ai_family = AF_INET; 628 } 629 if (s->udp) { 630 hints.ai_socktype = SOCK_DGRAM; 631 hints.ai_protocol = 0; 632 s->offset += 4; 633 s->counter_offset += 4; 634 s->session_offset += 4; 635 s->cookie_offset += 4; 636 } else { 637 hints.ai_socktype = SOCK_RAW; 638 hints.ai_protocol = IPPROTO_L2TP; 639 } 640 641 gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result); 642 643 if ((gairet != 0) || (result == NULL)) { 644 error_setg(errp, "could not resolve src, errno = %s", 645 gai_strerror(gairet)); 646 goto outerr; 647 } 648 fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol); 649 if (fd == -1) { 650 fd = -errno; 651 error_setg(errp, "socket creation failed, errno = %d", 652 -fd); 653 goto outerr; 654 } 655 if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) { 656 error_setg(errp, "could not bind socket err=%i", errno); 657 goto outerr; 658 } 659 if (result) { 660 freeaddrinfo(result); 661 } 662 663 memset(&hints, 0, sizeof(hints)); 664 665 if (s->ipv6) { 666 hints.ai_family = AF_INET6; 667 } else { 668 hints.ai_family = AF_INET; 669 } 670 if (s->udp) { 671 hints.ai_socktype = SOCK_DGRAM; 672 hints.ai_protocol = 0; 673 } else { 674 hints.ai_socktype = SOCK_RAW; 675 hints.ai_protocol = IPPROTO_L2TP; 676 } 677 678 result = NULL; 679 gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result); 680 if ((gairet != 0) || (result == NULL)) { 681 error_setg(errp, "could not resolve dst, error = %s", 682 gai_strerror(gairet)); 683 goto outerr; 684 } 685 686 s->dgram_dst = g_new0(struct sockaddr_storage, 1); 687 memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen); 688 s->dst_size = result->ai_addrlen; 689 690 if (result) { 691 freeaddrinfo(result); 692 } 693 694 if (l2tpv3->has_counter && l2tpv3->counter) { 695 s->has_counter = true; 696 s->offset += 4; 697 } else { 698 s->has_counter = false; 699 } 700 701 if (l2tpv3->has_pincounter && l2tpv3->pincounter) { 702 s->has_counter = true; /* pin counter implies that there is counter */ 703 s->pin_counter = true; 704 } else { 705 s->pin_counter = false; 706 } 707 708 if (l2tpv3->has_offset) { 709 /* extra offset */ 710 s->offset += l2tpv3->offset; 711 } 712 713 if ((s->ipv6) || (s->udp)) { 714 s->header_size = s->offset; 715 } else { 716 s->header_size = s->offset + sizeof(struct iphdr); 717 } 718 719 s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT); 720 s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT); 721 s->header_buf = g_malloc(s->header_size); 722 723 qemu_set_nonblock(fd); 724 725 s->fd = fd; 726 s->counter = 0; 727 728 l2tpv3_read_poll(s, true); 729 730 snprintf(s->nc.info_str, sizeof(s->nc.info_str), 731 "l2tpv3: connected"); 732 return 0; 733 outerr: 734 qemu_del_net_client(nc); 735 if (fd >= 0) { 736 close(fd); 737 } 738 if (result) { 739 freeaddrinfo(result); 740 } 741 return -1; 742 } 743 744