1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2012-2014 Cisco Systems 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include <linux/ip.h> 27 #include <netdb.h> 28 #include "config-host.h" 29 #include "net/net.h" 30 #include "clients.h" 31 #include "monitor/monitor.h" 32 #include "qemu-common.h" 33 #include "qemu/error-report.h" 34 #include "qemu/option.h" 35 #include "qemu/sockets.h" 36 #include "qemu/iov.h" 37 #include "qemu/main-loop.h" 38 39 40 /* The buffer size needs to be investigated for optimum numbers and 41 * optimum means of paging in on different systems. This size is 42 * chosen to be sufficient to accommodate one packet with some headers 43 */ 44 45 #define BUFFER_ALIGN sysconf(_SC_PAGESIZE) 46 #define BUFFER_SIZE 2048 47 #define IOVSIZE 2 48 #define MAX_L2TPV3_MSGCNT 64 49 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE) 50 51 /* Header set to 0x30000 signifies a data packet */ 52 53 #define L2TPV3_DATA_PACKET 0x30000 54 55 /* IANA-assigned IP protocol ID for L2TPv3 */ 56 57 #ifndef IPPROTO_L2TP 58 #define IPPROTO_L2TP 0x73 59 #endif 60 61 typedef struct NetL2TPV3State { 62 NetClientState nc; 63 int fd; 64 65 /* 66 * these are used for xmit - that happens packet a time 67 * and for first sign of life packet (easier to parse that once) 68 */ 69 70 uint8_t *header_buf; 71 struct iovec *vec; 72 73 /* 74 * these are used for receive - try to "eat" up to 32 packets at a time 75 */ 76 77 struct mmsghdr *msgvec; 78 79 /* 80 * peer address 81 */ 82 83 struct sockaddr_storage *dgram_dst; 84 uint32_t dst_size; 85 86 /* 87 * L2TPv3 parameters 88 */ 89 90 uint64_t rx_cookie; 91 uint64_t tx_cookie; 92 uint32_t rx_session; 93 uint32_t tx_session; 94 uint32_t header_size; 95 uint32_t counter; 96 97 /* 98 * DOS avoidance in error handling 99 */ 100 101 bool header_mismatch; 102 103 /* 104 * Ring buffer handling 105 */ 106 107 int queue_head; 108 int queue_tail; 109 int queue_depth; 110 111 /* 112 * Precomputed offsets 113 */ 114 115 uint32_t offset; 116 uint32_t cookie_offset; 117 uint32_t counter_offset; 118 uint32_t session_offset; 119 120 /* Poll Control */ 121 122 bool read_poll; 123 bool write_poll; 124 125 /* Flags */ 126 127 bool ipv6; 128 bool udp; 129 bool has_counter; 130 bool pin_counter; 131 bool cookie; 132 bool cookie_is_64; 133 134 } NetL2TPV3State; 135 136 static int l2tpv3_can_send(void *opaque); 137 static void net_l2tpv3_send(void *opaque); 138 static void l2tpv3_writable(void *opaque); 139 140 static void l2tpv3_update_fd_handler(NetL2TPV3State *s) 141 { 142 qemu_set_fd_handler2(s->fd, 143 s->read_poll ? l2tpv3_can_send : NULL, 144 s->read_poll ? net_l2tpv3_send : NULL, 145 s->write_poll ? l2tpv3_writable : NULL, 146 s); 147 } 148 149 static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable) 150 { 151 if (s->read_poll != enable) { 152 s->read_poll = enable; 153 l2tpv3_update_fd_handler(s); 154 } 155 } 156 157 static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable) 158 { 159 if (s->write_poll != enable) { 160 s->write_poll = enable; 161 l2tpv3_update_fd_handler(s); 162 } 163 } 164 165 static void l2tpv3_writable(void *opaque) 166 { 167 NetL2TPV3State *s = opaque; 168 l2tpv3_write_poll(s, false); 169 qemu_flush_queued_packets(&s->nc); 170 } 171 172 static int l2tpv3_can_send(void *opaque) 173 { 174 NetL2TPV3State *s = opaque; 175 176 return qemu_can_send_packet(&s->nc); 177 } 178 179 static void l2tpv3_send_completed(NetClientState *nc, ssize_t len) 180 { 181 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 182 l2tpv3_read_poll(s, true); 183 } 184 185 static void l2tpv3_poll(NetClientState *nc, bool enable) 186 { 187 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 188 l2tpv3_write_poll(s, enable); 189 l2tpv3_read_poll(s, enable); 190 } 191 192 static void l2tpv3_form_header(NetL2TPV3State *s) 193 { 194 uint32_t *counter; 195 196 if (s->udp) { 197 stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET); 198 } 199 stl_be_p( 200 (uint32_t *) (s->header_buf + s->session_offset), 201 s->tx_session 202 ); 203 if (s->cookie) { 204 if (s->cookie_is_64) { 205 stq_be_p( 206 (uint64_t *)(s->header_buf + s->cookie_offset), 207 s->tx_cookie 208 ); 209 } else { 210 stl_be_p( 211 (uint32_t *) (s->header_buf + s->cookie_offset), 212 s->tx_cookie 213 ); 214 } 215 } 216 if (s->has_counter) { 217 counter = (uint32_t *)(s->header_buf + s->counter_offset); 218 if (s->pin_counter) { 219 *counter = 0; 220 } else { 221 stl_be_p(counter, ++s->counter); 222 } 223 } 224 } 225 226 static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc, 227 const struct iovec *iov, 228 int iovcnt) 229 { 230 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 231 232 struct msghdr message; 233 int ret; 234 235 if (iovcnt > MAX_L2TPV3_IOVCNT - 1) { 236 error_report( 237 "iovec too long %d > %d, change l2tpv3.h", 238 iovcnt, MAX_L2TPV3_IOVCNT 239 ); 240 return -1; 241 } 242 l2tpv3_form_header(s); 243 memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec)); 244 s->vec->iov_base = s->header_buf; 245 s->vec->iov_len = s->offset; 246 message.msg_name = s->dgram_dst; 247 message.msg_namelen = s->dst_size; 248 message.msg_iov = s->vec; 249 message.msg_iovlen = iovcnt + 1; 250 message.msg_control = NULL; 251 message.msg_controllen = 0; 252 message.msg_flags = 0; 253 do { 254 ret = sendmsg(s->fd, &message, 0); 255 } while ((ret == -1) && (errno == EINTR)); 256 if (ret > 0) { 257 ret -= s->offset; 258 } else if (ret == 0) { 259 /* belt and braces - should not occur on DGRAM 260 * we should get an error and never a 0 send 261 */ 262 ret = iov_size(iov, iovcnt); 263 } else { 264 /* signal upper layer that socket buffer is full */ 265 ret = -errno; 266 if (ret == -EAGAIN || ret == -ENOBUFS) { 267 l2tpv3_write_poll(s, true); 268 ret = 0; 269 } 270 } 271 return ret; 272 } 273 274 static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc, 275 const uint8_t *buf, 276 size_t size) 277 { 278 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 279 280 struct iovec *vec; 281 struct msghdr message; 282 ssize_t ret = 0; 283 284 l2tpv3_form_header(s); 285 vec = s->vec; 286 vec->iov_base = s->header_buf; 287 vec->iov_len = s->offset; 288 vec++; 289 vec->iov_base = (void *) buf; 290 vec->iov_len = size; 291 message.msg_name = s->dgram_dst; 292 message.msg_namelen = s->dst_size; 293 message.msg_iov = s->vec; 294 message.msg_iovlen = 2; 295 message.msg_control = NULL; 296 message.msg_controllen = 0; 297 message.msg_flags = 0; 298 do { 299 ret = sendmsg(s->fd, &message, 0); 300 } while ((ret == -1) && (errno == EINTR)); 301 if (ret > 0) { 302 ret -= s->offset; 303 } else if (ret == 0) { 304 /* belt and braces - should not occur on DGRAM 305 * we should get an error and never a 0 send 306 */ 307 ret = size; 308 } else { 309 ret = -errno; 310 if (ret == -EAGAIN || ret == -ENOBUFS) { 311 /* signal upper layer that socket buffer is full */ 312 l2tpv3_write_poll(s, true); 313 ret = 0; 314 } 315 } 316 return ret; 317 } 318 319 static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf) 320 { 321 322 uint32_t *session; 323 uint64_t cookie; 324 325 if ((!s->udp) && (!s->ipv6)) { 326 buf += sizeof(struct iphdr) /* fix for ipv4 raw */; 327 } 328 329 /* we do not do a strict check for "data" packets as per 330 * the RFC spec because the pure IP spec does not have 331 * that anyway. 332 */ 333 334 if (s->cookie) { 335 if (s->cookie_is_64) { 336 cookie = ldq_be_p(buf + s->cookie_offset); 337 } else { 338 cookie = ldl_be_p(buf + s->cookie_offset); 339 } 340 if (cookie != s->rx_cookie) { 341 if (!s->header_mismatch) { 342 error_report("unknown cookie id"); 343 } 344 return -1; 345 } 346 } 347 session = (uint32_t *) (buf + s->session_offset); 348 if (ldl_be_p(session) != s->rx_session) { 349 if (!s->header_mismatch) { 350 error_report("session mismatch"); 351 } 352 return -1; 353 } 354 return 0; 355 } 356 357 static void net_l2tpv3_process_queue(NetL2TPV3State *s) 358 { 359 int size = 0; 360 struct iovec *vec; 361 bool bad_read; 362 int data_size; 363 struct mmsghdr *msgvec; 364 365 /* go into ring mode only if there is a "pending" tail */ 366 if (s->queue_depth > 0) { 367 do { 368 msgvec = s->msgvec + s->queue_tail; 369 if (msgvec->msg_len > 0) { 370 data_size = msgvec->msg_len - s->header_size; 371 vec = msgvec->msg_hdr.msg_iov; 372 if ((data_size > 0) && 373 (l2tpv3_verify_header(s, vec->iov_base) == 0)) { 374 vec++; 375 /* Use the legacy delivery for now, we will 376 * switch to using our own ring as a queueing mechanism 377 * at a later date 378 */ 379 size = qemu_send_packet_async( 380 &s->nc, 381 vec->iov_base, 382 data_size, 383 l2tpv3_send_completed 384 ); 385 if (size == 0) { 386 l2tpv3_read_poll(s, false); 387 } 388 bad_read = false; 389 } else { 390 bad_read = true; 391 if (!s->header_mismatch) { 392 /* report error only once */ 393 error_report("l2tpv3 header verification failed"); 394 s->header_mismatch = true; 395 } 396 } 397 } else { 398 bad_read = true; 399 } 400 s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT; 401 s->queue_depth--; 402 } while ( 403 (s->queue_depth > 0) && 404 qemu_can_send_packet(&s->nc) && 405 ((size > 0) || bad_read) 406 ); 407 } 408 } 409 410 static void net_l2tpv3_send(void *opaque) 411 { 412 NetL2TPV3State *s = opaque; 413 int target_count, count; 414 struct mmsghdr *msgvec; 415 416 /* go into ring mode only if there is a "pending" tail */ 417 418 if (s->queue_depth) { 419 420 /* The ring buffer we use has variable intake 421 * count of how much we can read varies - adjust accordingly 422 */ 423 424 target_count = MAX_L2TPV3_MSGCNT - s->queue_depth; 425 426 /* Ensure we do not overrun the ring when we have 427 * a lot of enqueued packets 428 */ 429 430 if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) { 431 target_count = MAX_L2TPV3_MSGCNT - s->queue_head; 432 } 433 } else { 434 435 /* we do not have any pending packets - we can use 436 * the whole message vector linearly instead of using 437 * it as a ring 438 */ 439 440 s->queue_head = 0; 441 s->queue_tail = 0; 442 target_count = MAX_L2TPV3_MSGCNT; 443 } 444 445 msgvec = s->msgvec + s->queue_head; 446 if (target_count > 0) { 447 do { 448 count = recvmmsg( 449 s->fd, 450 msgvec, 451 target_count, MSG_DONTWAIT, NULL); 452 } while ((count == -1) && (errno == EINTR)); 453 if (count < 0) { 454 /* Recv error - we still need to flush packets here, 455 * (re)set queue head to current position 456 */ 457 count = 0; 458 } 459 s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT; 460 s->queue_depth += count; 461 } 462 net_l2tpv3_process_queue(s); 463 } 464 465 static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount) 466 { 467 int i, j; 468 struct iovec *iov; 469 struct mmsghdr *cleanup = msgvec; 470 if (cleanup) { 471 for (i = 0; i < count; i++) { 472 if (cleanup->msg_hdr.msg_iov) { 473 iov = cleanup->msg_hdr.msg_iov; 474 for (j = 0; j < iovcount; j++) { 475 g_free(iov->iov_base); 476 iov++; 477 } 478 g_free(cleanup->msg_hdr.msg_iov); 479 } 480 cleanup++; 481 } 482 g_free(msgvec); 483 } 484 } 485 486 static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count) 487 { 488 int i; 489 struct iovec *iov; 490 struct mmsghdr *msgvec, *result; 491 492 msgvec = g_new(struct mmsghdr, count); 493 result = msgvec; 494 for (i = 0; i < count ; i++) { 495 msgvec->msg_hdr.msg_name = NULL; 496 msgvec->msg_hdr.msg_namelen = 0; 497 iov = g_new(struct iovec, IOVSIZE); 498 msgvec->msg_hdr.msg_iov = iov; 499 iov->iov_base = g_malloc(s->header_size); 500 iov->iov_len = s->header_size; 501 iov++ ; 502 iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE); 503 iov->iov_len = BUFFER_SIZE; 504 msgvec->msg_hdr.msg_iovlen = 2; 505 msgvec->msg_hdr.msg_control = NULL; 506 msgvec->msg_hdr.msg_controllen = 0; 507 msgvec->msg_hdr.msg_flags = 0; 508 msgvec++; 509 } 510 return result; 511 } 512 513 static void net_l2tpv3_cleanup(NetClientState *nc) 514 { 515 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 516 qemu_purge_queued_packets(nc); 517 l2tpv3_read_poll(s, false); 518 l2tpv3_write_poll(s, false); 519 if (s->fd >= 0) { 520 close(s->fd); 521 } 522 destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE); 523 g_free(s->vec); 524 g_free(s->header_buf); 525 g_free(s->dgram_dst); 526 } 527 528 static NetClientInfo net_l2tpv3_info = { 529 .type = NET_CLIENT_OPTIONS_KIND_L2TPV3, 530 .size = sizeof(NetL2TPV3State), 531 .receive = net_l2tpv3_receive_dgram, 532 .receive_iov = net_l2tpv3_receive_dgram_iov, 533 .poll = l2tpv3_poll, 534 .cleanup = net_l2tpv3_cleanup, 535 }; 536 537 int net_init_l2tpv3(const NetClientOptions *opts, 538 const char *name, 539 NetClientState *peer, Error **errp) 540 { 541 /* FIXME error_setg(errp, ...) on failure */ 542 const NetdevL2TPv3Options *l2tpv3; 543 NetL2TPV3State *s; 544 NetClientState *nc; 545 int fd = -1, gairet; 546 struct addrinfo hints; 547 struct addrinfo *result = NULL; 548 char *srcport, *dstport; 549 550 nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name); 551 552 s = DO_UPCAST(NetL2TPV3State, nc, nc); 553 554 s->queue_head = 0; 555 s->queue_tail = 0; 556 s->header_mismatch = false; 557 558 assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3); 559 l2tpv3 = opts->l2tpv3; 560 561 if (l2tpv3->has_ipv6 && l2tpv3->ipv6) { 562 s->ipv6 = l2tpv3->ipv6; 563 } else { 564 s->ipv6 = false; 565 } 566 567 if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) { 568 error_report("l2tpv3_open : offset must be less than 256 bytes"); 569 goto outerr; 570 } 571 572 if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) { 573 if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) { 574 s->cookie = true; 575 } else { 576 goto outerr; 577 } 578 } else { 579 s->cookie = false; 580 } 581 582 if (l2tpv3->has_cookie64 || l2tpv3->cookie64) { 583 s->cookie_is_64 = true; 584 } else { 585 s->cookie_is_64 = false; 586 } 587 588 if (l2tpv3->has_udp && l2tpv3->udp) { 589 s->udp = true; 590 if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) { 591 error_report("l2tpv3_open : need both src and dst port for udp"); 592 goto outerr; 593 } else { 594 srcport = l2tpv3->srcport; 595 dstport = l2tpv3->dstport; 596 } 597 } else { 598 s->udp = false; 599 srcport = NULL; 600 dstport = NULL; 601 } 602 603 604 s->offset = 4; 605 s->session_offset = 0; 606 s->cookie_offset = 4; 607 s->counter_offset = 4; 608 609 s->tx_session = l2tpv3->txsession; 610 if (l2tpv3->has_rxsession) { 611 s->rx_session = l2tpv3->rxsession; 612 } else { 613 s->rx_session = s->tx_session; 614 } 615 616 if (s->cookie) { 617 s->rx_cookie = l2tpv3->rxcookie; 618 s->tx_cookie = l2tpv3->txcookie; 619 if (s->cookie_is_64 == true) { 620 /* 64 bit cookie */ 621 s->offset += 8; 622 s->counter_offset += 8; 623 } else { 624 /* 32 bit cookie */ 625 s->offset += 4; 626 s->counter_offset += 4; 627 } 628 } 629 630 memset(&hints, 0, sizeof(hints)); 631 632 if (s->ipv6) { 633 hints.ai_family = AF_INET6; 634 } else { 635 hints.ai_family = AF_INET; 636 } 637 if (s->udp) { 638 hints.ai_socktype = SOCK_DGRAM; 639 hints.ai_protocol = 0; 640 s->offset += 4; 641 s->counter_offset += 4; 642 s->session_offset += 4; 643 s->cookie_offset += 4; 644 } else { 645 hints.ai_socktype = SOCK_RAW; 646 hints.ai_protocol = IPPROTO_L2TP; 647 } 648 649 gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result); 650 651 if ((gairet != 0) || (result == NULL)) { 652 error_report( 653 "l2tpv3_open : could not resolve src, errno = %s", 654 gai_strerror(gairet) 655 ); 656 goto outerr; 657 } 658 fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol); 659 if (fd == -1) { 660 fd = -errno; 661 error_report("l2tpv3_open : socket creation failed, errno = %d", -fd); 662 goto outerr; 663 } 664 if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) { 665 error_report("l2tpv3_open : could not bind socket err=%i", errno); 666 goto outerr; 667 } 668 if (result) { 669 freeaddrinfo(result); 670 } 671 672 memset(&hints, 0, sizeof(hints)); 673 674 if (s->ipv6) { 675 hints.ai_family = AF_INET6; 676 } else { 677 hints.ai_family = AF_INET; 678 } 679 if (s->udp) { 680 hints.ai_socktype = SOCK_DGRAM; 681 hints.ai_protocol = 0; 682 } else { 683 hints.ai_socktype = SOCK_RAW; 684 hints.ai_protocol = IPPROTO_L2TP; 685 } 686 687 result = NULL; 688 gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result); 689 if ((gairet != 0) || (result == NULL)) { 690 error_report( 691 "l2tpv3_open : could not resolve dst, error = %s", 692 gai_strerror(gairet) 693 ); 694 goto outerr; 695 } 696 697 s->dgram_dst = g_new0(struct sockaddr_storage, 1); 698 memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen); 699 s->dst_size = result->ai_addrlen; 700 701 if (result) { 702 freeaddrinfo(result); 703 } 704 705 if (l2tpv3->has_counter && l2tpv3->counter) { 706 s->has_counter = true; 707 s->offset += 4; 708 } else { 709 s->has_counter = false; 710 } 711 712 if (l2tpv3->has_pincounter && l2tpv3->pincounter) { 713 s->has_counter = true; /* pin counter implies that there is counter */ 714 s->pin_counter = true; 715 } else { 716 s->pin_counter = false; 717 } 718 719 if (l2tpv3->has_offset) { 720 /* extra offset */ 721 s->offset += l2tpv3->offset; 722 } 723 724 if ((s->ipv6) || (s->udp)) { 725 s->header_size = s->offset; 726 } else { 727 s->header_size = s->offset + sizeof(struct iphdr); 728 } 729 730 s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT); 731 s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT); 732 s->header_buf = g_malloc(s->header_size); 733 734 qemu_set_nonblock(fd); 735 736 s->fd = fd; 737 s->counter = 0; 738 739 l2tpv3_read_poll(s, true); 740 741 snprintf(s->nc.info_str, sizeof(s->nc.info_str), 742 "l2tpv3: connected"); 743 return 0; 744 outerr: 745 qemu_del_net_client(nc); 746 if (fd >= 0) { 747 close(fd); 748 } 749 if (result) { 750 freeaddrinfo(result); 751 } 752 return -1; 753 } 754 755