1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2012-2014 Cisco Systems 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include <linux/ip.h> 28 #include <netdb.h> 29 #include "net/net.h" 30 #include "clients.h" 31 #include "qapi/error.h" 32 #include "qemu/error-report.h" 33 #include "qemu/option.h" 34 #include "qemu/sockets.h" 35 #include "qemu/iov.h" 36 #include "qemu/main-loop.h" 37 #include "qemu/memalign.h" 38 39 /* The buffer size needs to be investigated for optimum numbers and 40 * optimum means of paging in on different systems. This size is 41 * chosen to be sufficient to accommodate one packet with some headers 42 */ 43 44 #define BUFFER_ALIGN sysconf(_SC_PAGESIZE) 45 #define BUFFER_SIZE 2048 46 #define IOVSIZE 2 47 #define MAX_L2TPV3_MSGCNT 64 48 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE) 49 50 /* Header set to 0x30000 signifies a data packet */ 51 52 #define L2TPV3_DATA_PACKET 0x30000 53 54 /* IANA-assigned IP protocol ID for L2TPv3 */ 55 56 #ifndef IPPROTO_L2TP 57 #define IPPROTO_L2TP 0x73 58 #endif 59 60 typedef struct NetL2TPV3State { 61 NetClientState nc; 62 int fd; 63 64 /* 65 * these are used for xmit - that happens packet a time 66 * and for first sign of life packet (easier to parse that once) 67 */ 68 69 uint8_t *header_buf; 70 struct iovec *vec; 71 72 /* 73 * these are used for receive - try to "eat" up to 32 packets at a time 74 */ 75 76 struct mmsghdr *msgvec; 77 78 /* 79 * peer address 80 */ 81 82 struct sockaddr_storage *dgram_dst; 83 uint32_t dst_size; 84 85 /* 86 * L2TPv3 parameters 87 */ 88 89 uint64_t rx_cookie; 90 uint64_t tx_cookie; 91 uint32_t rx_session; 92 uint32_t tx_session; 93 uint32_t header_size; 94 uint32_t counter; 95 96 /* 97 * DOS avoidance in error handling 98 */ 99 100 bool header_mismatch; 101 102 /* 103 * Ring buffer handling 104 */ 105 106 int queue_head; 107 int queue_tail; 108 int queue_depth; 109 110 /* 111 * Precomputed offsets 112 */ 113 114 uint32_t offset; 115 uint32_t cookie_offset; 116 uint32_t counter_offset; 117 uint32_t session_offset; 118 119 /* Poll Control */ 120 121 bool read_poll; 122 bool write_poll; 123 124 /* Flags */ 125 126 bool ipv6; 127 bool udp; 128 bool has_counter; 129 bool pin_counter; 130 bool cookie; 131 bool cookie_is_64; 132 133 } NetL2TPV3State; 134 135 static void net_l2tpv3_send(void *opaque); 136 static void l2tpv3_writable(void *opaque); 137 138 static void l2tpv3_update_fd_handler(NetL2TPV3State *s) 139 { 140 qemu_set_fd_handler(s->fd, 141 s->read_poll ? net_l2tpv3_send : NULL, 142 s->write_poll ? l2tpv3_writable : NULL, 143 s); 144 } 145 146 static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable) 147 { 148 if (s->read_poll != enable) { 149 s->read_poll = enable; 150 l2tpv3_update_fd_handler(s); 151 } 152 } 153 154 static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable) 155 { 156 if (s->write_poll != enable) { 157 s->write_poll = enable; 158 l2tpv3_update_fd_handler(s); 159 } 160 } 161 162 static void l2tpv3_writable(void *opaque) 163 { 164 NetL2TPV3State *s = opaque; 165 l2tpv3_write_poll(s, false); 166 qemu_flush_queued_packets(&s->nc); 167 } 168 169 static void l2tpv3_send_completed(NetClientState *nc, ssize_t len) 170 { 171 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 172 l2tpv3_read_poll(s, true); 173 } 174 175 static void l2tpv3_poll(NetClientState *nc, bool enable) 176 { 177 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 178 l2tpv3_write_poll(s, enable); 179 l2tpv3_read_poll(s, enable); 180 } 181 182 static void l2tpv3_form_header(NetL2TPV3State *s) 183 { 184 uint32_t *counter; 185 186 if (s->udp) { 187 stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET); 188 } 189 stl_be_p( 190 (uint32_t *) (s->header_buf + s->session_offset), 191 s->tx_session 192 ); 193 if (s->cookie) { 194 if (s->cookie_is_64) { 195 stq_be_p( 196 (uint64_t *)(s->header_buf + s->cookie_offset), 197 s->tx_cookie 198 ); 199 } else { 200 stl_be_p( 201 (uint32_t *) (s->header_buf + s->cookie_offset), 202 s->tx_cookie 203 ); 204 } 205 } 206 if (s->has_counter) { 207 counter = (uint32_t *)(s->header_buf + s->counter_offset); 208 if (s->pin_counter) { 209 *counter = 0; 210 } else { 211 stl_be_p(counter, ++s->counter); 212 } 213 } 214 } 215 216 static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc, 217 const struct iovec *iov, 218 int iovcnt) 219 { 220 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 221 222 struct msghdr message; 223 int ret; 224 225 if (iovcnt > MAX_L2TPV3_IOVCNT - 1) { 226 error_report( 227 "iovec too long %d > %d, change l2tpv3.h", 228 iovcnt, MAX_L2TPV3_IOVCNT 229 ); 230 return -1; 231 } 232 l2tpv3_form_header(s); 233 memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec)); 234 s->vec->iov_base = s->header_buf; 235 s->vec->iov_len = s->offset; 236 message.msg_name = s->dgram_dst; 237 message.msg_namelen = s->dst_size; 238 message.msg_iov = s->vec; 239 message.msg_iovlen = iovcnt + 1; 240 message.msg_control = NULL; 241 message.msg_controllen = 0; 242 message.msg_flags = 0; 243 ret = RETRY_ON_EINTR(sendmsg(s->fd, &message, 0)); 244 if (ret > 0) { 245 ret -= s->offset; 246 } else if (ret == 0) { 247 /* belt and braces - should not occur on DGRAM 248 * we should get an error and never a 0 send 249 */ 250 ret = iov_size(iov, iovcnt); 251 } else { 252 /* signal upper layer that socket buffer is full */ 253 ret = -errno; 254 if (ret == -EAGAIN || ret == -ENOBUFS) { 255 l2tpv3_write_poll(s, true); 256 ret = 0; 257 } 258 } 259 return ret; 260 } 261 262 static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc, 263 const uint8_t *buf, 264 size_t size) 265 { 266 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 267 268 struct iovec *vec; 269 struct msghdr message; 270 ssize_t ret = 0; 271 272 l2tpv3_form_header(s); 273 vec = s->vec; 274 vec->iov_base = s->header_buf; 275 vec->iov_len = s->offset; 276 vec++; 277 vec->iov_base = (void *) buf; 278 vec->iov_len = size; 279 message.msg_name = s->dgram_dst; 280 message.msg_namelen = s->dst_size; 281 message.msg_iov = s->vec; 282 message.msg_iovlen = 2; 283 message.msg_control = NULL; 284 message.msg_controllen = 0; 285 message.msg_flags = 0; 286 ret = RETRY_ON_EINTR(sendmsg(s->fd, &message, 0)); 287 if (ret > 0) { 288 ret -= s->offset; 289 } else if (ret == 0) { 290 /* belt and braces - should not occur on DGRAM 291 * we should get an error and never a 0 send 292 */ 293 ret = size; 294 } else { 295 ret = -errno; 296 if (ret == -EAGAIN || ret == -ENOBUFS) { 297 /* signal upper layer that socket buffer is full */ 298 l2tpv3_write_poll(s, true); 299 ret = 0; 300 } 301 } 302 return ret; 303 } 304 305 static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf) 306 { 307 308 uint32_t *session; 309 uint64_t cookie; 310 311 if ((!s->udp) && (!s->ipv6)) { 312 buf += sizeof(struct iphdr) /* fix for ipv4 raw */; 313 } 314 315 /* we do not do a strict check for "data" packets as per 316 * the RFC spec because the pure IP spec does not have 317 * that anyway. 318 */ 319 320 if (s->cookie) { 321 if (s->cookie_is_64) { 322 cookie = ldq_be_p(buf + s->cookie_offset); 323 } else { 324 cookie = ldl_be_p(buf + s->cookie_offset) & 0xffffffffULL; 325 } 326 if (cookie != s->rx_cookie) { 327 if (!s->header_mismatch) { 328 error_report("unknown cookie id"); 329 } 330 return -1; 331 } 332 } 333 session = (uint32_t *) (buf + s->session_offset); 334 if (ldl_be_p(session) != s->rx_session) { 335 if (!s->header_mismatch) { 336 error_report("session mismatch"); 337 } 338 return -1; 339 } 340 return 0; 341 } 342 343 static void net_l2tpv3_process_queue(NetL2TPV3State *s) 344 { 345 int size = 0; 346 struct iovec *vec; 347 bool bad_read; 348 int data_size; 349 struct mmsghdr *msgvec; 350 351 /* go into ring mode only if there is a "pending" tail */ 352 if (s->queue_depth > 0) { 353 do { 354 msgvec = s->msgvec + s->queue_tail; 355 if (msgvec->msg_len > 0) { 356 data_size = msgvec->msg_len - s->header_size; 357 vec = msgvec->msg_hdr.msg_iov; 358 if ((data_size > 0) && 359 (l2tpv3_verify_header(s, vec->iov_base) == 0)) { 360 vec++; 361 /* Use the legacy delivery for now, we will 362 * switch to using our own ring as a queueing mechanism 363 * at a later date 364 */ 365 size = qemu_send_packet_async( 366 &s->nc, 367 vec->iov_base, 368 data_size, 369 l2tpv3_send_completed 370 ); 371 if (size == 0) { 372 l2tpv3_read_poll(s, false); 373 } 374 bad_read = false; 375 } else { 376 bad_read = true; 377 if (!s->header_mismatch) { 378 /* report error only once */ 379 error_report("l2tpv3 header verification failed"); 380 s->header_mismatch = true; 381 } 382 } 383 } else { 384 bad_read = true; 385 } 386 s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT; 387 s->queue_depth--; 388 } while ( 389 (s->queue_depth > 0) && 390 qemu_can_send_packet(&s->nc) && 391 ((size > 0) || bad_read) 392 ); 393 } 394 } 395 396 static void net_l2tpv3_send(void *opaque) 397 { 398 NetL2TPV3State *s = opaque; 399 int target_count, count; 400 struct mmsghdr *msgvec; 401 402 /* go into ring mode only if there is a "pending" tail */ 403 404 if (s->queue_depth) { 405 406 /* The ring buffer we use has variable intake 407 * count of how much we can read varies - adjust accordingly 408 */ 409 410 target_count = MAX_L2TPV3_MSGCNT - s->queue_depth; 411 412 /* Ensure we do not overrun the ring when we have 413 * a lot of enqueued packets 414 */ 415 416 if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) { 417 target_count = MAX_L2TPV3_MSGCNT - s->queue_head; 418 } 419 } else { 420 421 /* we do not have any pending packets - we can use 422 * the whole message vector linearly instead of using 423 * it as a ring 424 */ 425 426 s->queue_head = 0; 427 s->queue_tail = 0; 428 target_count = MAX_L2TPV3_MSGCNT; 429 } 430 431 msgvec = s->msgvec + s->queue_head; 432 if (target_count > 0) { 433 count = RETRY_ON_EINTR( 434 recvmmsg(s->fd, msgvec, target_count, MSG_DONTWAIT, NULL) 435 ); 436 if (count < 0) { 437 /* Recv error - we still need to flush packets here, 438 * (re)set queue head to current position 439 */ 440 count = 0; 441 } 442 s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT; 443 s->queue_depth += count; 444 } 445 net_l2tpv3_process_queue(s); 446 } 447 448 static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount) 449 { 450 int i, j; 451 struct iovec *iov; 452 struct mmsghdr *cleanup = msgvec; 453 if (cleanup) { 454 for (i = 0; i < count; i++) { 455 if (cleanup->msg_hdr.msg_iov) { 456 iov = cleanup->msg_hdr.msg_iov; 457 for (j = 0; j < iovcount; j++) { 458 g_free(iov->iov_base); 459 iov++; 460 } 461 g_free(cleanup->msg_hdr.msg_iov); 462 } 463 cleanup++; 464 } 465 g_free(msgvec); 466 } 467 } 468 469 static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count) 470 { 471 int i; 472 struct iovec *iov; 473 struct mmsghdr *msgvec, *result; 474 475 msgvec = g_new(struct mmsghdr, count); 476 result = msgvec; 477 for (i = 0; i < count ; i++) { 478 msgvec->msg_hdr.msg_name = NULL; 479 msgvec->msg_hdr.msg_namelen = 0; 480 iov = g_new(struct iovec, IOVSIZE); 481 msgvec->msg_hdr.msg_iov = iov; 482 iov->iov_base = g_malloc(s->header_size); 483 iov->iov_len = s->header_size; 484 iov++ ; 485 iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE); 486 iov->iov_len = BUFFER_SIZE; 487 msgvec->msg_hdr.msg_iovlen = 2; 488 msgvec->msg_hdr.msg_control = NULL; 489 msgvec->msg_hdr.msg_controllen = 0; 490 msgvec->msg_hdr.msg_flags = 0; 491 msgvec++; 492 } 493 return result; 494 } 495 496 static void net_l2tpv3_cleanup(NetClientState *nc) 497 { 498 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); 499 qemu_purge_queued_packets(nc); 500 l2tpv3_read_poll(s, false); 501 l2tpv3_write_poll(s, false); 502 if (s->fd >= 0) { 503 close(s->fd); 504 } 505 destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE); 506 g_free(s->vec); 507 g_free(s->header_buf); 508 g_free(s->dgram_dst); 509 } 510 511 static NetClientInfo net_l2tpv3_info = { 512 .type = NET_CLIENT_DRIVER_L2TPV3, 513 .size = sizeof(NetL2TPV3State), 514 .receive = net_l2tpv3_receive_dgram, 515 .receive_iov = net_l2tpv3_receive_dgram_iov, 516 .poll = l2tpv3_poll, 517 .cleanup = net_l2tpv3_cleanup, 518 }; 519 520 int net_init_l2tpv3(const Netdev *netdev, 521 const char *name, 522 NetClientState *peer, Error **errp) 523 { 524 const NetdevL2TPv3Options *l2tpv3; 525 NetL2TPV3State *s; 526 NetClientState *nc; 527 int fd = -1, gairet; 528 struct addrinfo hints; 529 struct addrinfo *result = NULL; 530 char *srcport, *dstport; 531 532 nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name); 533 534 s = DO_UPCAST(NetL2TPV3State, nc, nc); 535 536 s->queue_head = 0; 537 s->queue_tail = 0; 538 s->header_mismatch = false; 539 540 assert(netdev->type == NET_CLIENT_DRIVER_L2TPV3); 541 l2tpv3 = &netdev->u.l2tpv3; 542 543 if (l2tpv3->has_ipv6 && l2tpv3->ipv6) { 544 s->ipv6 = l2tpv3->ipv6; 545 } else { 546 s->ipv6 = false; 547 } 548 549 if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) { 550 error_setg(errp, "offset must be less than 256 bytes"); 551 goto outerr; 552 } 553 554 if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) { 555 if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) { 556 s->cookie = true; 557 } else { 558 error_setg(errp, 559 "require both 'rxcookie' and 'txcookie' or neither"); 560 goto outerr; 561 } 562 } else { 563 s->cookie = false; 564 } 565 566 if (l2tpv3->has_cookie64 || l2tpv3->cookie64) { 567 s->cookie_is_64 = true; 568 } else { 569 s->cookie_is_64 = false; 570 } 571 572 if (l2tpv3->has_udp && l2tpv3->udp) { 573 s->udp = true; 574 if (!(l2tpv3->srcport && l2tpv3->dstport)) { 575 error_setg(errp, "need both src and dst port for udp"); 576 goto outerr; 577 } else { 578 srcport = l2tpv3->srcport; 579 dstport = l2tpv3->dstport; 580 } 581 } else { 582 s->udp = false; 583 srcport = NULL; 584 dstport = NULL; 585 } 586 587 588 s->offset = 4; 589 s->session_offset = 0; 590 s->cookie_offset = 4; 591 s->counter_offset = 4; 592 593 s->tx_session = l2tpv3->txsession; 594 if (l2tpv3->has_rxsession) { 595 s->rx_session = l2tpv3->rxsession; 596 } else { 597 s->rx_session = s->tx_session; 598 } 599 600 if (s->cookie) { 601 s->rx_cookie = l2tpv3->rxcookie; 602 s->tx_cookie = l2tpv3->txcookie; 603 if (s->cookie_is_64 == true) { 604 /* 64 bit cookie */ 605 s->offset += 8; 606 s->counter_offset += 8; 607 } else { 608 /* 32 bit cookie */ 609 s->offset += 4; 610 s->counter_offset += 4; 611 } 612 } 613 614 memset(&hints, 0, sizeof(hints)); 615 616 if (s->ipv6) { 617 hints.ai_family = AF_INET6; 618 } else { 619 hints.ai_family = AF_INET; 620 } 621 if (s->udp) { 622 hints.ai_socktype = SOCK_DGRAM; 623 hints.ai_protocol = 0; 624 s->offset += 4; 625 s->counter_offset += 4; 626 s->session_offset += 4; 627 s->cookie_offset += 4; 628 } else { 629 hints.ai_socktype = SOCK_RAW; 630 hints.ai_protocol = IPPROTO_L2TP; 631 } 632 633 gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result); 634 635 if ((gairet != 0) || (result == NULL)) { 636 error_setg(errp, "could not resolve src, errno = %s", 637 gai_strerror(gairet)); 638 goto outerr; 639 } 640 fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol); 641 if (fd == -1) { 642 fd = -errno; 643 error_setg(errp, "socket creation failed, errno = %d", 644 -fd); 645 goto outerr; 646 } 647 if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) { 648 error_setg(errp, "could not bind socket err=%i", errno); 649 goto outerr; 650 } 651 652 freeaddrinfo(result); 653 654 memset(&hints, 0, sizeof(hints)); 655 656 if (s->ipv6) { 657 hints.ai_family = AF_INET6; 658 } else { 659 hints.ai_family = AF_INET; 660 } 661 if (s->udp) { 662 hints.ai_socktype = SOCK_DGRAM; 663 hints.ai_protocol = 0; 664 } else { 665 hints.ai_socktype = SOCK_RAW; 666 hints.ai_protocol = IPPROTO_L2TP; 667 } 668 669 result = NULL; 670 gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result); 671 if ((gairet != 0) || (result == NULL)) { 672 error_setg(errp, "could not resolve dst, error = %s", 673 gai_strerror(gairet)); 674 goto outerr; 675 } 676 677 s->dgram_dst = g_new0(struct sockaddr_storage, 1); 678 memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen); 679 s->dst_size = result->ai_addrlen; 680 681 freeaddrinfo(result); 682 683 if (l2tpv3->has_counter && l2tpv3->counter) { 684 s->has_counter = true; 685 s->offset += 4; 686 } else { 687 s->has_counter = false; 688 } 689 690 if (l2tpv3->has_pincounter && l2tpv3->pincounter) { 691 s->has_counter = true; /* pin counter implies that there is counter */ 692 s->pin_counter = true; 693 } else { 694 s->pin_counter = false; 695 } 696 697 if (l2tpv3->has_offset) { 698 /* extra offset */ 699 s->offset += l2tpv3->offset; 700 } 701 702 if ((s->ipv6) || (s->udp)) { 703 s->header_size = s->offset; 704 } else { 705 s->header_size = s->offset + sizeof(struct iphdr); 706 } 707 708 s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT); 709 s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT); 710 s->header_buf = g_malloc(s->header_size); 711 712 qemu_socket_set_nonblock(fd); 713 714 s->fd = fd; 715 s->counter = 0; 716 717 l2tpv3_read_poll(s, true); 718 719 qemu_set_info_str(&s->nc, "l2tpv3: connected"); 720 return 0; 721 outerr: 722 qemu_del_net_client(nc); 723 if (fd >= 0) { 724 close(fd); 725 } 726 if (result) { 727 freeaddrinfo(result); 728 } 729 return -1; 730 } 731 732