xref: /openbmc/qemu/net/l2tpv3.c (revision 5b76dd13)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2012-2014 Cisco Systems
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include <linux/ip.h>
28 #include <netdb.h>
29 #include "net/net.h"
30 #include "clients.h"
31 #include "qapi/error.h"
32 #include "qemu-common.h"
33 #include "qemu/error-report.h"
34 #include "qemu/option.h"
35 #include "qemu/sockets.h"
36 #include "qemu/iov.h"
37 #include "qemu/main-loop.h"
38 
39 
40 /* The buffer size needs to be investigated for optimum numbers and
41  * optimum means of paging in on different systems. This size is
42  * chosen to be sufficient to accommodate one packet with some headers
43  */
44 
45 #define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
46 #define BUFFER_SIZE 2048
47 #define IOVSIZE 2
48 #define MAX_L2TPV3_MSGCNT 64
49 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
50 
51 /* Header set to 0x30000 signifies a data packet */
52 
53 #define L2TPV3_DATA_PACKET 0x30000
54 
55 /* IANA-assigned IP protocol ID for L2TPv3 */
56 
57 #ifndef IPPROTO_L2TP
58 #define IPPROTO_L2TP 0x73
59 #endif
60 
61 typedef struct NetL2TPV3State {
62     NetClientState nc;
63     int fd;
64 
65     /*
66      * these are used for xmit - that happens packet a time
67      * and for first sign of life packet (easier to parse that once)
68      */
69 
70     uint8_t *header_buf;
71     struct iovec *vec;
72 
73     /*
74      * these are used for receive - try to "eat" up to 32 packets at a time
75      */
76 
77     struct mmsghdr *msgvec;
78 
79     /*
80      * peer address
81      */
82 
83     struct sockaddr_storage *dgram_dst;
84     uint32_t dst_size;
85 
86     /*
87      * L2TPv3 parameters
88      */
89 
90     uint64_t rx_cookie;
91     uint64_t tx_cookie;
92     uint32_t rx_session;
93     uint32_t tx_session;
94     uint32_t header_size;
95     uint32_t counter;
96 
97     /*
98     * DOS avoidance in error handling
99     */
100 
101     bool header_mismatch;
102 
103     /*
104      * Ring buffer handling
105      */
106 
107     int queue_head;
108     int queue_tail;
109     int queue_depth;
110 
111     /*
112      * Precomputed offsets
113      */
114 
115     uint32_t offset;
116     uint32_t cookie_offset;
117     uint32_t counter_offset;
118     uint32_t session_offset;
119 
120     /* Poll Control */
121 
122     bool read_poll;
123     bool write_poll;
124 
125     /* Flags */
126 
127     bool ipv6;
128     bool udp;
129     bool has_counter;
130     bool pin_counter;
131     bool cookie;
132     bool cookie_is_64;
133 
134 } NetL2TPV3State;
135 
136 static void net_l2tpv3_send(void *opaque);
137 static void l2tpv3_writable(void *opaque);
138 
139 static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
140 {
141     qemu_set_fd_handler(s->fd,
142                         s->read_poll ? net_l2tpv3_send : NULL,
143                         s->write_poll ? l2tpv3_writable : NULL,
144                         s);
145 }
146 
147 static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
148 {
149     if (s->read_poll != enable) {
150         s->read_poll = enable;
151         l2tpv3_update_fd_handler(s);
152     }
153 }
154 
155 static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
156 {
157     if (s->write_poll != enable) {
158         s->write_poll = enable;
159         l2tpv3_update_fd_handler(s);
160     }
161 }
162 
163 static void l2tpv3_writable(void *opaque)
164 {
165     NetL2TPV3State *s = opaque;
166     l2tpv3_write_poll(s, false);
167     qemu_flush_queued_packets(&s->nc);
168 }
169 
170 static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
171 {
172     NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
173     l2tpv3_read_poll(s, true);
174 }
175 
176 static void l2tpv3_poll(NetClientState *nc, bool enable)
177 {
178     NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
179     l2tpv3_write_poll(s, enable);
180     l2tpv3_read_poll(s, enable);
181 }
182 
183 static void l2tpv3_form_header(NetL2TPV3State *s)
184 {
185     uint32_t *counter;
186 
187     if (s->udp) {
188         stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
189     }
190     stl_be_p(
191             (uint32_t *) (s->header_buf + s->session_offset),
192             s->tx_session
193         );
194     if (s->cookie) {
195         if (s->cookie_is_64) {
196             stq_be_p(
197                 (uint64_t *)(s->header_buf + s->cookie_offset),
198                 s->tx_cookie
199             );
200         } else {
201             stl_be_p(
202                 (uint32_t *) (s->header_buf + s->cookie_offset),
203                 s->tx_cookie
204             );
205         }
206     }
207     if (s->has_counter) {
208         counter = (uint32_t *)(s->header_buf + s->counter_offset);
209         if (s->pin_counter) {
210             *counter = 0;
211         } else {
212             stl_be_p(counter, ++s->counter);
213         }
214     }
215 }
216 
217 static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
218                     const struct iovec *iov,
219                     int iovcnt)
220 {
221     NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
222 
223     struct msghdr message;
224     int ret;
225 
226     if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
227         error_report(
228             "iovec too long %d > %d, change l2tpv3.h",
229             iovcnt, MAX_L2TPV3_IOVCNT
230         );
231         return -1;
232     }
233     l2tpv3_form_header(s);
234     memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
235     s->vec->iov_base = s->header_buf;
236     s->vec->iov_len = s->offset;
237     message.msg_name = s->dgram_dst;
238     message.msg_namelen = s->dst_size;
239     message.msg_iov = s->vec;
240     message.msg_iovlen = iovcnt + 1;
241     message.msg_control = NULL;
242     message.msg_controllen = 0;
243     message.msg_flags = 0;
244     do {
245         ret = sendmsg(s->fd, &message, 0);
246     } while ((ret == -1) && (errno == EINTR));
247     if (ret > 0) {
248         ret -= s->offset;
249     } else if (ret == 0) {
250         /* belt and braces - should not occur on DGRAM
251         * we should get an error and never a 0 send
252         */
253         ret = iov_size(iov, iovcnt);
254     } else {
255         /* signal upper layer that socket buffer is full */
256         ret = -errno;
257         if (ret == -EAGAIN || ret == -ENOBUFS) {
258             l2tpv3_write_poll(s, true);
259             ret = 0;
260         }
261     }
262     return ret;
263 }
264 
265 static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
266                     const uint8_t *buf,
267                     size_t size)
268 {
269     NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
270 
271     struct iovec *vec;
272     struct msghdr message;
273     ssize_t ret = 0;
274 
275     l2tpv3_form_header(s);
276     vec = s->vec;
277     vec->iov_base = s->header_buf;
278     vec->iov_len = s->offset;
279     vec++;
280     vec->iov_base = (void *) buf;
281     vec->iov_len = size;
282     message.msg_name = s->dgram_dst;
283     message.msg_namelen = s->dst_size;
284     message.msg_iov = s->vec;
285     message.msg_iovlen = 2;
286     message.msg_control = NULL;
287     message.msg_controllen = 0;
288     message.msg_flags = 0;
289     do {
290         ret = sendmsg(s->fd, &message, 0);
291     } while ((ret == -1) && (errno == EINTR));
292     if (ret > 0) {
293         ret -= s->offset;
294     } else if (ret == 0) {
295         /* belt and braces - should not occur on DGRAM
296         * we should get an error and never a 0 send
297         */
298         ret = size;
299     } else {
300         ret = -errno;
301         if (ret == -EAGAIN || ret == -ENOBUFS) {
302             /* signal upper layer that socket buffer is full */
303             l2tpv3_write_poll(s, true);
304             ret = 0;
305         }
306     }
307     return ret;
308 }
309 
310 static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
311 {
312 
313     uint32_t *session;
314     uint64_t cookie;
315 
316     if ((!s->udp) && (!s->ipv6)) {
317         buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
318     }
319 
320     /* we do not do a strict check for "data" packets as per
321     * the RFC spec because the pure IP spec does not have
322     * that anyway.
323     */
324 
325     if (s->cookie) {
326         if (s->cookie_is_64) {
327             cookie = ldq_be_p(buf + s->cookie_offset);
328         } else {
329             cookie = ldl_be_p(buf + s->cookie_offset) & 0xffffffffULL;
330         }
331         if (cookie != s->rx_cookie) {
332             if (!s->header_mismatch) {
333                 error_report("unknown cookie id");
334             }
335             return -1;
336         }
337     }
338     session = (uint32_t *) (buf + s->session_offset);
339     if (ldl_be_p(session) != s->rx_session) {
340         if (!s->header_mismatch) {
341             error_report("session mismatch");
342         }
343         return -1;
344     }
345     return 0;
346 }
347 
348 static void net_l2tpv3_process_queue(NetL2TPV3State *s)
349 {
350     int size = 0;
351     struct iovec *vec;
352     bool bad_read;
353     int data_size;
354     struct mmsghdr *msgvec;
355 
356     /* go into ring mode only if there is a "pending" tail */
357     if (s->queue_depth > 0) {
358         do {
359             msgvec = s->msgvec + s->queue_tail;
360             if (msgvec->msg_len > 0) {
361                 data_size = msgvec->msg_len - s->header_size;
362                 vec = msgvec->msg_hdr.msg_iov;
363                 if ((data_size > 0) &&
364                     (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
365                     vec++;
366                     /* Use the legacy delivery for now, we will
367                      * switch to using our own ring as a queueing mechanism
368                      * at a later date
369                      */
370                     size = qemu_send_packet_async(
371                             &s->nc,
372                             vec->iov_base,
373                             data_size,
374                             l2tpv3_send_completed
375                         );
376                     if (size == 0) {
377                         l2tpv3_read_poll(s, false);
378                     }
379                     bad_read = false;
380                 } else {
381                     bad_read = true;
382                     if (!s->header_mismatch) {
383                         /* report error only once */
384                         error_report("l2tpv3 header verification failed");
385                         s->header_mismatch = true;
386                     }
387                 }
388             } else {
389                 bad_read = true;
390             }
391             s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
392             s->queue_depth--;
393         } while (
394                 (s->queue_depth > 0) &&
395                  qemu_can_send_packet(&s->nc) &&
396                 ((size > 0) || bad_read)
397             );
398     }
399 }
400 
401 static void net_l2tpv3_send(void *opaque)
402 {
403     NetL2TPV3State *s = opaque;
404     int target_count, count;
405     struct mmsghdr *msgvec;
406 
407     /* go into ring mode only if there is a "pending" tail */
408 
409     if (s->queue_depth) {
410 
411         /* The ring buffer we use has variable intake
412          * count of how much we can read varies - adjust accordingly
413          */
414 
415         target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
416 
417         /* Ensure we do not overrun the ring when we have
418          * a lot of enqueued packets
419          */
420 
421         if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
422             target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
423         }
424     } else {
425 
426         /* we do not have any pending packets - we can use
427         * the whole message vector linearly instead of using
428         * it as a ring
429         */
430 
431         s->queue_head = 0;
432         s->queue_tail = 0;
433         target_count = MAX_L2TPV3_MSGCNT;
434     }
435 
436     msgvec = s->msgvec + s->queue_head;
437     if (target_count > 0) {
438         do {
439             count = recvmmsg(
440                 s->fd,
441                 msgvec,
442                 target_count, MSG_DONTWAIT, NULL);
443         } while ((count == -1) && (errno == EINTR));
444         if (count < 0) {
445             /* Recv error - we still need to flush packets here,
446              * (re)set queue head to current position
447              */
448             count = 0;
449         }
450         s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
451         s->queue_depth += count;
452     }
453     net_l2tpv3_process_queue(s);
454 }
455 
456 static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
457 {
458     int i, j;
459     struct iovec *iov;
460     struct mmsghdr *cleanup = msgvec;
461     if (cleanup) {
462         for (i = 0; i < count; i++) {
463             if (cleanup->msg_hdr.msg_iov) {
464                 iov = cleanup->msg_hdr.msg_iov;
465                 for (j = 0; j < iovcount; j++) {
466                     g_free(iov->iov_base);
467                     iov++;
468                 }
469                 g_free(cleanup->msg_hdr.msg_iov);
470             }
471             cleanup++;
472         }
473         g_free(msgvec);
474     }
475 }
476 
477 static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count)
478 {
479     int i;
480     struct iovec *iov;
481     struct mmsghdr *msgvec, *result;
482 
483     msgvec = g_new(struct mmsghdr, count);
484     result = msgvec;
485     for (i = 0; i < count ; i++) {
486         msgvec->msg_hdr.msg_name = NULL;
487         msgvec->msg_hdr.msg_namelen = 0;
488         iov =  g_new(struct iovec, IOVSIZE);
489         msgvec->msg_hdr.msg_iov = iov;
490         iov->iov_base = g_malloc(s->header_size);
491         iov->iov_len = s->header_size;
492         iov++ ;
493         iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
494         iov->iov_len = BUFFER_SIZE;
495         msgvec->msg_hdr.msg_iovlen = 2;
496         msgvec->msg_hdr.msg_control = NULL;
497         msgvec->msg_hdr.msg_controllen = 0;
498         msgvec->msg_hdr.msg_flags = 0;
499         msgvec++;
500     }
501     return result;
502 }
503 
504 static void net_l2tpv3_cleanup(NetClientState *nc)
505 {
506     NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
507     qemu_purge_queued_packets(nc);
508     l2tpv3_read_poll(s, false);
509     l2tpv3_write_poll(s, false);
510     if (s->fd >= 0) {
511         close(s->fd);
512     }
513     destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
514     g_free(s->vec);
515     g_free(s->header_buf);
516     g_free(s->dgram_dst);
517 }
518 
519 static NetClientInfo net_l2tpv3_info = {
520     .type = NET_CLIENT_DRIVER_L2TPV3,
521     .size = sizeof(NetL2TPV3State),
522     .receive = net_l2tpv3_receive_dgram,
523     .receive_iov = net_l2tpv3_receive_dgram_iov,
524     .poll = l2tpv3_poll,
525     .cleanup = net_l2tpv3_cleanup,
526 };
527 
528 int net_init_l2tpv3(const Netdev *netdev,
529                     const char *name,
530                     NetClientState *peer, Error **errp)
531 {
532     const NetdevL2TPv3Options *l2tpv3;
533     NetL2TPV3State *s;
534     NetClientState *nc;
535     int fd = -1, gairet;
536     struct addrinfo hints;
537     struct addrinfo *result = NULL;
538     char *srcport, *dstport;
539 
540     nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
541 
542     s = DO_UPCAST(NetL2TPV3State, nc, nc);
543 
544     s->queue_head = 0;
545     s->queue_tail = 0;
546     s->header_mismatch = false;
547 
548     assert(netdev->type == NET_CLIENT_DRIVER_L2TPV3);
549     l2tpv3 = &netdev->u.l2tpv3;
550 
551     if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
552         s->ipv6 = l2tpv3->ipv6;
553     } else {
554         s->ipv6 = false;
555     }
556 
557     if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
558         error_setg(errp, "offset must be less than 256 bytes");
559         goto outerr;
560     }
561 
562     if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
563         if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
564             s->cookie = true;
565         } else {
566             error_setg(errp,
567                        "require both 'rxcookie' and 'txcookie' or neither");
568             goto outerr;
569         }
570     } else {
571         s->cookie = false;
572     }
573 
574     if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
575         s->cookie_is_64  = true;
576     } else {
577         s->cookie_is_64  = false;
578     }
579 
580     if (l2tpv3->has_udp && l2tpv3->udp) {
581         s->udp = true;
582         if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) {
583             error_setg(errp, "need both src and dst port for udp");
584             goto outerr;
585         } else {
586             srcport = l2tpv3->srcport;
587             dstport = l2tpv3->dstport;
588         }
589     } else {
590         s->udp = false;
591         srcport = NULL;
592         dstport = NULL;
593     }
594 
595 
596     s->offset = 4;
597     s->session_offset = 0;
598     s->cookie_offset = 4;
599     s->counter_offset = 4;
600 
601     s->tx_session = l2tpv3->txsession;
602     if (l2tpv3->has_rxsession) {
603         s->rx_session = l2tpv3->rxsession;
604     } else {
605         s->rx_session = s->tx_session;
606     }
607 
608     if (s->cookie) {
609         s->rx_cookie = l2tpv3->rxcookie;
610         s->tx_cookie = l2tpv3->txcookie;
611         if (s->cookie_is_64 == true) {
612             /* 64 bit cookie */
613             s->offset += 8;
614             s->counter_offset += 8;
615         } else {
616             /* 32 bit cookie */
617             s->offset += 4;
618             s->counter_offset += 4;
619         }
620     }
621 
622     memset(&hints, 0, sizeof(hints));
623 
624     if (s->ipv6) {
625         hints.ai_family = AF_INET6;
626     } else {
627         hints.ai_family = AF_INET;
628     }
629     if (s->udp) {
630         hints.ai_socktype = SOCK_DGRAM;
631         hints.ai_protocol = 0;
632         s->offset += 4;
633         s->counter_offset += 4;
634         s->session_offset += 4;
635         s->cookie_offset += 4;
636     } else {
637         hints.ai_socktype = SOCK_RAW;
638         hints.ai_protocol = IPPROTO_L2TP;
639     }
640 
641     gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result);
642 
643     if ((gairet != 0) || (result == NULL)) {
644         error_setg(errp, "could not resolve src, errno = %s",
645                    gai_strerror(gairet));
646         goto outerr;
647     }
648     fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
649     if (fd == -1) {
650         fd = -errno;
651         error_setg(errp, "socket creation failed, errno = %d",
652                    -fd);
653         goto outerr;
654     }
655     if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) {
656         error_setg(errp, "could not bind socket err=%i", errno);
657         goto outerr;
658     }
659     if (result) {
660         freeaddrinfo(result);
661     }
662 
663     memset(&hints, 0, sizeof(hints));
664 
665     if (s->ipv6) {
666         hints.ai_family = AF_INET6;
667     } else {
668         hints.ai_family = AF_INET;
669     }
670     if (s->udp) {
671         hints.ai_socktype = SOCK_DGRAM;
672         hints.ai_protocol = 0;
673     } else {
674         hints.ai_socktype = SOCK_RAW;
675         hints.ai_protocol = IPPROTO_L2TP;
676     }
677 
678     result = NULL;
679     gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result);
680     if ((gairet != 0) || (result == NULL)) {
681         error_setg(errp, "could not resolve dst, error = %s",
682                    gai_strerror(gairet));
683         goto outerr;
684     }
685 
686     s->dgram_dst = g_new0(struct sockaddr_storage, 1);
687     memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen);
688     s->dst_size = result->ai_addrlen;
689 
690     if (result) {
691         freeaddrinfo(result);
692     }
693 
694     if (l2tpv3->has_counter && l2tpv3->counter) {
695         s->has_counter = true;
696         s->offset += 4;
697     } else {
698         s->has_counter = false;
699     }
700 
701     if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
702         s->has_counter = true;  /* pin counter implies that there is counter */
703         s->pin_counter = true;
704     } else {
705         s->pin_counter = false;
706     }
707 
708     if (l2tpv3->has_offset) {
709         /* extra offset */
710         s->offset += l2tpv3->offset;
711     }
712 
713     if ((s->ipv6) || (s->udp)) {
714         s->header_size = s->offset;
715     } else {
716         s->header_size = s->offset + sizeof(struct iphdr);
717     }
718 
719     s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
720     s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT);
721     s->header_buf = g_malloc(s->header_size);
722 
723     qemu_set_nonblock(fd);
724 
725     s->fd = fd;
726     s->counter = 0;
727 
728     l2tpv3_read_poll(s, true);
729 
730     snprintf(s->nc.info_str, sizeof(s->nc.info_str),
731              "l2tpv3: connected");
732     return 0;
733 outerr:
734     qemu_del_net_client(nc);
735     if (fd >= 0) {
736         close(fd);
737     }
738     if (result) {
739         freeaddrinfo(result);
740     }
741     return -1;
742 }
743 
744