xref: /openbmc/qemu/util/iov.c (revision e7a1d6c5)
1 /*
2  * Helpers for getting linearized buffers from iov / filling buffers into iovs
3  *
4  * Copyright IBM, Corp. 2007, 2008
5  * Copyright (C) 2010 Red Hat, Inc.
6  *
7  * Author(s):
8  *  Anthony Liguori <aliguori@us.ibm.com>
9  *  Amit Shah <amit.shah@redhat.com>
10  *  Michael Tokarev <mjt@tls.msk.ru>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2.  See
13  * the COPYING file in the top-level directory.
14  *
15  * Contributions after 2012-01-13 are licensed under the terms of the
16  * GNU GPL, version 2 or (at your option) any later version.
17  */
18 
19 #include "qemu/iov.h"
20 
21 #ifdef _WIN32
22 # include <windows.h>
23 # include <winsock2.h>
24 #else
25 # include <sys/types.h>
26 # include <sys/socket.h>
27 #endif
28 
29 size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt,
30                     size_t offset, const void *buf, size_t bytes)
31 {
32     size_t done;
33     unsigned int i;
34     for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
35         if (offset < iov[i].iov_len) {
36             size_t len = MIN(iov[i].iov_len - offset, bytes - done);
37             memcpy(iov[i].iov_base + offset, buf + done, len);
38             done += len;
39             offset = 0;
40         } else {
41             offset -= iov[i].iov_len;
42         }
43     }
44     assert(offset == 0);
45     return done;
46 }
47 
48 size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
49                   size_t offset, void *buf, size_t bytes)
50 {
51     size_t done;
52     unsigned int i;
53     for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
54         if (offset < iov[i].iov_len) {
55             size_t len = MIN(iov[i].iov_len - offset, bytes - done);
56             memcpy(buf + done, iov[i].iov_base + offset, len);
57             done += len;
58             offset = 0;
59         } else {
60             offset -= iov[i].iov_len;
61         }
62     }
63     assert(offset == 0);
64     return done;
65 }
66 
67 size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
68                   size_t offset, int fillc, size_t bytes)
69 {
70     size_t done;
71     unsigned int i;
72     for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
73         if (offset < iov[i].iov_len) {
74             size_t len = MIN(iov[i].iov_len - offset, bytes - done);
75             memset(iov[i].iov_base + offset, fillc, len);
76             done += len;
77             offset = 0;
78         } else {
79             offset -= iov[i].iov_len;
80         }
81     }
82     assert(offset == 0);
83     return done;
84 }
85 
86 size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt)
87 {
88     size_t len;
89     unsigned int i;
90 
91     len = 0;
92     for (i = 0; i < iov_cnt; i++) {
93         len += iov[i].iov_len;
94     }
95     return len;
96 }
97 
98 /* helper function for iov_send_recv() */
99 static ssize_t
100 do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
101 {
102 #ifdef CONFIG_POSIX
103     ssize_t ret;
104     struct msghdr msg;
105     memset(&msg, 0, sizeof(msg));
106     msg.msg_iov = iov;
107     msg.msg_iovlen = iov_cnt;
108     do {
109         ret = do_send
110             ? sendmsg(sockfd, &msg, 0)
111             : recvmsg(sockfd, &msg, 0);
112     } while (ret < 0 && errno == EINTR);
113     return ret;
114 #else
115     /* else send piece-by-piece */
116     /*XXX Note: windows has WSASend() and WSARecv() */
117     unsigned i = 0;
118     ssize_t ret = 0;
119     while (i < iov_cnt) {
120         ssize_t r = do_send
121             ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0)
122             : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0);
123         if (r > 0) {
124             ret += r;
125         } else if (!r) {
126             break;
127         } else if (errno == EINTR) {
128             continue;
129         } else {
130             /* else it is some "other" error,
131              * only return if there was no data processed. */
132             if (ret == 0) {
133                 ret = -1;
134             }
135             break;
136         }
137         i++;
138     }
139     return ret;
140 #endif
141 }
142 
143 ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
144                       size_t offset, size_t bytes,
145                       bool do_send)
146 {
147     ssize_t total = 0;
148     ssize_t ret;
149     size_t orig_len, tail;
150     unsigned niov;
151 
152     while (bytes > 0) {
153         /* Find the start position, skipping `offset' bytes:
154          * first, skip all full-sized vector elements, */
155         for (niov = 0; niov < iov_cnt && offset >= iov[niov].iov_len; ++niov) {
156             offset -= iov[niov].iov_len;
157         }
158 
159         /* niov == iov_cnt would only be valid if bytes == 0, which
160          * we already ruled out in the loop condition.  */
161         assert(niov < iov_cnt);
162         iov += niov;
163         iov_cnt -= niov;
164 
165         if (offset) {
166             /* second, skip `offset' bytes from the (now) first element,
167              * undo it on exit */
168             iov[0].iov_base += offset;
169             iov[0].iov_len -= offset;
170         }
171         /* Find the end position skipping `bytes' bytes: */
172         /* first, skip all full-sized elements */
173         tail = bytes;
174         for (niov = 0; niov < iov_cnt && iov[niov].iov_len <= tail; ++niov) {
175             tail -= iov[niov].iov_len;
176         }
177         if (tail) {
178             /* second, fixup the last element, and remember the original
179              * length */
180             assert(niov < iov_cnt);
181             assert(iov[niov].iov_len > tail);
182             orig_len = iov[niov].iov_len;
183             iov[niov++].iov_len = tail;
184             ret = do_send_recv(sockfd, iov, niov, do_send);
185             /* Undo the changes above before checking for errors */
186             iov[niov-1].iov_len = orig_len;
187         } else {
188             ret = do_send_recv(sockfd, iov, niov, do_send);
189         }
190         if (offset) {
191             iov[0].iov_base -= offset;
192             iov[0].iov_len += offset;
193         }
194 
195         if (ret < 0) {
196             assert(errno != EINTR);
197             if (errno == EAGAIN && total > 0) {
198                 return total;
199             }
200             return -1;
201         }
202 
203         if (ret == 0 && !do_send) {
204             /* recv returns 0 when the peer has performed an orderly
205              * shutdown. */
206             break;
207         }
208 
209         /* Prepare for the next iteration */
210         offset += ret;
211         total += ret;
212         bytes -= ret;
213     }
214 
215     return total;
216 }
217 
218 
219 void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
220                  FILE *fp, const char *prefix, size_t limit)
221 {
222     int v;
223     size_t size = 0;
224     char *buf;
225 
226     for (v = 0; v < iov_cnt; v++) {
227         size += iov[v].iov_len;
228     }
229     size = size > limit ? limit : size;
230     buf = g_malloc(size);
231     iov_to_buf(iov, iov_cnt, 0, buf, size);
232     qemu_hexdump(buf, fp, prefix, size);
233     g_free(buf);
234 }
235 
236 unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
237                  const struct iovec *iov, unsigned int iov_cnt,
238                  size_t offset, size_t bytes)
239 {
240     size_t len;
241     unsigned int i, j;
242     for (i = 0, j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) {
243         if (offset >= iov[i].iov_len) {
244             offset -= iov[i].iov_len;
245             continue;
246         }
247         len = MIN(bytes, iov[i].iov_len - offset);
248 
249         dst_iov[j].iov_base = iov[i].iov_base + offset;
250         dst_iov[j].iov_len = len;
251         j++;
252         bytes -= len;
253         offset = 0;
254     }
255     assert(offset == 0);
256     return j;
257 }
258 
259 /* io vectors */
260 
261 void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
262 {
263     qiov->iov = g_malloc(alloc_hint * sizeof(struct iovec));
264     qiov->niov = 0;
265     qiov->nalloc = alloc_hint;
266     qiov->size = 0;
267 }
268 
269 void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
270 {
271     int i;
272 
273     qiov->iov = iov;
274     qiov->niov = niov;
275     qiov->nalloc = -1;
276     qiov->size = 0;
277     for (i = 0; i < niov; i++)
278         qiov->size += iov[i].iov_len;
279 }
280 
281 void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
282 {
283     assert(qiov->nalloc != -1);
284 
285     if (qiov->niov == qiov->nalloc) {
286         qiov->nalloc = 2 * qiov->nalloc + 1;
287         qiov->iov = g_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec));
288     }
289     qiov->iov[qiov->niov].iov_base = base;
290     qiov->iov[qiov->niov].iov_len = len;
291     qiov->size += len;
292     ++qiov->niov;
293 }
294 
295 /*
296  * Concatenates (partial) iovecs from src_iov to the end of dst.
297  * It starts copying after skipping `soffset' bytes at the
298  * beginning of src and adds individual vectors from src to
299  * dst copies up to `sbytes' bytes total, or up to the end
300  * of src_iov if it comes first.  This way, it is okay to specify
301  * very large value for `sbytes' to indicate "up to the end
302  * of src".
303  * Only vector pointers are processed, not the actual data buffers.
304  */
305 void qemu_iovec_concat_iov(QEMUIOVector *dst,
306                            struct iovec *src_iov, unsigned int src_cnt,
307                            size_t soffset, size_t sbytes)
308 {
309     int i;
310     size_t done;
311 
312     if (!sbytes) {
313         return;
314     }
315     assert(dst->nalloc != -1);
316     for (i = 0, done = 0; done < sbytes && i < src_cnt; i++) {
317         if (soffset < src_iov[i].iov_len) {
318             size_t len = MIN(src_iov[i].iov_len - soffset, sbytes - done);
319             qemu_iovec_add(dst, src_iov[i].iov_base + soffset, len);
320             done += len;
321             soffset = 0;
322         } else {
323             soffset -= src_iov[i].iov_len;
324         }
325     }
326     assert(soffset == 0); /* offset beyond end of src */
327 }
328 
329 /*
330  * Concatenates (partial) iovecs from src to the end of dst.
331  * It starts copying after skipping `soffset' bytes at the
332  * beginning of src and adds individual vectors from src to
333  * dst copies up to `sbytes' bytes total, or up to the end
334  * of src if it comes first.  This way, it is okay to specify
335  * very large value for `sbytes' to indicate "up to the end
336  * of src".
337  * Only vector pointers are processed, not the actual data buffers.
338  */
339 void qemu_iovec_concat(QEMUIOVector *dst,
340                        QEMUIOVector *src, size_t soffset, size_t sbytes)
341 {
342     qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
343 }
344 
345 void qemu_iovec_destroy(QEMUIOVector *qiov)
346 {
347     assert(qiov->nalloc != -1);
348 
349     qemu_iovec_reset(qiov);
350     g_free(qiov->iov);
351     qiov->nalloc = 0;
352     qiov->iov = NULL;
353 }
354 
355 void qemu_iovec_reset(QEMUIOVector *qiov)
356 {
357     assert(qiov->nalloc != -1);
358 
359     qiov->niov = 0;
360     qiov->size = 0;
361 }
362 
363 size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
364                          void *buf, size_t bytes)
365 {
366     return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);
367 }
368 
369 size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
370                            const void *buf, size_t bytes)
371 {
372     return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes);
373 }
374 
375 size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
376                          int fillc, size_t bytes)
377 {
378     return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
379 }
380 
381 /**
382  * Check that I/O vector contents are identical
383  *
384  * The IO vectors must have the same structure (same length of all parts).
385  * A typical usage is to compare vectors created with qemu_iovec_clone().
386  *
387  * @a:          I/O vector
388  * @b:          I/O vector
389  * @ret:        Offset to first mismatching byte or -1 if match
390  */
391 ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
392 {
393     int i;
394     ssize_t offset = 0;
395 
396     assert(a->niov == b->niov);
397     for (i = 0; i < a->niov; i++) {
398         size_t len = 0;
399         uint8_t *p = (uint8_t *)a->iov[i].iov_base;
400         uint8_t *q = (uint8_t *)b->iov[i].iov_base;
401 
402         assert(a->iov[i].iov_len == b->iov[i].iov_len);
403         while (len < a->iov[i].iov_len && *p++ == *q++) {
404             len++;
405         }
406 
407         offset += len;
408 
409         if (len != a->iov[i].iov_len) {
410             return offset;
411         }
412     }
413     return -1;
414 }
415 
416 typedef struct {
417     int src_index;
418     struct iovec *src_iov;
419     void *dest_base;
420 } IOVectorSortElem;
421 
422 static int sortelem_cmp_src_base(const void *a, const void *b)
423 {
424     const IOVectorSortElem *elem_a = a;
425     const IOVectorSortElem *elem_b = b;
426 
427     /* Don't overflow */
428     if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
429         return -1;
430     } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
431         return 1;
432     } else {
433         return 0;
434     }
435 }
436 
437 static int sortelem_cmp_src_index(const void *a, const void *b)
438 {
439     const IOVectorSortElem *elem_a = a;
440     const IOVectorSortElem *elem_b = b;
441 
442     return elem_a->src_index - elem_b->src_index;
443 }
444 
445 /**
446  * Copy contents of I/O vector
447  *
448  * The relative relationships of overlapping iovecs are preserved.  This is
449  * necessary to ensure identical semantics in the cloned I/O vector.
450  */
451 void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf)
452 {
453     IOVectorSortElem sortelems[src->niov];
454     void *last_end;
455     int i;
456 
457     /* Sort by source iovecs by base address */
458     for (i = 0; i < src->niov; i++) {
459         sortelems[i].src_index = i;
460         sortelems[i].src_iov = &src->iov[i];
461     }
462     qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
463 
464     /* Allocate buffer space taking into account overlapping iovecs */
465     last_end = NULL;
466     for (i = 0; i < src->niov; i++) {
467         struct iovec *cur = sortelems[i].src_iov;
468         ptrdiff_t rewind = 0;
469 
470         /* Detect overlap */
471         if (last_end && last_end > cur->iov_base) {
472             rewind = last_end - cur->iov_base;
473         }
474 
475         sortelems[i].dest_base = buf - rewind;
476         buf += cur->iov_len - MIN(rewind, cur->iov_len);
477         last_end = MAX(cur->iov_base + cur->iov_len, last_end);
478     }
479 
480     /* Sort by source iovec index and build destination iovec */
481     qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
482     for (i = 0; i < src->niov; i++) {
483         qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
484     }
485 }
486 
487 size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt,
488                          size_t bytes)
489 {
490     size_t total = 0;
491     struct iovec *cur;
492 
493     for (cur = *iov; *iov_cnt > 0; cur++) {
494         if (cur->iov_len > bytes) {
495             cur->iov_base += bytes;
496             cur->iov_len -= bytes;
497             total += bytes;
498             break;
499         }
500 
501         bytes -= cur->iov_len;
502         total += cur->iov_len;
503         *iov_cnt -= 1;
504     }
505 
506     *iov = cur;
507     return total;
508 }
509 
510 size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
511                         size_t bytes)
512 {
513     size_t total = 0;
514     struct iovec *cur;
515 
516     if (*iov_cnt == 0) {
517         return 0;
518     }
519 
520     cur = iov + (*iov_cnt - 1);
521 
522     while (*iov_cnt > 0) {
523         if (cur->iov_len > bytes) {
524             cur->iov_len -= bytes;
525             total += bytes;
526             break;
527         }
528 
529         bytes -= cur->iov_len;
530         total += cur->iov_len;
531         cur--;
532         *iov_cnt -= 1;
533     }
534 
535     return total;
536 }
537