2 * Helpers for getting linearized buffers from iov / filling buffers into iovs
4 * Copyright IBM, Corp. 2007, 2008
5 * Copyright (C) 2010 Red Hat, Inc.
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Amit Shah <amit.shah@redhat.com>
10 * Michael Tokarev <mjt@tls.msk.ru>
12 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory.
15 * Contributions after 2012-01-13 are licensed under the terms of the
16 * GNU GPL, version 2 or (at your option) any later version.
19 #include "qemu/osdep.h"
21 #include "qemu/sockets.h"
22 #include "qemu/cutils.h"
24 size_t iov_from_buf_full(const struct iovec
*iov
, unsigned int iov_cnt
,
25 size_t offset
, const void *buf
, size_t bytes
)
29 for (i
= 0, done
= 0; (offset
|| done
< bytes
) && i
< iov_cnt
; i
++) {
30 if (offset
< iov
[i
].iov_len
) {
31 size_t len
= MIN(iov
[i
].iov_len
- offset
, bytes
- done
);
32 memcpy(iov
[i
].iov_base
+ offset
, buf
+ done
, len
);
36 offset
-= iov
[i
].iov_len
;
43 size_t iov_to_buf_full(const struct iovec
*iov
, const unsigned int iov_cnt
,
44 size_t offset
, void *buf
, size_t bytes
)
48 for (i
= 0, done
= 0; (offset
|| done
< bytes
) && i
< iov_cnt
; i
++) {
49 if (offset
< iov
[i
].iov_len
) {
50 size_t len
= MIN(iov
[i
].iov_len
- offset
, bytes
- done
);
51 memcpy(buf
+ done
, iov
[i
].iov_base
+ offset
, len
);
55 offset
-= iov
[i
].iov_len
;
62 size_t iov_memset(const struct iovec
*iov
, const unsigned int iov_cnt
,
63 size_t offset
, int fillc
, size_t bytes
)
67 for (i
= 0, done
= 0; (offset
|| done
< bytes
) && i
< iov_cnt
; i
++) {
68 if (offset
< iov
[i
].iov_len
) {
69 size_t len
= MIN(iov
[i
].iov_len
- offset
, bytes
- done
);
70 memset(iov
[i
].iov_base
+ offset
, fillc
, len
);
74 offset
-= iov
[i
].iov_len
;
81 size_t iov_size(const struct iovec
*iov
, const unsigned int iov_cnt
)
87 for (i
= 0; i
< iov_cnt
; i
++) {
88 len
+= iov
[i
].iov_len
;
93 /* helper function for iov_send_recv() */
95 do_send_recv(int sockfd
, struct iovec
*iov
, unsigned iov_cnt
, bool do_send
)
100 memset(&msg
, 0, sizeof(msg
));
102 msg
.msg_iovlen
= iov_cnt
;
105 ? sendmsg(sockfd
, &msg
, 0)
106 : recvmsg(sockfd
, &msg
, 0);
107 } while (ret
< 0 && errno
== EINTR
);
110 /* else send piece-by-piece */
111 /*XXX Note: windows has WSASend() and WSARecv() */
115 while (i
< iov_cnt
) {
117 ? send(sockfd
, iov
[i
].iov_base
+ off
, iov
[i
].iov_len
- off
, 0)
118 : recv(sockfd
, iov
[i
].iov_base
+ off
, iov
[i
].iov_len
- off
, 0);
122 if (off
< iov
[i
].iov_len
) {
127 } else if (errno
== EINTR
) {
130 /* else it is some "other" error,
131 * only return if there was no data processed. */
144 ssize_t
iov_send_recv(int sockfd
, const struct iovec
*_iov
, unsigned iov_cnt
,
145 size_t offset
, size_t bytes
,
150 size_t orig_len
, tail
;
152 struct iovec
*local_iov
, *iov
;
158 local_iov
= g_new0(struct iovec
, iov_cnt
);
159 iov_copy(local_iov
, iov_cnt
, _iov
, iov_cnt
, offset
, bytes
);
164 /* Find the start position, skipping `offset' bytes:
165 * first, skip all full-sized vector elements, */
166 for (niov
= 0; niov
< iov_cnt
&& offset
>= iov
[niov
].iov_len
; ++niov
) {
167 offset
-= iov
[niov
].iov_len
;
170 /* niov == iov_cnt would only be valid if bytes == 0, which
171 * we already ruled out in the loop condition. */
172 assert(niov
< iov_cnt
);
177 /* second, skip `offset' bytes from the (now) first element,
179 iov
[0].iov_base
+= offset
;
180 iov
[0].iov_len
-= offset
;
182 /* Find the end position skipping `bytes' bytes: */
183 /* first, skip all full-sized elements */
185 for (niov
= 0; niov
< iov_cnt
&& iov
[niov
].iov_len
<= tail
; ++niov
) {
186 tail
-= iov
[niov
].iov_len
;
189 /* second, fixup the last element, and remember the original
191 assert(niov
< iov_cnt
);
192 assert(iov
[niov
].iov_len
> tail
);
193 orig_len
= iov
[niov
].iov_len
;
194 iov
[niov
++].iov_len
= tail
;
195 ret
= do_send_recv(sockfd
, iov
, niov
, do_send
);
196 /* Undo the changes above before checking for errors */
197 iov
[niov
-1].iov_len
= orig_len
;
199 ret
= do_send_recv(sockfd
, iov
, niov
, do_send
);
202 iov
[0].iov_base
-= offset
;
203 iov
[0].iov_len
+= offset
;
207 assert(errno
!= EINTR
);
209 if (errno
== EAGAIN
&& total
> 0) {
215 if (ret
== 0 && !do_send
) {
216 /* recv returns 0 when the peer has performed an orderly
221 /* Prepare for the next iteration */
232 void iov_hexdump(const struct iovec
*iov
, const unsigned int iov_cnt
,
233 FILE *fp
, const char *prefix
, size_t limit
)
239 for (v
= 0; v
< iov_cnt
; v
++) {
240 size
+= iov
[v
].iov_len
;
242 size
= size
> limit
? limit
: size
;
243 buf
= g_malloc(size
);
244 iov_to_buf(iov
, iov_cnt
, 0, buf
, size
);
245 qemu_hexdump(fp
, prefix
, buf
, size
);
249 unsigned iov_copy(struct iovec
*dst_iov
, unsigned int dst_iov_cnt
,
250 const struct iovec
*iov
, unsigned int iov_cnt
,
251 size_t offset
, size_t bytes
)
256 i
< iov_cnt
&& j
< dst_iov_cnt
&& (offset
|| bytes
); i
++) {
257 if (offset
>= iov
[i
].iov_len
) {
258 offset
-= iov
[i
].iov_len
;
261 len
= MIN(bytes
, iov
[i
].iov_len
- offset
);
263 dst_iov
[j
].iov_base
= iov
[i
].iov_base
+ offset
;
264 dst_iov
[j
].iov_len
= len
;
275 void qemu_iovec_init(QEMUIOVector
*qiov
, int alloc_hint
)
277 qiov
->iov
= g_new(struct iovec
, alloc_hint
);
279 qiov
->nalloc
= alloc_hint
;
283 void qemu_iovec_init_external(QEMUIOVector
*qiov
, struct iovec
*iov
, int niov
)
291 for (i
= 0; i
< niov
; i
++)
292 qiov
->size
+= iov
[i
].iov_len
;
295 void qemu_iovec_add(QEMUIOVector
*qiov
, void *base
, size_t len
)
297 assert(qiov
->nalloc
!= -1);
299 if (qiov
->niov
== qiov
->nalloc
) {
300 qiov
->nalloc
= 2 * qiov
->nalloc
+ 1;
301 qiov
->iov
= g_renew(struct iovec
, qiov
->iov
, qiov
->nalloc
);
303 qiov
->iov
[qiov
->niov
].iov_base
= base
;
304 qiov
->iov
[qiov
->niov
].iov_len
= len
;
310 * Concatenates (partial) iovecs from src_iov to the end of dst.
311 * It starts copying after skipping `soffset' bytes at the
312 * beginning of src and adds individual vectors from src to
313 * dst copies up to `sbytes' bytes total, or up to the end
314 * of src_iov if it comes first. This way, it is okay to specify
315 * very large value for `sbytes' to indicate "up to the end
317 * Only vector pointers are processed, not the actual data buffers.
319 size_t qemu_iovec_concat_iov(QEMUIOVector
*dst
,
320 struct iovec
*src_iov
, unsigned int src_cnt
,
321 size_t soffset
, size_t sbytes
)
329 assert(dst
->nalloc
!= -1);
330 for (i
= 0, done
= 0; done
< sbytes
&& i
< src_cnt
; i
++) {
331 if (soffset
< src_iov
[i
].iov_len
) {
332 size_t len
= MIN(src_iov
[i
].iov_len
- soffset
, sbytes
- done
);
333 qemu_iovec_add(dst
, src_iov
[i
].iov_base
+ soffset
, len
);
337 soffset
-= src_iov
[i
].iov_len
;
340 assert(soffset
== 0); /* offset beyond end of src */
346 * Concatenates (partial) iovecs from src to the end of dst.
347 * It starts copying after skipping `soffset' bytes at the
348 * beginning of src and adds individual vectors from src to
349 * dst copies up to `sbytes' bytes total, or up to the end
350 * of src if it comes first. This way, it is okay to specify
351 * very large value for `sbytes' to indicate "up to the end
353 * Only vector pointers are processed, not the actual data buffers.
355 void qemu_iovec_concat(QEMUIOVector
*dst
,
356 QEMUIOVector
*src
, size_t soffset
, size_t sbytes
)
358 qemu_iovec_concat_iov(dst
, src
->iov
, src
->niov
, soffset
, sbytes
);
364 * Return pointer to iovec structure, where byte at @offset in original vector
366 * Set @remaining_offset to be offset inside that iovec to the same byte.
368 static struct iovec
*iov_skip_offset(struct iovec
*iov
, size_t offset
,
369 size_t *remaining_offset
)
371 while (offset
> 0 && offset
>= iov
->iov_len
) {
372 offset
-= iov
->iov_len
;
375 *remaining_offset
= offset
;
383 * Find subarray of iovec's, containing requested range. @head would
384 * be offset in first iov (returned by the function), @tail would be
385 * count of extra bytes in last iovec (returned iov + @niov - 1).
387 static struct iovec
*qiov_slice(QEMUIOVector
*qiov
,
388 size_t offset
, size_t len
,
389 size_t *head
, size_t *tail
, int *niov
)
391 struct iovec
*iov
, *end_iov
;
393 assert(offset
+ len
<= qiov
->size
);
395 iov
= iov_skip_offset(qiov
->iov
, offset
, head
);
396 end_iov
= iov_skip_offset(iov
, *head
+ len
, tail
);
399 assert(*tail
< end_iov
->iov_len
);
400 *tail
= end_iov
->iov_len
- *tail
;
404 *niov
= end_iov
- iov
;
409 int qemu_iovec_subvec_niov(QEMUIOVector
*qiov
, size_t offset
, size_t len
)
414 qiov_slice(qiov
, offset
, len
, &head
, &tail
, &niov
);
420 * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
421 * and @tail_buf buffer into new qiov.
423 int qemu_iovec_init_extended(
425 void *head_buf
, size_t head_len
,
426 QEMUIOVector
*mid_qiov
, size_t mid_offset
, size_t mid_len
,
427 void *tail_buf
, size_t tail_len
)
429 size_t mid_head
, mid_tail
;
430 int total_niov
, mid_niov
= 0;
431 struct iovec
*p
, *mid_iov
= NULL
;
433 assert(mid_qiov
->niov
<= IOV_MAX
);
435 if (SIZE_MAX
- head_len
< mid_len
||
436 SIZE_MAX
- head_len
- mid_len
< tail_len
)
442 mid_iov
= qiov_slice(mid_qiov
, mid_offset
, mid_len
,
443 &mid_head
, &mid_tail
, &mid_niov
);
446 total_niov
= !!head_len
+ mid_niov
+ !!tail_len
;
447 if (total_niov
> IOV_MAX
) {
451 if (total_niov
== 1) {
452 qemu_iovec_init_buf(qiov
, NULL
, 0);
453 p
= &qiov
->local_iov
;
455 qiov
->niov
= qiov
->nalloc
= total_niov
;
456 qiov
->size
= head_len
+ mid_len
+ tail_len
;
457 p
= qiov
->iov
= g_new(struct iovec
, qiov
->niov
);
461 p
->iov_base
= head_buf
;
462 p
->iov_len
= head_len
;
466 assert(!mid_niov
== !mid_len
);
468 memcpy(p
, mid_iov
, mid_niov
* sizeof(*p
));
469 p
[0].iov_base
= (uint8_t *)p
[0].iov_base
+ mid_head
;
470 p
[0].iov_len
-= mid_head
;
471 p
[mid_niov
- 1].iov_len
-= mid_tail
;
476 p
->iov_base
= tail_buf
;
477 p
->iov_len
= tail_len
;
484 * Check if the contents of subrange of qiov data is all zeroes.
486 bool qemu_iovec_is_zero(QEMUIOVector
*qiov
, size_t offset
, size_t bytes
)
489 size_t current_offset
;
491 assert(offset
+ bytes
<= qiov
->size
);
493 iov
= iov_skip_offset(qiov
->iov
, offset
, ¤t_offset
);
496 uint8_t *base
= (uint8_t *)iov
->iov_base
+ current_offset
;
497 size_t len
= MIN(iov
->iov_len
- current_offset
, bytes
);
499 if (!buffer_is_zero(base
, len
)) {
511 void qemu_iovec_init_slice(QEMUIOVector
*qiov
, QEMUIOVector
*source
,
512 size_t offset
, size_t len
)
516 assert(source
->size
>= len
);
517 assert(source
->size
- len
>= offset
);
519 /* We shrink the request, so we can't overflow neither size_t nor MAX_IOV */
520 ret
= qemu_iovec_init_extended(qiov
, NULL
, 0, source
, offset
, len
, NULL
, 0);
524 void qemu_iovec_destroy(QEMUIOVector
*qiov
)
526 if (qiov
->nalloc
!= -1) {
530 memset(qiov
, 0, sizeof(*qiov
));
533 void qemu_iovec_reset(QEMUIOVector
*qiov
)
535 assert(qiov
->nalloc
!= -1);
541 size_t qemu_iovec_to_buf(QEMUIOVector
*qiov
, size_t offset
,
542 void *buf
, size_t bytes
)
544 return iov_to_buf(qiov
->iov
, qiov
->niov
, offset
, buf
, bytes
);
547 size_t qemu_iovec_from_buf(QEMUIOVector
*qiov
, size_t offset
,
548 const void *buf
, size_t bytes
)
550 return iov_from_buf(qiov
->iov
, qiov
->niov
, offset
, buf
, bytes
);
553 size_t qemu_iovec_memset(QEMUIOVector
*qiov
, size_t offset
,
554 int fillc
, size_t bytes
)
556 return iov_memset(qiov
->iov
, qiov
->niov
, offset
, fillc
, bytes
);
560 * Check that I/O vector contents are identical
562 * The IO vectors must have the same structure (same length of all parts).
563 * A typical usage is to compare vectors created with qemu_iovec_clone().
567 * @ret: Offset to first mismatching byte or -1 if match
569 ssize_t
qemu_iovec_compare(QEMUIOVector
*a
, QEMUIOVector
*b
)
574 assert(a
->niov
== b
->niov
);
575 for (i
= 0; i
< a
->niov
; i
++) {
577 uint8_t *p
= (uint8_t *)a
->iov
[i
].iov_base
;
578 uint8_t *q
= (uint8_t *)b
->iov
[i
].iov_base
;
580 assert(a
->iov
[i
].iov_len
== b
->iov
[i
].iov_len
);
581 while (len
< a
->iov
[i
].iov_len
&& *p
++ == *q
++) {
587 if (len
!= a
->iov
[i
].iov_len
) {
596 struct iovec
*src_iov
;
600 static int sortelem_cmp_src_base(const void *a
, const void *b
)
602 const IOVectorSortElem
*elem_a
= a
;
603 const IOVectorSortElem
*elem_b
= b
;
606 if (elem_a
->src_iov
->iov_base
< elem_b
->src_iov
->iov_base
) {
608 } else if (elem_a
->src_iov
->iov_base
> elem_b
->src_iov
->iov_base
) {
615 static int sortelem_cmp_src_index(const void *a
, const void *b
)
617 const IOVectorSortElem
*elem_a
= a
;
618 const IOVectorSortElem
*elem_b
= b
;
620 return elem_a
->src_index
- elem_b
->src_index
;
624 * Copy contents of I/O vector
626 * The relative relationships of overlapping iovecs are preserved. This is
627 * necessary to ensure identical semantics in the cloned I/O vector.
629 void qemu_iovec_clone(QEMUIOVector
*dest
, const QEMUIOVector
*src
, void *buf
)
631 IOVectorSortElem sortelems
[src
->niov
];
635 /* Sort by source iovecs by base address */
636 for (i
= 0; i
< src
->niov
; i
++) {
637 sortelems
[i
].src_index
= i
;
638 sortelems
[i
].src_iov
= &src
->iov
[i
];
640 qsort(sortelems
, src
->niov
, sizeof(sortelems
[0]), sortelem_cmp_src_base
);
642 /* Allocate buffer space taking into account overlapping iovecs */
644 for (i
= 0; i
< src
->niov
; i
++) {
645 struct iovec
*cur
= sortelems
[i
].src_iov
;
646 ptrdiff_t rewind
= 0;
649 if (last_end
&& last_end
> cur
->iov_base
) {
650 rewind
= last_end
- cur
->iov_base
;
653 sortelems
[i
].dest_base
= buf
- rewind
;
654 buf
+= cur
->iov_len
- MIN(rewind
, cur
->iov_len
);
655 last_end
= MAX(cur
->iov_base
+ cur
->iov_len
, last_end
);
658 /* Sort by source iovec index and build destination iovec */
659 qsort(sortelems
, src
->niov
, sizeof(sortelems
[0]), sortelem_cmp_src_index
);
660 for (i
= 0; i
< src
->niov
; i
++) {
661 qemu_iovec_add(dest
, sortelems
[i
].dest_base
, src
->iov
[i
].iov_len
);
665 void iov_discard_undo(IOVDiscardUndo
*undo
)
667 /* Restore original iovec if it was modified */
668 if (undo
->modified_iov
) {
669 *undo
->modified_iov
= undo
->orig
;
673 size_t iov_discard_front_undoable(struct iovec
**iov
,
674 unsigned int *iov_cnt
,
676 IOVDiscardUndo
*undo
)
682 undo
->modified_iov
= NULL
;
685 for (cur
= *iov
; *iov_cnt
> 0; cur
++) {
686 if (cur
->iov_len
> bytes
) {
688 undo
->modified_iov
= cur
;
692 cur
->iov_base
+= bytes
;
693 cur
->iov_len
-= bytes
;
698 bytes
-= cur
->iov_len
;
699 total
+= cur
->iov_len
;
707 size_t iov_discard_front(struct iovec
**iov
, unsigned int *iov_cnt
,
710 return iov_discard_front_undoable(iov
, iov_cnt
, bytes
, NULL
);
713 size_t iov_discard_back_undoable(struct iovec
*iov
,
714 unsigned int *iov_cnt
,
716 IOVDiscardUndo
*undo
)
722 undo
->modified_iov
= NULL
;
729 cur
= iov
+ (*iov_cnt
- 1);
731 while (*iov_cnt
> 0) {
732 if (cur
->iov_len
> bytes
) {
734 undo
->modified_iov
= cur
;
738 cur
->iov_len
-= bytes
;
743 bytes
-= cur
->iov_len
;
744 total
+= cur
->iov_len
;
752 size_t iov_discard_back(struct iovec
*iov
, unsigned int *iov_cnt
,
755 return iov_discard_back_undoable(iov
, iov_cnt
, bytes
, NULL
);
758 void qemu_iovec_discard_back(QEMUIOVector
*qiov
, size_t bytes
)
761 unsigned int niov
= qiov
->niov
;
763 assert(qiov
->size
>= bytes
);
764 total
= iov_discard_back(qiov
->iov
, &niov
, bytes
);
765 assert(total
== bytes
);