2 * Copyright (c) 1982, 1986, 1988, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
36 #include "opt_param.h"
37 #include "opt_mbuf_stress_test.h"
38 #include "opt_mbuf_profiling.h"
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/limits.h>
45 #include <sys/malloc.h>
47 #include <sys/sysctl.h>
48 #include <sys/domain.h>
49 #include <sys/protosw.h>
52 #include <security/mac/mac_framework.h>
58 #ifdef MBUF_STRESS_TEST
63 int m_defragrandomfailures
;
67 * sysctl(8) exported objects
69 SYSCTL_INT(_kern_ipc
, KIPC_MAX_LINKHDR
, max_linkhdr
, CTLFLAG_RD
,
70 &max_linkhdr
, 0, "Size of largest link layer header");
71 SYSCTL_INT(_kern_ipc
, KIPC_MAX_PROTOHDR
, max_protohdr
, CTLFLAG_RD
,
72 &max_protohdr
, 0, "Size of largest protocol layer header");
73 SYSCTL_INT(_kern_ipc
, KIPC_MAX_HDR
, max_hdr
, CTLFLAG_RD
,
74 &max_hdr
, 0, "Size of largest link plus protocol header");
75 SYSCTL_INT(_kern_ipc
, KIPC_MAX_DATALEN
, max_datalen
, CTLFLAG_RD
,
76 &max_datalen
, 0, "Minimum space left in mbuf after max_hdr");
77 #ifdef MBUF_STRESS_TEST
78 SYSCTL_INT(_kern_ipc
, OID_AUTO
, m_defragpackets
, CTLFLAG_RD
,
79 &m_defragpackets
, 0, "");
80 SYSCTL_INT(_kern_ipc
, OID_AUTO
, m_defragbytes
, CTLFLAG_RD
,
81 &m_defragbytes
, 0, "");
82 SYSCTL_INT(_kern_ipc
, OID_AUTO
, m_defraguseless
, CTLFLAG_RD
,
83 &m_defraguseless
, 0, "");
84 SYSCTL_INT(_kern_ipc
, OID_AUTO
, m_defragfailure
, CTLFLAG_RD
,
85 &m_defragfailure
, 0, "");
86 SYSCTL_INT(_kern_ipc
, OID_AUTO
, m_defragrandomfailures
, CTLFLAG_RW
,
87 &m_defragrandomfailures
, 0, "");
91 * Allocate a given length worth of mbufs and/or clusters (whatever fits
92 * best) and return a pointer to the top of the allocated chain. If an
93 * existing mbuf chain is provided, then we will append the new chain
94 * to the existing one but still return the top of the newly allocated
98 m_getm2(struct mbuf
*m
, int len
, int how
, short type
, int flags
)
100 struct mbuf
*mb
, *nm
= NULL
, *mtail
= NULL
;
102 KASSERT(len
>= 0, ("%s: len is < 0", __func__
));
104 /* Validate flags. */
105 flags
&= (M_PKTHDR
| M_EOR
);
107 /* Packet header mbuf must be first in chain. */
108 if ((flags
& M_PKTHDR
) && m
!= NULL
)
111 /* Loop and append maximum sized mbufs to the chain tail. */
114 mb
= m_getjcl(how
, type
, (flags
& M_PKTHDR
),
116 else if (len
>= MINCLSIZE
)
117 mb
= m_getcl(how
, type
, (flags
& M_PKTHDR
));
118 else if (flags
& M_PKTHDR
)
119 mb
= m_gethdr(how
, type
);
121 mb
= m_get(how
, type
);
123 /* Fail the whole operation if one mbuf can't be allocated. */
131 len
-= (mb
->m_flags
& M_EXT
) ? mb
->m_ext
.ext_size
:
132 ((mb
->m_flags
& M_PKTHDR
) ? MHLEN
: MLEN
);
138 flags
&= ~M_PKTHDR
; /* Only valid on the first mbuf. */
141 mtail
->m_flags
|= M_EOR
; /* Only valid on the last mbuf. */
143 /* If mbuf was supplied, append new chain to the end of it. */
145 for (mtail
= m
; mtail
->m_next
!= NULL
; mtail
= mtail
->m_next
)
148 mtail
->m_flags
&= ~M_EOR
;
156 * Free an entire chain of mbufs and associated external buffers, if
160 m_freem(struct mbuf
*mb
)
168 * Configure a provided mbuf to refer to the provided external storage
169 * buffer and setup a reference count for said buffer. If the setting
170 * up of the reference count fails, the M_EXT bit will not be set. If
171 * successfull, the M_EXT bit is set in the mbuf's flags.
174 * mb The existing mbuf to which to attach the provided buffer.
175 * buf The address of the provided external storage buffer.
176 * size The size of the provided buffer.
177 * freef A pointer to a routine that is responsible for freeing the
178 * provided external storage buffer.
179 * args A pointer to an argument structure (of any type) to be passed
180 * to the provided freef routine (may be NULL).
181 * flags Any other flags to be passed to the provided mbuf.
182 * type The type that the external storage buffer should be
189 m_extadd(struct mbuf
*mb
, caddr_t buf
, u_int size
,
190 void (*freef
)(void *, void *), void *arg1
, void *arg2
, int flags
, int type
)
192 KASSERT(type
!= EXT_CLUSTER
, ("%s: EXT_CLUSTER not allowed", __func__
));
194 if (type
!= EXT_EXTREF
)
195 mb
->m_ext
.ref_cnt
= (u_int
*)uma_zalloc(zone_ext_refcnt
, M_NOWAIT
);
196 if (mb
->m_ext
.ref_cnt
!= NULL
) {
197 *(mb
->m_ext
.ref_cnt
) = 1;
198 mb
->m_flags
|= (M_EXT
| flags
);
199 mb
->m_ext
.ext_buf
= buf
;
200 mb
->m_data
= mb
->m_ext
.ext_buf
;
201 mb
->m_ext
.ext_size
= size
;
202 mb
->m_ext
.ext_free
= freef
;
203 mb
->m_ext
.ext_arg1
= arg1
;
204 mb
->m_ext
.ext_arg2
= arg2
;
205 mb
->m_ext
.ext_type
= type
;
210 * Non-directly-exported function to clean up after mbufs with M_EXT
211 * storage attached to them if the reference count hits 1.
214 mb_free_ext(struct mbuf
*m
)
218 KASSERT((m
->m_flags
& M_EXT
) == M_EXT
, ("%s: M_EXT not set", __func__
));
219 KASSERT(m
->m_ext
.ref_cnt
!= NULL
, ("%s: ref_cnt not set", __func__
));
223 * check if the header is embedded in the cluster
225 skipmbuf
= (m
->m_flags
& M_NOFREE
);
227 /* Free attached storage if this mbuf is the only reference to it. */
228 if (*(m
->m_ext
.ref_cnt
) == 1 ||
229 atomic_fetchadd_int(m
->m_ext
.ref_cnt
, -1) == 1) {
230 switch (m
->m_ext
.ext_type
) {
231 case EXT_PACKET
: /* The packet zone is special. */
232 if (*(m
->m_ext
.ref_cnt
) == 0)
233 *(m
->m_ext
.ref_cnt
) = 1;
234 uma_zfree(zone_pack
, m
);
235 return; /* Job done. */
237 uma_zfree(zone_clust
, m
->m_ext
.ext_buf
);
240 uma_zfree(zone_jumbop
, m
->m_ext
.ext_buf
);
243 uma_zfree(zone_jumbo9
, m
->m_ext
.ext_buf
);
246 uma_zfree(zone_jumbo16
, m
->m_ext
.ext_buf
);
252 *(m
->m_ext
.ref_cnt
) = 0;
253 uma_zfree(zone_ext_refcnt
, __DEVOLATILE(u_int
*,
257 KASSERT(m
->m_ext
.ext_free
!= NULL
,
258 ("%s: ext_free not set", __func__
));
259 (*(m
->m_ext
.ext_free
))(m
->m_ext
.ext_arg1
,
263 KASSERT(m
->m_ext
.ext_type
== 0,
264 ("%s: unknown ext_type", __func__
));
271 * Free this mbuf back to the mbuf zone with all m_ext
272 * information purged.
274 m
->m_ext
.ext_buf
= NULL
;
275 m
->m_ext
.ext_free
= NULL
;
276 m
->m_ext
.ext_arg1
= NULL
;
277 m
->m_ext
.ext_arg2
= NULL
;
278 m
->m_ext
.ref_cnt
= NULL
;
279 m
->m_ext
.ext_size
= 0;
280 m
->m_ext
.ext_type
= 0;
281 m
->m_flags
&= ~M_EXT
;
282 uma_zfree(zone_mbuf
, m
);
286 * Attach the the cluster from *m to *n, set up m_ext in *n
287 * and bump the refcount of the cluster.
290 mb_dupcl(struct mbuf
*n
, struct mbuf
*m
)
292 KASSERT((m
->m_flags
& M_EXT
) == M_EXT
, ("%s: M_EXT not set", __func__
));
293 KASSERT(m
->m_ext
.ref_cnt
!= NULL
, ("%s: ref_cnt not set", __func__
));
294 KASSERT((n
->m_flags
& M_EXT
) == 0, ("%s: M_EXT set", __func__
));
296 if (*(m
->m_ext
.ref_cnt
) == 1)
297 *(m
->m_ext
.ref_cnt
) += 1;
299 atomic_add_int(m
->m_ext
.ref_cnt
, 1);
300 n
->m_ext
.ext_buf
= m
->m_ext
.ext_buf
;
301 n
->m_ext
.ext_free
= m
->m_ext
.ext_free
;
302 n
->m_ext
.ext_arg1
= m
->m_ext
.ext_arg1
;
303 n
->m_ext
.ext_arg2
= m
->m_ext
.ext_arg2
;
304 n
->m_ext
.ext_size
= m
->m_ext
.ext_size
;
305 n
->m_ext
.ref_cnt
= m
->m_ext
.ref_cnt
;
306 n
->m_ext
.ext_type
= m
->m_ext
.ext_type
;
311 * Clean up mbuf (chain) from any tags and packet headers.
312 * If "all" is set then the first mbuf in the chain will be
316 m_demote(struct mbuf
*m0
, int all
)
320 for (m
= all
? m0
: m0
->m_next
; m
!= NULL
; m
= m
->m_next
) {
321 if (m
->m_flags
& M_PKTHDR
) {
322 m_tag_delete_chain(m
, NULL
);
323 m
->m_flags
&= ~M_PKTHDR
;
324 bzero(&m
->m_pkthdr
, sizeof(struct pkthdr
));
326 if (m
->m_type
== MT_HEADER
)
328 if (m
!= m0
&& m
->m_nextpkt
!= NULL
)
330 m
->m_flags
= m
->m_flags
& (M_EXT
|M_EOR
|M_RDONLY
|M_FREELIST
);
335 * Sanity checks on mbuf (chain) for use in KASSERT() and general
337 * Returns 0 or panics when bad and 1 on all tests passed.
338 * Sanitize, 0 to run M_SANITY_ACTION, 1 to garble things so they
342 m_sanity(struct mbuf
*m0
, int sanitize
)
349 #define M_SANITY_ACTION(s) panic("mbuf %p: " s, m)
351 #define M_SANITY_ACTION(s) printf("mbuf %p: " s, m)
354 for (m
= m0
; m
!= NULL
; m
= m
->m_next
) {
356 * Basic pointer checks. If any of these fails then some
357 * unrelated kernel memory before or after us is trashed.
358 * No way to recover from that.
360 a
= ((m
->m_flags
& M_EXT
) ? m
->m_ext
.ext_buf
:
361 ((m
->m_flags
& M_PKTHDR
) ? (caddr_t
)(&m
->m_pktdat
) :
362 (caddr_t
)(&m
->m_dat
)) );
363 b
= (caddr_t
)(a
+ (m
->m_flags
& M_EXT
? m
->m_ext
.ext_size
:
364 ((m
->m_flags
& M_PKTHDR
) ? MHLEN
: MLEN
)));
365 if ((caddr_t
)m
->m_data
< a
)
366 M_SANITY_ACTION("m_data outside mbuf data range left");
367 if ((caddr_t
)m
->m_data
> b
)
368 M_SANITY_ACTION("m_data outside mbuf data range right");
369 if ((caddr_t
)m
->m_data
+ m
->m_len
> b
)
370 M_SANITY_ACTION("m_data + m_len exeeds mbuf space");
371 if ((m
->m_flags
& M_PKTHDR
) && m
->m_pkthdr
.header
) {
372 if ((caddr_t
)m
->m_pkthdr
.header
< a
||
373 (caddr_t
)m
->m_pkthdr
.header
> b
)
374 M_SANITY_ACTION("m_pkthdr.header outside mbuf data range");
377 /* m->m_nextpkt may only be set on first mbuf in chain. */
378 if (m
!= m0
&& m
->m_nextpkt
!= NULL
) {
380 m_freem(m
->m_nextpkt
);
381 m
->m_nextpkt
= (struct mbuf
*)0xDEADC0DE;
383 M_SANITY_ACTION("m->m_nextpkt on in-chain mbuf");
386 /* packet length (not mbuf length!) calculation */
387 if (m0
->m_flags
& M_PKTHDR
)
390 /* m_tags may only be attached to first mbuf in chain. */
391 if (m
!= m0
&& m
->m_flags
& M_PKTHDR
&&
392 !SLIST_EMPTY(&m
->m_pkthdr
.tags
)) {
394 m_tag_delete_chain(m
, NULL
);
395 /* put in 0xDEADC0DE perhaps? */
397 M_SANITY_ACTION("m_tags on in-chain mbuf");
400 /* M_PKTHDR may only be set on first mbuf in chain */
401 if (m
!= m0
&& m
->m_flags
& M_PKTHDR
) {
403 bzero(&m
->m_pkthdr
, sizeof(m
->m_pkthdr
));
404 m
->m_flags
&= ~M_PKTHDR
;
405 /* put in 0xDEADCODE and leave hdr flag in */
407 M_SANITY_ACTION("M_PKTHDR on in-chain mbuf");
411 if (pktlen
&& pktlen
!= m
->m_pkthdr
.len
) {
415 M_SANITY_ACTION("m_pkthdr.len != mbuf chain length");
419 #undef M_SANITY_ACTION
424 * "Move" mbuf pkthdr from "from" to "to".
425 * "from" must have M_PKTHDR set, and "to" must be empty.
428 m_move_pkthdr(struct mbuf
*to
, struct mbuf
*from
)
432 /* see below for why these are not enabled */
434 /* Note: with MAC, this may not be a good assertion. */
435 KASSERT(SLIST_EMPTY(&to
->m_pkthdr
.tags
),
436 ("m_move_pkthdr: to has tags"));
440 * XXXMAC: It could be this should also occur for non-MAC?
442 if (to
->m_flags
& M_PKTHDR
)
443 m_tag_delete_chain(to
, NULL
);
445 to
->m_flags
= (from
->m_flags
& M_COPYFLAGS
) | (to
->m_flags
& M_EXT
);
446 if ((to
->m_flags
& M_EXT
) == 0)
447 to
->m_data
= to
->m_pktdat
;
448 to
->m_pkthdr
= from
->m_pkthdr
; /* especially tags */
449 SLIST_INIT(&from
->m_pkthdr
.tags
); /* purge tags from src */
450 from
->m_flags
&= ~M_PKTHDR
;
454 * Duplicate "from"'s mbuf pkthdr in "to".
455 * "from" must have M_PKTHDR set, and "to" must be empty.
456 * In particular, this does a deep copy of the packet tags.
459 m_dup_pkthdr(struct mbuf
*to
, struct mbuf
*from
, int how
)
464 * The mbuf allocator only initializes the pkthdr
465 * when the mbuf is allocated with MGETHDR. Many users
466 * (e.g. m_copy*, m_prepend) use MGET and then
467 * smash the pkthdr as needed causing these
468 * assertions to trip. For now just disable them.
471 /* Note: with MAC, this may not be a good assertion. */
472 KASSERT(SLIST_EMPTY(&to
->m_pkthdr
.tags
), ("m_dup_pkthdr: to has tags"));
474 MBUF_CHECKSLEEP(how
);
476 if (to
->m_flags
& M_PKTHDR
)
477 m_tag_delete_chain(to
, NULL
);
479 to
->m_flags
= (from
->m_flags
& M_COPYFLAGS
) | (to
->m_flags
& M_EXT
);
480 if ((to
->m_flags
& M_EXT
) == 0)
481 to
->m_data
= to
->m_pktdat
;
482 to
->m_pkthdr
= from
->m_pkthdr
;
483 SLIST_INIT(&to
->m_pkthdr
.tags
);
484 return (m_tag_copy_chain(to
, from
, MBTOM(how
)));
488 * Lesser-used path for M_PREPEND:
489 * allocate new mbuf to prepend to chain,
493 m_prepend(struct mbuf
*m
, int len
, int how
)
497 if (m
->m_flags
& M_PKTHDR
)
498 MGETHDR(mn
, how
, m
->m_type
);
500 MGET(mn
, how
, m
->m_type
);
505 if (m
->m_flags
& M_PKTHDR
)
506 M_MOVE_PKTHDR(mn
, m
);
509 if(m
->m_flags
& M_PKTHDR
) {
521 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
522 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
523 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
524 * Note that the copy is read-only, because clusters are not copied,
525 * only their reference counts are incremented.
528 m_copym(struct mbuf
*m
, int off0
, int len
, int wait
)
530 struct mbuf
*n
, **np
;
535 KASSERT(off
>= 0, ("m_copym, negative off %d", off
));
536 KASSERT(len
>= 0, ("m_copym, negative len %d", len
));
537 MBUF_CHECKSLEEP(wait
);
538 if (off
== 0 && m
->m_flags
& M_PKTHDR
)
541 KASSERT(m
!= NULL
, ("m_copym, offset > size of mbuf chain"));
551 KASSERT(len
== M_COPYALL
,
552 ("m_copym, length > size of mbuf chain"));
556 MGETHDR(n
, wait
, m
->m_type
);
558 MGET(n
, wait
, m
->m_type
);
563 if (!m_dup_pkthdr(n
, m
, wait
))
565 if (len
== M_COPYALL
)
566 n
->m_pkthdr
.len
-= off0
;
568 n
->m_pkthdr
.len
= len
;
571 n
->m_len
= min(len
, m
->m_len
- off
);
572 if (m
->m_flags
& M_EXT
) {
573 n
->m_data
= m
->m_data
+ off
;
576 bcopy(mtod(m
, caddr_t
)+off
, mtod(n
, caddr_t
),
578 if (len
!= M_COPYALL
)
585 mbstat
.m_mcfail
++; /* XXX: No consistency. */
590 mbstat
.m_mcfail
++; /* XXX: No consistency. */
595 * Returns mbuf chain with new head for the prepending case.
596 * Copies from mbuf (chain) n from off for len to mbuf (chain) m
597 * either prepending or appending the data.
598 * The resulting mbuf (chain) m is fully writeable.
599 * m is destination (is made writeable)
600 * n is source, off is offset in source, len is len from offset
601 * dir, 0 append, 1 prepend
602 * how, wait or nowait
606 m_bcopyxxx(void *s
, void *t
, u_int len
)
608 bcopy(s
, t
, (size_t)len
);
613 m_copymdata(struct mbuf
*m
, struct mbuf
*n
, int off
, int len
,
616 struct mbuf
*mm
, *x
, *z
, *prev
= NULL
;
621 KASSERT(m
!= NULL
&& n
!= NULL
, ("m_copymdata, no target or source"));
622 KASSERT(off
>= 0, ("m_copymdata, negative off %d", off
));
623 KASSERT(len
>= 0, ("m_copymdata, negative len %d", len
));
624 KASSERT(prep
== 0 || prep
== 1, ("m_copymdata, unknown direction %d", prep
));
633 for (z
= n
; z
!= NULL
; z
= z
->m_next
)
635 if (len
== M_COPYALL
)
637 if (off
+ len
> nlen
|| len
< 1)
640 if (!M_WRITABLE(mm
)) {
641 /* XXX: Use proper m_xxx function instead. */
642 x
= m_getcl(how
, MT_DATA
, mm
->m_flags
);
645 bcopy(mm
->m_ext
.ext_buf
, x
->m_ext
.ext_buf
, x
->m_ext
.ext_size
);
646 p
= x
->m_ext
.ext_buf
+ (mm
->m_data
- mm
->m_ext
.ext_buf
);
656 * Append/prepend the data. Allocating mbufs as necessary.
658 /* Shortcut if enough free space in first/last mbuf. */
659 if (!prep
&& M_TRAILINGSPACE(mm
) >= len
) {
660 m_apply(n
, off
, len
, m_bcopyxxx
, mtod(mm
, caddr_t
) +
663 mm
->m_pkthdr
.len
+= len
;
666 if (prep
&& M_LEADINGSPACE(mm
) >= len
) {
667 mm
->m_data
= mtod(mm
, caddr_t
) - len
;
668 m_apply(n
, off
, len
, m_bcopyxxx
, mtod(mm
, caddr_t
));
670 mm
->m_pkthdr
.len
+= len
;
674 /* Expand first/last mbuf to cluster if possible. */
675 if (!prep
&& !(mm
->m_flags
& M_EXT
) && len
> M_TRAILINGSPACE(mm
)) {
676 bcopy(mm
->m_data
, &buf
, mm
->m_len
);
678 if (!(mm
->m_flags
& M_EXT
))
680 bcopy(&buf
, mm
->m_ext
.ext_buf
, mm
->m_len
);
681 mm
->m_data
= mm
->m_ext
.ext_buf
;
682 mm
->m_pkthdr
.header
= NULL
;
684 if (prep
&& !(mm
->m_flags
& M_EXT
) && len
> M_LEADINGSPACE(mm
)) {
685 bcopy(mm
->m_data
, &buf
, mm
->m_len
);
687 if (!(mm
->m_flags
& M_EXT
))
689 bcopy(&buf
, (caddr_t
*)mm
->m_ext
.ext_buf
+
690 mm
->m_ext
.ext_size
- mm
->m_len
, mm
->m_len
);
691 mm
->m_data
= (caddr_t
)mm
->m_ext
.ext_buf
+
692 mm
->m_ext
.ext_size
- mm
->m_len
;
693 mm
->m_pkthdr
.header
= NULL
;
696 /* Append/prepend as many mbuf (clusters) as necessary to fit len. */
697 if (!prep
&& len
> M_TRAILINGSPACE(mm
)) {
698 if (!m_getm(mm
, len
- M_TRAILINGSPACE(mm
), how
, MT_DATA
))
701 if (prep
&& len
> M_LEADINGSPACE(mm
)) {
702 if (!(z
= m_getm(NULL
, len
- M_LEADINGSPACE(mm
), how
, MT_DATA
)))
705 for (x
= z
; x
!= NULL
; x
= x
->m_next
) {
706 i
+= x
->m_flags
& M_EXT
? x
->m_ext
.ext_size
:
707 (x
->m_flags
& M_PKTHDR
? MHLEN
: MLEN
);
711 z
->m_data
+= i
- len
;
712 m_move_pkthdr(mm
, z
);
717 /* Seek to start position in source mbuf. Optimization for long chains. */
725 /* Copy data into target mbuf. */
728 KASSERT(z
!= NULL
, ("m_copymdata, falling off target edge"));
729 i
= M_TRAILINGSPACE(z
);
730 m_apply(n
, off
, i
, m_bcopyxxx
, mtod(z
, caddr_t
) + z
->m_len
);
732 /* fixup pkthdr.len if necessary */
733 if ((prep
? mm
: m
)->m_flags
& M_PKTHDR
)
734 (prep
? mm
: m
)->m_pkthdr
.len
+= i
;
739 return (prep
? mm
: m
);
743 * Copy an entire packet, including header (which must be present).
744 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
745 * Note that the copy is read-only, because clusters are not copied,
746 * only their reference counts are incremented.
747 * Preserve alignment of the first mbuf so if the creator has left
748 * some room at the beginning (e.g. for inserting protocol headers)
749 * the copies still have the room available.
752 m_copypacket(struct mbuf
*m
, int how
)
754 struct mbuf
*top
, *n
, *o
;
756 MBUF_CHECKSLEEP(how
);
757 MGET(n
, how
, m
->m_type
);
762 if (!m_dup_pkthdr(n
, m
, how
))
765 if (m
->m_flags
& M_EXT
) {
766 n
->m_data
= m
->m_data
;
769 n
->m_data
= n
->m_pktdat
+ (m
->m_data
- m
->m_pktdat
);
770 bcopy(mtod(m
, char *), mtod(n
, char *), n
->m_len
);
775 MGET(o
, how
, m
->m_type
);
783 if (m
->m_flags
& M_EXT
) {
784 n
->m_data
= m
->m_data
;
787 bcopy(mtod(m
, char *), mtod(n
, char *), n
->m_len
);
795 mbstat
.m_mcfail
++; /* XXX: No consistency. */
800 * Copy data from an mbuf chain starting "off" bytes from the beginning,
801 * continuing for "len" bytes, into the indicated buffer.
804 m_copydata(const struct mbuf
*m
, int off
, int len
, caddr_t cp
)
808 KASSERT(off
>= 0, ("m_copydata, negative off %d", off
));
809 KASSERT(len
>= 0, ("m_copydata, negative len %d", len
));
811 KASSERT(m
!= NULL
, ("m_copydata, offset > size of mbuf chain"));
818 KASSERT(m
!= NULL
, ("m_copydata, length > size of mbuf chain"));
819 count
= min(m
->m_len
- off
, len
);
820 bcopy(mtod(m
, caddr_t
) + off
, cp
, count
);
829 * Copy a packet header mbuf chain into a completely new chain, including
830 * copying any mbuf clusters. Use this instead of m_copypacket() when
831 * you need a writable copy of an mbuf chain.
834 m_dup(struct mbuf
*m
, int how
)
836 struct mbuf
**p
, *top
= NULL
;
837 int remain
, moff
, nsize
;
839 MBUF_CHECKSLEEP(how
);
845 /* While there's more data, get a new mbuf, tack it on, and fill it */
846 remain
= m
->m_pkthdr
.len
;
849 while (remain
> 0 || top
== NULL
) { /* allow m->m_pkthdr.len == 0 */
852 /* Get the next new mbuf */
853 if (remain
>= MINCLSIZE
) {
854 n
= m_getcl(how
, m
->m_type
, 0);
857 n
= m_get(how
, m
->m_type
);
863 if (top
== NULL
) { /* First one, must be PKTHDR */
864 if (!m_dup_pkthdr(n
, m
, how
)) {
868 if ((n
->m_flags
& M_EXT
) == 0)
873 /* Link it into the new chain */
877 /* Copy data from original mbuf(s) into new mbuf */
878 while (n
->m_len
< nsize
&& m
!= NULL
) {
879 int chunk
= min(nsize
- n
->m_len
, m
->m_len
- moff
);
881 bcopy(m
->m_data
+ moff
, n
->m_data
+ n
->m_len
, chunk
);
885 if (moff
== m
->m_len
) {
891 /* Check correct total mbuf length */
892 KASSERT((remain
> 0 && m
!= NULL
) || (remain
== 0 && m
== NULL
),
893 ("%s: bogus m_pkthdr.len", __func__
));
899 mbstat
.m_mcfail
++; /* XXX: No consistency. */
904 * Concatenate mbuf chain n to m.
905 * Both chains must be of the same type (e.g. MT_DATA).
906 * Any m_pkthdr is not updated.
909 m_cat(struct mbuf
*m
, struct mbuf
*n
)
914 if (m
->m_flags
& M_EXT
||
915 m
->m_data
+ m
->m_len
+ n
->m_len
>= &m
->m_dat
[MLEN
]) {
916 /* just join the two chains */
920 /* splat the data from one into the other */
921 bcopy(mtod(n
, caddr_t
), mtod(m
, caddr_t
) + m
->m_len
,
923 m
->m_len
+= n
->m_len
;
929 m_adj(struct mbuf
*mp
, int req_len
)
935 if ((m
= mp
) == NULL
)
941 while (m
!= NULL
&& len
> 0) {
942 if (m
->m_len
<= len
) {
953 if (mp
->m_flags
& M_PKTHDR
)
954 m
->m_pkthdr
.len
-= (req_len
- len
);
957 * Trim from tail. Scan the mbuf chain,
958 * calculating its length and finding the last mbuf.
959 * If the adjustment only affects this mbuf, then just
960 * adjust and return. Otherwise, rescan and truncate
961 * after the remaining size.
967 if (m
->m_next
== (struct mbuf
*)0)
971 if (m
->m_len
>= len
) {
973 if (mp
->m_flags
& M_PKTHDR
)
974 mp
->m_pkthdr
.len
-= len
;
981 * Correct length for chain is "count".
982 * Find the mbuf with last data, adjust its length,
983 * and toss data from remaining mbufs on chain.
986 if (m
->m_flags
& M_PKTHDR
)
987 m
->m_pkthdr
.len
= count
;
988 for (; m
; m
= m
->m_next
) {
989 if (m
->m_len
>= count
) {
991 if (m
->m_next
!= NULL
) {
1003 * Rearange an mbuf chain so that len bytes are contiguous
1004 * and in the data area of an mbuf (so that mtod and dtom
1005 * will work for a structure of size len). Returns the resulting
1006 * mbuf chain on success, frees it and returns null on failure.
1007 * If there is room, it will add up to max_protohdr-len extra bytes to the
1008 * contiguous region in an attempt to avoid being called next time.
1011 m_pullup(struct mbuf
*n
, int len
)
1018 * If first mbuf has no cluster, and has room for len bytes
1019 * without shifting current data, pullup into it,
1020 * otherwise allocate a new mbuf to prepend to the chain.
1022 if ((n
->m_flags
& M_EXT
) == 0 &&
1023 n
->m_data
+ len
< &n
->m_dat
[MLEN
] && n
->m_next
) {
1024 if (n
->m_len
>= len
)
1032 MGET(m
, M_DONTWAIT
, n
->m_type
);
1036 if (n
->m_flags
& M_PKTHDR
)
1037 M_MOVE_PKTHDR(m
, n
);
1039 space
= &m
->m_dat
[MLEN
] - (m
->m_data
+ m
->m_len
);
1041 count
= min(min(max(len
, max_protohdr
), space
), n
->m_len
);
1042 bcopy(mtod(n
, caddr_t
), mtod(m
, caddr_t
) + m
->m_len
,
1052 } while (len
> 0 && n
);
1061 mbstat
.m_mpfail
++; /* XXX: No consistency. */
1066 * Like m_pullup(), except a new mbuf is always allocated, and we allow
1067 * the amount of empty space before the data in the new mbuf to be specified
1068 * (in the event that the caller expects to prepend later).
1073 m_copyup(struct mbuf
*n
, int len
, int dstoff
)
1078 if (len
> (MHLEN
- dstoff
))
1080 MGET(m
, M_DONTWAIT
, n
->m_type
);
1084 if (n
->m_flags
& M_PKTHDR
)
1085 M_MOVE_PKTHDR(m
, n
);
1086 m
->m_data
+= dstoff
;
1087 space
= &m
->m_dat
[MLEN
] - (m
->m_data
+ m
->m_len
);
1089 count
= min(min(max(len
, max_protohdr
), space
), n
->m_len
);
1090 memcpy(mtod(m
, caddr_t
) + m
->m_len
, mtod(n
, caddr_t
),
1100 } while (len
> 0 && n
);
1114 * Partition an mbuf chain in two pieces, returning the tail --
1115 * all but the first len0 bytes. In case of failure, it returns NULL and
1116 * attempts to restore the chain to its original state.
1118 * Note that the resulting mbufs might be read-only, because the new
1119 * mbuf can end up sharing an mbuf cluster with the original mbuf if
1120 * the "breaking point" happens to lie within a cluster mbuf. Use the
1121 * M_WRITABLE() macro to check for this case.
1124 m_split(struct mbuf
*m0
, int len0
, int wait
)
1127 u_int len
= len0
, remain
;
1129 MBUF_CHECKSLEEP(wait
);
1130 for (m
= m0
; m
&& len
> m
->m_len
; m
= m
->m_next
)
1134 remain
= m
->m_len
- len
;
1135 if (m0
->m_flags
& M_PKTHDR
) {
1136 MGETHDR(n
, wait
, m0
->m_type
);
1139 n
->m_pkthdr
.rcvif
= m0
->m_pkthdr
.rcvif
;
1140 n
->m_pkthdr
.len
= m0
->m_pkthdr
.len
- len0
;
1141 m0
->m_pkthdr
.len
= len0
;
1142 if (m
->m_flags
& M_EXT
)
1144 if (remain
> MHLEN
) {
1145 /* m can't be the lead packet */
1147 n
->m_next
= m_split(m
, len
, wait
);
1148 if (n
->m_next
== NULL
) {
1156 MH_ALIGN(n
, remain
);
1157 } else if (remain
== 0) {
1162 MGET(n
, wait
, m
->m_type
);
1168 if (m
->m_flags
& M_EXT
) {
1169 n
->m_data
= m
->m_data
+ len
;
1172 bcopy(mtod(m
, caddr_t
) + len
, mtod(n
, caddr_t
), remain
);
1176 n
->m_next
= m
->m_next
;
1181 * Routine to copy from device local memory into mbufs.
1182 * Note that `off' argument is offset into first mbuf of target chain from
1183 * which to begin copying the data to.
1186 m_devget(char *buf
, int totlen
, int off
, struct ifnet
*ifp
,
1187 void (*copy
)(char *from
, caddr_t to
, u_int len
))
1190 struct mbuf
*top
= NULL
, **mp
= &top
;
1193 if (off
< 0 || off
> MHLEN
)
1196 while (totlen
> 0) {
1197 if (top
== NULL
) { /* First one, must be PKTHDR */
1198 if (totlen
+ off
>= MINCLSIZE
) {
1199 m
= m_getcl(M_DONTWAIT
, MT_DATA
, M_PKTHDR
);
1202 m
= m_gethdr(M_DONTWAIT
, MT_DATA
);
1205 /* Place initial small packet/header at end of mbuf */
1206 if (m
&& totlen
+ off
+ max_linkhdr
<= MLEN
) {
1207 m
->m_data
+= max_linkhdr
;
1213 m
->m_pkthdr
.rcvif
= ifp
;
1214 m
->m_pkthdr
.len
= totlen
;
1216 if (totlen
+ off
>= MINCLSIZE
) {
1217 m
= m_getcl(M_DONTWAIT
, MT_DATA
, 0);
1220 m
= m_get(M_DONTWAIT
, MT_DATA
);
1233 m
->m_len
= len
= min(totlen
, len
);
1235 copy(buf
, mtod(m
, caddr_t
), (u_int
)len
);
1237 bcopy(buf
, mtod(m
, caddr_t
), (u_int
)len
);
1247 * Copy data from a buffer back into the indicated mbuf chain,
1248 * starting "off" bytes from the beginning, extending the mbuf
1249 * chain if necessary.
1252 m_copyback(struct mbuf
*m0
, int off
, int len
, c_caddr_t cp
)
1255 struct mbuf
*m
= m0
, *n
;
1260 while (off
> (mlen
= m
->m_len
)) {
1263 if (m
->m_next
== NULL
) {
1264 n
= m_get(M_DONTWAIT
, m
->m_type
);
1267 bzero(mtod(n
, caddr_t
), MLEN
);
1268 n
->m_len
= min(MLEN
, len
+ off
);
1274 mlen
= min (m
->m_len
- off
, len
);
1275 bcopy(cp
, off
+ mtod(m
, caddr_t
), (u_int
)mlen
);
1283 if (m
->m_next
== NULL
) {
1284 n
= m_get(M_DONTWAIT
, m
->m_type
);
1287 n
->m_len
= min(MLEN
, len
);
1292 out
: if (((m
= m0
)->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.len
< totlen
))
1293 m
->m_pkthdr
.len
= totlen
;
1297 * Append the specified data to the indicated mbuf chain,
1298 * Extend the mbuf chain if the new data does not fit in
1301 * Return 1 if able to complete the job; otherwise 0.
1304 m_append(struct mbuf
*m0
, int len
, c_caddr_t cp
)
1307 int remainder
, space
;
1309 for (m
= m0
; m
->m_next
!= NULL
; m
= m
->m_next
)
1312 space
= M_TRAILINGSPACE(m
);
1315 * Copy into available space.
1317 if (space
> remainder
)
1319 bcopy(cp
, mtod(m
, caddr_t
) + m
->m_len
, space
);
1321 cp
+= space
, remainder
-= space
;
1323 while (remainder
> 0) {
1325 * Allocate a new mbuf; could check space
1326 * and allocate a cluster instead.
1328 n
= m_get(M_DONTWAIT
, m
->m_type
);
1331 n
->m_len
= min(MLEN
, remainder
);
1332 bcopy(cp
, mtod(n
, caddr_t
), n
->m_len
);
1333 cp
+= n
->m_len
, remainder
-= n
->m_len
;
1337 if (m0
->m_flags
& M_PKTHDR
)
1338 m0
->m_pkthdr
.len
+= len
- remainder
;
1339 return (remainder
== 0);
1343 * Apply function f to the data in an mbuf chain starting "off" bytes from
1344 * the beginning, continuing for "len" bytes.
1347 m_apply(struct mbuf
*m
, int off
, int len
,
1348 int (*f
)(void *, void *, u_int
), void *arg
)
1353 KASSERT(off
>= 0, ("m_apply, negative off %d", off
));
1354 KASSERT(len
>= 0, ("m_apply, negative len %d", len
));
1356 KASSERT(m
!= NULL
, ("m_apply, offset > size of mbuf chain"));
1363 KASSERT(m
!= NULL
, ("m_apply, offset > size of mbuf chain"));
1364 count
= min(m
->m_len
- off
, len
);
1365 rval
= (*f
)(arg
, mtod(m
, caddr_t
) + off
, count
);
1376 * Return a pointer to mbuf/offset of location in mbuf chain.
1379 m_getptr(struct mbuf
*m
, int loc
, int *off
)
1383 /* Normal end of search. */
1384 if (m
->m_len
> loc
) {
1389 if (m
->m_next
== NULL
) {
1391 /* Point at the end of valid data. */
1404 m_print(const struct mbuf
*m
, int maxlen
)
1408 const struct mbuf
*m2
;
1410 if (m
->m_flags
& M_PKTHDR
)
1411 len
= m
->m_pkthdr
.len
;
1415 while (m2
!= NULL
&& (len
== -1 || len
)) {
1417 if (maxlen
!= -1 && pdata
> maxlen
)
1419 printf("mbuf: %p len: %d, next: %p, %b%s", m2
, m2
->m_len
,
1420 m2
->m_next
, m2
->m_flags
, "\20\20freelist\17skipfw"
1421 "\11proto5\10proto4\7proto3\6proto2\5proto1\4rdonly"
1422 "\3eor\2pkthdr\1ext", pdata
? "" : "\n");
1424 printf(", %*D\n", pdata
, (u_char
*)m2
->m_data
, "-");
1430 printf("%d bytes unaccounted for.\n", len
);
1435 m_fixhdr(struct mbuf
*m0
)
1439 len
= m_length(m0
, NULL
);
1440 m0
->m_pkthdr
.len
= len
;
1445 m_length(struct mbuf
*m0
, struct mbuf
**last
)
1451 for (m
= m0
; m
!= NULL
; m
= m
->m_next
) {
1453 if (m
->m_next
== NULL
)
1462 * Defragment a mbuf chain, returning the shortest possible
1463 * chain of mbufs and clusters. If allocation fails and
1464 * this cannot be completed, NULL will be returned, but
1465 * the passed in chain will be unchanged. Upon success,
1466 * the original chain will be freed, and the new chain
1469 * If a non-packet header is passed in, the original
1470 * mbuf (chain?) will be returned unharmed.
1473 m_defrag(struct mbuf
*m0
, int how
)
1475 struct mbuf
*m_new
= NULL
, *m_final
= NULL
;
1476 int progress
= 0, length
;
1478 MBUF_CHECKSLEEP(how
);
1479 if (!(m0
->m_flags
& M_PKTHDR
))
1482 m_fixhdr(m0
); /* Needed sanity check */
1484 #ifdef MBUF_STRESS_TEST
1485 if (m_defragrandomfailures
) {
1486 int temp
= arc4random() & 0xff;
1492 if (m0
->m_pkthdr
.len
> MHLEN
)
1493 m_final
= m_getcl(how
, MT_DATA
, M_PKTHDR
);
1495 m_final
= m_gethdr(how
, MT_DATA
);
1497 if (m_final
== NULL
)
1500 if (m_dup_pkthdr(m_final
, m0
, how
) == 0)
1505 while (progress
< m0
->m_pkthdr
.len
) {
1506 length
= m0
->m_pkthdr
.len
- progress
;
1507 if (length
> MCLBYTES
)
1510 if (m_new
== NULL
) {
1512 m_new
= m_getcl(how
, MT_DATA
, 0);
1514 m_new
= m_get(how
, MT_DATA
);
1519 m_copydata(m0
, progress
, length
, mtod(m_new
, caddr_t
));
1521 m_new
->m_len
= length
;
1522 if (m_new
!= m_final
)
1523 m_cat(m_final
, m_new
);
1526 #ifdef MBUF_STRESS_TEST
1527 if (m0
->m_next
== NULL
)
1532 #ifdef MBUF_STRESS_TEST
1534 m_defragbytes
+= m0
->m_pkthdr
.len
;
1538 #ifdef MBUF_STRESS_TEST
1547 * Defragment an mbuf chain, returning at most maxfrags separate
1548 * mbufs+clusters. If this is not possible NULL is returned and
1549 * the original mbuf chain is left in it's present (potentially
1550 * modified) state. We use two techniques: collapsing consecutive
1551 * mbufs and replacing consecutive mbufs by a cluster.
1553 * NB: this should really be named m_defrag but that name is taken
1556 m_collapse(struct mbuf
*m0
, int how
, int maxfrags
)
1558 struct mbuf
*m
, *n
, *n2
, **prev
;
1562 * Calculate the current number of frags.
1565 for (m
= m0
; m
!= NULL
; m
= m
->m_next
)
1568 * First, try to collapse mbufs. Note that we always collapse
1569 * towards the front so we don't need to deal with moving the
1570 * pkthdr. This may be suboptimal if the first mbuf has much
1571 * less data than the following.
1579 if ((m
->m_flags
& M_RDONLY
) == 0 &&
1580 n
->m_len
< M_TRAILINGSPACE(m
)) {
1581 bcopy(mtod(n
, void *), mtod(m
, char *) + m
->m_len
,
1583 m
->m_len
+= n
->m_len
;
1584 m
->m_next
= n
->m_next
;
1586 if (--curfrags
<= maxfrags
)
1591 KASSERT(maxfrags
> 1,
1592 ("maxfrags %u, but normal collapse failed", maxfrags
));
1594 * Collapse consecutive mbufs to a cluster.
1596 prev
= &m0
->m_next
; /* NB: not the first mbuf */
1597 while ((n
= *prev
) != NULL
) {
1598 if ((n2
= n
->m_next
) != NULL
&&
1599 n
->m_len
+ n2
->m_len
< MCLBYTES
) {
1600 m
= m_getcl(how
, MT_DATA
, 0);
1603 bcopy(mtod(n
, void *), mtod(m
, void *), n
->m_len
);
1604 bcopy(mtod(n2
, void *), mtod(m
, char *) + n
->m_len
,
1606 m
->m_len
= n
->m_len
+ n2
->m_len
;
1607 m
->m_next
= n2
->m_next
;
1611 if (--curfrags
<= maxfrags
) /* +1 cl -2 mbufs */
1614 * Still not there, try the normal collapse
1615 * again before we allocate another cluster.
1622 * No place where we can collapse to a cluster; punt.
1623 * This can occur if, for example, you request 2 frags
1624 * but the packet requires that both be clusters (we
1625 * never reallocate the first mbuf to avoid moving the
1632 #ifdef MBUF_STRESS_TEST
1635 * Fragment an mbuf chain. There's no reason you'd ever want to do
1636 * this in normal usage, but it's great for stress testing various
1639 * If fragmentation is not possible, the original chain will be
1642 * Possible length values:
1643 * 0 no fragmentation will occur
1644 * > 0 each fragment will be of the specified length
1645 * -1 each fragment will be the same random value in length
1646 * -2 each fragment's length will be entirely random
1647 * (Random values range from 1 to 256)
1650 m_fragment(struct mbuf
*m0
, int how
, int length
)
1652 struct mbuf
*m_new
= NULL
, *m_final
= NULL
;
1655 if (!(m0
->m_flags
& M_PKTHDR
))
1658 if ((length
== 0) || (length
< -2))
1661 m_fixhdr(m0
); /* Needed sanity check */
1663 m_final
= m_getcl(how
, MT_DATA
, M_PKTHDR
);
1665 if (m_final
== NULL
)
1668 if (m_dup_pkthdr(m_final
, m0
, how
) == 0)
1674 length
= 1 + (arc4random() & 255);
1676 while (progress
< m0
->m_pkthdr
.len
) {
1682 fraglen
= 1 + (arc4random() & 255);
1683 if (fraglen
> m0
->m_pkthdr
.len
- progress
)
1684 fraglen
= m0
->m_pkthdr
.len
- progress
;
1686 if (fraglen
> MCLBYTES
)
1689 if (m_new
== NULL
) {
1690 m_new
= m_getcl(how
, MT_DATA
, 0);
1695 m_copydata(m0
, progress
, fraglen
, mtod(m_new
, caddr_t
));
1696 progress
+= fraglen
;
1697 m_new
->m_len
= fraglen
;
1698 if (m_new
!= m_final
)
1699 m_cat(m_final
, m_new
);
1708 /* Return the original chain on failure */
1715 * Copy the contents of uio into a properly sized mbuf chain.
1718 m_uiotombuf(struct uio
*uio
, int how
, int len
, int align
, int flags
)
1720 struct mbuf
*m
, *mb
;
1721 int error
, length
, total
;
1725 * len can be zero or an arbitrary large value bound by
1726 * the total data supplied by the uio.
1729 total
= min(uio
->uio_resid
, len
);
1731 total
= uio
->uio_resid
;
1734 * The smallest unit returned by m_getm2() is a single mbuf
1735 * with pkthdr. We can't align past it.
1741 * Give us the full allocation or nothing.
1742 * If len is zero return the smallest empty mbuf.
1744 m
= m_getm2(NULL
, max(total
+ align
, 1), how
, MT_DATA
, flags
);
1749 /* Fill all mbufs with uio data and update header information. */
1750 for (mb
= m
; mb
!= NULL
; mb
= mb
->m_next
) {
1751 length
= min(M_TRAILINGSPACE(mb
), total
- progress
);
1753 error
= uiomove(mtod(mb
, void *), length
, uio
);
1761 if (flags
& M_PKTHDR
)
1762 m
->m_pkthdr
.len
+= length
;
1764 KASSERT(progress
== total
, ("%s: progress != total", __func__
));
1770 * Set the m_data pointer of a newly-allocated mbuf
1771 * to place an object of the specified size at the
1772 * end of the mbuf, longword aligned.
1775 m_align(struct mbuf
*m
, int len
)
1779 if (m
->m_flags
& M_EXT
)
1780 adjust
= m
->m_ext
.ext_size
- len
;
1781 else if (m
->m_flags
& M_PKTHDR
)
1782 adjust
= MHLEN
- len
;
1784 adjust
= MLEN
- len
;
1785 m
->m_data
+= adjust
&~ (sizeof(long)-1);
1789 * Create a writable copy of the mbuf chain. While doing this
1790 * we compact the chain with a goal of producing a chain with
1791 * at most two mbufs. The second mbuf in this chain is likely
1792 * to be a cluster. The primary purpose of this work is to create
1793 * a writable packet for encryption, compression, etc. The
1794 * secondary goal is to linearize the data so the data can be
1795 * passed to crypto hardware in the most efficient manner possible.
1798 m_unshare(struct mbuf
*m0
, int how
)
1800 struct mbuf
*m
, *mprev
;
1801 struct mbuf
*n
, *mfirst
, *mlast
;
1805 for (m
= m0
; m
!= NULL
; m
= mprev
->m_next
) {
1807 * Regular mbufs are ignored unless there's a cluster
1808 * in front of it that we can use to coalesce. We do
1809 * the latter mainly so later clusters can be coalesced
1810 * also w/o having to handle them specially (i.e. convert
1811 * mbuf+cluster -> cluster). This optimization is heavily
1812 * influenced by the assumption that we're running over
1813 * Ethernet where MCLBYTES is large enough that the max
1814 * packet size will permit lots of coalescing into a
1815 * single cluster. This in turn permits efficient
1816 * crypto operations, especially when using hardware.
1818 if ((m
->m_flags
& M_EXT
) == 0) {
1819 if (mprev
&& (mprev
->m_flags
& M_EXT
) &&
1820 m
->m_len
<= M_TRAILINGSPACE(mprev
)) {
1821 /* XXX: this ignores mbuf types */
1822 memcpy(mtod(mprev
, caddr_t
) + mprev
->m_len
,
1823 mtod(m
, caddr_t
), m
->m_len
);
1824 mprev
->m_len
+= m
->m_len
;
1825 mprev
->m_next
= m
->m_next
; /* unlink from chain */
1826 m_free(m
); /* reclaim mbuf */
1828 newipsecstat
.ips_mbcoalesced
++;
1836 * Writable mbufs are left alone (for now).
1838 if (M_WRITABLE(m
)) {
1844 * Not writable, replace with a copy or coalesce with
1845 * the previous mbuf if possible (since we have to copy
1846 * it anyway, we try to reduce the number of mbufs and
1847 * clusters so that future work is easier).
1849 KASSERT(m
->m_flags
& M_EXT
, ("m_flags 0x%x", m
->m_flags
));
1850 /* NB: we only coalesce into a cluster or larger */
1851 if (mprev
!= NULL
&& (mprev
->m_flags
& M_EXT
) &&
1852 m
->m_len
<= M_TRAILINGSPACE(mprev
)) {
1853 /* XXX: this ignores mbuf types */
1854 memcpy(mtod(mprev
, caddr_t
) + mprev
->m_len
,
1855 mtod(m
, caddr_t
), m
->m_len
);
1856 mprev
->m_len
+= m
->m_len
;
1857 mprev
->m_next
= m
->m_next
; /* unlink from chain */
1858 m_free(m
); /* reclaim mbuf */
1860 newipsecstat
.ips_clcoalesced
++;
1866 * Allocate new space to hold the copy...
1868 /* XXX why can M_PKTHDR be set past the first mbuf? */
1869 if (mprev
== NULL
&& (m
->m_flags
& M_PKTHDR
)) {
1871 * NB: if a packet header is present we must
1872 * allocate the mbuf separately from any cluster
1873 * because M_MOVE_PKTHDR will smash the data
1874 * pointer and drop the M_EXT marker.
1876 MGETHDR(n
, how
, m
->m_type
);
1881 M_MOVE_PKTHDR(n
, m
);
1883 if ((n
->m_flags
& M_EXT
) == 0) {
1889 n
= m_getcl(how
, m
->m_type
, m
->m_flags
);
1896 * ... and copy the data. We deal with jumbo mbufs
1897 * (i.e. m_len > MCLBYTES) by splitting them into
1898 * clusters. We could just malloc a buffer and make
1899 * it external but too many device drivers don't know
1900 * how to break up the non-contiguous memory when
1908 int cc
= min(len
, MCLBYTES
);
1909 memcpy(mtod(n
, caddr_t
), mtod(m
, caddr_t
) + off
, cc
);
1915 newipsecstat
.ips_clcopied
++;
1923 n
= m_getcl(how
, m
->m_type
, m
->m_flags
);
1930 n
->m_next
= m
->m_next
;
1932 m0
= mfirst
; /* new head of chain */
1934 mprev
->m_next
= mfirst
; /* replace old mbuf */
1935 m_free(m
); /* release old mbuf */
1941 #ifdef MBUF_PROFILING
1943 #define MP_BUCKETS 32 /* don't just change this as things may overflow.*/
1944 struct mbufprofile
{
1945 uintmax_t wasted
[MP_BUCKETS
];
1946 uintmax_t used
[MP_BUCKETS
];
1947 uintmax_t segments
[MP_BUCKETS
];
1950 #define MP_MAXDIGITS 21 /* strlen("16,000,000,000,000,000,000") == 21 */
1951 #define MP_NUMLINES 6
1952 #define MP_NUMSPERLINE 16
1953 #define MP_EXTRABYTES 64 /* > strlen("used:\nwasted:\nsegments:\n") */
1954 /* work out max space needed and add a bit of spare space too */
1955 #define MP_MAXLINE ((MP_MAXDIGITS+1) * MP_NUMSPERLINE)
1956 #define MP_BUFSIZE ((MP_MAXLINE * MP_NUMLINES) + 1 + MP_EXTRABYTES)
1958 char mbprofbuf
[MP_BUFSIZE
];
1961 m_profile(struct mbuf
*m
)
1970 if (m
->m_flags
& M_EXT
) {
1971 wasted
+= MHLEN
- sizeof(m
->m_ext
) +
1972 m
->m_ext
.ext_size
- m
->m_len
;
1974 if (m
->m_flags
& M_PKTHDR
)
1975 wasted
+= MHLEN
- m
->m_len
;
1977 wasted
+= MLEN
- m
->m_len
;
1981 /* be paranoid.. it helps */
1982 if (segments
> MP_BUCKETS
- 1)
1983 segments
= MP_BUCKETS
- 1;
1986 if (wasted
> 100000)
1988 /* store in the appropriate bucket */
1989 /* don't bother locking. if it's slightly off, so what? */
1990 mbprof
.segments
[segments
]++;
1991 mbprof
.used
[fls(used
)]++;
1992 mbprof
.wasted
[fls(wasted
)]++;
1996 mbprof_textify(void)
2003 p
= &mbprof
.wasted
[0];
2005 offset
= snprintf(c
, MP_MAXLINE
+ 10,
2007 "%ju %ju %ju %ju %ju %ju %ju %ju "
2008 "%ju %ju %ju %ju %ju %ju %ju %ju\n",
2009 p
[0], p
[1], p
[2], p
[3], p
[4], p
[5], p
[6], p
[7],
2010 p
[8], p
[9], p
[10], p
[11], p
[12], p
[13], p
[14], p
[15]);
2012 p
= &mbprof
.wasted
[16];
2014 offset
= snprintf(c
, MP_MAXLINE
,
2015 "%ju %ju %ju %ju %ju %ju %ju %ju "
2016 "%ju %ju %ju %ju %ju %ju %ju %ju\n",
2017 p
[0], p
[1], p
[2], p
[3], p
[4], p
[5], p
[6], p
[7],
2018 p
[8], p
[9], p
[10], p
[11], p
[12], p
[13], p
[14], p
[15]);
2020 p
= &mbprof
.used
[0];
2022 offset
= snprintf(c
, MP_MAXLINE
+ 10,
2024 "%ju %ju %ju %ju %ju %ju %ju %ju "
2025 "%ju %ju %ju %ju %ju %ju %ju %ju\n",
2026 p
[0], p
[1], p
[2], p
[3], p
[4], p
[5], p
[6], p
[7],
2027 p
[8], p
[9], p
[10], p
[11], p
[12], p
[13], p
[14], p
[15]);
2029 p
= &mbprof
.used
[16];
2031 offset
= snprintf(c
, MP_MAXLINE
,
2032 "%ju %ju %ju %ju %ju %ju %ju %ju "
2033 "%ju %ju %ju %ju %ju %ju %ju %ju\n",
2034 p
[0], p
[1], p
[2], p
[3], p
[4], p
[5], p
[6], p
[7],
2035 p
[8], p
[9], p
[10], p
[11], p
[12], p
[13], p
[14], p
[15]);
2037 p
= &mbprof
.segments
[0];
2039 offset
= snprintf(c
, MP_MAXLINE
+ 10,
2041 "%ju %ju %ju %ju %ju %ju %ju %ju "
2042 "%ju %ju %ju %ju %ju %ju %ju %ju\n",
2043 p
[0], p
[1], p
[2], p
[3], p
[4], p
[5], p
[6], p
[7],
2044 p
[8], p
[9], p
[10], p
[11], p
[12], p
[13], p
[14], p
[15]);
2046 p
= &mbprof
.segments
[16];
2048 offset
= snprintf(c
, MP_MAXLINE
,
2049 "%ju %ju %ju %ju %ju %ju %ju %ju "
2050 "%ju %ju %ju %ju %ju %ju %ju %jju",
2051 p
[0], p
[1], p
[2], p
[3], p
[4], p
[5], p
[6], p
[7],
2052 p
[8], p
[9], p
[10], p
[11], p
[12], p
[13], p
[14], p
[15]);
2057 mbprof_handler(SYSCTL_HANDLER_ARGS
)
2062 error
= SYSCTL_OUT(req
, mbprofbuf
, strlen(mbprofbuf
) + 1);
2067 mbprof_clr_handler(SYSCTL_HANDLER_ARGS
)
2072 error
= sysctl_handle_int(oidp
, &clear
, 0, req
);
2073 if (error
|| !req
->newptr
)
2077 bzero(&mbprof
, sizeof(mbprof
));
2084 SYSCTL_PROC(_kern_ipc
, OID_AUTO
, mbufprofile
, CTLTYPE_STRING
|CTLFLAG_RD
,
2085 NULL
, 0, mbprof_handler
, "A", "mbuf profiling statistics");
2087 SYSCTL_PROC(_kern_ipc
, OID_AUTO
, mbufprofileclr
, CTLTYPE_INT
|CTLFLAG_RW
,
2088 NULL
, 0, mbprof_clr_handler
, "I", "clear mbuf profiling statistics");