3 * Jonathan Looney. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/queue.h>
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/sysctl.h>
35 #include <sys/systm.h>
37 #include <sys/eventhandler.h>
38 #include <machine/atomic.h>
39 #include <netinet/tcp_var.h>
40 #include <netinet/tcp_pcap.h>
42 #define M_LEADINGSPACE_NOWRITE(m) \
43 ((m)->m_data - M_START(m))
45 int tcp_pcap_aggressive_free
= 1;
46 static int tcp_pcap_clusters_referenced_cur
= 0;
47 static int tcp_pcap_clusters_referenced_max
= 0;
49 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcp_pcap_aggressive_free
,
50 CTLFLAG_RW
, &tcp_pcap_aggressive_free
, 0,
51 "Free saved packets when the memory system comes under pressure");
52 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcp_pcap_clusters_referenced_cur
,
53 CTLFLAG_RD
, &tcp_pcap_clusters_referenced_cur
, 0,
54 "Number of clusters currently referenced on TCP PCAP queues");
55 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcp_pcap_clusters_referenced_max
,
56 CTLFLAG_RW
, &tcp_pcap_clusters_referenced_max
, 0,
57 "Maximum number of clusters allowed to be referenced on TCP PCAP "
60 static int tcp_pcap_alloc_reuse_ext
= 0;
61 static int tcp_pcap_alloc_reuse_mbuf
= 0;
62 static int tcp_pcap_alloc_new_mbuf
= 0;
63 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcp_pcap_alloc_reuse_ext
,
64 CTLFLAG_RD
, &tcp_pcap_alloc_reuse_ext
, 0,
65 "Number of mbufs with external storage reused for the TCP PCAP "
67 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcp_pcap_alloc_reuse_mbuf
,
68 CTLFLAG_RD
, &tcp_pcap_alloc_reuse_mbuf
, 0,
69 "Number of mbufs with internal storage reused for the TCP PCAP "
71 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcp_pcap_alloc_new_mbuf
,
72 CTLFLAG_RD
, &tcp_pcap_alloc_new_mbuf
, 0,
73 "Number of new mbufs allocated for the TCP PCAP functionality");
75 VNET_DEFINE(int, tcp_pcap_packets
) = 0;
76 #define V_tcp_pcap_packets VNET(tcp_pcap_packets)
77 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcp_pcap_packets
,
78 CTLFLAG_RW
, &VNET_NAME(tcp_pcap_packets
), 0,
79 "Default number of packets saved per direction per TCPCB");
81 /* Initialize the values. */
83 tcp_pcap_max_set(void)
86 tcp_pcap_clusters_referenced_max
= nmbclusters
/ 4;
94 EVENTHANDLER_REGISTER(nmbclusters_change
, tcp_pcap_max_set
,
95 NULL
, EVENTHANDLER_PRI_ANY
);
99 * If we are below the maximum allowed cluster references,
100 * increment the reference count and return TRUE. Otherwise,
101 * leave the reference count alone and return FALSE.
104 tcp_pcap_take_cluster_reference(void)
106 if (atomic_fetchadd_int(&tcp_pcap_clusters_referenced_cur
, 1) >=
107 tcp_pcap_clusters_referenced_max
) {
108 atomic_add_int(&tcp_pcap_clusters_referenced_cur
, -1);
115 * For all the external entries in m, apply the given adjustment.
116 * This can be used to adjust the counter when an mbuf chain is
120 tcp_pcap_adj_cluster_reference(struct mbuf
*m
, int adj
)
123 if (m
->m_flags
& M_EXT
)
124 atomic_add_int(&tcp_pcap_clusters_referenced_cur
, adj
);
131 * Free all mbufs in a chain, decrementing the reference count as
134 * Functions in this file should use this instead of m_freem() when
135 * they are freeing mbuf chains that may contain clusters that were
136 * already included in tcp_pcap_clusters_referenced_cur.
139 tcp_pcap_m_freem(struct mbuf
*mb
)
142 if (mb
->m_flags
& M_EXT
)
143 atomic_subtract_int(&tcp_pcap_clusters_referenced_cur
,
150 * Copy data from m to n, where n cannot fit all the data we might
153 * Prioritize data like this:
159 tcp_pcap_copy_bestfit(struct tcphdr
*th
, struct mbuf
*m
, struct mbuf
*n
)
161 struct mbuf
*m_cur
= m
;
162 int bytes_to_copy
=0, trailing_data
, skip
=0, tcp_off
;
164 /* Below, we assume these will be non-NULL. */
165 KASSERT(th
, ("%s: called with th == NULL", __func__
));
166 KASSERT(m
, ("%s: called with m == NULL", __func__
));
167 KASSERT(n
, ("%s: called with n == NULL", __func__
));
169 /* We assume this initialization occurred elsewhere. */
170 KASSERT(n
->m_len
== 0, ("%s: called with n->m_len=%d (expected 0)",
171 __func__
, n
->m_len
));
172 KASSERT(n
->m_data
== M_START(n
),
173 ("%s: called with n->m_data != M_START(n)", __func__
));
176 * Calculate the size of the TCP header. We use this often
177 * enough that it is worth just calculating at the start.
179 tcp_off
= th
->th_off
<< 2;
181 /* Trim off leading empty mbufs. */
182 while (m
&& m
->m_len
== 0)
190 * No data? Highly unusual. We would expect to at
191 * least see a TCP header in the mbuf.
192 * As we have a pointer to the TCP header, I guess
193 * we should just copy that. (???)
196 bytes_to_copy
= tcp_off
;
197 if (bytes_to_copy
> M_SIZE(n
))
198 bytes_to_copy
= M_SIZE(n
);
199 bcopy(th
, n
->m_data
, bytes_to_copy
);
200 n
->m_len
= bytes_to_copy
;
205 * Find TCP header. Record the total number of bytes up to,
206 * and including, the TCP header.
209 if ((caddr_t
) th
>= (caddr_t
) m_cur
->m_data
&&
210 (caddr_t
) th
< (caddr_t
) (m_cur
->m_data
+ m_cur
->m_len
))
212 bytes_to_copy
+= m_cur
->m_len
;
213 m_cur
= m_cur
->m_next
;
216 bytes_to_copy
+= (caddr_t
) th
- (caddr_t
) m_cur
->m_data
;
219 bytes_to_copy
+= tcp_off
;
222 * If we already want to copy more bytes than we can hold
223 * in the destination mbuf, skip leading bytes and copy
226 * Otherwise, consider trailing data.
228 if (bytes_to_copy
> M_SIZE(n
)) {
229 skip
= bytes_to_copy
- M_SIZE(n
);
230 bytes_to_copy
= M_SIZE(n
);
234 * Determine how much trailing data is in the chain.
235 * We start with the length of this mbuf (the one
236 * containing th) and subtract the size of the TCP
237 * header (tcp_off) and the size of the data prior
238 * to th (th - m_cur->m_data).
240 * This *should not* be negative, as the TCP code
241 * should put the whole TCP header in a single
242 * mbuf. But, it isn't a problem if it is. We will
243 * simple work off our negative balance as we look
244 * at subsequent mbufs.
246 trailing_data
= m_cur
->m_len
- tcp_off
;
247 trailing_data
-= (caddr_t
) th
- (caddr_t
) m_cur
->m_data
;
248 m_cur
= m_cur
->m_next
;
250 trailing_data
+= m_cur
->m_len
;
251 m_cur
= m_cur
->m_next
;
253 if ((bytes_to_copy
+ trailing_data
) > M_SIZE(n
))
254 bytes_to_copy
= M_SIZE(n
);
256 bytes_to_copy
+= trailing_data
;
259 m_copydata(m
, skip
, bytes_to_copy
, n
->m_data
);
260 n
->m_len
= bytes_to_copy
;
264 tcp_pcap_add(struct tcphdr
*th
, struct mbuf
*m
, struct mbufq
*queue
)
266 struct mbuf
*n
= NULL
, *mhead
;
268 KASSERT(th
, ("%s: called with th == NULL", __func__
));
269 KASSERT(m
, ("%s: called with m == NULL", __func__
));
270 KASSERT(queue
, ("%s: called with queue == NULL", __func__
));
272 /* We only care about data packets. */
273 while (m
&& m
->m_type
!= MT_DATA
)
276 /* We only need to do something if we still have an mbuf. */
280 /* If we are not saving mbufs, return now. */
281 if (queue
->mq_maxlen
== 0)
285 * Check to see if we will need to recycle mbufs.
287 * If we need to get rid of mbufs to stay below
288 * our packet count, try to reuse the mbuf. Once
289 * we already have a new mbuf (n), then we can
290 * simply free subsequent mbufs.
292 * Note that most of the logic in here is to deal
293 * with the reuse. If we are fine with constant
294 * mbuf allocs/deallocs, we could ditch this logic.
295 * But, it only seems to make sense to reuse
296 * mbufs we already have.
298 while (mbufq_full(queue
)) {
299 mhead
= mbufq_dequeue(queue
);
302 tcp_pcap_m_freem(mhead
);
306 * If this held an external cluster, try to
307 * detach the cluster. But, if we held the
308 * last reference, go through the normal
311 if (mhead
->m_flags
& M_EXT
) {
312 switch (mhead
->m_ext
.ext_type
) {
314 /* Don't mess around with these. */
315 tcp_pcap_m_freem(mhead
);
318 if (atomic_fetchadd_int(
319 mhead
->m_ext
.ext_cnt
, -1) == 1)
322 * We held the last reference
323 * on this cluster. Restore
324 * the reference count and put
325 * it back in the pool.
327 *(mhead
->m_ext
.ext_cnt
) = 1;
328 tcp_pcap_m_freem(mhead
);
332 * We were able to cleanly free the
336 &tcp_pcap_clusters_referenced_cur
,
338 tcp_pcap_alloc_reuse_ext
++;
343 tcp_pcap_alloc_reuse_mbuf
++;
347 tcp_pcap_m_freem(n
->m_next
);
348 m_init(n
, M_NOWAIT
, MT_DATA
, 0);
352 /* Check to see if we need to get a new mbuf. */
354 if (!(n
= m_get(M_NOWAIT
, MT_DATA
)))
356 tcp_pcap_alloc_new_mbuf
++;
360 * What are we dealing with? If a cluster, attach it. Otherwise,
361 * try to copy the data from the beginning of the mbuf to the
362 * end of data. (There may be data between the start of the data
363 * area and the current data pointer. We want to get this, because
364 * it may contain header information that is useful.)
365 * In cases where that isn't possible, settle for what we can
368 if ((m
->m_flags
& M_EXT
) && tcp_pcap_take_cluster_reference()) {
369 n
->m_data
= m
->m_data
;
373 else if (((m
->m_data
+ m
->m_len
) - M_START(m
)) <= M_SIZE(n
)) {
375 * At this point, n is guaranteed to be a normal mbuf
376 * with no cluster and no packet header. Because the
377 * logic in this code block requires this, the assert
378 * is here to catch any instances where someone
379 * changes the logic to invalidate that assumption.
381 KASSERT((n
->m_flags
& (M_EXT
| M_PKTHDR
)) == 0,
382 ("%s: Unexpected flags (%#x) for mbuf",
383 __func__
, n
->m_flags
));
384 n
->m_data
= n
->m_dat
+ M_LEADINGSPACE_NOWRITE(m
);
386 bcopy(M_START(m
), n
->m_dat
,
387 m
->m_len
+ M_LEADINGSPACE_NOWRITE(m
));
391 * This is the case where we need to "settle for what
392 * we can get". The most probable way to this code
393 * path is that we've already taken references to the
394 * maximum number of mbuf clusters we can, and the data
395 * is too long to fit in an mbuf's internal storage.
396 * Try for a "best fit".
398 tcp_pcap_copy_bestfit(th
, m
, n
);
400 /* Don't try to get additional data. */
405 n
->m_next
= m_copym(m
->m_next
, 0, M_COPYALL
, M_NOWAIT
);
406 tcp_pcap_adj_cluster_reference(n
->m_next
, 1);
410 /* Add the new mbuf to the list. */
411 if (mbufq_enqueue(queue
, n
)) {
412 /* This shouldn't happen. If INVARIANTS is defined, panic. */
413 KASSERT(0, ("%s: mbufq was unexpectedly full!", __func__
));
419 tcp_pcap_drain(struct mbufq
*queue
)
422 while ((m
= mbufq_dequeue(queue
)))
427 tcp_pcap_tcpcb_init(struct tcpcb
*tp
)
429 mbufq_init(&(tp
->t_inpkts
), V_tcp_pcap_packets
);
430 mbufq_init(&(tp
->t_outpkts
), V_tcp_pcap_packets
);
434 tcp_pcap_set_sock_max(struct mbufq
*queue
, int newval
)
436 queue
->mq_maxlen
= newval
;
437 while (queue
->mq_len
> queue
->mq_maxlen
)
438 tcp_pcap_m_freem(mbufq_dequeue(queue
));
442 tcp_pcap_get_sock_max(struct mbufq
*queue
)
444 return queue
->mq_maxlen
;