3 * Copyright (c) 2008 Michael J. Silbersack.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
32 * IP ID generation is a fascinating topic.
34 * In order to avoid ID collisions during packet reassembly, common sense
35 * dictates that the period between reuse of IDs be as large as possible.
36 * This leads to the classic implementation of a system-wide counter, thereby
37 * ensuring that IDs repeat only once every 2^16 packets.
39 * Subsequent security researchers have pointed out that using a global
40 * counter makes ID values predictable. This predictability allows traffic
41 * analysis, idle scanning, and even packet injection in specific cases.
42 * These results suggest that IP IDs should be as random as possible.
44 * The "searchable queues" algorithm used in this IP ID implementation was
45 * proposed by Amit Klein. It is a compromise between the above two
46 * viewpoints that has provable behavior that can be tuned to the user's
49 * The basic concept is that we supplement a standard random number generator
50 * with a queue of the last L IDs that we have handed out to ensure that all
51 * IDs have a period of at least L.
53 * To efficiently implement this idea, we keep two data structures: a
54 * circular array of IDs of size L and a bitstring of 65536 bits.
56 * To start, we ask the RNG for a new ID. A quick index into the bitstring
57 * is used to determine if this is a recently used value. The process is
58 * repeated until a value is returned that is not in the bitstring.
60 * Having found a usable ID, we remove the ID stored at the current position
61 * in the queue from the bitstring and replace it with our new ID. Our new
62 * ID is then added to the bitstring and the queue pointer is incremented.
64 * The lower limit of 512 was chosen because there doesn't seem to be much
65 * point to having a smaller value. The upper limit of 32768 was chosen for
66 * two reasons. First, every step above 32768 decreases the entropy. Taken
67 * to an extreme, 65533 would offer 1 bit of entropy. Second, the number of
68 * attempts it takes the algorithm to find an unused ID drastically
69 * increases, killing performance. The default value of 8192 was chosen
70 * because it provides a good tradeoff between randomness and non-repetition.
72 * With L=8192, the queue will use 16K of memory. The bitstring always
73 * uses 8K of memory. No memory is allocated until the use of random ids is
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/counter.h>
80 #include <sys/kernel.h>
81 #include <sys/malloc.h>
83 #include <sys/mutex.h>
84 #include <sys/random.h>
86 #include <sys/sysctl.h>
87 #include <sys/bitstring.h>
91 #include <netinet/in.h>
92 #include <netinet/ip.h>
93 #include <netinet/ip_var.h>
96 * By default we generate IP ID only for non-atomic datagrams, as
97 * suggested by RFC6864. We use per-CPU counter for that, or if
98 * user wants to, we can turn on random ID generation.
100 static VNET_DEFINE(int, ip_rfc6864
) = 1;
101 static VNET_DEFINE(int, ip_do_randomid
) = 0;
102 #define V_ip_rfc6864 VNET(ip_rfc6864)
103 #define V_ip_do_randomid VNET(ip_do_randomid)
106 * Random ID state engine.
108 static MALLOC_DEFINE(M_IPID
, "ipid", "randomized ip id state");
109 static VNET_DEFINE(uint16_t *, id_array
);
110 static VNET_DEFINE(bitstr_t
*, id_bits
);
111 static VNET_DEFINE(int, array_ptr
);
112 static VNET_DEFINE(int, array_size
);
113 static VNET_DEFINE(int, random_id_collisions
);
114 static VNET_DEFINE(int, random_id_total
);
115 static VNET_DEFINE(struct mtx
, ip_id_mtx
);
116 #define V_id_array VNET(id_array)
117 #define V_id_bits VNET(id_bits)
118 #define V_array_ptr VNET(array_ptr)
119 #define V_array_size VNET(array_size)
120 #define V_random_id_collisions VNET(random_id_collisions)
121 #define V_random_id_total VNET(random_id_total)
122 #define V_ip_id_mtx VNET(ip_id_mtx)
125 * Non-random ID state engine is simply a per-cpu counter.
127 static VNET_DEFINE(counter_u64_t
, ip_id
);
128 #define V_ip_id VNET(ip_id)
130 static int sysctl_ip_randomid(SYSCTL_HANDLER_ARGS
);
131 static int sysctl_ip_id_change(SYSCTL_HANDLER_ARGS
);
132 static void ip_initid(int);
133 static uint16_t ip_randomid(void);
134 static void ipid_sysinit(void);
135 static void ipid_sysuninit(void);
137 SYSCTL_DECL(_net_inet_ip
);
138 SYSCTL_PROC(_net_inet_ip
, OID_AUTO
, random_id
,
139 CTLTYPE_INT
| CTLFLAG_VNET
| CTLFLAG_RW
,
140 &VNET_NAME(ip_do_randomid
), 0, sysctl_ip_randomid
, "IU",
141 "Assign random ip_id values");
142 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, rfc6864
, CTLFLAG_VNET
| CTLFLAG_RW
,
143 &VNET_NAME(ip_rfc6864
), 0,
144 "Use constant IP ID for atomic datagrams");
145 SYSCTL_PROC(_net_inet_ip
, OID_AUTO
, random_id_period
,
146 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_VNET
,
147 &VNET_NAME(array_size
), 0, sysctl_ip_id_change
, "IU", "IP ID Array size");
148 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, random_id_collisions
,
149 CTLFLAG_RD
| CTLFLAG_VNET
,
150 &VNET_NAME(random_id_collisions
), 0, "Count of IP ID collisions");
151 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, random_id_total
, CTLFLAG_RD
| CTLFLAG_VNET
,
152 &VNET_NAME(random_id_total
), 0, "Count of IP IDs created");
155 sysctl_ip_randomid(SYSCTL_HANDLER_ARGS
)
159 new = V_ip_do_randomid
;
160 error
= sysctl_handle_int(oidp
, &new, 0, req
);
161 if (error
|| req
->newptr
== NULL
)
163 if (new != 0 && new != 1)
165 if (new == V_ip_do_randomid
)
167 if (new == 1 && V_ip_do_randomid
== 0)
169 /* We don't free memory when turning random ID off, due to race. */
170 V_ip_do_randomid
= new;
175 sysctl_ip_id_change(SYSCTL_HANDLER_ARGS
)
180 error
= sysctl_handle_int(oidp
, &new, 0, req
);
181 if (error
== 0 && req
->newptr
) {
182 if (new >= 512 && new <= 32768)
191 ip_initid(int new_size
)
196 new_array
= malloc(new_size
* sizeof(uint16_t), M_IPID
,
198 new_bits
= malloc(bitstr_size(65536), M_IPID
, M_WAITOK
| M_ZERO
);
200 mtx_lock(&V_ip_id_mtx
);
201 if (V_id_array
!= NULL
) {
202 free(V_id_array
, M_IPID
);
203 free(V_id_bits
, M_IPID
);
205 V_id_array
= new_array
;
206 V_id_bits
= new_bits
;
207 V_array_size
= new_size
;
209 V_random_id_collisions
= 0;
210 V_random_id_total
= 0;
211 mtx_unlock(&V_ip_id_mtx
);
219 mtx_lock(&V_ip_id_mtx
);
221 * To avoid a conflict with the zeros that the array is initially
222 * filled with, we never hand out an id of zero.
227 V_random_id_collisions
++;
228 arc4rand(&new_id
, sizeof(new_id
), 0);
229 } while (bit_test(V_id_bits
, new_id
) || new_id
== 0);
230 bit_clear(V_id_bits
, V_id_array
[V_array_ptr
]);
231 bit_set(V_id_bits
, new_id
);
232 V_id_array
[V_array_ptr
] = new_id
;
234 if (V_array_ptr
== V_array_size
)
237 mtx_unlock(&V_ip_id_mtx
);
242 ip_fillid(struct ip
*ip
)
246 * Per RFC6864 Section 4
248 * o Atomic datagrams: (DF==1) && (MF==0) && (frag_offset==0)
249 * o Non-atomic datagrams: (DF==0) || (MF==1) || (frag_offset>0)
251 if (V_ip_rfc6864
&& (ip
->ip_off
& htons(IP_DF
)) == htons(IP_DF
))
253 else if (V_ip_do_randomid
)
254 ip
->ip_id
= ip_randomid();
256 counter_u64_add(V_ip_id
, 1);
258 * There are two issues about this trick, to be kept in mind.
259 * 1) We can migrate between counter_u64_add() and next
260 * line, and grab counter from other CPU, resulting in too
261 * quick ID reuse. This is tolerable in our particular case,
262 * since probability of such event is much lower then reuse
263 * of ID due to legitimate overflow, that at modern Internet
264 * speeds happens all the time.
265 * 2) We are relying on the fact that counter(9) is based on
266 * UMA_ZONE_PCPU uma(9) zone. We also take only last
267 * sixteen bits of a counter, so we don't care about the
268 * fact that machines with 32-bit word update their counters
271 ip
->ip_id
= htons((*(uint64_t *)zpcpu_get(V_ip_id
)) & 0xffff);
280 mtx_init(&V_ip_id_mtx
, "ip_id_mtx", NULL
, MTX_DEF
);
281 V_ip_id
= counter_u64_alloc(M_WAITOK
);
284 arc4rand(zpcpu_get_cpu(V_ip_id
, i
), sizeof(uint64_t), 0);
286 VNET_SYSINIT(ip_id
, SI_SUB_PROTO_DOMAIN
, SI_ORDER_ANY
, ipid_sysinit
, NULL
);
292 if (V_id_array
!= NULL
) {
293 free(V_id_array
, M_IPID
);
294 free(V_id_bits
, M_IPID
);
296 counter_u64_free(V_ip_id
);
297 mtx_destroy(&V_ip_id_mtx
);
299 VNET_SYSUNINIT(ip_id
, SI_SUB_PROTO_DOMAIN
, SI_ORDER_THIRD
, ipid_sysuninit
, NULL
);