1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
12 #include <linux/bpf.h>
13 #include <linux/jhash.h>
14 #include <linux/filter.h>
15 #include <linux/vmalloc.h>
19 struct hlist_head
*buckets
;
21 u32 count
; /* number of elements in this hashtable */
22 u32 n_buckets
; /* number of hash buckets */
23 u32 elem_size
; /* size of each element in bytes */
26 /* each htab element is struct htab_elem + key + value */
28 struct hlist_node hash_node
;
31 char key
[0] __aligned(8);
34 /* Called from syscall */
35 static struct bpf_map
*htab_map_alloc(union bpf_attr
*attr
)
37 struct bpf_htab
*htab
;
40 htab
= kzalloc(sizeof(*htab
), GFP_USER
);
42 return ERR_PTR(-ENOMEM
);
44 /* mandatory map attributes */
45 htab
->map
.key_size
= attr
->key_size
;
46 htab
->map
.value_size
= attr
->value_size
;
47 htab
->map
.max_entries
= attr
->max_entries
;
49 /* check sanity of attributes.
50 * value_size == 0 may be allowed in the future to use map as a set
53 if (htab
->map
.max_entries
== 0 || htab
->map
.key_size
== 0 ||
54 htab
->map
.value_size
== 0)
57 /* hash table size must be power of 2 */
58 htab
->n_buckets
= roundup_pow_of_two(htab
->map
.max_entries
);
61 if (htab
->map
.key_size
> MAX_BPF_STACK
)
62 /* eBPF programs initialize keys on stack, so they cannot be
63 * larger than max stack size
68 /* prevent zero size kmalloc and check for u32 overflow */
69 if (htab
->n_buckets
== 0 ||
70 htab
->n_buckets
> U32_MAX
/ sizeof(struct hlist_head
))
73 htab
->buckets
= kmalloc_array(htab
->n_buckets
, sizeof(struct hlist_head
),
74 GFP_USER
| __GFP_NOWARN
);
77 htab
->buckets
= vmalloc(htab
->n_buckets
* sizeof(struct hlist_head
));
82 for (i
= 0; i
< htab
->n_buckets
; i
++)
83 INIT_HLIST_HEAD(&htab
->buckets
[i
]);
85 raw_spin_lock_init(&htab
->lock
);
88 htab
->elem_size
= sizeof(struct htab_elem
) +
89 round_up(htab
->map
.key_size
, 8) +
92 htab
->map
.pages
= round_up(htab
->n_buckets
* sizeof(struct hlist_head
) +
93 htab
->elem_size
* htab
->map
.max_entries
,
94 PAGE_SIZE
) >> PAGE_SHIFT
;
102 static inline u32
htab_map_hash(const void *key
, u32 key_len
)
104 return jhash(key
, key_len
, 0);
107 static inline struct hlist_head
*select_bucket(struct bpf_htab
*htab
, u32 hash
)
109 return &htab
->buckets
[hash
& (htab
->n_buckets
- 1)];
112 static struct htab_elem
*lookup_elem_raw(struct hlist_head
*head
, u32 hash
,
113 void *key
, u32 key_size
)
117 hlist_for_each_entry_rcu(l
, head
, hash_node
)
118 if (l
->hash
== hash
&& !memcmp(&l
->key
, key
, key_size
))
124 /* Called from syscall or from eBPF program */
125 static void *htab_map_lookup_elem(struct bpf_map
*map
, void *key
)
127 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
128 struct hlist_head
*head
;
132 /* Must be called with rcu_read_lock. */
133 WARN_ON_ONCE(!rcu_read_lock_held());
135 key_size
= map
->key_size
;
137 hash
= htab_map_hash(key
, key_size
);
139 head
= select_bucket(htab
, hash
);
141 l
= lookup_elem_raw(head
, hash
, key
, key_size
);
144 return l
->key
+ round_up(map
->key_size
, 8);
149 /* Called from syscall */
150 static int htab_map_get_next_key(struct bpf_map
*map
, void *key
, void *next_key
)
152 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
153 struct hlist_head
*head
;
154 struct htab_elem
*l
, *next_l
;
158 WARN_ON_ONCE(!rcu_read_lock_held());
160 key_size
= map
->key_size
;
162 hash
= htab_map_hash(key
, key_size
);
164 head
= select_bucket(htab
, hash
);
167 l
= lookup_elem_raw(head
, hash
, key
, key_size
);
171 goto find_first_elem
;
174 /* key was found, get next key in the same bucket */
175 next_l
= hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l
->hash_node
)),
176 struct htab_elem
, hash_node
);
179 /* if next elem in this hash list is non-zero, just return it */
180 memcpy(next_key
, next_l
->key
, key_size
);
184 /* no more elements in this hash list, go to the next bucket */
185 i
= hash
& (htab
->n_buckets
- 1);
189 /* iterate over buckets */
190 for (; i
< htab
->n_buckets
; i
++) {
191 head
= select_bucket(htab
, i
);
193 /* pick first element in the bucket */
194 next_l
= hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head
)),
195 struct htab_elem
, hash_node
);
197 /* if it's not empty, just return it */
198 memcpy(next_key
, next_l
->key
, key_size
);
203 /* itereated over all buckets and all elements */
207 /* Called from syscall or from eBPF program */
208 static int htab_map_update_elem(struct bpf_map
*map
, void *key
, void *value
,
211 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
212 struct htab_elem
*l_new
, *l_old
;
213 struct hlist_head
*head
;
218 if (map_flags
> BPF_EXIST
)
222 WARN_ON_ONCE(!rcu_read_lock_held());
224 /* allocate new element outside of lock */
225 l_new
= kmalloc(htab
->elem_size
, GFP_ATOMIC
);
229 key_size
= map
->key_size
;
231 memcpy(l_new
->key
, key
, key_size
);
232 memcpy(l_new
->key
+ round_up(key_size
, 8), value
, map
->value_size
);
234 l_new
->hash
= htab_map_hash(l_new
->key
, key_size
);
236 /* bpf_map_update_elem() can be called in_irq() */
237 raw_spin_lock_irqsave(&htab
->lock
, flags
);
239 head
= select_bucket(htab
, l_new
->hash
);
241 l_old
= lookup_elem_raw(head
, l_new
->hash
, key
, key_size
);
243 if (!l_old
&& unlikely(htab
->count
>= map
->max_entries
)) {
244 /* if elem with this 'key' doesn't exist and we've reached
245 * max_entries limit, fail insertion of new elem
251 if (l_old
&& map_flags
== BPF_NOEXIST
) {
252 /* elem already exists */
257 if (!l_old
&& map_flags
== BPF_EXIST
) {
258 /* elem doesn't exist, cannot update it */
263 /* add new element to the head of the list, so that concurrent
264 * search will find it before old elem
266 hlist_add_head_rcu(&l_new
->hash_node
, head
);
268 hlist_del_rcu(&l_old
->hash_node
);
269 kfree_rcu(l_old
, rcu
);
273 raw_spin_unlock_irqrestore(&htab
->lock
, flags
);
277 raw_spin_unlock_irqrestore(&htab
->lock
, flags
);
282 /* Called from syscall or from eBPF program */
283 static int htab_map_delete_elem(struct bpf_map
*map
, void *key
)
285 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
286 struct hlist_head
*head
;
292 WARN_ON_ONCE(!rcu_read_lock_held());
294 key_size
= map
->key_size
;
296 hash
= htab_map_hash(key
, key_size
);
298 raw_spin_lock_irqsave(&htab
->lock
, flags
);
300 head
= select_bucket(htab
, hash
);
302 l
= lookup_elem_raw(head
, hash
, key
, key_size
);
305 hlist_del_rcu(&l
->hash_node
);
311 raw_spin_unlock_irqrestore(&htab
->lock
, flags
);
315 static void delete_all_elements(struct bpf_htab
*htab
)
319 for (i
= 0; i
< htab
->n_buckets
; i
++) {
320 struct hlist_head
*head
= select_bucket(htab
, i
);
321 struct hlist_node
*n
;
324 hlist_for_each_entry_safe(l
, n
, head
, hash_node
) {
325 hlist_del_rcu(&l
->hash_node
);
332 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
333 static void htab_map_free(struct bpf_map
*map
)
335 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
337 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
338 * so the programs (can be more than one that used this map) were
339 * disconnected from events. Wait for outstanding critical sections in
340 * these programs to complete
344 /* some of kfree_rcu() callbacks for elements of this map may not have
345 * executed. It's ok. Proceed to free residual elements and map itself
347 delete_all_elements(htab
);
348 kvfree(htab
->buckets
);
352 static const struct bpf_map_ops htab_ops
= {
353 .map_alloc
= htab_map_alloc
,
354 .map_free
= htab_map_free
,
355 .map_get_next_key
= htab_map_get_next_key
,
356 .map_lookup_elem
= htab_map_lookup_elem
,
357 .map_update_elem
= htab_map_update_elem
,
358 .map_delete_elem
= htab_map_delete_elem
,
361 static struct bpf_map_type_list htab_type __read_mostly
= {
363 .type
= BPF_MAP_TYPE_HASH
,
366 static int __init
register_htab_map(void)
368 bpf_register_map_type(&htab_type
);
371 late_initcall(register_htab_map
);