mmap: resize arrays dynamically
[systemd_ALT/systemd_imz.git] / src / journal / mmap-cache.c
blob69efb20adea08f95e39098da596eb199b4e25fa8
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
3 /***
4 This file is part of systemd.
6 Copyright 2012 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
22 #include <assert.h>
23 #include <sys/mman.h>
24 #include <errno.h>
25 #include <stdlib.h>
26 #include <string.h>
28 #include "util.h"
30 #include "mmap-cache.h"
32 #define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
34 #define DEFAULT_WINDOWS_MAX 64
35 #define DEFAULT_FDS_MAX 32
36 #define DEFAULT_CONTEXTS_MAX 32
38 typedef struct Window {
39 int fd;
40 void *ptr;
41 uint64_t offset;
42 uint64_t size;
44 unsigned n_ref;
45 unsigned lru_prev;
46 unsigned lru_next;
48 unsigned by_fd_prev;
49 unsigned by_fd_next;
50 } Window;
52 typedef struct FileDescriptor {
53 int fd;
54 unsigned windows;
55 } FileDescriptor;
57 struct MMapCache {
58 unsigned n_ref;
60 unsigned contexts_max;
61 unsigned windows_max;
62 unsigned fds_max;
64 unsigned n_windows;
65 unsigned n_fds;
67 unsigned lru_first, lru_last;
69 Window *windows;
70 unsigned *by_context;
71 FileDescriptor *by_fd;
74 static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index);
76 static void mmap_cache_window_unmap(MMapCache *m, unsigned w) {
77 Window *v;
79 assert(m);
80 assert(w < m->n_windows);
82 v = m->windows + w;
83 if (!v->ptr)
84 return;
86 munmap(v->ptr, v->size);
87 v->ptr = NULL;
90 static void mmap_cache_window_add_lru(MMapCache *m, unsigned w) {
91 Window *v;
93 assert(m);
94 assert(w < m->n_windows);
96 v = m->windows + w;
97 assert(v->n_ref == 0);
99 if (m->lru_last != (unsigned) -1) {
100 assert(m->windows[m->lru_last].lru_next == (unsigned) -1);
101 m->windows[m->lru_last].lru_next = w;
104 v->lru_prev = m->lru_last;
105 v->lru_next = (unsigned) -1;
107 m->lru_last = w;
108 if (m->lru_first == (unsigned) -1)
109 m->lru_first = w;
112 static void mmap_cache_window_remove_lru(MMapCache *m, unsigned w) {
113 Window *v;
115 assert(m);
116 assert(w < m->n_windows);
118 v = m->windows + w;
120 if (v->lru_prev == (unsigned) -1) {
121 assert(m->lru_first == w);
122 m->lru_first = v->lru_next;
123 } else {
124 assert(m->windows[v->lru_prev].lru_next == w);
125 m->windows[v->lru_prev].lru_next = v->lru_next;
128 if (v->lru_next == (unsigned) -1) {
129 assert(m->lru_last == w);
130 m->lru_last = v->lru_prev;
131 } else {
132 assert(m->windows[v->lru_next].lru_prev == w);
133 m->windows[v->lru_next].lru_prev = v->lru_prev;
137 static void mmap_cache_fd_add(MMapCache *m, unsigned fd_index, unsigned w) {
138 Window *v;
140 assert(m);
141 assert(fd_index < m->n_fds);
143 v = m->windows + w;
144 assert(m->by_fd[fd_index].fd == v->fd);
146 if (m->by_fd[fd_index].windows != (unsigned) -1) {
147 assert(m->windows[m->by_fd[fd_index].windows].by_fd_prev == (unsigned) -1);
148 m->windows[m->by_fd[fd_index].windows].by_fd_prev = w;
151 v->by_fd_next = m->by_fd[fd_index].windows;
152 v->by_fd_prev = (unsigned) -1;
154 m->by_fd[fd_index].windows = w;
157 static void mmap_cache_fd_remove(MMapCache *m, unsigned fd_index, unsigned w) {
158 Window *v;
160 assert(m);
161 assert(fd_index < m->n_fds);
163 v = m->windows + w;
164 assert(m->by_fd[fd_index].fd == v->fd);
165 assert(v->by_fd_next == (unsigned) -1 || m->windows[v->by_fd_next].fd == v->fd);
166 assert(v->by_fd_prev == (unsigned) -1 || m->windows[v->by_fd_prev].fd == v->fd);
168 if (v->by_fd_prev == (unsigned) -1) {
169 assert(m->by_fd[fd_index].windows == w);
170 m->by_fd[fd_index].windows = v->by_fd_next;
171 } else {
172 assert(m->windows[v->by_fd_prev].by_fd_next == w);
173 m->windows[v->by_fd_prev].by_fd_next = v->by_fd_next;
176 if (v->by_fd_next != (unsigned) -1) {
177 assert(m->windows[v->by_fd_next].by_fd_prev == w);
178 m->windows[v->by_fd_next].by_fd_prev = v->by_fd_prev;
182 static void mmap_cache_context_unset(MMapCache *m, unsigned c) {
183 Window *v;
184 unsigned w;
186 assert(m);
187 assert(c < m->contexts_max);
189 if (m->by_context[c] == (unsigned) -1)
190 return;
192 w = m->by_context[c];
193 m->by_context[c] = (unsigned) -1;
195 v = m->windows + w;
196 assert(v->n_ref > 0);
197 v->n_ref --;
199 if (v->n_ref == 0)
200 mmap_cache_window_add_lru(m, w);
203 static void mmap_cache_context_set(MMapCache *m, unsigned c, unsigned w) {
204 Window *v;
206 assert(m);
207 assert(c < m->contexts_max);
208 assert(w < m->n_windows);
210 if (m->by_context[c] == w)
211 return;
213 mmap_cache_context_unset(m, c);
215 m->by_context[c] = w;
217 v = m->windows + w;
218 v->n_ref ++;
220 if (v->n_ref == 1)
221 mmap_cache_window_remove_lru(m, w);
224 static void mmap_cache_free(MMapCache *m) {
226 assert(m);
228 if (m->windows) {
229 unsigned w;
231 for (w = 0; w < m->n_windows; w++)
232 mmap_cache_window_unmap(m, w);
234 free(m->windows);
237 free(m->by_context);
238 free(m->by_fd);
239 free(m);
242 MMapCache* mmap_cache_new(void) {
243 MMapCache *m;
245 m = new0(MMapCache, 1);
246 if (!m)
247 return NULL;
249 m->contexts_max = DEFAULT_CONTEXTS_MAX;
250 m->fds_max = DEFAULT_FDS_MAX;
251 m->windows_max = DEFAULT_WINDOWS_MAX;
252 m->n_ref = 1;
253 m->lru_first = (unsigned) -1;
254 m->lru_last = (unsigned) -1;
256 m->windows = new(Window, m->windows_max);
257 if (!m->windows) {
258 mmap_cache_free(m);
259 return NULL;
262 m->by_context = new(unsigned, m->contexts_max);
263 if (!m->by_context) {
264 mmap_cache_free(m);
265 return NULL;
267 memset(m->by_context, -1, m->contexts_max * sizeof(unsigned));
269 m->by_fd = new(FileDescriptor, m->fds_max);
270 if (!m->by_fd) {
271 mmap_cache_free(m);
272 return NULL;
275 return m;
278 MMapCache* mmap_cache_ref(MMapCache *m) {
279 assert(m);
280 assert(m->n_ref > 0);
282 m->n_ref++;
283 return m;
286 MMapCache* mmap_cache_unref(MMapCache *m) {
287 assert(m);
288 assert(m->n_ref > 0);
290 if (m->n_ref == 1)
291 mmap_cache_free(m);
292 else
293 m->n_ref--;
295 return NULL;
298 static int mmap_cache_allocate_window(MMapCache *m, unsigned *w) {
299 Window *v;
300 unsigned fd_index;
302 assert(m);
303 assert(w);
305 if (m->n_windows < m->windows_max) {
306 *w = m->n_windows ++;
307 return 0;
310 if (m->lru_first == (unsigned) -1)
311 return -E2BIG;
313 *w = m->lru_first;
314 v = m->windows + *w;
315 assert(v->n_ref == 0);
317 mmap_cache_window_unmap(m, *w);
319 if (v->fd >= 0) {
320 assert_se(mmap_cache_peek_fd_index(m, v->fd, &fd_index) > 0);
321 mmap_cache_fd_remove(m, fd_index, *w);
324 mmap_cache_window_remove_lru(m, *w);
326 return 0;
329 static int mmap_cache_make_room(MMapCache *m) {
330 unsigned w;
332 assert(m);
334 w = m->lru_first;
335 while (w != (unsigned) -1) {
336 Window *v;
338 v = m->windows + w;
340 if (v->ptr) {
341 mmap_cache_window_unmap(m, w);
342 return 1;
345 w = v->lru_next;
348 return 0;
351 static int mmap_cache_put(
352 MMapCache *m,
353 int fd,
354 unsigned fd_index,
355 int prot,
356 unsigned context,
357 uint64_t offset,
358 uint64_t size,
359 void **ret) {
361 unsigned w;
362 Window *v;
363 void *d;
364 uint64_t woffset, wsize;
365 int r;
367 assert(m);
368 assert(fd >= 0);
369 assert(context < m->contexts_max);
370 assert(size > 0);
371 assert(ret);
373 woffset = offset & ~((uint64_t) page_size() - 1ULL);
374 wsize = size + (offset - woffset);
375 wsize = PAGE_ALIGN(wsize);
377 if (wsize < WINDOW_SIZE) {
378 uint64_t delta;
380 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
382 if (delta > offset)
383 woffset = 0;
384 else
385 woffset -= delta;
387 wsize = WINDOW_SIZE;
390 for (;;) {
391 d = mmap(NULL, wsize, prot, MAP_SHARED, fd, woffset);
392 if (d != MAP_FAILED)
393 break;
394 if (errno != ENOMEM)
395 return -errno;
397 r = mmap_cache_make_room(m);
398 if (r < 0)
399 return r;
400 if (r == 0)
401 return -ENOMEM;
404 r = mmap_cache_allocate_window(m, &w);
405 if (r < 0) {
406 munmap(d, wsize);
407 return r;
410 v = m->windows + w;
411 v->fd = fd;
412 v->ptr = d;
413 v->offset = woffset;
414 v->size = wsize;
416 v->n_ref = 0;
417 mmap_cache_window_add_lru(m, w);
418 mmap_cache_fd_add(m, fd_index, w);
419 mmap_cache_context_set(m, context, w);
421 *ret = (uint8_t*) d + (offset - woffset);
422 return 1;
425 static int fd_cmp(const void *_a, const void *_b) {
426 const FileDescriptor *a = _a, *b = _b;
428 if (a->fd < b->fd)
429 return -1;
430 if (a->fd > b->fd)
431 return 1;
433 return 0;
436 static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index) {
437 FileDescriptor *j;
438 unsigned r;
440 assert(m);
441 assert(fd >= 0);
442 assert(fd_index);
444 for (r = 0; r < m->n_fds; r++)
445 assert(m->by_fd[r].windows == (unsigned) -1 ||
446 m->windows[m->by_fd[r].windows].fd == m->by_fd[r].fd);
448 j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp);
449 if (!j)
450 return 0;
452 *fd_index = (unsigned) (j - m->by_fd);
453 return 1;
456 static int mmap_cache_get_fd_index(MMapCache *m, int fd, unsigned *fd_index) {
457 FileDescriptor *j;
458 int r;
460 assert(m);
461 assert(fd >= 0);
462 assert(fd_index);
464 r = mmap_cache_peek_fd_index(m, fd, fd_index);
465 if (r != 0)
466 return r;
468 if (m->n_fds >= m->fds_max) {
469 unsigned k;
470 FileDescriptor *n;
472 k = m->n_fds * 2;
473 n = realloc(m->by_fd, sizeof(FileDescriptor) * k);
474 if (!n)
475 return -ENOMEM;
477 m->fds_max = k;
478 m->by_fd = n;
481 j = m->by_fd + m->n_fds ++;
482 j->fd = fd;
483 j->windows = (unsigned) -1;
485 qsort(m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp);
487 return mmap_cache_peek_fd_index(m, fd, fd_index);
490 static bool mmap_cache_test_window(
491 MMapCache *m,
492 unsigned w,
493 uint64_t offset,
494 uint64_t size) {
495 Window *v;
497 assert(m);
498 assert(w < m->n_windows);
499 assert(size > 0);
501 v = m->windows + w;
503 return offset >= v->offset &&
504 offset + size <= v->offset + v->size;
507 static int mmap_cache_current(
508 MMapCache *m,
509 int fd,
510 unsigned context,
511 uint64_t offset,
512 uint64_t size,
513 void **ret) {
515 Window *v;
516 unsigned w;
518 assert(m);
519 assert(fd >= 0);
520 assert(context < m->contexts_max);
521 assert(size > 0);
522 assert(ret);
524 if (m->by_context[context] == (unsigned) -1)
525 return 0;
527 w = m->by_context[context];
528 v = m->windows + w;
530 if (v->fd != fd)
531 return 0;
533 if (!mmap_cache_test_window(m, w, offset, size))
534 return 0;
536 *ret = (uint8_t*) v->ptr + (offset - v->offset);
537 return 1;
540 static int mmap_cache_find(
541 MMapCache *m,
542 int fd,
543 unsigned fd_index,
544 unsigned context,
545 uint64_t offset,
546 uint64_t size,
547 void **ret) {
549 Window *v = NULL;
550 unsigned w;
552 assert(m);
553 assert(fd >= 0);
554 assert(fd_index < m->n_fds);
555 assert(context < m->contexts_max);
556 assert(size > 0);
557 assert(ret);
559 w = m->by_fd[fd_index].windows;
560 while (w != (unsigned) -1) {
561 v = m->windows + w;
562 assert(v->fd == fd);
564 if (mmap_cache_test_window(m, w, offset, size))
565 break;
567 w = v->by_fd_next;
570 if (w == (unsigned) -1)
571 return 0;
573 mmap_cache_context_set(m, context, w);
575 *ret = (uint8_t*) v->ptr + (offset - v->offset);
576 return 1;
579 int mmap_cache_get(
580 MMapCache *m,
581 int fd,
582 int prot,
583 unsigned context,
584 uint64_t offset,
585 uint64_t size,
586 void **ret) {
588 unsigned fd_index;
589 int r;
591 assert(m);
592 assert(fd >= 0);
593 assert(size > 0);
594 assert(ret);
596 if (context >= m->contexts_max) {
597 unsigned k, *n;
598 Window *w;
600 /* Increase the number of contexts if necessary, and
601 * make sure we have twice the number of windows */
603 k = context * 2;
604 n = realloc(m->by_context, sizeof(unsigned) * k);
605 if (!n)
606 return -ENOMEM;
607 memset(n + m->contexts_max, -1, (k - m->contexts_max) * sizeof(unsigned));
608 m->contexts_max = k;
609 m->by_context = n;
611 k = MAX(m->windows_max, m->contexts_max*2);
612 w = realloc(m->windows, sizeof(Window) * k);
613 if (!w)
614 return -ENOMEM;
616 m->windows_max = k;
617 m->windows = w;
620 /* Maybe the current pointer for this context is already the
621 * right one? */
622 r = mmap_cache_current(m, fd, context, offset, size, ret);
623 if (r != 0)
624 return r;
626 /* Hmm, drop the reference to the current one, since it wasn't
627 * good enough */
628 mmap_cache_context_unset(m, context);
630 /* OK, let's find the chain for this FD */
631 r = mmap_cache_get_fd_index(m, fd, &fd_index);
632 if (r < 0)
633 return r;
635 /* And let's look through the available mmaps */
636 r = mmap_cache_find(m, fd, fd_index, context, offset, size, ret);
637 if (r != 0)
638 return r;
640 /* Not found? Then, let's add it */
641 return mmap_cache_put(m, fd, fd_index, prot, context, offset, size, ret);
644 void mmap_cache_close_fd(MMapCache *m, int fd) {
645 unsigned fd_index, c, w;
646 int r;
648 assert(m);
649 assert(fd > 0);
651 r = mmap_cache_peek_fd_index(m, fd, &fd_index);
652 if (r <= 0)
653 return;
655 for (c = 0; c < m->contexts_max; c++) {
656 w = m->by_context[c];
657 if (w == (unsigned) -1)
658 continue;
660 if (m->windows[w].fd == fd)
661 mmap_cache_context_unset(m, c);
664 w = m->by_fd[fd_index].windows;
665 while (w != (unsigned) -1) {
666 Window *v;
668 v = m->windows + w;
669 assert(v->fd == fd);
671 mmap_cache_window_unmap(m, w);
672 mmap_cache_fd_remove(m, fd_index, w);
673 v->fd = -1;
675 w = m->by_fd[fd_index].windows;
678 memmove(m->by_fd + fd_index, m->by_fd + fd_index + 1, (m->n_fds - (fd_index + 1)) * sizeof(FileDescriptor));
679 m->n_fds --;
682 void mmap_cache_close_fd_range(MMapCache *m, int fd, uint64_t p) {
683 unsigned fd_index, c, w;
684 int r;
686 assert(m);
687 assert(fd > 0);
689 /* This drops all windows that include space right of the
690 * specified offset. This is useful to ensure that after the
691 * file size is extended we drop our mappings of the end and
692 * create it anew, since otherwise it is undefined whether
693 * mapping will continue to work as intended. */
695 r = mmap_cache_peek_fd_index(m, fd, &fd_index);
696 if (r <= 0)
697 return;
699 for (c = 0; c < m->contexts_max; c++) {
700 w = m->by_context[c];
702 if (w != (unsigned) -1 && m->windows[w].fd == fd)
703 mmap_cache_context_unset(m, c);
706 w = m->by_fd[fd_index].windows;
707 while (w != (unsigned) -1) {
708 Window *v;
710 v = m->windows + w;
711 assert(v->fd == fd);
712 assert(v->by_fd_next == (unsigned) -1 ||
713 m->windows[v->by_fd_next].fd == fd);
715 if (v->offset + v->size > p) {
717 mmap_cache_window_unmap(m, w);
718 mmap_cache_fd_remove(m, fd_index, w);
719 v->fd = -1;
721 w = m->by_fd[fd_index].windows;
722 } else
723 w = v->by_fd_next;
727 void mmap_cache_close_context(MMapCache *m, unsigned context) {
728 mmap_cache_context_unset(m, context);