2 * Copyright (C) 2012-2020 all contributors <cmogstored-public@yhbt.net>
3 * License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
6 * Uses the mountlist library in gnulib to map system device IDs and
7 * system device names to mount entries.
9 #include "cmogstored.h"
12 pthread_mutex_t cond_lock
;
16 static pthread_mutex_t by_dev_lock
= PTHREAD_MUTEX_INITIALIZER
;
19 * by_dev maps (system) device IDs to a mount_entry; mount_entry structs may
20 * be chained as multiple mount entries may be aliased (e.g. "rootfs" and
21 * "/dev/root") on Linux.
23 static Hash_table
*by_dev
;
25 static void me_free(void *entry
)
27 struct mount_entry
*next
;
28 struct mount_entry
*me
= entry
;
31 assert(me
->me_type
== NULL
32 && me
->me_type_malloced
== 0
33 && "me_type still malloc-ed in mountlist");
36 } while ((me
= next
));
39 static size_t me_hash(const void *entry
, size_t tablesize
)
41 const struct mount_entry
*me
= entry
;
43 return me
->me_dev
% tablesize
;
46 static bool me_cmp(const void *a
, const void *b
)
48 const struct mount_entry
*me_a
= a
;
49 const struct mount_entry
*me_b
= b
;
51 return me_a
->me_dev
== me_b
->me_dev
;
54 static void mnt_atexit(void)
59 static Hash_table
* mnt_new(size_t n
)
61 Hash_table
*rv
= hash_initialize(n
, NULL
, me_hash
, me_cmp
, me_free
);
68 /* populates a hash table starting with the mount list */
69 static void mnt_populate(Hash_table
*tbl
)
71 struct mount_entry
*head
= read_file_system_list(false);
72 struct mount_entry
*next
;
75 struct mount_entry
*old_me
;
78 for ( ; head
; head
= next
) {
81 /* ensure we can me_free() without side effects when skipping */
84 /* we don't care about FS type at all */
85 if (head
->me_type_malloced
) {
87 head
->me_type_malloced
= 0;
91 if (!mog_mnt_usable(head
))
94 /* mark the device as something we _might_ track util for */
95 mog_iou_active(head
->me_dev
);
97 switch (hash_insert_if_absent(tbl
, head
, &exist
.ptr
)) {
99 /* chain entries if they have multiple st_dev */
100 struct mount_entry
*me
= exist
.old_me
;
105 assert(me
!= head
&& "circular mount ref");
113 assert(0 && "compiler bug?");
119 /* runs inside a thread, this is called at startup before daemonization */
120 static void * init_once(void *ptr
)
122 struct init_args
*ia
= ptr
;
125 CHECK(int, 0, pthread_mutex_lock(&by_dev_lock
) );
126 assert(by_dev
== NULL
&&
127 "by_dev exists during initialization");
129 mnt_populate(by_dev
);
130 CHECK(int, 0, pthread_mutex_unlock(&by_dev_lock
) );
132 /* wake up parent thread, this tells parent to cancel us */
133 CHECK(int, 0, pthread_mutex_lock(&ia
->cond_lock
));
134 CHECK(int, 0, pthread_cond_signal(&ia
->cond
));
135 CHECK(int, 0, pthread_mutex_unlock(&ia
->cond_lock
));
137 /* wait for cancellation, mog_sleep may return ENOMEM or EINTR */
140 } while (err
== EINTR
|| err
== ENOMEM
);
141 assert(0 && "init_once did not get cancelled");
145 /* once-only initialization */
146 static void timed_init_once(void)
151 struct init_args ia
= {
152 .cond_lock
= PTHREAD_MUTEX_INITIALIZER
,
153 .cond
= PTHREAD_COND_INITIALIZER
156 CHECK(int, 0, pthread_mutex_lock(&ia
.cond_lock
));
159 rc
= pthread_create(&thr
, NULL
, init_once
, &ia
);
163 /* this must succeed, keep looping */
164 if (mog_pthread_create_retryable(rc
)) {
165 if ((++tries
% 1024) == 0)
166 warn("pthread_create: %s (tries: %lu)",
167 strerror(rc
), tries
);
170 assert(0 && "pthread_create usage error");
179 rc
= pthread_cond_timedwait(&ia
.cond
, &ia
.cond_lock
, &ts
);
184 warn("still populating mountlist (tries: %lu)",
186 else if (rc
== EINTR
)
189 assert(0 && "unhandled pthread_cond_timedwait failure");
191 CHECK(int, 0, pthread_mutex_unlock(&ia
.cond_lock
));
194 * this will load libgcc_s under glibc, we want to do this early
195 * in process lifetime to prevent load failures if we are under
196 * FD pressure later on.
198 CHECK(int, 0, pthread_cancel(thr
));
200 CHECK(int, 0, pthread_join(thr
, NULL
));
201 CHECK(int, 0, pthread_cond_destroy(&ia
.cond
));
202 CHECK(int, 0, pthread_mutex_destroy(&ia
.cond_lock
));
206 void mog_mnt_refresh(void)
208 Hash_table
*new, *old
;
210 static pthread_mutex_t refresh_lock
= PTHREAD_MUTEX_INITIALIZER
;
212 CHECK(int, 0, pthread_mutex_lock(&refresh_lock
) ); /* protects old */
214 CHECK(int, 0, pthread_mutex_lock(&by_dev_lock
) );
215 old
= by_dev
; /* save early for validation */
217 n
= hash_get_n_buckets_used(old
);
218 CHECK(int, 0, pthread_mutex_unlock(&by_dev_lock
) );
221 mog_iou_cleanup_begin();
223 mnt_populate(new); /* slow, can stat all devices */
225 /* quickly swap in the new mount list */
226 CHECK(int, 0, pthread_mutex_lock(&by_dev_lock
) );
227 assert(old
== by_dev
&&
228 "by_dev hash modified during update");
230 CHECK(int, 0, pthread_mutex_unlock(&by_dev_lock
) );
233 * must cleanup _after_ replacing by_dev, since readers
234 * can still mark devices as active before we wrlock.
236 mog_iou_cleanup_finish();
242 CHECK(int, 0, pthread_mutex_unlock(&refresh_lock
) );
246 * Looks up a mount_entry by st_dev, returns NULL if nothing was found
247 * Users may only acquire one mount entry at a time and MUST release it
249 const struct mount_entry
* mog_mnt_acquire(dev_t st_dev
)
251 struct mount_entry me
= { .me_dev
= st_dev
};
252 struct mount_entry
*rv
;
254 CHECK(int, 0, pthread_mutex_lock(&by_dev_lock
) );
255 rv
= hash_lookup(by_dev
, &me
);
257 /* user must release this via mog_mnt_release if non-NULL */
259 struct mount_entry
*rv_me
= rv
;
262 * if multiple entries match st_dev, favor the one
263 * with a leading slash
265 while (rv_me
&& rv_me
->me_devname
[0] != '/')
266 rv_me
= rv_me
->me_next
;
268 return rv_me
? rv_me
: rv
;
271 CHECK(int, 0, pthread_mutex_unlock(&by_dev_lock
) );
275 /* releases the mount entry, allowing mog_mnt_acquire to be called again */
276 void mog_mnt_release(const struct mount_entry
*me
)
278 struct mount_entry
*check_me
;
279 union { const void *in
; void *out
; } deconst
= { .in
= me
};
281 check_me
= hash_lookup(by_dev
, deconst
.out
);
283 while (check_me
->me_next
&& check_me
!= me
)
284 check_me
= check_me
->me_next
;
286 assert(check_me
== me
&& "did not release acquired mount_entry");
287 CHECK(int, 0, pthread_mutex_unlock(&by_dev_lock
) );
290 #define MOG_DEV_T_INVAL ((dev_t)-1)
293 char prefix
[(sizeof("/dev/") - 1) + MOG_IOSTAT_DEVLEN
];
296 char util
[MOG_IOUTIL_LEN
];
300 * returns true if the mount entry matches the update request
301 * (and thus can be updated). False if no match.
303 static bool me_update_match(struct mount_entry
*me
, struct mnt_update
*update
)
305 if (update
->st_rdev
!= MOG_DEV_T_INVAL
306 && me
->me_dev
== update
->st_rdev
)
309 if (strlen(me
->me_devname
) < update
->prefixlen
)
311 return memcmp(update
->prefix
, me
->me_devname
, update
->prefixlen
) == 0;
315 static bool update_util_each(void *ent
, void *upd
)
317 struct mount_entry
*me
= ent
;
318 struct mnt_update
*update
= upd
;
319 dev_t this_dev
= me
->me_dev
;
321 /* attempt to resolve multiple mounts mapped to the same mount point */
322 for (; me
; me
= me
->me_next
) {
323 assert(this_dev
== me
->me_dev
&& "me_dev mismatch");
325 if (me_update_match(me
, update
)) {
326 mog_iou_write(this_dev
, update
->util
);
328 * We could cull mismatched mount entries here.
329 * mount point aliasing is relatively uncommon so
330 * probably not worth the code.
336 return true; /* continue */
340 * takes a line of iostat information and updates entries in our
341 * mountlist which match it. This is O(mountpoints) for now.
343 void mog_mnt_update_util(struct mog_iostat
*iostat
)
345 static const size_t pfx_len
= sizeof("/dev/") - 1;
346 struct mnt_update update
;
347 size_t cpy_len
= strlen(iostat
->dev
);
348 char *dst
= mempcpy(update
.prefix
, "/dev/", pfx_len
);
351 mempcpy(dst
, iostat
->dev
, cpy_len
+ 1);
352 update
.prefixlen
= cpy_len
+ pfx_len
;
355 * st_rdev matching is necessary for cryptmount(8) on Linux, where
356 * /dev/mapper/FOO is NOT a symlink to /dev/dm-N, but /dev/dm-N
357 * and /dev/mapper/FOO both refer to the same device (where
358 * /dev/mapper/FOO is the mounted device name, mountlist never
361 * FIXME: parse /proc/partitions under Linux like mogstored does
362 * may avoid this stat.
364 if (stat(update
.prefix
, &st
) == 0 && S_ISBLK(st
.st_mode
))
365 update
.st_rdev
= st
.st_rdev
;
367 update
.st_rdev
= MOG_DEV_T_INVAL
;
369 assert(sizeof(update
.util
) == sizeof(iostat
->util
));
370 memcpy(&update
.util
, iostat
->util
, sizeof(update
.util
));
372 CHECK(int, 0, pthread_mutex_lock(&by_dev_lock
) );
373 (void)hash_do_for_each(by_dev
, update_util_each
, &update
);
374 CHECK(int, 0, pthread_mutex_unlock(&by_dev_lock
) );