From 770f82792a81e27168fe4ed82905e567944bf0bb Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 18 Mar 2018 19:57:19 -0700 Subject: [PATCH] devfs - Fix a few more issues (2) * The devfs_freep() issues were more extensive then originally determined. There was another race, primarily due to some vnodes (related to unlinked devfs nodes) getting torn down before aliases during umount. * Add another flag and adjust the code to not free the node structure until all link references (nlinks) have gone away. * Also refactor locking and sequencing to further bullet-proof vnode reclaims vs devfs_freep()'s. --- sys/sys/devfs.h | 5 ++- sys/vfs/devfs/devfs_core.c | 95 ++++++++++++++++++++++++++++++++------------- sys/vfs/devfs/devfs_vnops.c | 10 +++-- 3 files changed, 76 insertions(+), 34 deletions(-) diff --git a/sys/sys/devfs.h b/sys/sys/devfs.h index 4ba1091233..0e8b89826b 100644 --- a/sys/sys/devfs.h +++ b/sys/sys/devfs.h @@ -90,7 +90,7 @@ struct devfs_node { struct devfs_node *parent; /* parent of this node */ devfs_nodetype node_type; /* devfs node type */ - u_int64_t refs; /* number of open references */ + u_int64_t refs; /* currently unused */ size_t nchildren; /* number of children of a parent */ u_int64_t cookie_jar; /* cookie pool for children */ u_int64_t cookie; /* directory entry cookie for readdir */ @@ -308,9 +308,10 @@ typedef void* (devfs_iterate_callback_t)(struct devfs_node *, void *); #define DEVFS_HIDDEN 0x010 /* Makes node inaccessible */ #define DEVFS_INVISIBLE 0x020 /* Makes node invisible */ #define DEVFS_PTY 0x040 /* PTY device */ -#define DEVFS_DESTROYED 0x080 /* Sanity check */ +#define DEVFS_DESTROYED 0x080 /* Destroy attempt */ #define DEVFS_RULE_CREATED 0x100 /* Node was rule-created */ #define DEVFS_RULE_HIDDEN 0x200 /* Node was hidden by a rule */ +#define DEVFS_NLINKSWAIT 0x400 /* NLinks final */ /* * Clone helper stuff diff --git a/sys/vfs/devfs/devfs_core.c b/sys/vfs/devfs/devfs_core.c index a3b2e165a7..2d07b19e92 100644 --- a/sys/vfs/devfs/devfs_core.c +++ b/sys/vfs/devfs/devfs_core.c @@ -421,17 +421,34 @@ void devfs_freep(struct devfs_node *node) { struct vnode *vp; - int relock; KKASSERT(node); /* * It is possible for devfs_freep() to race a destruction due * to having to release the lock below. We use DEVFS_DESTROYED - * to interlock the race. + * to interlock the race (mediated by devfs_lock) + * + * We use NLINKSWAIT to indicate that the node couldn't be + * freed due to having pending nlinks. We can free + * the node when nlinks drops to 0. This should never print + * a "(null)" name, if it ever does there are still unresolved + * issues. */ if (node->flags & DEVFS_DESTROYED) { - kprintf("devfs: race avoided node '%s'\n", node->d_dir.d_name); + if ((node->flags & DEVFS_NLINKSWAIT) && + node->nlinks == 0) { + kprintf("devfs: final node '%s' on nlinks\n", + node->d_dir.d_name); + if (node->d_dir.d_name) { + kfree(node->d_dir.d_name, M_DEVFS); + node->d_dir.d_name = NULL; + } + objcache_put(devfs_node_cache, node); + } else { + kprintf("devfs: race avoided node '%s'\n", + node->d_dir.d_name); + } return; } node->flags |= DEVFS_DESTROYED; @@ -440,41 +457,50 @@ devfs_freep(struct devfs_node *node) * Items we have to dispose of before potentially releasing * devfs_lock. * - * Adjust leak_count. * Remove the node from the orphan list if it is still on it. */ atomic_subtract_long(&DEVFS_MNTDATA(node->mp)->leak_count, 1); + atomic_subtract_long(&DEVFS_MNTDATA(node->mp)->file_count, 1); if (node->flags & DEVFS_ORPHANED) devfs_tracer_del_orphan(node); - atomic_subtract_long(&DEVFS_MNTDATA(node->mp)->file_count, 1); /* - * Avoid deadlocks between devfs_lock and the vnode lock when - * disassociating the vnode (stress2 pty vs ls -la /dev/pts or - * du -sh). + * At this point only the vp points to node, and node cannot be + * physically freed because we own DEVFS_DESTROYED. + * + * We must dispose of the vnode without deadlocking or racing + * against e.g. a vnode reclaim. * * This also prevents the vnode reclaim code from double-freeing * the node. The vget() is required to safely modified the vp * and cycle the refs to terminate an inactive vp. */ - if (lockstatus(&devfs_lock, curthread) == LK_EXCLUSIVE) { - lockmgr(&devfs_lock, LK_RELEASE); - relock = 1; - } else { - relock = 0; - } - while ((vp = node->v_node) != NULL) { - if (vget(vp, LK_EXCLUSIVE | LK_RETRY) != 0) - break; - v_release_rdev(vp); - vp->v_data = NULL; - node->v_node = NULL; - vput(vp); - } + int relock; - if (relock) - lockmgr(&devfs_lock, LK_EXCLUSIVE); + vref(vp); + if (lockstatus(&devfs_lock, curthread) == LK_EXCLUSIVE) { + lockmgr(&devfs_lock, LK_RELEASE); + relock = 1; + } else { + relock = 0; + } + if (node->v_node == NULL) { + /* reclaim race, mediated by devfs_lock */ + vrele(vp); + } else if (vget(vp, LK_EXCLUSIVE | LK_RETRY) == 0) { + vrele(vp); + v_release_rdev(vp); + vp->v_data = NULL; + node->v_node = NULL; + vput(vp); + } else { + /* reclaim race, mediated by devfs_lock */ + vrele(vp); + } + if (relock) + lockmgr(&devfs_lock, LK_EXCLUSIVE); + } /* * Remaining cleanup @@ -483,11 +509,20 @@ devfs_freep(struct devfs_node *node) kfree(node->symlink_name, M_DEVFS); node->symlink_name = NULL; } - if (node->d_dir.d_name) { - kfree(node->d_dir.d_name, M_DEVFS); - node->d_dir.d_name = NULL; + + /* + * We cannot actually free the node if it still has + * nlinks. + */ + if (node->nlinks) { + node->flags |= DEVFS_NLINKSWAIT; + } else { + if (node->d_dir.d_name) { + kfree(node->d_dir.d_name, M_DEVFS); + node->d_dir.d_name = NULL; + } + objcache_put(devfs_node_cache, node); } - objcache_put(devfs_node_cache, node); } /* @@ -563,6 +598,10 @@ devfs_unlinkp(struct devfs_node *node) vp = devfs_alias_getvp(node); node->link_target = NULL; target->nlinks--; + if (target->nlinks == 0 && + (target->flags & DEVFS_DESTROYED)) { + devfs_freep(target); + } } else { vp = NULL; } diff --git a/sys/vfs/devfs/devfs_vnops.c b/sys/vfs/devfs/devfs_vnops.c index f8550e1b4f..65b6ddfb52 100644 --- a/sys/vfs/devfs/devfs_vnops.c +++ b/sys/vfs/devfs/devfs_vnops.c @@ -290,8 +290,12 @@ devfs_vop_reclaim(struct vop_reclaim_args *ap) * topology. Interlocked by devfs_lock. */ vp = ap->a_vp; - if ((node = DEVFS_NODE(vp)) != NULL) { - node->v_node = NULL; + node = DEVFS_NODE(vp); + vp->v_data = NULL; + node->v_node = NULL; + v_release_rdev(vp); + + if (node) { if ((node->flags & DEVFS_NODE_LINKED) == 0) devfs_freep(node); } @@ -303,8 +307,6 @@ devfs_vop_reclaim(struct vop_reclaim_args *ap) * v_rdev needs to be properly released using v_release_rdev * Make sure v_data is NULL as well. */ - vp->v_data = NULL; - v_release_rdev(vp); return 0; } -- 2.11.4.GIT