kernfs: allow nodes to be created in the deactivated state

Currently, kernfs_nodes are made visible to userland on creation, which makes it difficult for kernfs users to atomically succeed or fail creation of multiple nodes. In addition, if something fails after creating some nodes, the created nodes might already be in use and their active refs need to be drained for removal, which has the potential to introduce tricky reverse locking dependency on active_ref depending on how the error path is synchronized. This patch introduces per-root flag KERNFS_ROOT_CREATE_DEACTIVATED. If set, all nodes under the root are created in the deactivated state and stay invisible to userland until explicitly enabled by the new kernfs_activate() API. Also, nodes which have never been activated are guaranteed to bypass draining on removal thus allowing error paths to not worry about lockding dependency on active_ref draining. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Tejun Heo <tj@kernel.org> 2014-02-03 23:09:12 +0400
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2014-02-08 03:52:48 +0400
commit: d35258ef702cca0c4e66d799f8e38b78c02ce8a5 (patch)
tree: f3d98795cf620ad32edd4df982d015cfeedd1a97 /fs
parent: b9c9dad0c457d32cf8c7d2e413463c8414c7a7a7 (diff)
download: linux-d35258ef702cca0c4e66d799f8e38b78c02ce8a5.tar.xz
2 files changed, 65 insertions, 8 deletions
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 89f8462f337e..3cff0a233cd1 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -435,7 +435,7 @@ int kernfs_add_one(struct kernfs_node *kn)
 		goto out_unlock;
 
 	ret = -ENOENT;
-	if (!kernfs_active(parent))
+	if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
 		goto out_unlock;
 
 	kn->hash = kernfs_name_hash(kn->name, kn->ns);
@@ -451,9 +451,19 @@ int kernfs_add_one(struct kernfs_node *kn)
 		ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
 	}
 
-	/* Mark the entry added into directory tree */
-	atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
-	ret = 0;
+	mutex_unlock(&kernfs_mutex);
+
+	/*
+	 * Activate the new node unless CREATE_DEACTIVATED is requested.
+	 * If not activated here, the kernfs user is responsible for
+	 * activating the node with kernfs_activate().  A node which hasn't
+	 * been activated is not visible to userland and its removal won't
+	 * trigger deactivation.
+	 */
+	if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
+		kernfs_activate(kn);
+	return 0;
+
 out_unlock:
 	mutex_unlock(&kernfs_mutex);
 	return ret;
@@ -528,13 +538,14 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
 /**
  * kernfs_create_root - create a new kernfs hierarchy
  * @scops: optional syscall operations for the hierarchy
+ * @flags: KERNFS_ROOT_* flags
  * @priv: opaque data associated with the new directory
  *
  * Returns the root of the new hierarchy on success, ERR_PTR() value on
  * failure.
  */
 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
-				       void *priv)
+				       unsigned int flags, void *priv)
 {
 	struct kernfs_root *root;
 	struct kernfs_node *kn;
@@ -553,14 +564,17 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 		return ERR_PTR(-ENOMEM);
 	}
 
-	atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
 	kn->priv = priv;
 	kn->dir.root = root;
 
 	root->syscall_ops = scops;
+	root->flags = flags;
 	root->kn = kn;
 	init_waitqueue_head(&root->deactivate_waitq);
 
+	if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
+		kernfs_activate(kn);
+
 	return root;
 }
 
@@ -783,6 +797,40 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
 	return pos->parent;
 }
 
+/**
+ * kernfs_activate - activate a node which started deactivated
+ * @kn: kernfs_node whose subtree is to be activated
+ *
+ * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
+ * needs to be explicitly activated.  A node which hasn't been activated
+ * isn't visible to userland and deactivation is skipped during its
+ * removal.  This is useful to construct atomic init sequences where
+ * creation of multiple nodes should either succeed or fail atomically.
+ *
+ * The caller is responsible for ensuring that this function is not called
+ * after kernfs_remove*() is invoked on @kn.
+ */
+void kernfs_activate(struct kernfs_node *kn)
+{
+	struct kernfs_node *pos;
+
+	mutex_lock(&kernfs_mutex);
+
+	pos = NULL;
+	while ((pos = kernfs_next_descendant_post(pos, kn))) {
+		if (!pos || (pos->flags & KERNFS_ACTIVATED))
+			continue;
+
+		WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
+		WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
+
+		atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
+		pos->flags |= KERNFS_ACTIVATED;
+	}
+
+	mutex_unlock(&kernfs_mutex);
+}
+
 static void __kernfs_remove(struct kernfs_node *kn)
 {
 	struct kernfs_node *pos;
@@ -817,7 +865,16 @@ static void __kernfs_remove(struct kernfs_node *kn)
 		 */
 		kernfs_get(pos);
 
-		kernfs_drain(pos);
+		/*
+		 * Drain iff @kn was activated.  This avoids draining and
+		 * its lockdep annotations for nodes which have never been
+		 * activated and allows embedding kernfs_remove() in create
+		 * error paths without worrying about draining.
+		 */
+		if (kn->flags & KERNFS_ACTIVATED)
+			kernfs_drain(pos);
+		else
+			WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
 
 		/*
 		 * kernfs_unlink_sibling() succeeds once per node.  Use it
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 6211230814fd..5c7fdd9c6811 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -62,7 +62,7 @@ int __init sysfs_init(void)
 {
 	int err;
 
-	sysfs_root = kernfs_create_root(NULL, NULL);
+	sysfs_root = kernfs_create_root(NULL, 0, NULL);
 	if (IS_ERR(sysfs_root))
 		return PTR_ERR(sysfs_root);
author	Tejun Heo <tj@kernel.org>	2014-02-03 23:09:12 +0400
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2014-02-08 03:52:48 +0400
commit	d35258ef702cca0c4e66d799f8e38b78c02ce8a5 (patch)
tree	f3d98795cf620ad32edd4df982d015cfeedd1a97 /fs
parent	b9c9dad0c457d32cf8c7d2e413463c8414c7a7a7 (diff)
download	linux-d35258ef702cca0c4e66d799f8e38b78c02ce8a5.tar.xz