diff options
Diffstat (limited to 'kernel/audit_tree.c')
-rw-r--r-- | kernel/audit_tree.c | 498 |
1 files changed, 277 insertions, 221 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index ea43181cde4a..d4af4d97f847 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -24,9 +24,9 @@ struct audit_tree { struct audit_chunk { struct list_head hash; - struct fsnotify_mark mark; + unsigned long key; + struct fsnotify_mark *mark; struct list_head trees; /* with root here */ - int dead; int count; atomic_long_t refs; struct rcu_head head; @@ -37,13 +37,25 @@ struct audit_chunk { } owners[]; }; +struct audit_tree_mark { + struct fsnotify_mark mark; + struct audit_chunk *chunk; +}; + static LIST_HEAD(tree_list); static LIST_HEAD(prune_list); static struct task_struct *prune_thread; /* - * One struct chunk is attached to each inode of interest. - * We replace struct chunk on tagging/untagging. + * One struct chunk is attached to each inode of interest through + * audit_tree_mark (fsnotify mark). We replace struct chunk on tagging / + * untagging, the mark is stable as long as there is chunk attached. The + * association between mark and chunk is protected by hash_lock and + * audit_tree_group->mark_mutex. Thus as long as we hold + * audit_tree_group->mark_mutex and check that the mark is alive by + * FSNOTIFY_MARK_FLAG_ATTACHED flag check, we are sure the mark points to + * the current chunk. + * * Rules have pointer to struct audit_tree. * Rules have struct list_head rlist forming a list of rules over * the same tree. @@ -62,8 +74,12 @@ static struct task_struct *prune_thread; * tree is refcounted; one reference for "some rules on rules_list refer to * it", one for each chunk with pointer to it. * - * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount - * of watch contributes 1 to .refs). + * chunk is refcounted by embedded .refs. Mark associated with the chunk holds + * one chunk reference. This reference is dropped either when a mark is going + * to be freed (corresponding inode goes away) or when chunk attached to the + * mark gets replaced. This reference must be dropped using + * audit_mark_put_chunk() to make sure the reference is dropped only after RCU + * grace period as it protects RCU readers of the hash table. * * node.index allows to get from node.list to containing chunk. * MSB of that sucker is stolen to mark taggings that we might have to @@ -72,6 +88,7 @@ static struct task_struct *prune_thread; */ static struct fsnotify_group *audit_tree_group; +static struct kmem_cache *audit_tree_mark_cachep __read_mostly; static struct audit_tree *alloc_tree(const char *s) { @@ -131,12 +148,43 @@ static void __put_chunk(struct rcu_head *rcu) audit_put_chunk(chunk); } -static void audit_tree_destroy_watch(struct fsnotify_mark *entry) +/* + * Drop reference to the chunk that was held by the mark. This is the reference + * that gets dropped after we've removed the chunk from the hash table and we + * use it to make sure chunk cannot be freed before RCU grace period expires. + */ +static void audit_mark_put_chunk(struct audit_chunk *chunk) { - struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark); call_rcu(&chunk->head, __put_chunk); } +static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *mark) +{ + return container_of(mark, struct audit_tree_mark, mark); +} + +static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark) +{ + return audit_mark(mark)->chunk; +} + +static void audit_tree_destroy_watch(struct fsnotify_mark *mark) +{ + kmem_cache_free(audit_tree_mark_cachep, audit_mark(mark)); +} + +static struct fsnotify_mark *alloc_mark(void) +{ + struct audit_tree_mark *amark; + + amark = kmem_cache_zalloc(audit_tree_mark_cachep, GFP_KERNEL); + if (!amark) + return NULL; + fsnotify_init_mark(&amark->mark, audit_tree_group); + amark->mark.mask = FS_IN_IGNORED; + return &amark->mark; +} + static struct audit_chunk *alloc_chunk(int count) { struct audit_chunk *chunk; @@ -156,8 +204,6 @@ static struct audit_chunk *alloc_chunk(int count) INIT_LIST_HEAD(&chunk->owners[i].list); chunk->owners[i].index = i; } - fsnotify_init_mark(&chunk->mark, audit_tree_group); - chunk->mark.mask = FS_IN_IGNORED; return chunk; } @@ -172,36 +218,25 @@ static unsigned long inode_to_key(const struct inode *inode) return (unsigned long)&inode->i_fsnotify_marks; } -/* - * Function to return search key in our hash from chunk. Key 0 is special and - * should never be present in the hash. - */ -static unsigned long chunk_to_key(struct audit_chunk *chunk) -{ - /* - * We have a reference to the mark so it should be attached to a - * connector. - */ - if (WARN_ON_ONCE(!chunk->mark.connector)) - return 0; - return (unsigned long)chunk->mark.connector->obj; -} - static inline struct list_head *chunk_hash(unsigned long key) { unsigned long n = key / L1_CACHE_BYTES; return chunk_hash_heads + n % HASH_SIZE; } -/* hash_lock & entry->lock is held by caller */ +/* hash_lock & mark->group->mark_mutex is held by caller */ static void insert_hash(struct audit_chunk *chunk) { - unsigned long key = chunk_to_key(chunk); struct list_head *list; - if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED)) - return; - list = chunk_hash(key); + /* + * Make sure chunk is fully initialized before making it visible in the + * hash. Pairs with a data dependency barrier in READ_ONCE() in + * audit_tree_lookup(). + */ + smp_wmb(); + WARN_ON_ONCE(!chunk->key); + list = chunk_hash(chunk->key); list_add_rcu(&chunk->hash, list); } @@ -213,7 +248,11 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode) struct audit_chunk *p; list_for_each_entry_rcu(p, list, hash) { - if (chunk_to_key(p) == key) { + /* + * We use a data dependency barrier in READ_ONCE() to make sure + * the chunk we see is fully initialized. + */ + if (READ_ONCE(p->key) == key) { atomic_long_inc(&p->refs); return p; } @@ -239,137 +278,159 @@ static struct audit_chunk *find_chunk(struct node *p) return container_of(p, struct audit_chunk, owners[0]); } -static void untag_chunk(struct node *p) +static void replace_mark_chunk(struct fsnotify_mark *mark, + struct audit_chunk *chunk) +{ + struct audit_chunk *old; + + assert_spin_locked(&hash_lock); + old = mark_chunk(mark); + audit_mark(mark)->chunk = chunk; + if (chunk) + chunk->mark = mark; + if (old) + old->mark = NULL; +} + +static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old) { - struct audit_chunk *chunk = find_chunk(p); - struct fsnotify_mark *entry = &chunk->mark; - struct audit_chunk *new = NULL; struct audit_tree *owner; - int size = chunk->count - 1; int i, j; - fsnotify_get_mark(entry); + new->key = old->key; + list_splice_init(&old->trees, &new->trees); + list_for_each_entry(owner, &new->trees, same_root) + owner->root = new; + for (i = j = 0; j < old->count; i++, j++) { + if (!old->owners[j].owner) { + i--; + continue; + } + owner = old->owners[j].owner; + new->owners[i].owner = owner; + new->owners[i].index = old->owners[j].index - j + i; + if (!owner) /* result of earlier fallback */ + continue; + get_tree(owner); + list_replace_init(&old->owners[j].list, &new->owners[i].list); + } + replace_mark_chunk(old->mark, new); + /* + * Make sure chunk is fully initialized before making it visible in the + * hash. Pairs with a data dependency barrier in READ_ONCE() in + * audit_tree_lookup(). + */ + smp_wmb(); + list_replace_rcu(&old->hash, &new->hash); +} - spin_unlock(&hash_lock); +static void remove_chunk_node(struct audit_chunk *chunk, struct node *p) +{ + struct audit_tree *owner = p->owner; + + if (owner->root == chunk) { + list_del_init(&owner->same_root); + owner->root = NULL; + } + list_del_init(&p->list); + p->owner = NULL; + put_tree(owner); +} - if (size) - new = alloc_chunk(size); +static int chunk_count_trees(struct audit_chunk *chunk) +{ + int i; + int ret = 0; - mutex_lock(&entry->group->mark_mutex); - spin_lock(&entry->lock); + for (i = 0; i < chunk->count; i++) + if (chunk->owners[i].owner) + ret++; + return ret; +} + +static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark) +{ + struct audit_chunk *new; + int size; + + mutex_lock(&audit_tree_group->mark_mutex); /* - * mark_mutex protects mark from getting detached and thus also from - * mark->connector->obj getting NULL. + * mark_mutex stabilizes chunk attached to the mark so we can check + * whether it didn't change while we've dropped hash_lock. */ - if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { - spin_unlock(&entry->lock); - mutex_unlock(&entry->group->mark_mutex); - if (new) - fsnotify_put_mark(&new->mark); - goto out; - } - - owner = p->owner; + if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) || + mark_chunk(mark) != chunk) + goto out_mutex; + size = chunk_count_trees(chunk); if (!size) { - chunk->dead = 1; spin_lock(&hash_lock); list_del_init(&chunk->trees); - if (owner->root == chunk) - owner->root = NULL; - list_del_init(&p->list); list_del_rcu(&chunk->hash); + replace_mark_chunk(mark, NULL); spin_unlock(&hash_lock); - spin_unlock(&entry->lock); - mutex_unlock(&entry->group->mark_mutex); - fsnotify_destroy_mark(entry, audit_tree_group); - goto out; + fsnotify_detach_mark(mark); + mutex_unlock(&audit_tree_group->mark_mutex); + audit_mark_put_chunk(chunk); + fsnotify_free_mark(mark); + return; } + new = alloc_chunk(size); if (!new) - goto Fallback; + goto out_mutex; - if (fsnotify_add_mark_locked(&new->mark, entry->connector->obj, - FSNOTIFY_OBJ_TYPE_INODE, 1)) { - fsnotify_put_mark(&new->mark); - goto Fallback; - } - - chunk->dead = 1; spin_lock(&hash_lock); - list_replace_init(&chunk->trees, &new->trees); - if (owner->root == chunk) { - list_del_init(&owner->same_root); - owner->root = NULL; - } - - for (i = j = 0; j <= size; i++, j++) { - struct audit_tree *s; - if (&chunk->owners[j] == p) { - list_del_init(&p->list); - i--; - continue; - } - s = chunk->owners[j].owner; - new->owners[i].owner = s; - new->owners[i].index = chunk->owners[j].index - j + i; - if (!s) /* result of earlier fallback */ - continue; - get_tree(s); - list_replace_init(&chunk->owners[j].list, &new->owners[i].list); - } - - list_replace_rcu(&chunk->hash, &new->hash); - list_for_each_entry(owner, &new->trees, same_root) - owner->root = new; - spin_unlock(&hash_lock); - spin_unlock(&entry->lock); - mutex_unlock(&entry->group->mark_mutex); - fsnotify_destroy_mark(entry, audit_tree_group); - fsnotify_put_mark(&new->mark); /* drop initial reference */ - goto out; - -Fallback: - // do the best we can - spin_lock(&hash_lock); - if (owner->root == chunk) { - list_del_init(&owner->same_root); - owner->root = NULL; - } - list_del_init(&p->list); - p->owner = NULL; - put_tree(owner); + /* + * This has to go last when updating chunk as once replace_chunk() is + * called, new RCU readers can see the new chunk. + */ + replace_chunk(new, chunk); spin_unlock(&hash_lock); - spin_unlock(&entry->lock); - mutex_unlock(&entry->group->mark_mutex); -out: - fsnotify_put_mark(entry); - spin_lock(&hash_lock); + mutex_unlock(&audit_tree_group->mark_mutex); + audit_mark_put_chunk(chunk); + return; + +out_mutex: + mutex_unlock(&audit_tree_group->mark_mutex); } +/* Call with group->mark_mutex held, releases it */ static int create_chunk(struct inode *inode, struct audit_tree *tree) { - struct fsnotify_mark *entry; + struct fsnotify_mark *mark; struct audit_chunk *chunk = alloc_chunk(1); - if (!chunk) + + if (!chunk) { + mutex_unlock(&audit_tree_group->mark_mutex); return -ENOMEM; + } - entry = &chunk->mark; - if (fsnotify_add_inode_mark(entry, inode, 0)) { - fsnotify_put_mark(entry); + mark = alloc_mark(); + if (!mark) { + mutex_unlock(&audit_tree_group->mark_mutex); + kfree(chunk); + return -ENOMEM; + } + + if (fsnotify_add_inode_mark_locked(mark, inode, 0)) { + mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_put_mark(mark); + kfree(chunk); return -ENOSPC; } - spin_lock(&entry->lock); spin_lock(&hash_lock); if (tree->goner) { spin_unlock(&hash_lock); - chunk->dead = 1; - spin_unlock(&entry->lock); - fsnotify_destroy_mark(entry, audit_tree_group); - fsnotify_put_mark(entry); + fsnotify_detach_mark(mark); + mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_free_mark(mark); + fsnotify_put_mark(mark); + kfree(chunk); return 0; } + replace_mark_chunk(mark, chunk); chunk->owners[0].index = (1U << 31); chunk->owners[0].owner = tree; get_tree(tree); @@ -378,35 +439,49 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) tree->root = chunk; list_add(&tree->same_root, &chunk->trees); } + chunk->key = inode_to_key(inode); + /* + * Inserting into the hash table has to go last as once we do that RCU + * readers can see the chunk. + */ insert_hash(chunk); spin_unlock(&hash_lock); - spin_unlock(&entry->lock); - fsnotify_put_mark(entry); /* drop initial reference */ + mutex_unlock(&audit_tree_group->mark_mutex); + /* + * Drop our initial reference. When mark we point to is getting freed, + * we get notification through ->freeing_mark callback and cleanup + * chunk pointing to this mark. + */ + fsnotify_put_mark(mark); return 0; } /* the first tagged inode becomes root of tree */ static int tag_chunk(struct inode *inode, struct audit_tree *tree) { - struct fsnotify_mark *old_entry, *chunk_entry; - struct audit_tree *owner; + struct fsnotify_mark *mark; struct audit_chunk *chunk, *old; struct node *p; int n; - old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks, - audit_tree_group); - if (!old_entry) + mutex_lock(&audit_tree_group->mark_mutex); + mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group); + if (!mark) return create_chunk(inode, tree); - old = container_of(old_entry, struct audit_chunk, mark); - + /* + * Found mark is guaranteed to be attached and mark_mutex protects mark + * from getting detached and thus it makes sure there is chunk attached + * to the mark. + */ /* are we already there? */ spin_lock(&hash_lock); + old = mark_chunk(mark); for (n = 0; n < old->count; n++) { if (old->owners[n].owner == tree) { spin_unlock(&hash_lock); - fsnotify_put_mark(old_entry); + mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_put_mark(mark); return 0; } } @@ -414,83 +489,38 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) chunk = alloc_chunk(old->count + 1); if (!chunk) { - fsnotify_put_mark(old_entry); + mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_put_mark(mark); return -ENOMEM; } - chunk_entry = &chunk->mark; - - mutex_lock(&old_entry->group->mark_mutex); - spin_lock(&old_entry->lock); - /* - * mark_mutex protects mark from getting detached and thus also from - * mark->connector->obj getting NULL. - */ - if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { - /* old_entry is being shot, lets just lie */ - spin_unlock(&old_entry->lock); - mutex_unlock(&old_entry->group->mark_mutex); - fsnotify_put_mark(old_entry); - fsnotify_put_mark(&chunk->mark); - return -ENOENT; - } - - if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj, - FSNOTIFY_OBJ_TYPE_INODE, 1)) { - spin_unlock(&old_entry->lock); - mutex_unlock(&old_entry->group->mark_mutex); - fsnotify_put_mark(chunk_entry); - fsnotify_put_mark(old_entry); - return -ENOSPC; - } - - /* even though we hold old_entry->lock, this is safe since chunk_entry->lock could NEVER have been grabbed before */ - spin_lock(&chunk_entry->lock); spin_lock(&hash_lock); - - /* we now hold old_entry->lock, chunk_entry->lock, and hash_lock */ if (tree->goner) { spin_unlock(&hash_lock); - chunk->dead = 1; - spin_unlock(&chunk_entry->lock); - spin_unlock(&old_entry->lock); - mutex_unlock(&old_entry->group->mark_mutex); - - fsnotify_destroy_mark(chunk_entry, audit_tree_group); - - fsnotify_put_mark(chunk_entry); - fsnotify_put_mark(old_entry); + mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_put_mark(mark); + kfree(chunk); return 0; } - list_replace_init(&old->trees, &chunk->trees); - for (n = 0, p = chunk->owners; n < old->count; n++, p++) { - struct audit_tree *s = old->owners[n].owner; - p->owner = s; - p->index = old->owners[n].index; - if (!s) /* result of fallback in untag */ - continue; - get_tree(s); - list_replace_init(&old->owners[n].list, &p->list); - } + p = &chunk->owners[chunk->count - 1]; p->index = (chunk->count - 1) | (1U<<31); p->owner = tree; get_tree(tree); list_add(&p->list, &tree->chunks); - list_replace_rcu(&old->hash, &chunk->hash); - list_for_each_entry(owner, &chunk->trees, same_root) - owner->root = chunk; - old->dead = 1; if (!tree->root) { tree->root = chunk; list_add(&tree->same_root, &chunk->trees); } + /* + * This has to go last when updating chunk as once replace_chunk() is + * called, new RCU readers can see the new chunk. + */ + replace_chunk(chunk, old); spin_unlock(&hash_lock); - spin_unlock(&chunk_entry->lock); - spin_unlock(&old_entry->lock); - mutex_unlock(&old_entry->group->mark_mutex); - fsnotify_destroy_mark(old_entry, audit_tree_group); - fsnotify_put_mark(chunk_entry); /* drop initial reference */ - fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ + mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_put_mark(mark); /* pair to fsnotify_find_mark */ + audit_mark_put_chunk(old); + return 0; } @@ -503,8 +533,7 @@ static void audit_tree_log_remove_rule(struct audit_krule *rule) ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); if (unlikely(!ab)) return; - audit_log_format(ab, "op=remove_rule"); - audit_log_format(ab, " dir="); + audit_log_format(ab, "op=remove_rule dir="); audit_log_untrustedstring(ab, rule->tree->pathname); audit_log_key(ab, rule->filterkey); audit_log_format(ab, " list=%d res=1", rule->listnr); @@ -534,22 +563,48 @@ static void kill_rules(struct audit_tree *tree) } /* - * finish killing struct audit_tree + * Remove tree from chunks. If 'tagged' is set, remove tree only from tagged + * chunks. The function expects tagged chunks are all at the beginning of the + * chunks list. */ -static void prune_one(struct audit_tree *victim) +static void prune_tree_chunks(struct audit_tree *victim, bool tagged) { spin_lock(&hash_lock); while (!list_empty(&victim->chunks)) { struct node *p; + struct audit_chunk *chunk; + struct fsnotify_mark *mark; + + p = list_first_entry(&victim->chunks, struct node, list); + /* have we run out of marked? */ + if (tagged && !(p->index & (1U<<31))) + break; + chunk = find_chunk(p); + mark = chunk->mark; + remove_chunk_node(chunk, p); + /* Racing with audit_tree_freeing_mark()? */ + if (!mark) + continue; + fsnotify_get_mark(mark); + spin_unlock(&hash_lock); - p = list_entry(victim->chunks.next, struct node, list); + untag_chunk(chunk, mark); + fsnotify_put_mark(mark); - untag_chunk(p); + spin_lock(&hash_lock); } spin_unlock(&hash_lock); put_tree(victim); } +/* + * finish killing struct audit_tree + */ +static void prune_one(struct audit_tree *victim) +{ + prune_tree_chunks(victim, false); +} + /* trim the uncommitted chunks from tree */ static void trim_marked(struct audit_tree *tree) @@ -569,18 +624,11 @@ static void trim_marked(struct audit_tree *tree) list_add(p, &tree->chunks); } } + spin_unlock(&hash_lock); - while (!list_empty(&tree->chunks)) { - struct node *node; - - node = list_entry(tree->chunks.next, struct node, list); - - /* have we run out of marked? */ - if (!(node->index & (1U<<31))) - break; + prune_tree_chunks(tree, true); - untag_chunk(node); - } + spin_lock(&hash_lock); if (!tree->root && !tree->goner) { tree->goner = 1; spin_unlock(&hash_lock); @@ -661,7 +709,7 @@ void audit_trim_trees(void) /* this could be NULL if the watch is dying else where... */ node->index |= 1U<<31; if (iterate_mounts(compare_root, - (void *)chunk_to_key(chunk), + (void *)(chunk->key), root_mnt)) node->index &= ~(1U<<31); } @@ -959,10 +1007,6 @@ static void evict_chunk(struct audit_chunk *chunk) int need_prune = 0; int n; - if (chunk->dead) - return; - - chunk->dead = 1; mutex_lock(&audit_filter_mutex); spin_lock(&hash_lock); while (!list_empty(&chunk->trees)) { @@ -999,17 +1043,27 @@ static int audit_tree_handle_event(struct fsnotify_group *group, return 0; } -static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group) +static void audit_tree_freeing_mark(struct fsnotify_mark *mark, + struct fsnotify_group *group) { - struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark); + struct audit_chunk *chunk; - evict_chunk(chunk); + mutex_lock(&mark->group->mark_mutex); + spin_lock(&hash_lock); + chunk = mark_chunk(mark); + replace_mark_chunk(mark, NULL); + spin_unlock(&hash_lock); + mutex_unlock(&mark->group->mark_mutex); + if (chunk) { + evict_chunk(chunk); + audit_mark_put_chunk(chunk); + } /* * We are guaranteed to have at least one reference to the mark from * either the inode or the caller of fsnotify_destroy_mark(). */ - BUG_ON(refcount_read(&entry->refcnt) < 1); + BUG_ON(refcount_read(&mark->refcnt) < 1); } static const struct fsnotify_ops audit_tree_ops = { @@ -1022,6 +1076,8 @@ static int __init audit_tree_init(void) { int i; + audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC); + audit_tree_group = fsnotify_alloc_group(&audit_tree_ops); if (IS_ERR(audit_tree_group)) audit_panic("cannot initialize fsnotify group for rectree watches"); |