apparmor: cache buffers on percpu list if there is lock contention

commit df323337e507 ("apparmor: Use a memory pool instead per-CPU caches") changed buffer allocation to use a memory pool, however on a heavily loaded machine there can be lock contention on the global buffers lock. Add a percpu list to cache buffers on when lock contention is encountered. When allocating buffers attempt to use cached buffers first, before taking the global buffers lock. When freeing buffers try to put them back to the global list but if contention is encountered, put the buffer on the percpu list. The length of time a buffer is held on the percpu list is dynamically adjusted based on lock contention. The amount of hold time is increased and decreased linearly. v5: - simplify base patch by removing: improvements can be added later - MAX_LOCAL and must lock - contention scaling. v4: - fix percpu ->count buffer count which had been spliced across a debug patch. - introduce define for MAX_LOCAL_COUNT - rework count check and locking around it. - update commit message to reference commit that introduced the memory. v3: - limit number of buffers that can be pushed onto the percpu list. This avoids a problem on some kernels where one percpu list can inherit buffers from another cpu after a reschedule, causing more kernel memory to used than is necessary. Under normal conditions this should eventually return to normal but under pathelogical conditions the extra memory consumption may have been unbouanded v2: - dynamically adjust buffer hold time on percpu list based on lock contention. v1: - cache buffers on percpu list on lock contention Reported-by: Sergey Senozhatsky <senozhatsky@chromium.org> Reviewed-by: Georgia Garcia <georgia.garcia@canonical.com> Signed-off-by: John Johansen <john.johansen@canonical.com>
author: John Johansen <john.johansen@canonical.com> 2022-10-25 11:18:41 +0300
committer: John Johansen <john.johansen@canonical.com> 2023-10-19 02:00:45 +0300
commit: ea9bae12d02819556db63348db8bd8441eb316f2 (patch)
tree: 8f2a31d41082be914673193fe991a4e8bee809cb /security
parent: c4371d90633b73cf6e86aff43ff2b5d95ad2b9eb (diff)
download: linux-ea9bae12d02819556db63348db8bd8441eb316f2.tar.xz
1 files changed, 62 insertions, 5 deletions
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index c80c1bd3024a..ce4f3e7a784d 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -49,12 +49,19 @@ union aa_buffer {
 	DECLARE_FLEX_ARRAY(char, buffer);
 };
 
+struct aa_local_cache {
+	unsigned int hold;
+	unsigned int count;
+	struct list_head head;
+};
+
 #define RESERVE_COUNT 2
 static int reserve_count = RESERVE_COUNT;
 static int buffer_count;
 
 static LIST_HEAD(aa_global_buffers);
 static DEFINE_SPINLOCK(aa_buffers_lock);
+static DEFINE_PER_CPU(struct aa_local_cache, aa_local_buffers);
 
 /*
  * LSM hook functions
@@ -1789,11 +1796,32 @@ static int param_set_mode(const char *val, const struct kernel_param *kp)
 char *aa_get_buffer(bool in_atomic)
 {
 	union aa_buffer *aa_buf;
+	struct aa_local_cache *cache;
 	bool try_again = true;
 	gfp_t flags = (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 
+	/* use per cpu cached buffers first */
+	cache = get_cpu_ptr(&aa_local_buffers);
+	if (!list_empty(&cache->head)) {
+		aa_buf = list_first_entry(&cache->head, union aa_buffer, list);
+		list_del(&aa_buf->list);
+		cache->hold--;
+		cache->count--;
+		put_cpu_ptr(&aa_local_buffers);
+		return &aa_buf->buffer[0];
+	}
+	put_cpu_ptr(&aa_local_buffers);
+
+	if (!spin_trylock(&aa_buffers_lock)) {
+		cache = get_cpu_ptr(&aa_local_buffers);
+		cache->hold += 1;
+		put_cpu_ptr(&aa_local_buffers);
+		spin_lock(&aa_buffers_lock);
+	} else {
+		cache = get_cpu_ptr(&aa_local_buffers);
+		put_cpu_ptr(&aa_local_buffers);
+	}
 retry:
-	spin_lock(&aa_buffers_lock);
 	if (buffer_count > reserve_count ||
 	    (in_atomic && !list_empty(&aa_global_buffers))) {
 		aa_buf = list_first_entry(&aa_global_buffers, union aa_buffer,
@@ -1819,6 +1847,7 @@ retry:
 	if (!aa_buf) {
 		if (try_again) {
 			try_again = false;
+			spin_lock(&aa_buffers_lock);
 			goto retry;
 		}
 		pr_warn_once("AppArmor: Failed to allocate a memory buffer.\n");
@@ -1830,15 +1859,34 @@ retry:
 void aa_put_buffer(char *buf)
 {
 	union aa_buffer *aa_buf;
+	struct aa_local_cache *cache;
 
 	if (!buf)
 		return;
 	aa_buf = container_of(buf, union aa_buffer, buffer[0]);
 
-	spin_lock(&aa_buffers_lock);
-	list_add(&aa_buf->list, &aa_global_buffers);
-	buffer_count++;
-	spin_unlock(&aa_buffers_lock);
+	cache = get_cpu_ptr(&aa_local_buffers);
+	if (!cache->hold) {
+		put_cpu_ptr(&aa_local_buffers);
+
+		if (spin_trylock(&aa_buffers_lock)) {
+			/* put back on global list */
+			list_add(&aa_buf->list, &aa_global_buffers);
+			buffer_count++;
+			spin_unlock(&aa_buffers_lock);
+			cache = get_cpu_ptr(&aa_local_buffers);
+			put_cpu_ptr(&aa_local_buffers);
+			return;
+		}
+		/* contention on global list, fallback to percpu */
+		cache = get_cpu_ptr(&aa_local_buffers);
+		cache->hold += 1;
+	}
+
+	/* cache in percpu list */
+	list_add(&aa_buf->list, &cache->head);
+	cache->count++;
+	put_cpu_ptr(&aa_local_buffers);
 }
 
 /*
@@ -1881,6 +1929,15 @@ static int __init alloc_buffers(void)
 	int i, num;
 
 	/*
+	 * per cpu set of cached allocated buffers used to help reduce
+	 * lock contention
+	 */
+	for_each_possible_cpu(i) {
+		per_cpu(aa_local_buffers, i).hold = 0;
+		per_cpu(aa_local_buffers, i).count = 0;
+		INIT_LIST_HEAD(&per_cpu(aa_local_buffers, i).head);
+	}
+	/*
 	 * A function may require two buffers at once. Usually the buffers are
 	 * used for a short period of time and are shared. On UP kernel buffers
 	 * two should be enough, with more CPUs it is possible that more
author	John Johansen <john.johansen@canonical.com>	2022-10-25 11:18:41 +0300
committer	John Johansen <john.johansen@canonical.com>	2023-10-19 02:00:45 +0300
commit	ea9bae12d02819556db63348db8bd8441eb316f2 (patch)
tree	8f2a31d41082be914673193fe991a4e8bee809cb /security
parent	c4371d90633b73cf6e86aff43ff2b5d95ad2b9eb (diff)
download	linux-ea9bae12d02819556db63348db8bd8441eb316f2.tar.xz