From 1fd8bc7cd889bd73d07a83cb32d674ac68f99153 Mon Sep 17 00:00:00 2001 From: Yang Erkun Date: Sat, 14 Dec 2024 17:30:05 +0800 Subject: maple_tree: reload mas before the second call for mas_empty_area Change the LONG_MAX in simple_offset_add to 1024, and do latter: [root@fedora ~]# mkdir /tmp/dir [root@fedora ~]# for i in {1..1024}; do touch /tmp/dir/$i; done touch: cannot touch '/tmp/dir/1024': Device or resource busy [root@fedora ~]# rm /tmp/dir/123 [root@fedora ~]# touch /tmp/dir/1024 [root@fedora ~]# rm /tmp/dir/100 [root@fedora ~]# touch /tmp/dir/1025 touch: cannot touch '/tmp/dir/1025': Device or resource busy After we delete file 100, actually this is a empty entry, but the latter create failed unexpected. mas_alloc_cyclic has two chance to find empty entry. First find the entry with range range_lo and range_hi, if no empty entry exist, and range_lo > min, retry find with range min and range_hi. However, the first call mas_empty_area may mark mas as EBUSY, and the second call for mas_empty_area will return false directly. Fix this by reload mas before second call for mas_empty_area. [Liam.Howlett@Oracle.com: fix mas_alloc_cyclic() second search] Link: https://lore.kernel.org/all/20241216060600.287B4C4CED0@smtp.kernel.org/ Link: https://lkml.kernel.org/r/20241216190113.1226145-2-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20241214093005.72284-1-yangerkun@huaweicloud.com Fixes: 9b6713cc7522 ("maple_tree: Add mtree_alloc_cyclic()") Signed-off-by: Yang Erkun Signed-off-by: Liam R. Howlett Cc: Christian Brauner Cc: Chuck Lever says: Cc: Liam R. Howlett Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/maple_tree.c') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index d0ae808f3a14..047397136f15 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -4354,6 +4354,7 @@ int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, ret = 1; } if (ret < 0 && range_lo > min) { + mas_reset(mas); ret = mas_empty_area(mas, min, range_hi, 1); if (ret == 0) ret = 1; -- cgit v1.2.3 From 002ebb925e2fcf150b0e346a960060ea4789ff01 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 25 Nov 2024 02:41:56 +0000 Subject: maple_tree: use mas_next_slot() directly The loop condition makes sure (mas.last < max), so we can directly use mas_next_slot() here. Since no other use of mas_next_entry(), it is removed. Link: https://lkml.kernel.org/r/20241125024156.26093-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: Liam R. Howlett Cc: Sidhartha Kumar Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- lib/maple_tree.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) (limited to 'lib/maple_tree.c') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 047397136f15..36e603645a30 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -4745,29 +4745,6 @@ again: return entry; } -/* - * mas_next_entry() - Internal function to get the next entry. - * @mas: The maple state - * @limit: The maximum range start. - * - * Set the @mas->node to the next entry and the range_start to - * the beginning value for the entry. Does not check beyond @limit. - * Sets @mas->index and @mas->last to the range, Does not update @mas->index and - * @mas->last on overflow. - * Restarts on dead nodes. - * - * Return: the next entry or %NULL. - */ -static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit) -{ - if (mas->last >= limit) { - mas->status = ma_overflow; - return NULL; - } - - return mas_next_slot(mas, limit, false); -} - /* * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the * highest gap address of a given size in a given node and descend. @@ -6938,7 +6915,7 @@ retry: goto unlock; while (mas_is_active(&mas) && (mas.last < max)) { - entry = mas_next_entry(&mas, max); + entry = mas_next_slot(&mas, max, false); if (likely(entry && !xa_is_zero(entry))) break; } -- cgit v1.2.3 From 5f8db8d428807e52c816610e8acea576f856eb6d Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 16 Nov 2024 01:48:03 +0000 Subject: maple_tree: index has been checked to be smaller than pivot Patch series "mas_anode_descend() related cleanup". Some cleanup related to mas_anode_descend(). This patch (of 3): At the beginning of loop, it has checked the range is in lower bounds. Link: https://lkml.kernel.org/r/20241116014805.11547-1-richard.weiyang@gmail.com Link: https://lkml.kernel.org/r/20241116014805.11547-2-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Sidhartha Kumar Signed-off-by: Andrew Morton --- lib/maple_tree.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'lib/maple_tree.c') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 36e603645a30..57603524e2dd 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -4882,13 +4882,12 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) found = true; goto done; } - if (mas->index <= pivot) { - mas->node = mas_slot(mas, slots, offset); - mas->min = min; - mas->max = pivot; - offset = 0; - break; - } + + mas->node = mas_slot(mas, slots, offset); + mas->min = min; + mas->max = pivot; + offset = 0; + break; } next_slot: min = pivot + 1; -- cgit v1.2.3 From f5bd418727856b104d6dcb43b144381c31250383 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 16 Nov 2024 01:48:04 +0000 Subject: maple_tree: not possible to be a root node after loop Empty tree and single entry tree is handled else whether, so the maple tree here must be a tree with nodes. If the height is 1 and we found the gap, it will jump to *done* since it is also a leaf. If the height is more than one, and there may be an available range, we will descend the tree, which is not root anymore. If there is no available range, we will set error and return. This means the check for root node here is not necessary. Link: https://lkml.kernel.org/r/20241116014805.11547-3-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Sidhartha Kumar Signed-off-by: Andrew Morton --- lib/maple_tree.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'lib/maple_tree.c') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 57603524e2dd..3174234d77cb 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -4880,7 +4880,7 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) if (gap >= size) { if (ma_is_leaf(type)) { found = true; - goto done; + break; } mas->node = mas_slot(mas, slots, offset); @@ -4897,9 +4897,6 @@ next_slot: } } - if (mte_is_root(mas->node)) - found = true; -done: mas->offset = offset; return found; } -- cgit v1.2.3 From f2760364add9996b8937ae388c8035ea1f242440 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 16 Nov 2024 01:48:05 +0000 Subject: maple_tree: we don't set offset to MAPLE_NODE_SLOTS on error When mas_anode_descend() not find gap, it sets -EBUSY instead of setting offset to MAPLE_NODE_SLOTS. Link: https://lkml.kernel.org/r/20241116014805.11547-4-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Sidhartha Kumar Signed-off-by: Andrew Morton --- lib/maple_tree.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'lib/maple_tree.c') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 3174234d77cb..fe7f9e1f5bbb 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5000,8 +5000,8 @@ static inline void mas_awalk(struct ma_state *mas, unsigned long size) * There are 4 options: * go to child (descend) * go back to parent (ascend) - * no gap found. (return, slot == MAPLE_NODE_SLOTS) - * found the gap. (return, slot != MAPLE_NODE_SLOTS) + * no gap found. (return, error == -EBUSY) + * found the gap. (return) */ while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { if (last == mas->node) @@ -5086,9 +5086,6 @@ int mas_empty_area(struct ma_state *mas, unsigned long min, return xa_err(mas->node); offset = mas->offset; - if (unlikely(offset == MAPLE_NODE_SLOTS)) - return -EBUSY; - node = mas_mn(mas); mt = mte_node_type(mas->node); pivots = ma_pivots(node, mt); -- cgit v1.2.3 From 4f6a6bed0bfef4b966f076f33eb4f5547226056a Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 13 Nov 2024 03:16:14 +0000 Subject: maple_tree: simplify split calculation Patch series "simplify split calculation", v3. This patch (of 3): The current calculation for splitting nodes tries to enforce a minimum span on the leaf nodes. This code is complex and never worked correctly to begin with, due to the min value being passed as 0 for all leaves. The calculation should just split the data as equally as possible between the new nodes. Note that b_end will be one more than the data, so the left side is still favoured in the calculation. The current code may also lead to a deficient node by not leaving enough data for the right side of the split. This issue is also addressed with the split calculation change. [Liam.Howlett@Oracle.com: rephrase the change log] Link: https://lkml.kernel.org/r/20241113031616.10530-1-richard.weiyang@gmail.com Link: https://lkml.kernel.org/r/20241113031616.10530-2-richard.weiyang@gmail.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Wei Yang Reviewed-by: Liam R. Howlett Cc: Sidhartha Kumar Cc: Lorenzo Stoakes Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) (limited to 'lib/maple_tree.c') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index fe7f9e1f5bbb..ca8ae1e1cc0a 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1863,11 +1863,11 @@ static inline int mab_no_null_split(struct maple_big_node *b_node, * Return: The first split location. The middle split is set in @mid_split. */ static inline int mab_calc_split(struct ma_state *mas, - struct maple_big_node *bn, unsigned char *mid_split, unsigned long min) + struct maple_big_node *bn, unsigned char *mid_split) { unsigned char b_end = bn->b_end; int split = b_end / 2; /* Assume equal split. */ - unsigned char slot_min, slot_count = mt_slots[bn->type]; + unsigned char slot_count = mt_slots[bn->type]; /* * To support gap tracking, all NULL entries are kept together and a node cannot @@ -1900,18 +1900,7 @@ static inline int mab_calc_split(struct ma_state *mas, split = b_end / 3; *mid_split = split * 2; } else { - slot_min = mt_min_slots[bn->type]; - *mid_split = 0; - /* - * Avoid having a range less than the slot count unless it - * causes one node to be deficient. - * NOTE: mt_min_slots is 1 based, b_end and split are zero. - */ - while ((split < slot_count - 1) && - ((bn->pivot[split] - min) < slot_count - 1) && - (b_end - split > slot_min)) - split++; } /* Avoid ending a node on a NULL entry */ @@ -2377,7 +2366,7 @@ static inline struct maple_enode static inline unsigned char mas_mab_to_node(struct ma_state *mas, struct maple_big_node *b_node, struct maple_enode **left, struct maple_enode **right, struct maple_enode **middle, - unsigned char *mid_split, unsigned long min) + unsigned char *mid_split) { unsigned char split = 0; unsigned char slot_count = mt_slots[b_node->type]; @@ -2390,7 +2379,7 @@ static inline unsigned char mas_mab_to_node(struct ma_state *mas, if (b_node->b_end < slot_count) { split = b_node->b_end; } else { - split = mab_calc_split(mas, b_node, mid_split, min); + split = mab_calc_split(mas, b_node, mid_split); *right = mas_new_ma_node(mas, b_node); } @@ -2877,7 +2866,7 @@ static void mas_spanning_rebalance(struct ma_state *mas, mast->bn->b_end--; mast->bn->type = mte_node_type(mast->orig_l->node); split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, - &mid_split, mast->orig_l->min); + &mid_split); mast_set_split_parents(mast, left, middle, right, split, mid_split); mast_cp_to_nodes(mast, left, middle, right, split, mid_split); @@ -3365,7 +3354,7 @@ static void mas_split(struct ma_state *mas, struct maple_big_node *b_node) if (mas_push_data(mas, height, &mast, false)) break; - split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min); + split = mab_calc_split(mas, b_node, &mid_split); mast_split_data(&mast, mas, split); /* * Usually correct, mab_mas_cp in the above call overwrites -- cgit v1.2.3 From 7318f95ba40bed454d1e20f27e821198d3def4e5 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 13 Nov 2024 03:16:16 +0000 Subject: maple_tree: only root node could be deficient Each level's rightmost node should have (max == ULONG_MAX). This means current validation skips the right most node on each level. Only the root node may be below the minimum data threshold. Link: https://lkml.kernel.org/r/20241113031616.10530-4-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: Liam R. Howlett Cc: Sidhartha Kumar Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- lib/maple_tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/maple_tree.c') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index ca8ae1e1cc0a..f7153ade1be5 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -7556,7 +7556,7 @@ void mt_validate(struct maple_tree *mt) MAS_WARN_ON(&mas, mte_dead_node(mas.node)); end = mas_data_end(&mas); if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && - (mas.max != ULONG_MAX))) { + (!mte_is_root(mas.node)))) { pr_err("Invalid size %u of " PTR_FMT "\n", end, mas_mn(&mas)); } -- cgit v1.2.3