summaryrefslogtreecommitdiff
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c259
1 files changed, 170 insertions, 89 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 089c958aa2c3..72bfac2d6dce 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -408,6 +408,10 @@ static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
ext4_group_t group, int cr);
+static int ext4_try_to_trim_range(struct super_block *sb,
+ struct ext4_buddy *e4b, ext4_grpblk_t start,
+ ext4_grpblk_t max, ext4_grpblk_t minblocks);
+
/*
* The algorithm using this percpu seq counter goes below:
* 1. We sample the percpu discard_pa_seq counter before trying for block
@@ -2474,6 +2478,12 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
* This could return negative error code if something goes wrong
* during ext4_mb_init_group(). This should not be called with
* ext4_lock_group() held.
+ *
+ * Note: because we are conditionally operating with the group lock in
+ * the EXT4_MB_STRICT_CHECK case, we need to fake out sparse in this
+ * function using __acquire and __release. This means we need to be
+ * super careful before messing with the error path handling via "goto
+ * out"!
*/
static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
ext4_group_t group, int cr)
@@ -2487,8 +2497,10 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
if (sbi->s_mb_stats)
atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]);
- if (should_lock)
+ if (should_lock) {
ext4_lock_group(sb, group);
+ __release(ext4_group_lock_ptr(sb, group));
+ }
free = grp->bb_free;
if (free == 0)
goto out;
@@ -2496,8 +2508,10 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
goto out;
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
goto out;
- if (should_lock)
+ if (should_lock) {
+ __acquire(ext4_group_lock_ptr(sb, group));
ext4_unlock_group(sb, group);
+ }
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
@@ -2524,12 +2538,16 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
return ret;
}
- if (should_lock)
+ if (should_lock) {
ext4_lock_group(sb, group);
+ __release(ext4_group_lock_ptr(sb, group));
+ }
ret = ext4_mb_good_group(ac, group, cr);
out:
- if (should_lock)
+ if (should_lock) {
+ __acquire(ext4_group_lock_ptr(sb, group));
ext4_unlock_group(sb, group);
+ }
return ret;
}
@@ -2965,6 +2983,7 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
}
static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos)
+__acquires(&EXT4_SB(sb)->s_mb_rb_lock)
{
struct super_block *sb = PDE_DATA(file_inode(seq->file));
unsigned long position;
@@ -3037,6 +3056,7 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
}
static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v)
+__releases(&EXT4_SB(sb)->s_mb_rb_lock)
{
struct super_block *sb = PDE_DATA(file_inode(seq->file));
@@ -3308,6 +3328,57 @@ static int ext4_groupinfo_create_slab(size_t size)
return 0;
}
+static void ext4_discard_work(struct work_struct *work)
+{
+ struct ext4_sb_info *sbi = container_of(work,
+ struct ext4_sb_info, s_discard_work);
+ struct super_block *sb = sbi->s_sb;
+ struct ext4_free_data *fd, *nfd;
+ struct ext4_buddy e4b;
+ struct list_head discard_list;
+ ext4_group_t grp, load_grp;
+ int err = 0;
+
+ INIT_LIST_HEAD(&discard_list);
+ spin_lock(&sbi->s_md_lock);
+ list_splice_init(&sbi->s_discard_list, &discard_list);
+ spin_unlock(&sbi->s_md_lock);
+
+ load_grp = UINT_MAX;
+ list_for_each_entry_safe(fd, nfd, &discard_list, efd_list) {
+ /*
+ * If filesystem is umounting or no memory or suffering
+ * from no space, give up the discard
+ */
+ if ((sb->s_flags & SB_ACTIVE) && !err &&
+ !atomic_read(&sbi->s_retry_alloc_pending)) {
+ grp = fd->efd_group;
+ if (grp != load_grp) {
+ if (load_grp != UINT_MAX)
+ ext4_mb_unload_buddy(&e4b);
+
+ err = ext4_mb_load_buddy(sb, grp, &e4b);
+ if (err) {
+ kmem_cache_free(ext4_free_data_cachep, fd);
+ load_grp = UINT_MAX;
+ continue;
+ } else {
+ load_grp = grp;
+ }
+ }
+
+ ext4_lock_group(sb, grp);
+ ext4_try_to_trim_range(sb, &e4b, fd->efd_start_cluster,
+ fd->efd_start_cluster + fd->efd_count - 1, 1);
+ ext4_unlock_group(sb, grp);
+ }
+ kmem_cache_free(ext4_free_data_cachep, fd);
+ }
+
+ if (load_grp != UINT_MAX)
+ ext4_mb_unload_buddy(&e4b);
+}
+
int ext4_mb_init(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3376,6 +3447,9 @@ int ext4_mb_init(struct super_block *sb)
spin_lock_init(&sbi->s_md_lock);
sbi->s_mb_free_pending = 0;
INIT_LIST_HEAD(&sbi->s_freed_data_list);
+ INIT_LIST_HEAD(&sbi->s_discard_list);
+ INIT_WORK(&sbi->s_discard_work, ext4_discard_work);
+ atomic_set(&sbi->s_retry_alloc_pending, 0);
sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
@@ -3474,6 +3548,14 @@ int ext4_mb_release(struct super_block *sb)
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
int count;
+ if (test_opt(sb, DISCARD)) {
+ /*
+ * wait the discard work to drain all of ext4_free_data
+ */
+ flush_work(&sbi->s_discard_work);
+ WARN_ON_ONCE(!list_empty(&sbi->s_discard_list));
+ }
+
if (sbi->s_group_info) {
for (i = 0; i < ngroups; i++) {
cond_resched();
@@ -3596,7 +3678,6 @@ static void ext4_free_data_in_buddy(struct super_block *sb,
put_page(e4b.bd_bitmap_page);
}
ext4_unlock_group(sb, entry->efd_group);
- kmem_cache_free(ext4_free_data_cachep, entry);
ext4_mb_unload_buddy(&e4b);
mb_debug(sb, "freed %d blocks in %d structures\n", count,
@@ -3611,10 +3692,9 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_free_data *entry, *tmp;
- struct bio *discard_bio = NULL;
struct list_head freed_data_list;
struct list_head *cut_pos = NULL;
- int err;
+ bool wake;
INIT_LIST_HEAD(&freed_data_list);
@@ -3629,30 +3709,20 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
cut_pos);
spin_unlock(&sbi->s_md_lock);
- if (test_opt(sb, DISCARD)) {
- list_for_each_entry(entry, &freed_data_list, efd_list) {
- err = ext4_issue_discard(sb, entry->efd_group,
- entry->efd_start_cluster,
- entry->efd_count,
- &discard_bio);
- if (err && err != -EOPNOTSUPP) {
- ext4_msg(sb, KERN_WARNING, "discard request in"
- " group:%d block:%d count:%d failed"
- " with %d", entry->efd_group,
- entry->efd_start_cluster,
- entry->efd_count, err);
- } else if (err == -EOPNOTSUPP)
- break;
- }
+ list_for_each_entry(entry, &freed_data_list, efd_list)
+ ext4_free_data_in_buddy(sb, entry);
- if (discard_bio) {
- submit_bio_wait(discard_bio);
- bio_put(discard_bio);
- }
+ if (test_opt(sb, DISCARD)) {
+ spin_lock(&sbi->s_md_lock);
+ wake = list_empty(&sbi->s_discard_list);
+ list_splice_tail(&freed_data_list, &sbi->s_discard_list);
+ spin_unlock(&sbi->s_md_lock);
+ if (wake)
+ queue_work(system_unbound_wq, &sbi->s_discard_work);
+ } else {
+ list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
+ kmem_cache_free(ext4_free_data_cachep, entry);
}
-
- list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
- ext4_free_data_in_buddy(sb, entry);
}
int __init ext4_init_mballoc(void)
@@ -3726,7 +3796,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
}
BUFFER_TRACE(bitmap_bh, "getting write access");
- err = ext4_journal_get_write_access(handle, bitmap_bh);
+ err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
+ EXT4_JTR_NONE);
if (err)
goto out_err;
@@ -3739,7 +3810,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
ext4_free_group_clusters(sb, gdp));
BUFFER_TRACE(gdp_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, gdp_bh);
+ err = ext4_journal_get_write_access(handle, sb, gdp_bh, EXT4_JTR_NONE);
if (err)
goto out_err;
@@ -5916,7 +5987,8 @@ do_more:
}
BUFFER_TRACE(bitmap_bh, "getting write access");
- err = ext4_journal_get_write_access(handle, bitmap_bh);
+ err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
+ EXT4_JTR_NONE);
if (err)
goto error_return;
@@ -5926,7 +5998,7 @@ do_more:
* using it
*/
BUFFER_TRACE(gd_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, gd_bh);
+ err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE);
if (err)
goto error_return;
#ifdef AGGRESSIVE_CHECK
@@ -6107,7 +6179,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
}
BUFFER_TRACE(bitmap_bh, "getting write access");
- err = ext4_journal_get_write_access(handle, bitmap_bh);
+ err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
+ EXT4_JTR_NONE);
if (err)
goto error_return;
@@ -6117,7 +6190,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
* using it
*/
BUFFER_TRACE(gd_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, gd_bh);
+ err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE);
if (err)
goto error_return;
@@ -6183,19 +6256,19 @@ error_return:
* @sb: super block for the file system
* @start: starting block of the free extent in the alloc. group
* @count: number of blocks to TRIM
- * @group: alloc. group we are working with
* @e4b: ext4 buddy for the group
*
* Trim "count" blocks starting at "start" in the "group". To assure that no
* one will allocate those blocks, mark it as used in buddy bitmap. This must
* be called with under the group lock.
*/
-static int ext4_trim_extent(struct super_block *sb, int start, int count,
- ext4_group_t group, struct ext4_buddy *e4b)
+static int ext4_trim_extent(struct super_block *sb,
+ int start, int count, struct ext4_buddy *e4b)
__releases(bitlock)
__acquires(bitlock)
{
struct ext4_free_extent ex;
+ ext4_group_t group = e4b->bd_group;
int ret = 0;
trace_ext4_trim_extent(sb, group, start, count);
@@ -6218,51 +6291,21 @@ __acquires(bitlock)
return ret;
}
-/**
- * ext4_trim_all_free -- function to trim all free space in alloc. group
- * @sb: super block for file system
- * @group: group to be trimmed
- * @start: first group block to examine
- * @max: last group block to examine
- * @minblocks: minimum extent block count
- *
- * ext4_trim_all_free walks through group's buddy bitmap searching for free
- * extents. When the free block is found, ext4_trim_extent is called to TRIM
- * the extent.
- *
- *
- * ext4_trim_all_free walks through group's block bitmap searching for free
- * extents. When the free extent is found, mark it as used in group buddy
- * bitmap. Then issue a TRIM command on this extent and free the extent in
- * the group buddy bitmap. This is done until whole group is scanned.
- */
-static ext4_grpblk_t
-ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
- ext4_grpblk_t start, ext4_grpblk_t max,
- ext4_grpblk_t minblocks)
+static int ext4_try_to_trim_range(struct super_block *sb,
+ struct ext4_buddy *e4b, ext4_grpblk_t start,
+ ext4_grpblk_t max, ext4_grpblk_t minblocks)
+__acquires(ext4_group_lock_ptr(sb, e4b->bd_group))
+__releases(ext4_group_lock_ptr(sb, e4b->bd_group))
{
+ ext4_grpblk_t next, count, free_count;
void *bitmap;
- ext4_grpblk_t next, count = 0, free_count = 0;
- struct ext4_buddy e4b;
int ret = 0;
- trace_ext4_trim_all_free(sb, group, start, max);
-
- ret = ext4_mb_load_buddy(sb, group, &e4b);
- if (ret) {
- ext4_warning(sb, "Error %d loading buddy information for %u",
- ret, group);
- return ret;
- }
- bitmap = e4b.bd_bitmap;
-
- ext4_lock_group(sb, group);
- if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
- minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
- goto out;
-
- start = (e4b.bd_info->bb_first_free > start) ?
- e4b.bd_info->bb_first_free : start;
+ bitmap = e4b->bd_bitmap;
+ start = (e4b->bd_info->bb_first_free > start) ?
+ e4b->bd_info->bb_first_free : start;
+ count = 0;
+ free_count = 0;
while (start <= max) {
start = mb_find_next_zero_bit(bitmap, max + 1, start);
@@ -6271,8 +6314,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
next = mb_find_next_bit(bitmap, max + 1, start);
if ((next - start) >= minblocks) {
- ret = ext4_trim_extent(sb, start,
- next - start, group, &e4b);
+ ret = ext4_trim_extent(sb, start, next - start, e4b);
if (ret && ret != -EOPNOTSUPP)
break;
ret = 0;
@@ -6287,25 +6329,64 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
}
if (need_resched()) {
- ext4_unlock_group(sb, group);
+ ext4_unlock_group(sb, e4b->bd_group);
cond_resched();
- ext4_lock_group(sb, group);
+ ext4_lock_group(sb, e4b->bd_group);
}
- if ((e4b.bd_info->bb_free - free_count) < minblocks)
+ if ((e4b->bd_info->bb_free - free_count) < minblocks)
break;
}
- if (!ret) {
- ret = count;
- EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+ return count;
+}
+
+/**
+ * ext4_trim_all_free -- function to trim all free space in alloc. group
+ * @sb: super block for file system
+ * @group: group to be trimmed
+ * @start: first group block to examine
+ * @max: last group block to examine
+ * @minblocks: minimum extent block count
+ *
+ * ext4_trim_all_free walks through group's block bitmap searching for free
+ * extents. When the free extent is found, mark it as used in group buddy
+ * bitmap. Then issue a TRIM command on this extent and free the extent in
+ * the group buddy bitmap.
+ */
+static ext4_grpblk_t
+ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
+ ext4_grpblk_t start, ext4_grpblk_t max,
+ ext4_grpblk_t minblocks)
+{
+ struct ext4_buddy e4b;
+ int ret;
+
+ trace_ext4_trim_all_free(sb, group, start, max);
+
+ ret = ext4_mb_load_buddy(sb, group, &e4b);
+ if (ret) {
+ ext4_warning(sb, "Error %d loading buddy information for %u",
+ ret, group);
+ return ret;
+ }
+
+ ext4_lock_group(sb, group);
+
+ if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
+ minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) {
+ ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
+ if (ret >= 0)
+ EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+ } else {
+ ret = 0;
}
-out:
+
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
ext4_debug("trimmed %d blocks in the group %d\n",
- count, group);
+ ret, group);
return ret;
}