diff options
Diffstat (limited to 'drivers/md/dm-zoned-reclaim.c')
-rw-r--r-- | drivers/md/dm-zoned-reclaim.c | 210 |
1 files changed, 132 insertions, 78 deletions
diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index e7ace908a9b7..2261b4dd60b7 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -13,7 +13,6 @@ struct dmz_reclaim { struct dmz_metadata *metadata; - struct dmz_dev *dev; struct delayed_work work; struct workqueue_struct *wq; @@ -22,6 +21,8 @@ struct dmz_reclaim { struct dm_kcopyd_throttle kc_throttle; int kc_err; + int dev_idx; + unsigned long flags; /* Last target access time */ @@ -44,13 +45,13 @@ enum { * Percentage of unmapped (free) random zones below which reclaim starts * even if the target is busy. */ -#define DMZ_RECLAIM_LOW_UNMAP_RND 30 +#define DMZ_RECLAIM_LOW_UNMAP_ZONES 30 /* * Percentage of unmapped (free) random zones above which reclaim will * stop if the target is busy. */ -#define DMZ_RECLAIM_HIGH_UNMAP_RND 50 +#define DMZ_RECLAIM_HIGH_UNMAP_ZONES 50 /* * Align a sequential zone write pointer to chunk_block. @@ -59,6 +60,7 @@ static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone, sector_t block) { struct dmz_metadata *zmd = zrc->metadata; + struct dmz_dev *dev = zone->dev; sector_t wp_block = zone->wp_block; unsigned int nr_blocks; int ret; @@ -74,15 +76,15 @@ static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone, * pointer and the requested position. */ nr_blocks = block - wp_block; - ret = blkdev_issue_zeroout(zrc->dev->bdev, + ret = blkdev_issue_zeroout(dev->bdev, dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block), dmz_blk2sect(nr_blocks), GFP_NOIO, 0); if (ret) { - dmz_dev_err(zrc->dev, + dmz_dev_err(dev, "Align zone %u wp %llu to %llu (wp+%u) blocks failed %d", - dmz_id(zmd, zone), (unsigned long long)wp_block, + zone->id, (unsigned long long)wp_block, (unsigned long long)block, nr_blocks, ret); - dmz_check_bdev(zrc->dev); + dmz_check_bdev(dev); return ret; } @@ -116,7 +118,6 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc, struct dm_zone *src_zone, struct dm_zone *dst_zone) { struct dmz_metadata *zmd = zrc->metadata; - struct dmz_dev *dev = zrc->dev; struct dm_io_region src, dst; sector_t block = 0, end_block; sector_t nr_blocks; @@ -128,7 +129,7 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc, if (dmz_is_seq(src_zone)) end_block = src_zone->wp_block; else - end_block = dev->zone_nr_blocks; + end_block = dmz_zone_nr_blocks(zmd); src_zone_block = dmz_start_block(zmd, src_zone); dst_zone_block = dmz_start_block(zmd, dst_zone); @@ -136,9 +137,14 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc, set_bit(DM_KCOPYD_WRITE_SEQ, &flags); while (block < end_block) { - if (dev->flags & DMZ_BDEV_DYING) + if (src_zone->dev->flags & DMZ_BDEV_DYING) + return -EIO; + if (dst_zone->dev->flags & DMZ_BDEV_DYING) return -EIO; + if (dmz_reclaim_should_terminate(src_zone)) + return -EINTR; + /* Get a valid region from the source zone */ ret = dmz_first_valid_block(zmd, src_zone, &block); if (ret <= 0) @@ -156,11 +162,11 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc, return ret; } - src.bdev = dev->bdev; + src.bdev = src_zone->dev->bdev; src.sector = dmz_blk2sect(src_zone_block + block); src.count = dmz_blk2sect(nr_blocks); - dst.bdev = dev->bdev; + dst.bdev = dst_zone->dev->bdev; dst.sector = dmz_blk2sect(dst_zone_block + block); dst.count = src.count; @@ -194,10 +200,10 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone) struct dmz_metadata *zmd = zrc->metadata; int ret; - dmz_dev_debug(zrc->dev, - "Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)", - dzone->chunk, dmz_id(zmd, bzone), dmz_weight(bzone), - dmz_id(zmd, dzone), dmz_weight(dzone)); + DMDEBUG("(%s/%u): Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)", + dmz_metadata_label(zmd), zrc->dev_idx, + dzone->chunk, bzone->id, dmz_weight(bzone), + dzone->id, dmz_weight(dzone)); /* Flush data zone into the buffer zone */ ret = dmz_reclaim_copy(zrc, bzone, dzone); @@ -210,7 +216,7 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone) ret = dmz_merge_valid_blocks(zmd, bzone, dzone, chunk_block); if (ret == 0) { /* Free the buffer zone */ - dmz_invalidate_blocks(zmd, bzone, 0, zrc->dev->zone_nr_blocks); + dmz_invalidate_blocks(zmd, bzone, 0, dmz_zone_nr_blocks(zmd)); dmz_lock_map(zmd); dmz_unmap_zone(zmd, bzone); dmz_unlock_zone_reclaim(dzone); @@ -233,10 +239,10 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) struct dmz_metadata *zmd = zrc->metadata; int ret = 0; - dmz_dev_debug(zrc->dev, - "Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)", - chunk, dmz_id(zmd, dzone), dmz_weight(dzone), - dmz_id(zmd, bzone), dmz_weight(bzone)); + DMDEBUG("(%s/%u): Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)", + dmz_metadata_label(zmd), zrc->dev_idx, + chunk, dzone->id, dmz_weight(dzone), + bzone->id, dmz_weight(bzone)); /* Flush data zone into the buffer zone */ ret = dmz_reclaim_copy(zrc, dzone, bzone); @@ -252,7 +258,7 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) * Free the data zone and remap the chunk to * the buffer zone. */ - dmz_invalidate_blocks(zmd, dzone, 0, zrc->dev->zone_nr_blocks); + dmz_invalidate_blocks(zmd, dzone, 0, dmz_zone_nr_blocks(zmd)); dmz_lock_map(zmd); dmz_unmap_zone(zmd, bzone); dmz_unmap_zone(zmd, dzone); @@ -277,18 +283,26 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) struct dm_zone *szone = NULL; struct dmz_metadata *zmd = zrc->metadata; int ret; + int alloc_flags = DMZ_ALLOC_SEQ; - /* Get a free sequential zone */ + /* Get a free random or sequential zone */ dmz_lock_map(zmd); - szone = dmz_alloc_zone(zmd, DMZ_ALLOC_RECLAIM); +again: + szone = dmz_alloc_zone(zmd, zrc->dev_idx, + alloc_flags | DMZ_ALLOC_RECLAIM); + if (!szone && alloc_flags == DMZ_ALLOC_SEQ && dmz_nr_cache_zones(zmd)) { + alloc_flags = DMZ_ALLOC_RND; + goto again; + } dmz_unlock_map(zmd); if (!szone) return -ENOSPC; - dmz_dev_debug(zrc->dev, - "Chunk %u, move rnd zone %u (weight %u) to seq zone %u", - chunk, dmz_id(zmd, dzone), dmz_weight(dzone), - dmz_id(zmd, szone)); + DMDEBUG("(%s/%u): Chunk %u, move %s zone %u (weight %u) to %s zone %u", + dmz_metadata_label(zmd), zrc->dev_idx, chunk, + dmz_is_cache(dzone) ? "cache" : "rnd", + dzone->id, dmz_weight(dzone), + dmz_is_rnd(szone) ? "rnd" : "seq", szone->id); /* Flush the random data zone into the sequential zone */ ret = dmz_reclaim_copy(zrc, dzone, szone); @@ -306,7 +320,7 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) dmz_unlock_map(zmd); } else { /* Free the data zone and remap the chunk */ - dmz_invalidate_blocks(zmd, dzone, 0, zrc->dev->zone_nr_blocks); + dmz_invalidate_blocks(zmd, dzone, 0, dmz_zone_nr_blocks(zmd)); dmz_lock_map(zmd); dmz_unmap_zone(zmd, dzone); dmz_unlock_zone_reclaim(dzone); @@ -337,6 +351,14 @@ static void dmz_reclaim_empty(struct dmz_reclaim *zrc, struct dm_zone *dzone) } /* + * Test if the target device is idle. + */ +static inline int dmz_target_idle(struct dmz_reclaim *zrc) +{ + return time_is_before_jiffies(zrc->atime + DMZ_IDLE_PERIOD); +} + +/* * Find a candidate zone for reclaim and process it. */ static int dmz_do_reclaim(struct dmz_reclaim *zrc) @@ -348,13 +370,16 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc) int ret; /* Get a data zone */ - dzone = dmz_get_zone_for_reclaim(zmd); - if (IS_ERR(dzone)) - return PTR_ERR(dzone); + dzone = dmz_get_zone_for_reclaim(zmd, zrc->dev_idx, + dmz_target_idle(zrc)); + if (!dzone) { + DMDEBUG("(%s/%u): No zone found to reclaim", + dmz_metadata_label(zmd), zrc->dev_idx); + return -EBUSY; + } start = jiffies; - - if (dmz_is_rnd(dzone)) { + if (dmz_is_cache(dzone) || dmz_is_rnd(dzone)) { if (!dmz_weight(dzone)) { /* Empty zone */ dmz_reclaim_empty(zrc, dzone); @@ -395,54 +420,80 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc) } out: if (ret) { + if (ret == -EINTR) + DMDEBUG("(%s/%u): reclaim zone %u interrupted", + dmz_metadata_label(zmd), zrc->dev_idx, + rzone->id); + else + DMDEBUG("(%s/%u): Failed to reclaim zone %u, err %d", + dmz_metadata_label(zmd), zrc->dev_idx, + rzone->id, ret); dmz_unlock_zone_reclaim(dzone); return ret; } ret = dmz_flush_metadata(zrc->metadata); if (ret) { - dmz_dev_debug(zrc->dev, - "Metadata flush for zone %u failed, err %d\n", - dmz_id(zmd, rzone), ret); + DMDEBUG("(%s/%u): Metadata flush for zone %u failed, err %d", + dmz_metadata_label(zmd), zrc->dev_idx, rzone->id, ret); return ret; } - dmz_dev_debug(zrc->dev, "Reclaimed zone %u in %u ms", - dmz_id(zmd, rzone), jiffies_to_msecs(jiffies - start)); + DMDEBUG("(%s/%u): Reclaimed zone %u in %u ms", + dmz_metadata_label(zmd), zrc->dev_idx, + rzone->id, jiffies_to_msecs(jiffies - start)); return 0; } -/* - * Test if the target device is idle. - */ -static inline int dmz_target_idle(struct dmz_reclaim *zrc) +static unsigned int dmz_reclaim_percentage(struct dmz_reclaim *zrc) { - return time_is_before_jiffies(zrc->atime + DMZ_IDLE_PERIOD); + struct dmz_metadata *zmd = zrc->metadata; + unsigned int nr_cache = dmz_nr_cache_zones(zmd); + unsigned int nr_unmap, nr_zones; + + if (nr_cache) { + nr_zones = nr_cache; + nr_unmap = dmz_nr_unmap_cache_zones(zmd); + } else { + nr_zones = dmz_nr_rnd_zones(zmd, zrc->dev_idx); + nr_unmap = dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx); + } + return nr_unmap * 100 / nr_zones; } /* * Test if reclaim is necessary. */ -static bool dmz_should_reclaim(struct dmz_reclaim *zrc) +static bool dmz_should_reclaim(struct dmz_reclaim *zrc, unsigned int p_unmap) { - struct dmz_metadata *zmd = zrc->metadata; - unsigned int nr_rnd = dmz_nr_rnd_zones(zmd); - unsigned int nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd); - unsigned int p_unmap_rnd = nr_unmap_rnd * 100 / nr_rnd; + unsigned int nr_reclaim; + + nr_reclaim = dmz_nr_rnd_zones(zrc->metadata, zrc->dev_idx); + + if (dmz_nr_cache_zones(zrc->metadata)) { + /* + * The first device in a multi-device + * setup only contains cache zones, so + * never start reclaim there. + */ + if (zrc->dev_idx == 0) + return false; + nr_reclaim += dmz_nr_cache_zones(zrc->metadata); + } /* Reclaim when idle */ - if (dmz_target_idle(zrc) && nr_unmap_rnd < nr_rnd) + if (dmz_target_idle(zrc) && nr_reclaim) return true; - /* If there are still plenty of random zones, do not reclaim */ - if (p_unmap_rnd >= DMZ_RECLAIM_HIGH_UNMAP_RND) + /* If there are still plenty of cache zones, do not reclaim */ + if (p_unmap >= DMZ_RECLAIM_HIGH_UNMAP_ZONES) return false; /* - * If the percentage of unmapped random zones is low, + * If the percentage of unmapped cache zones is low, * reclaim even if the target is busy. */ - return p_unmap_rnd <= DMZ_RECLAIM_LOW_UNMAP_RND; + return p_unmap <= DMZ_RECLAIM_LOW_UNMAP_ZONES; } /* @@ -452,14 +503,14 @@ static void dmz_reclaim_work(struct work_struct *work) { struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work); struct dmz_metadata *zmd = zrc->metadata; - unsigned int nr_rnd, nr_unmap_rnd; - unsigned int p_unmap_rnd; + unsigned int p_unmap, nr_unmap_rnd = 0, nr_rnd = 0; int ret; - if (dmz_bdev_is_dying(zrc->dev)) + if (dmz_dev_is_dying(zmd)) return; - if (!dmz_should_reclaim(zrc)) { + p_unmap = dmz_reclaim_percentage(zrc); + if (!dmz_should_reclaim(zrc, p_unmap)) { mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD); return; } @@ -470,27 +521,29 @@ static void dmz_reclaim_work(struct work_struct *work) * and slower if there are still some free random zones to avoid * as much as possible to negatively impact the user workload. */ - nr_rnd = dmz_nr_rnd_zones(zmd); - nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd); - p_unmap_rnd = nr_unmap_rnd * 100 / nr_rnd; - if (dmz_target_idle(zrc) || p_unmap_rnd < DMZ_RECLAIM_LOW_UNMAP_RND / 2) { + if (dmz_target_idle(zrc) || p_unmap < DMZ_RECLAIM_LOW_UNMAP_ZONES / 2) { /* Idle or very low percentage: go fast */ zrc->kc_throttle.throttle = 100; } else { /* Busy but we still have some random zone: throttle */ - zrc->kc_throttle.throttle = min(75U, 100U - p_unmap_rnd / 2); + zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2); } - dmz_dev_debug(zrc->dev, - "Reclaim (%u): %s, %u%% free rnd zones (%u/%u)", - zrc->kc_throttle.throttle, - (dmz_target_idle(zrc) ? "Idle" : "Busy"), - p_unmap_rnd, nr_unmap_rnd, nr_rnd); + nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx); + nr_rnd = dmz_nr_rnd_zones(zmd, zrc->dev_idx); + + DMDEBUG("(%s/%u): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u random)", + dmz_metadata_label(zmd), zrc->dev_idx, + zrc->kc_throttle.throttle, + (dmz_target_idle(zrc) ? "Idle" : "Busy"), + p_unmap, dmz_nr_unmap_cache_zones(zmd), + dmz_nr_cache_zones(zmd), + dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx), + dmz_nr_rnd_zones(zmd, zrc->dev_idx)); ret = dmz_do_reclaim(zrc); - if (ret) { - dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret); - if (!dmz_check_bdev(zrc->dev)) + if (ret && ret != -EINTR) { + if (!dmz_check_dev(zmd)) return; } @@ -500,8 +553,8 @@ static void dmz_reclaim_work(struct work_struct *work) /* * Initialize reclaim. */ -int dmz_ctr_reclaim(struct dmz_dev *dev, struct dmz_metadata *zmd, - struct dmz_reclaim **reclaim) +int dmz_ctr_reclaim(struct dmz_metadata *zmd, + struct dmz_reclaim **reclaim, int idx) { struct dmz_reclaim *zrc; int ret; @@ -510,9 +563,9 @@ int dmz_ctr_reclaim(struct dmz_dev *dev, struct dmz_metadata *zmd, if (!zrc) return -ENOMEM; - zrc->dev = dev; zrc->metadata = zmd; zrc->atime = jiffies; + zrc->dev_idx = idx; /* Reclaim kcopyd client */ zrc->kc = dm_kcopyd_client_create(&zrc->kc_throttle); @@ -524,8 +577,8 @@ int dmz_ctr_reclaim(struct dmz_dev *dev, struct dmz_metadata *zmd, /* Reclaim work */ INIT_DELAYED_WORK(&zrc->work, dmz_reclaim_work); - zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s", WQ_MEM_RECLAIM, - dev->name); + zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s_%d", WQ_MEM_RECLAIM, + dmz_metadata_label(zmd), idx); if (!zrc->wq) { ret = -ENOMEM; goto err; @@ -583,7 +636,8 @@ void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc) */ void dmz_schedule_reclaim(struct dmz_reclaim *zrc) { - if (dmz_should_reclaim(zrc)) + unsigned int p_unmap = dmz_reclaim_percentage(zrc); + + if (dmz_should_reclaim(zrc, p_unmap)) mod_delayed_work(zrc->wq, &zrc->work, 0); } - |