diff options
Diffstat (limited to 'drivers/md/dm-writecache.c')
-rw-r--r-- | drivers/md/dm-writecache.c | 138 |
1 files changed, 129 insertions, 9 deletions
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index a09bdc000e64..114927da9cc9 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -26,6 +26,8 @@ #define AUTOCOMMIT_BLOCKS_SSD 65536 #define AUTOCOMMIT_BLOCKS_PMEM 64 #define AUTOCOMMIT_MSEC 1000 +#define MAX_AGE_DIV 16 +#define MAX_AGE_UNSPECIFIED -1UL #define BITMAP_GRANULARITY 65536 #if BITMAP_GRANULARITY < PAGE_SIZE @@ -88,6 +90,7 @@ struct wc_entry { :47 #endif ; + unsigned long age; #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS uint64_t original_sector; uint64_t seq_count; @@ -119,6 +122,7 @@ struct dm_writecache { size_t writeback_size; size_t freelist_high_watermark; size_t freelist_low_watermark; + unsigned long max_age; unsigned uncommitted_blocks; unsigned autocommit_blocks; @@ -130,6 +134,8 @@ struct dm_writecache { struct timer_list autocommit_timer; struct wait_queue_head freelist_wait; + struct timer_list max_age_timer; + atomic_t bio_in_progress[2]; struct wait_queue_head bio_in_progress_wait[2]; @@ -160,6 +166,7 @@ struct dm_writecache { bool autocommit_time_set:1; bool writeback_fua_set:1; bool flush_on_suspend:1; + bool cleaner:1; unsigned writeback_all; struct workqueue_struct *writeback_wq; @@ -502,6 +509,34 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size); } +static void ssd_commit_superblock(struct dm_writecache *wc) +{ + int r; + struct dm_io_region region; + struct dm_io_request req; + + region.bdev = wc->ssd_dev->bdev; + region.sector = 0; + region.count = PAGE_SIZE; + + if (unlikely(region.sector + region.count > wc->metadata_sectors)) + region.count = wc->metadata_sectors - region.sector; + + region.sector += wc->start_sector; + + req.bi_op = REQ_OP_WRITE; + req.bi_op_flags = REQ_SYNC | REQ_FUA; + req.mem.type = DM_IO_VMA; + req.mem.ptr.vma = (char *)wc->memory_map; + req.client = wc->dm_io; + req.notify.fn = NULL; + req.notify.context = NULL; + + r = dm_io(&req, 1, ®ion, NULL); + if (unlikely(r)) + writecache_error(wc, r, "error writing superblock"); +} + static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) { if (WC_MODE_PMEM(wc)) @@ -596,6 +631,7 @@ static void writecache_insert_entry(struct dm_writecache *wc, struct wc_entry *i rb_link_node(&ins->rb_node, parent, node); rb_insert_color(&ins->rb_node, &wc->tree); list_add(&ins->lru, &wc->lru); + ins->age = jiffies; } static void writecache_unlink(struct dm_writecache *wc, struct wc_entry *e) @@ -631,6 +667,16 @@ static inline void writecache_verify_watermark(struct dm_writecache *wc) queue_work(wc->writeback_wq, &wc->writeback_work); } +static void writecache_max_age_timer(struct timer_list *t) +{ + struct dm_writecache *wc = from_timer(wc, t, max_age_timer); + + if (!dm_suspended(wc->ti) && !writecache_has_error(wc)) { + queue_work(wc->writeback_wq, &wc->writeback_work); + mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV); + } +} + static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector) { struct wc_entry *e; @@ -741,8 +787,10 @@ static void writecache_flush(struct dm_writecache *wc) wc->seq_count++; pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count)); - writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count); - writecache_commit_flushed(wc, false); + if (WC_MODE_PMEM(wc)) + writecache_commit_flushed(wc, false); + else + ssd_commit_superblock(wc); wc->overwrote_committed = false; @@ -837,6 +885,7 @@ static void writecache_suspend(struct dm_target *ti) bool flush_on_suspend; del_timer_sync(&wc->autocommit_timer); + del_timer_sync(&wc->max_age_timer); wc_lock(wc); writecache_flush(wc); @@ -876,6 +925,7 @@ static int writecache_alloc_entries(struct dm_writecache *wc) struct wc_entry *e = &wc->entries[b]; e->index = b; e->write_in_progress = false; + cond_resched(); } return 0; @@ -930,6 +980,7 @@ static void writecache_resume(struct dm_target *ti) e->original_sector = le64_to_cpu(wme.original_sector); e->seq_count = le64_to_cpu(wme.seq_count); } + cond_resched(); } #endif for (b = 0; b < wc->n_blocks; b++) { @@ -973,6 +1024,9 @@ erase_this: writecache_verify_watermark(wc); + if (wc->max_age != MAX_AGE_UNSPECIFIED) + mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV); + wc_unlock(wc); } @@ -1021,6 +1075,28 @@ static int process_flush_on_suspend_mesg(unsigned argc, char **argv, struct dm_w return 0; } +static void activate_cleaner(struct dm_writecache *wc) +{ + wc->flush_on_suspend = true; + wc->cleaner = true; + wc->freelist_high_watermark = wc->n_blocks; + wc->freelist_low_watermark = wc->n_blocks; +} + +static int process_cleaner_mesg(unsigned argc, char **argv, struct dm_writecache *wc) +{ + if (argc != 1) + return -EINVAL; + + wc_lock(wc); + activate_cleaner(wc); + if (!dm_suspended(wc->ti)) + writecache_verify_watermark(wc); + wc_unlock(wc); + + return 0; +} + static int writecache_message(struct dm_target *ti, unsigned argc, char **argv, char *result, unsigned maxlen) { @@ -1031,6 +1107,8 @@ static int writecache_message(struct dm_target *ti, unsigned argc, char **argv, r = process_flush_mesg(argc, argv, wc); else if (!strcasecmp(argv[0], "flush_on_suspend")) r = process_flush_on_suspend_mesg(argc, argv, wc); + else if (!strcasecmp(argv[0], "cleaner")) + r = process_cleaner_mesg(argc, argv, wc); else DMERR("unrecognised message received: %s", argv[0]); @@ -1194,6 +1272,7 @@ read_next_block: } } else { do { + bool found_entry = false; if (writecache_has_error(wc)) goto unlock_error; e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); @@ -1204,9 +1283,25 @@ read_next_block: wc->overwrote_committed = true; goto bio_copy; } + found_entry = true; + } else { + if (unlikely(wc->cleaner)) + goto direct_write; } e = writecache_pop_from_freelist(wc, (sector_t)-1); if (unlikely(!e)) { + if (!found_entry) { +direct_write: + e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); + if (e) { + sector_t next_boundary = read_original_sector(wc, e) - bio->bi_iter.bi_sector; + BUG_ON(!next_boundary); + if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { + dm_accept_partial_bio(bio, next_boundary); + } + } + goto unlock_remap_origin; + } writecache_wait_on_freelist(wc); continue; } @@ -1619,7 +1714,9 @@ restart: wbl.size = 0; while (!list_empty(&wc->lru) && (wc->writeback_all || - wc->freelist_size + wc->writeback_size <= wc->freelist_low_watermark)) { + wc->freelist_size + wc->writeback_size <= wc->freelist_low_watermark || + (jiffies - container_of(wc->lru.prev, struct wc_entry, lru)->age >= + wc->max_age - wc->max_age / MAX_AGE_DIV))) { n_walked++; if (unlikely(n_walked > WRITEBACK_LATENCY) && @@ -1791,8 +1888,10 @@ static int init_memory(struct dm_writecache *wc) pmem_assign(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks)); pmem_assign(sb(wc)->seq_count, cpu_to_le64(0)); - for (b = 0; b < wc->n_blocks; b++) + for (b = 0; b < wc->n_blocks; b++) { write_original_sector_seq_count(wc, &wc->entries[b], -1, -1); + cond_resched(); + } writecache_flush_all_metadata(wc); writecache_commit_flushed(wc, false); @@ -1882,9 +1981,11 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) wc->ti = ti; mutex_init(&wc->lock); + wc->max_age = MAX_AGE_UNSPECIFIED; writecache_poison_lists(wc); init_waitqueue_head(&wc->freelist_wait); timer_setup(&wc->autocommit_timer, writecache_autocommit_timer, 0); + timer_setup(&wc->max_age_timer, writecache_max_age_timer, 0); for (i = 0; i < 2; i++) { atomic_set(&wc->bio_in_progress[i], 0); @@ -2058,6 +2159,16 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) goto invalid_optional; wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs); wc->autocommit_time_set = true; + } else if (!strcasecmp(string, "max_age") && opt_params >= 1) { + unsigned max_age_msecs; + string = dm_shift_arg(&as), opt_params--; + if (sscanf(string, "%u%c", &max_age_msecs, &dummy) != 1) + goto invalid_optional; + if (max_age_msecs > 86400000) + goto invalid_optional; + wc->max_age = msecs_to_jiffies(max_age_msecs); + } else if (!strcasecmp(string, "cleaner")) { + wc->cleaner = true; } else if (!strcasecmp(string, "fua")) { if (WC_MODE_PMEM(wc)) { wc->writeback_fua = true; @@ -2235,6 +2346,9 @@ overflow: do_div(x, 100); wc->freelist_low_watermark = x; + if (wc->cleaner) + activate_cleaner(wc); + r = writecache_alloc_entries(wc); if (r) { ti->error = "Cannot allocate memory"; @@ -2278,9 +2392,9 @@ static void writecache_status(struct dm_target *ti, status_type_t type, extra_args = 0; if (wc->start_sector) extra_args += 2; - if (wc->high_wm_percent_set) + if (wc->high_wm_percent_set && !wc->cleaner) extra_args += 2; - if (wc->low_wm_percent_set) + if (wc->low_wm_percent_set && !wc->cleaner) extra_args += 2; if (wc->max_writeback_jobs_set) extra_args += 2; @@ -2288,19 +2402,21 @@ static void writecache_status(struct dm_target *ti, status_type_t type, extra_args += 2; if (wc->autocommit_time_set) extra_args += 2; + if (wc->cleaner) + extra_args++; if (wc->writeback_fua_set) extra_args++; DMEMIT("%u", extra_args); if (wc->start_sector) DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector); - if (wc->high_wm_percent_set) { + if (wc->high_wm_percent_set && !wc->cleaner) { x = (uint64_t)wc->freelist_high_watermark * 100; x += wc->n_blocks / 2; do_div(x, (size_t)wc->n_blocks); DMEMIT(" high_watermark %u", 100 - (unsigned)x); } - if (wc->low_wm_percent_set) { + if (wc->low_wm_percent_set && !wc->cleaner) { x = (uint64_t)wc->freelist_low_watermark * 100; x += wc->n_blocks / 2; do_div(x, (size_t)wc->n_blocks); @@ -2312,6 +2428,10 @@ static void writecache_status(struct dm_target *ti, status_type_t type, DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks); if (wc->autocommit_time_set) DMEMIT(" autocommit_time %u", jiffies_to_msecs(wc->autocommit_jiffies)); + if (wc->max_age != MAX_AGE_UNSPECIFIED) + DMEMIT(" max_age %u", jiffies_to_msecs(wc->max_age)); + if (wc->cleaner) + DMEMIT(" cleaner"); if (wc->writeback_fua_set) DMEMIT(" %sfua", wc->writeback_fua ? "" : "no"); break; @@ -2320,7 +2440,7 @@ static void writecache_status(struct dm_target *ti, status_type_t type, static struct target_type writecache_target = { .name = "writecache", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = writecache_ctr, .dtr = writecache_dtr, |