diff options
Diffstat (limited to 'drivers/md/dm-writecache.c')
-rw-r--r-- | drivers/md/dm-writecache.c | 171 |
1 files changed, 98 insertions, 73 deletions
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 96a003eb7323..3aa5874f0aef 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2018 Red Hat. All rights reserved. * @@ -83,16 +83,13 @@ struct wc_entry { struct rb_node rb_node; struct list_head lru; unsigned short wc_list_contiguous; - bool write_in_progress #if BITS_PER_LONG == 64 - :1 -#endif - ; - unsigned long index -#if BITS_PER_LONG == 64 - :47 + bool write_in_progress : 1; + unsigned long index : 47; +#else + bool write_in_progress; + unsigned long index; #endif - ; unsigned long age; #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS uint64_t original_sector; @@ -128,9 +125,9 @@ struct dm_writecache { unsigned long max_age; unsigned long pause; - unsigned uncommitted_blocks; - unsigned autocommit_blocks; - unsigned max_writeback_jobs; + unsigned int uncommitted_blocks; + unsigned int autocommit_blocks; + unsigned int max_writeback_jobs; int error; @@ -155,7 +152,7 @@ struct dm_writecache { sector_t data_device_sectors; void *block_start; struct wc_entry *entries; - unsigned block_size; + unsigned int block_size; unsigned char block_size_bits; bool pmem_mode:1; @@ -178,13 +175,13 @@ struct dm_writecache { bool metadata_only:1; bool pause_set:1; - unsigned high_wm_percent_value; - unsigned low_wm_percent_value; - unsigned autocommit_time_value; - unsigned max_age_value; - unsigned pause_value; + unsigned int high_wm_percent_value; + unsigned int low_wm_percent_value; + unsigned int autocommit_time_value; + unsigned int max_age_value; + unsigned int pause_value; - unsigned writeback_all; + unsigned int writeback_all; struct workqueue_struct *writeback_wq; struct work_struct writeback_work; struct work_struct flush_work; @@ -202,7 +199,7 @@ struct dm_writecache { struct dm_kcopyd_client *dm_kcopyd; unsigned long *dirty_bitmap; - unsigned dirty_bitmap_size; + unsigned int dirty_bitmap_size; struct bio_set bio_set; mempool_t copy_pool; @@ -227,7 +224,7 @@ struct writeback_struct { struct list_head endio_entry; struct dm_writecache *wc; struct wc_entry **wc_list; - unsigned wc_list_n; + unsigned int wc_list_n; struct wc_entry *wc_list_inline[WB_LIST_INLINE]; struct bio bio; }; @@ -236,7 +233,7 @@ struct copy_struct { struct list_head endio_entry; struct dm_writecache *wc; struct wc_entry *e; - unsigned n_entries; + unsigned int n_entries; int error; }; @@ -300,6 +297,7 @@ static int persistent_memory_claim(struct dm_writecache *wc) } if (da != p) { long i; + wc->memory_map = NULL; pages = kvmalloc_array(p, sizeof(struct page *), GFP_KERNEL); if (!pages) { @@ -309,6 +307,7 @@ static int persistent_memory_claim(struct dm_writecache *wc) i = 0; do { long daa; + daa = dax_direct_access(wc->ssd_dev->dax_dev, offset + i, p - i, DAX_ACCESS, NULL, &pfn); if (daa <= 0) { @@ -369,7 +368,7 @@ static struct page *persistent_memory_page(void *addr) return virt_to_page(addr); } -static unsigned persistent_memory_page_offset(void *addr) +static unsigned int persistent_memory_page_offset(void *addr) { return (unsigned long)addr & (PAGE_SIZE - 1); } @@ -502,11 +501,12 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) COMPLETION_INITIALIZER_ONSTACK(endio.c), ATOMIC_INIT(1), }; - unsigned bitmap_bits = wc->dirty_bitmap_size * 8; - unsigned i = 0; + unsigned int bitmap_bits = wc->dirty_bitmap_size * 8; + unsigned int i = 0; while (1) { - unsigned j; + unsigned int j; + i = find_next_bit(wc->dirty_bitmap, bitmap_bits, i); if (unlikely(i == bitmap_bits)) break; @@ -531,7 +531,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) req.notify.context = &endio; /* writing via async dm-io (implied by notify.fn above) won't return an error */ - (void) dm_io(&req, 1, ®ion, NULL); + (void) dm_io(&req, 1, ®ion, NULL); i = j; } @@ -623,20 +623,21 @@ static struct wc_entry *writecache_find_entry(struct dm_writecache *wc, if (unlikely(!node)) { if (!(flags & WFE_RETURN_FOLLOWING)) return NULL; - if (read_original_sector(wc, e) >= block) { + if (read_original_sector(wc, e) >= block) return e; - } else { - node = rb_next(&e->rb_node); - if (unlikely(!node)) - return NULL; - e = container_of(node, struct wc_entry, rb_node); - return e; - } + + node = rb_next(&e->rb_node); + if (unlikely(!node)) + return NULL; + + e = container_of(node, struct wc_entry, rb_node); + return e; } } while (1) { struct wc_entry *e2; + if (flags & WFE_LOWEST_SEQ) node = rb_prev(&e->rb_node); else @@ -679,6 +680,7 @@ static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry { if (WC_MODE_SORT_FREELIST(wc)) { struct rb_node **node = &wc->freetree.rb_node, *parent = NULL; + if (unlikely(!*node)) wc->current_free = e; while (*node) { @@ -718,6 +720,7 @@ static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, s if (WC_MODE_SORT_FREELIST(wc)) { struct rb_node *next; + if (unlikely(!wc->current_free)) return NULL; e = wc->current_free; @@ -769,7 +772,7 @@ static void writecache_poison_lists(struct dm_writecache *wc) /* * Catch incorrect access to these values while the device is suspended. */ - memset(&wc->tree, -1, sizeof wc->tree); + memset(&wc->tree, -1, sizeof(wc->tree)); wc->lru.next = LIST_POISON1; wc->lru.prev = LIST_POISON2; wc->freelist.next = LIST_POISON1; @@ -864,6 +867,7 @@ static void writecache_flush_work(struct work_struct *work) static void writecache_autocommit_timer(struct timer_list *t) { struct dm_writecache *wc = from_timer(wc, t, autocommit_timer); + if (!writecache_has_error(wc)) queue_work(wc->writeback_wq, &wc->flush_work); } @@ -941,7 +945,8 @@ static void writecache_suspend(struct dm_target *ti) wc_lock(wc); if (flush_on_suspend) wc->writeback_all--; - while (writecache_wait_for_writeback(wc)); + while (writecache_wait_for_writeback(wc)) + ; if (WC_MODE_PMEM(wc)) persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size); @@ -962,6 +967,7 @@ static int writecache_alloc_entries(struct dm_writecache *wc) return -ENOMEM; for (b = 0; b < wc->n_blocks; b++) { struct wc_entry *e = &wc->entries[b]; + e->index = b; e->write_in_progress = false; cond_resched(); @@ -1005,6 +1011,7 @@ static void writecache_resume(struct dm_target *ti) r = writecache_read_metadata(wc, wc->metadata_sectors); if (r) { size_t sb_entries_offset; + writecache_error(wc, r, "unable to read metadata: %d", r); sb_entries_offset = offsetof(struct wc_memory_superblock, entries); memset((char *)wc->memory_map + sb_entries_offset, -1, @@ -1034,6 +1041,7 @@ static void writecache_resume(struct dm_target *ti) for (b = 0; b < wc->n_blocks; b++) { struct wc_entry *e = &wc->entries[b]; struct wc_memory_entry wme; + if (writecache_has_error(wc)) { e->original_sector = -1; e->seq_count = -1; @@ -1055,6 +1063,7 @@ static void writecache_resume(struct dm_target *ti) #endif for (b = 0; b < wc->n_blocks; b++) { struct wc_entry *e = &wc->entries[b]; + if (!writecache_entry_is_committed(wc, e)) { if (read_seq_count(wc, e) != -1) { erase_this: @@ -1100,7 +1109,7 @@ erase_this: wc_unlock(wc); } -static int process_flush_mesg(unsigned argc, char **argv, struct dm_writecache *wc) +static int process_flush_mesg(unsigned int argc, char **argv, struct dm_writecache *wc) { if (argc != 1) return -EINVAL; @@ -1133,7 +1142,7 @@ static int process_flush_mesg(unsigned argc, char **argv, struct dm_writecache * return 0; } -static int process_flush_on_suspend_mesg(unsigned argc, char **argv, struct dm_writecache *wc) +static int process_flush_on_suspend_mesg(unsigned int argc, char **argv, struct dm_writecache *wc) { if (argc != 1) return -EINVAL; @@ -1153,7 +1162,7 @@ static void activate_cleaner(struct dm_writecache *wc) wc->freelist_low_watermark = wc->n_blocks; } -static int process_cleaner_mesg(unsigned argc, char **argv, struct dm_writecache *wc) +static int process_cleaner_mesg(unsigned int argc, char **argv, struct dm_writecache *wc) { if (argc != 1) return -EINVAL; @@ -1167,20 +1176,20 @@ static int process_cleaner_mesg(unsigned argc, char **argv, struct dm_writecache return 0; } -static int process_clear_stats_mesg(unsigned argc, char **argv, struct dm_writecache *wc) +static int process_clear_stats_mesg(unsigned int argc, char **argv, struct dm_writecache *wc) { if (argc != 1) return -EINVAL; wc_lock(wc); - memset(&wc->stats, 0, sizeof wc->stats); + memset(&wc->stats, 0, sizeof(wc->stats)); wc_unlock(wc); return 0; } -static int writecache_message(struct dm_target *ti, unsigned argc, char **argv, - char *result, unsigned maxlen) +static int writecache_message(struct dm_target *ti, unsigned int argc, char **argv, + char *result, unsigned int maxlen) { int r = -EINVAL; struct dm_writecache *wc = ti->private; @@ -1238,12 +1247,13 @@ static void memcpy_flushcache_optimized(void *dest, void *source, size_t size) static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data) { void *buf; - unsigned size; + unsigned int size; int rw = bio_data_dir(bio); - unsigned remaining_size = wc->block_size; + unsigned int remaining_size = wc->block_size; do { struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter); + buf = bvec_kmap_local(&bv); size = bv.bv_len; if (unlikely(size > remaining_size)) @@ -1251,6 +1261,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data if (rw == READ) { int r; + r = copy_mc_to_kernel(buf, data, size); flush_dcache_page(bio_page(bio)); if (unlikely(r)) { @@ -1371,13 +1382,14 @@ read_next_block: static void writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio, struct wc_entry *e, bool search_used) { - unsigned bio_size = wc->block_size; + unsigned int bio_size = wc->block_size; sector_t start_cache_sec = cache_sector(wc, e); sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT); while (bio_size < bio->bi_iter.bi_size) { if (!search_used) { struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec); + if (!f) break; write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector + @@ -1387,6 +1399,7 @@ static void writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio, } else { struct wc_entry *f; struct rb_node *next = rb_next(&e->rb_node); + if (!next) break; f = container_of(next, struct wc_entry, rb_node); @@ -1427,6 +1440,7 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio do { bool found_entry = false; bool search_used = false; + if (writecache_has_error(wc)) { wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits; return WC_MAP_ERROR; @@ -1540,7 +1554,7 @@ static int writecache_map(struct dm_target *ti, struct bio *bio) bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); - if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) & + if (unlikely((((unsigned int)bio->bi_iter.bi_sector | bio_sectors(bio)) & (wc->block_size / 512 - 1)) != 0)) { DMERR("I/O is not aligned, sector %llu, size %u, block size %u", (unsigned long long)bio->bi_iter.bi_sector, @@ -1605,6 +1619,7 @@ static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t if (bio->bi_private == (void *)1) { int dir = bio_data_dir(bio); + if (atomic_dec_and_test(&wc->bio_in_progress[dir])) if (unlikely(waitqueue_active(&wc->bio_in_progress_wait[dir]))) wake_up(&wc->bio_in_progress_wait[dir]); @@ -1666,7 +1681,7 @@ static void writecache_copy_endio(int read_err, unsigned long write_err, void *p static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list) { - unsigned i; + unsigned int i; struct writeback_struct *wb; struct wc_entry *e; unsigned long n_walked = 0; @@ -1782,7 +1797,7 @@ pop_from_list: static bool wc_add_block(struct writeback_struct *wb, struct wc_entry *e) { struct dm_writecache *wc = wb->wc; - unsigned block_size = wc->block_size; + unsigned int block_size = wc->block_size; void *address = memory_data(wc, e); persistent_memory_flush_cache(address, block_size); @@ -1817,7 +1832,7 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba struct wc_entry *e, *f; struct bio *bio; struct writeback_struct *wb; - unsigned max_pages; + unsigned int max_pages; while (wbl->size) { wbl->size--; @@ -1832,10 +1847,13 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba wb->wc = wc; bio->bi_end_io = writecache_writeback_endio; bio->bi_iter.bi_sector = read_original_sector(wc, e); - if (max_pages <= WB_LIST_INLINE || - unlikely(!(wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *), - GFP_NOIO | __GFP_NORETRY | - __GFP_NOMEMALLOC | __GFP_NOWARN)))) { + + if (unlikely(max_pages > WB_LIST_INLINE)) + wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *), + GFP_NOIO | __GFP_NORETRY | + __GFP_NOMEMALLOC | __GFP_NOWARN); + + if (likely(max_pages <= WB_LIST_INLINE) || unlikely(!wb->wc_list)) { wb->wc_list = wb->wc_list_inline; max_pages = WB_LIST_INLINE; } @@ -1880,7 +1898,7 @@ static void __writecache_writeback_ssd(struct dm_writecache *wc, struct writebac struct copy_struct *c; while (wbl->size) { - unsigned n_sectors; + unsigned int n_sectors; wbl->size--; e = container_of(wbl->list.prev, struct wc_entry, lru); @@ -1940,6 +1958,7 @@ static void writecache_writeback(struct work_struct *work) if (likely(wc->pause != 0)) { while (1) { unsigned long idle; + if (unlikely(wc->cleaner) || unlikely(wc->writeback_all) || unlikely(dm_suspended(wc->ti))) break; @@ -1965,9 +1984,8 @@ restart: goto restart; } - if (wc->overwrote_committed) { + if (wc->overwrote_committed) writecache_wait_for_ios(wc, WRITE); - } n_walked = 0; INIT_LIST_HEAD(&skipped); @@ -1996,9 +2014,9 @@ restart: } else e = container_of(wc->lru.prev, struct wc_entry, lru); BUG_ON(e->write_in_progress); - if (unlikely(!writecache_entry_is_committed(wc, e))) { + if (unlikely(!writecache_entry_is_committed(wc, e))) writecache_flush(wc); - } + node = rb_prev(&e->rb_node); if (node) { f = container_of(node, struct wc_entry, rb_node); @@ -2087,12 +2105,13 @@ restart: if (unlikely(wc->writeback_all)) { wc_lock(wc); - while (writecache_wait_for_writeback(wc)); + while (writecache_wait_for_writeback(wc)) + ; wc_unlock(wc); } } -static int calculate_memory_size(uint64_t device_size, unsigned block_size, +static int calculate_memory_size(uint64_t device_size, unsigned int block_size, size_t *n_blocks_p, size_t *n_metadata_blocks_p) { uint64_t n_blocks, offset; @@ -2155,7 +2174,7 @@ static int init_memory(struct dm_writecache *wc) writecache_flush_all_metadata(wc); writecache_commit_flushed(wc, false); pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC)); - writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic); + writecache_flush_region(wc, &sb(wc)->magic, sizeof(sb(wc)->magic)); writecache_commit_flushed(wc, false); return 0; @@ -2207,12 +2226,12 @@ static void writecache_dtr(struct dm_target *ti) kfree(wc); } -static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) +static int writecache_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct dm_writecache *wc; struct dm_arg_set as; const char *string; - unsigned opt_params; + unsigned int opt_params; size_t offset, data_size; int i, r; char dummy; @@ -2384,6 +2403,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) string = dm_shift_arg(&as), opt_params--; if (!strcasecmp(string, "start_sector") && opt_params >= 1) { unsigned long long start_sector; + string = dm_shift_arg(&as), opt_params--; if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1) goto invalid_optional; @@ -2419,7 +2439,8 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) goto invalid_optional; wc->autocommit_blocks_set = true; } else if (!strcasecmp(string, "autocommit_time") && opt_params >= 1) { - unsigned autocommit_msecs; + unsigned int autocommit_msecs; + string = dm_shift_arg(&as), opt_params--; if (sscanf(string, "%u%c", &autocommit_msecs, &dummy) != 1) goto invalid_optional; @@ -2429,7 +2450,8 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) wc->autocommit_time_value = autocommit_msecs; wc->autocommit_time_set = true; } else if (!strcasecmp(string, "max_age") && opt_params >= 1) { - unsigned max_age_msecs; + unsigned int max_age_msecs; + string = dm_shift_arg(&as), opt_params--; if (sscanf(string, "%u%c", &max_age_msecs, &dummy) != 1) goto invalid_optional; @@ -2445,16 +2467,19 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) if (WC_MODE_PMEM(wc)) { wc->writeback_fua = true; wc->writeback_fua_set = true; - } else goto invalid_optional; + } else + goto invalid_optional; } else if (!strcasecmp(string, "nofua")) { if (WC_MODE_PMEM(wc)) { wc->writeback_fua = false; wc->writeback_fua_set = true; - } else goto invalid_optional; + } else + goto invalid_optional; } else if (!strcasecmp(string, "metadata_only")) { wc->metadata_only = true; } else if (!strcasecmp(string, "pause_writeback") && opt_params >= 1) { - unsigned pause_msecs; + unsigned int pause_msecs; + if (WC_MODE_PMEM(wc)) goto invalid_optional; string = dm_shift_arg(&as), opt_params--; @@ -2653,11 +2678,11 @@ bad: } static void writecache_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) + unsigned int status_flags, char *result, unsigned int maxlen) { struct dm_writecache *wc = ti->private; - unsigned extra_args; - unsigned sz = 0; + unsigned int extra_args; + unsigned int sz = 0; switch (type) { case STATUSTYPE_INFO: |