From 08c1af8f1c13bbf210f1760132f4df24d0ed46d6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 3 Apr 2022 14:38:22 -0400 Subject: dm integrity: fix memory corruption when tag_size is less than digest size It is possible to set up dm-integrity in such a way that the "tag_size" parameter is less than the actual digest size. In this situation, a part of the digest beyond tag_size is ignored. In this case, dm-integrity would write beyond the end of the ic->recalc_tags array and corrupt memory. The corruption happened in integrity_recalc->integrity_sector_checksum->crypto_shash_final. Fix this corruption by increasing the tags array so that it has enough padding at the end to accomodate the loop in integrity_recalc() being able to write a full digest size for the last member of the tags array. Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index ad2d5faa2ebb..36ae30b73a6e 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4399,6 +4399,7 @@ try_smaller_buffer: } if (ic->internal_hash) { + size_t recalc_tags_size; ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); if (!ic->recalc_wq ) { ti->error = "Cannot allocate workqueue"; @@ -4412,8 +4413,10 @@ try_smaller_buffer: r = -ENOMEM; goto bad; } - ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block, - ic->tag_size, GFP_KERNEL); + recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size; + if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size) + recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size; + ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL); if (!ic->recalc_tags) { ti->error = "Cannot allocate tags for recalculating"; r = -ENOMEM; -- cgit v1.2.3 From ce40426fdc3c92acdba6b5ca74bc7277ffaa6a3d Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Mon, 11 Apr 2022 15:03:35 -0700 Subject: dm mpath: only use ktime_get_ns() in historical selector Mixing sched_clock() and ktime_get_ns() usage will give bad results. Switch hst_select_path() from using sched_clock() to ktime_get_ns(). Also rename path_service_time()'s 'sched_now' variable to 'now'. Fixes: 2613eab11996 ("dm mpath: add Historical Service Time Path Selector") Signed-off-by: Khazhismel Kumykov Signed-off-by: Mike Snitzer --- drivers/md/dm-ps-historical-service-time.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c index 875bca30a0dd..82f2a06153dc 100644 --- a/drivers/md/dm-ps-historical-service-time.c +++ b/drivers/md/dm-ps-historical-service-time.c @@ -27,7 +27,6 @@ #include #include #include -#include #define DM_MSG_PREFIX "multipath historical-service-time" @@ -433,7 +432,7 @@ static struct dm_path *hst_select_path(struct path_selector *ps, { struct selector *s = ps->context; struct path_info *pi = NULL, *best = NULL; - u64 time_now = sched_clock(); + u64 time_now = ktime_get_ns(); struct dm_path *ret = NULL; unsigned long flags; @@ -474,7 +473,7 @@ static int hst_start_io(struct path_selector *ps, struct dm_path *path, static u64 path_service_time(struct path_info *pi, u64 start_time) { - u64 sched_now = ktime_get_ns(); + u64 now = ktime_get_ns(); /* if a previous disk request has finished after this IO was * sent to the hardware, pretend the submission happened @@ -483,11 +482,11 @@ static u64 path_service_time(struct path_info *pi, u64 start_time) if (time_after64(pi->last_finish, start_time)) start_time = pi->last_finish; - pi->last_finish = sched_now; - if (time_before64(sched_now, start_time)) + pi->last_finish = now; + if (time_before64(now, start_time)) return 0; - return sched_now - start_time; + return now - start_time; } static int hst_end_io(struct path_selector *ps, struct dm_path *path, -- cgit v1.2.3 From 73d7b06e902dd294e1f61554f7c403d0f705cf92 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 13 Apr 2022 11:06:19 -0400 Subject: dm zone: fix NULL pointer dereference in dm_zone_map_bio Commit 0fbb4d93b38b ("dm: add dm_submit_bio_remap interface") changed the alloc_io() function to delay the initialization of struct dm_io's orig_bio member, leaving it NULL until after the dm_io and associated user submitted bio is processed by __split_and_process_bio(). This change causes a NULL pointer dereference in dm_zone_map_bio() when the original user bio is inspected to detect the need for zone append command emulation. Fix this NULL pointer by updating dm_zone_map_bio() to not access ->orig_bio when the same info can be accessed from the clone of the ->orig_bio _before_ any ->map processing. Save off the bio_op() and bio_sectors() for the clone and then use the saved orig_bio_details as needed. Fixes: 0fbb4d93b38b ("dm: add dm_submit_bio_remap interface") Reported-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zone.c | 49 ++++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index c1ca9be4b79e..57daa86c19cf 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -360,16 +360,20 @@ static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, return 0; } +struct orig_bio_details { + unsigned int op; + unsigned int nr_sectors; +}; + /* * First phase of BIO mapping for targets with zone append emulation: * check all BIO that change a zone writer pointer and change zone * append operations into regular write operations. */ static bool dm_zone_map_bio_begin(struct mapped_device *md, - struct bio *orig_bio, struct bio *clone) + unsigned int zno, struct bio *clone) { sector_t zsectors = blk_queue_zone_sectors(md->queue); - unsigned int zno = bio_zone_no(orig_bio); unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); /* @@ -384,7 +388,7 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md, WRITE_ONCE(md->zwp_offset[zno], zwp_offset); } - switch (bio_op(orig_bio)) { + switch (bio_op(clone)) { case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_FINISH: return true; @@ -401,9 +405,8 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md, * target zone. */ clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE | - (orig_bio->bi_opf & (~REQ_OP_MASK)); - clone->bi_iter.bi_sector = - orig_bio->bi_iter.bi_sector + zwp_offset; + (clone->bi_opf & (~REQ_OP_MASK)); + clone->bi_iter.bi_sector += zwp_offset; break; default: DMWARN_LIMIT("Invalid BIO operation"); @@ -423,11 +426,10 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md, * data written to a zone. Note that at this point, the remapped clone BIO * may already have completed, so we do not touch it. */ -static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, - struct bio *orig_bio, +static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno, + struct orig_bio_details *orig_bio_details, unsigned int nr_sectors) { - unsigned int zno = bio_zone_no(orig_bio); unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); /* The clone BIO may already have been completed and failed */ @@ -435,7 +437,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, return BLK_STS_IOERR; /* Update the zone wp offset */ - switch (bio_op(orig_bio)) { + switch (orig_bio_details->op) { case REQ_OP_ZONE_RESET: WRITE_ONCE(md->zwp_offset[zno], 0); return BLK_STS_OK; @@ -452,7 +454,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, * Check that the target did not truncate the write operation * emulating a zone append. */ - if (nr_sectors != bio_sectors(orig_bio)) { + if (nr_sectors != orig_bio_details->nr_sectors) { DMWARN_LIMIT("Truncated write for zone append"); return BLK_STS_IOERR; } @@ -488,7 +490,7 @@ static inline void dm_zone_unlock(struct request_queue *q, bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED); } -static bool dm_need_zone_wp_tracking(struct bio *orig_bio) +static bool dm_need_zone_wp_tracking(struct bio *bio) { /* * Special processing is not needed for operations that do not need the @@ -496,15 +498,15 @@ static bool dm_need_zone_wp_tracking(struct bio *orig_bio) * zones and all operations that do not modify directly a sequential * zone write pointer. */ - if (op_is_flush(orig_bio->bi_opf) && !bio_sectors(orig_bio)) + if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) return false; - switch (bio_op(orig_bio)) { + switch (bio_op(bio)) { case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE: case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_FINISH: case REQ_OP_ZONE_APPEND: - return bio_zone_is_seq(orig_bio); + return bio_zone_is_seq(bio); default: return false; } @@ -519,8 +521,8 @@ int dm_zone_map_bio(struct dm_target_io *tio) struct dm_target *ti = tio->ti; struct mapped_device *md = io->md; struct request_queue *q = md->queue; - struct bio *orig_bio = io->orig_bio; struct bio *clone = &tio->clone; + struct orig_bio_details orig_bio_details; unsigned int zno; blk_status_t sts; int r; @@ -529,18 +531,21 @@ int dm_zone_map_bio(struct dm_target_io *tio) * IOs that do not change a zone write pointer do not need * any additional special processing. */ - if (!dm_need_zone_wp_tracking(orig_bio)) + if (!dm_need_zone_wp_tracking(clone)) return ti->type->map(ti, clone); /* Lock the target zone */ - zno = bio_zone_no(orig_bio); + zno = bio_zone_no(clone); dm_zone_lock(q, zno, clone); + orig_bio_details.nr_sectors = bio_sectors(clone); + orig_bio_details.op = bio_op(clone); + /* * Check that the bio and the target zone write pointer offset are * both valid, and if the bio is a zone append, remap it to a write. */ - if (!dm_zone_map_bio_begin(md, orig_bio, clone)) { + if (!dm_zone_map_bio_begin(md, zno, clone)) { dm_zone_unlock(q, zno, clone); return DM_MAPIO_KILL; } @@ -560,7 +565,8 @@ int dm_zone_map_bio(struct dm_target_io *tio) * The target submitted the clone BIO. The target zone will * be unlocked on completion of the clone. */ - sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr); + sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, + *tio->len_ptr); break; case DM_MAPIO_REMAPPED: /* @@ -568,7 +574,8 @@ int dm_zone_map_bio(struct dm_target_io *tio) * unlock the target zone here as the clone will not be * submitted. */ - sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr); + sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, + *tio->len_ptr); if (sts != BLK_STS_OK) dm_zone_unlock(q, zno, clone); break; -- cgit v1.2.3 From 7dd06a2548b2bf516ef2e79873a9cdd00b354b99 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 14 Apr 2022 11:52:54 -0400 Subject: dm: allow dm_accept_partial_bio() for dm_io without duplicate bios The intent behind commit e6fc9f62ce6e ("dm: flag clones created by __send_duplicate_bios") was to formally disallow the use of dm_accept_partial_bio() where it simply isn't possible -- due to constraint that multiple bios cannot meaningfully update a shared tio->len_ptr. But that commit went too far and disallowed the case where "abormal" IO (e.g. WRITE_ZEROES) is only using a single bio. Fix this by not marking a dm_io with a single dm_target_io (and bio), that happens to be created by __send_duplicate_bios, as DM_TIO_IS_DUPLICATE_BIO. Also remove 'unsigned *len' parameter from alloc_multiple_bios(). This commit fixes a dm_accept_partial_bio() BUG_ON() with dm-zoned when a WRITE_ZEROES bio is issued. Fixes: 655f3aad7aa4 ("dm: switch dm_target_io booleans over to proper flags") Reported-by: Shinichiro Kawasaki Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3c5fad7c4ee6..fc1f9583a271 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1323,8 +1323,7 @@ static void __map_bio(struct bio *clone) } static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, - struct dm_target *ti, unsigned num_bios, - unsigned *len) + struct dm_target *ti, unsigned num_bios) { struct bio *bio; int try; @@ -1335,7 +1334,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, if (try) mutex_lock(&ci->io->md->table_devices_lock); for (bio_nr = 0; bio_nr < num_bios; bio_nr++) { - bio = alloc_tio(ci, ti, bio_nr, len, + bio = alloc_tio(ci, ti, bio_nr, NULL, try ? GFP_NOIO : GFP_NOWAIT); if (!bio) break; @@ -1363,11 +1362,11 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, break; case 1: clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); - dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); __map_bio(clone); break; default: - alloc_multiple_bios(&blist, ci, ti, num_bios, len); + /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */ + alloc_multiple_bios(&blist, ci, ti, num_bios); while ((clone = bio_list_pop(&blist))) { dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); __map_bio(clone); @@ -1407,14 +1406,10 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target len = min_t(sector_t, ci->sector_count, max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector))); - /* - * dm_accept_partial_bio cannot be used with duplicate bios, - * so update clone_info cursor before __send_duplicate_bios(). - */ + __send_duplicate_bios(ci, ti, num_bios, &len); + ci->sector += len; ci->sector_count -= len; - - __send_duplicate_bios(ci, ti, num_bios, &len); } static bool is_abnormal_io(struct bio *bio) -- cgit v1.2.3 From 92b914e29af3e99589f2d2876616c0b534892ed4 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Fri, 15 Apr 2022 17:45:13 +0900 Subject: dm: fix bio length of empty flush The commit 92986f6b4c8a ("dm: use bio_clone_fast in alloc_io/alloc_tio") removed bio_clone_fast() call from alloc_tio() when ci->io->tio is available. In this case, ci->bio is not copied to ci->io->tio.clone. This is fine since init_clone_info() sets same values to ci->bio and ci->io->tio.clone. However, when incoming bios have REQ_PREFLUSH flag, __send_empty_flush() prepares a zero length bio on stack and set it to ci->bio. At this time, ci->io->tio.clone still keeps non-zero length. When alloc_tio() chooses this ci->io->tio.clone as the bio to map, it is passed to targets as non-empty flush bio. It causes bio length check failure in dm-zoned and unexpected operation such as dm_accept_partial_bio() call. To avoid the non-empty flush bio, set zero length to ci->io->tio.clone in __send_empty_flush(). Fixes: 92986f6b4c8a ("dm: use bio_clone_fast in alloc_io/alloc_tio") Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fc1f9583a271..82957bd460e8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1391,6 +1391,7 @@ static void __send_empty_flush(struct clone_info *ci) ci->bio = &flush_bio; ci->sector_count = 0; + ci->io->tio.clone.bi_iter.bi_size = 0; while ((ti = dm_table_get_target(ci->map, target_nr++))) __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); -- cgit v1.2.3