summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDamien Le Moal <damien.lemoal@wdc.com>2018-10-17 12:05:08 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-11-13 22:15:10 +0300
commit37531246704e708a5b3ee4a9498493cc4ff0c4bb (patch)
tree11de7755be4447baff1a84930f68500b476f1f2c
parent229cbc6123b4b1d2d46956a869e6dbf6905265a4 (diff)
downloadlinux-37531246704e708a5b3ee4a9498493cc4ff0c4bb.tar.xz
dm zoned: fix various dmz_get_mblock() issues
commit 3d4e738311327bc4ba1d55fbe2f1da3de4a475f9 upstream. dmz_fetch_mblock() called from dmz_get_mblock() has a race since the allocation of the new metadata block descriptor and its insertion in the cache rbtree with the READING state is not atomic. Two different contexts requesting the same block may end up each adding two different descriptors of the same block to the cache. Another problem for this function is that the BIO for processing the block read is allocated after the metadata block descriptor is inserted in the cache rbtree. If the BIO allocation fails, the metadata block descriptor is freed without first being removed from the rbtree. Fix the first problem by checking again if the requested block is not in the cache right before inserting the newly allocated descriptor, atomically under the mblk_lock spinlock. The second problem is fixed by simply allocating the BIO before inserting the new block in the cache. Finally, since dmz_fetch_mblock() also increments a block reference counter, rename the function to dmz_get_mblock_slow(). To be symmetric and clear, also rename dmz_lookup_mblock() to dmz_get_mblock_fast() and increment the block reference counter directly in that function rather than in dmz_get_mblock(). Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--drivers/md/dm-zoned-metadata.c66
1 files changed, 42 insertions, 24 deletions
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index e1dce7f8f4fd..34968ca6b84a 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -339,10 +339,11 @@ static void dmz_insert_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk)
}
/*
- * Lookup a metadata block in the rbtree.
+ * Lookup a metadata block in the rbtree. If the block is found, increment
+ * its reference count.
*/
-static struct dmz_mblock *dmz_lookup_mblock(struct dmz_metadata *zmd,
- sector_t mblk_no)
+static struct dmz_mblock *dmz_get_mblock_fast(struct dmz_metadata *zmd,
+ sector_t mblk_no)
{
struct rb_root *root = &zmd->mblk_rbtree;
struct rb_node *node = root->rb_node;
@@ -350,8 +351,17 @@ static struct dmz_mblock *dmz_lookup_mblock(struct dmz_metadata *zmd,
while (node) {
mblk = container_of(node, struct dmz_mblock, node);
- if (mblk->no == mblk_no)
+ if (mblk->no == mblk_no) {
+ /*
+ * If this is the first reference to the block,
+ * remove it from the LRU list.
+ */
+ mblk->ref++;
+ if (mblk->ref == 1 &&
+ !test_bit(DMZ_META_DIRTY, &mblk->state))
+ list_del_init(&mblk->link);
return mblk;
+ }
node = (mblk->no < mblk_no) ? node->rb_left : node->rb_right;
}
@@ -382,32 +392,47 @@ static void dmz_mblock_bio_end_io(struct bio *bio)
}
/*
- * Read a metadata block from disk.
+ * Read an uncached metadata block from disk and add it to the cache.
*/
-static struct dmz_mblock *dmz_fetch_mblock(struct dmz_metadata *zmd,
- sector_t mblk_no)
+static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
+ sector_t mblk_no)
{
- struct dmz_mblock *mblk;
+ struct dmz_mblock *mblk, *m;
sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no;
struct bio *bio;
- /* Get block and insert it */
+ /* Get a new block and a BIO to read it */
mblk = dmz_alloc_mblock(zmd, mblk_no);
if (!mblk)
return NULL;
- spin_lock(&zmd->mblk_lock);
- mblk->ref++;
- set_bit(DMZ_META_READING, &mblk->state);
- dmz_insert_mblock(zmd, mblk);
- spin_unlock(&zmd->mblk_lock);
-
bio = bio_alloc(GFP_NOIO, 1);
if (!bio) {
dmz_free_mblock(zmd, mblk);
return NULL;
}
+ spin_lock(&zmd->mblk_lock);
+
+ /*
+ * Make sure that another context did not start reading
+ * the block already.
+ */
+ m = dmz_get_mblock_fast(zmd, mblk_no);
+ if (m) {
+ spin_unlock(&zmd->mblk_lock);
+ dmz_free_mblock(zmd, mblk);
+ bio_put(bio);
+ return m;
+ }
+
+ mblk->ref++;
+ set_bit(DMZ_META_READING, &mblk->state);
+ dmz_insert_mblock(zmd, mblk);
+
+ spin_unlock(&zmd->mblk_lock);
+
+ /* Submit read BIO */
bio->bi_iter.bi_sector = dmz_blk2sect(block);
bio_set_dev(bio, zmd->dev->bdev);
bio->bi_private = mblk;
@@ -509,19 +534,12 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd,
/* Check rbtree */
spin_lock(&zmd->mblk_lock);
- mblk = dmz_lookup_mblock(zmd, mblk_no);
- if (mblk) {
- /* Cache hit: remove block from LRU list */
- mblk->ref++;
- if (mblk->ref == 1 &&
- !test_bit(DMZ_META_DIRTY, &mblk->state))
- list_del_init(&mblk->link);
- }
+ mblk = dmz_get_mblock_fast(zmd, mblk_no);
spin_unlock(&zmd->mblk_lock);
if (!mblk) {
/* Cache miss: read the block from disk */
- mblk = dmz_fetch_mblock(zmd, mblk_no);
+ mblk = dmz_get_mblock_slow(zmd, mblk_no);
if (!mblk)
return ERR_PTR(-ENOMEM);
}