diff options
Diffstat (limited to 'drivers/lightnvm')
-rw-r--r-- | drivers/lightnvm/Kconfig | 3 | ||||
-rw-r--r-- | drivers/lightnvm/core.c | 334 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-cache.c | 1 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-core.c | 587 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-gc.c | 11 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-init.c | 321 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-map.c | 13 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-rb.c | 110 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-read.c | 86 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-recovery.c | 471 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-rl.c | 5 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-sysfs.c | 12 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-trace.h | 145 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-write.c | 90 | ||||
-rw-r--r-- | drivers/lightnvm/pblk.h | 221 |
15 files changed, 1252 insertions, 1158 deletions
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 439bf90d084d..a872cd720967 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -4,8 +4,7 @@ menuconfig NVM bool "Open-Channel SSD target support" - depends on BLOCK && PCI - select BLK_DEV_NVME + depends on BLOCK help Say Y here to get to enable Open-channel SSDs. diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 60aa7bc5a630..efb976a863d2 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -355,6 +355,11 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) return -EINVAL; } + if ((tt->flags & NVM_TGT_F_HOST_L2P) != (dev->geo.dom & NVM_RSP_L2P)) { + pr_err("nvm: device is incompatible with target L2P type.\n"); + return -EINVAL; + } + if (nvm_target_exists(create->tgtname)) { pr_err("nvm: target name already exists (%s)\n", create->tgtname); @@ -598,22 +603,16 @@ static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, static void nvm_rq_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { - if (rqd->nr_ppas == 1) { - nvm_ppa_tgt_to_dev(tgt_dev, &rqd->ppa_addr, 1); - return; - } + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - nvm_ppa_tgt_to_dev(tgt_dev, rqd->ppa_list, rqd->nr_ppas); + nvm_ppa_tgt_to_dev(tgt_dev, ppa_list, rqd->nr_ppas); } static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { - if (rqd->nr_ppas == 1) { - nvm_ppa_dev_to_tgt(tgt_dev, &rqd->ppa_addr, 1); - return; - } + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - nvm_ppa_dev_to_tgt(tgt_dev, rqd->ppa_list, rqd->nr_ppas); + nvm_ppa_dev_to_tgt(tgt_dev, ppa_list, rqd->nr_ppas); } int nvm_register_tgt_type(struct nvm_tgt_type *tt) @@ -712,45 +711,23 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); } -int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct nvm_chk_meta *meta, - struct ppa_addr ppa, int nchks) +static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd) { - struct nvm_dev *dev = tgt_dev->parent; - - nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); - - return dev->ops->get_chk_meta(tgt_dev->parent, meta, - (sector_t)ppa.ppa, nchks); -} -EXPORT_SYMBOL(nvm_get_chunk_meta); - -int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, - int nr_ppas, int type) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_rq rqd; - int ret; + int flags = 0; - if (nr_ppas > NVM_MAX_VLBA) { - pr_err("nvm: unable to update all blocks atomically\n"); - return -EINVAL; - } + if (geo->version == NVM_OCSSD_SPEC_20) + return 0; - memset(&rqd, 0, sizeof(struct nvm_rq)); + if (rqd->is_seq) + flags |= geo->pln_mode >> 1; - nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); - nvm_rq_tgt_to_dev(tgt_dev, &rqd); + if (rqd->opcode == NVM_OP_PREAD) + flags |= (NVM_IO_SCRAMBLE_ENABLE | NVM_IO_SUSPEND); + else if (rqd->opcode == NVM_OP_PWRITE) + flags |= NVM_IO_SCRAMBLE_ENABLE; - ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); - nvm_free_rqd_ppalist(tgt_dev, &rqd); - if (ret) { - pr_err("nvm: failed bb mark\n"); - return -EINVAL; - } - - return 0; + return flags; } -EXPORT_SYMBOL(nvm_set_tgt_bb_tbl); int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { @@ -763,6 +740,7 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) nvm_rq_tgt_to_dev(tgt_dev, rqd); rqd->dev = tgt_dev; + rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd); /* In case of error, fail with right address format */ ret = dev->ops->submit_io(dev, rqd); @@ -783,6 +761,7 @@ int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) nvm_rq_tgt_to_dev(tgt_dev, rqd); rqd->dev = tgt_dev; + rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd); /* In case of error, fail with right address format */ ret = dev->ops->submit_io_sync(dev, rqd); @@ -805,27 +784,159 @@ void nvm_end_io(struct nvm_rq *rqd) } EXPORT_SYMBOL(nvm_end_io); +static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd) +{ + if (!dev->ops->submit_io_sync) + return -ENODEV; + + rqd->flags = nvm_set_flags(&dev->geo, rqd); + + return dev->ops->submit_io_sync(dev, rqd); +} + +static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa) +{ + struct nvm_rq rqd = { NULL }; + struct bio bio; + struct bio_vec bio_vec; + struct page *page; + int ret; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + bio_init(&bio, &bio_vec, 1); + bio_add_page(&bio, page, PAGE_SIZE, 0); + bio_set_op_attrs(&bio, REQ_OP_READ, 0); + + rqd.bio = &bio; + rqd.opcode = NVM_OP_PREAD; + rqd.is_seq = 1; + rqd.nr_ppas = 1; + rqd.ppa_addr = generic_to_dev_addr(dev, ppa); + + ret = nvm_submit_io_sync_raw(dev, &rqd); + if (ret) + return ret; + + __free_page(page); + + return rqd.error; +} + /* - * folds a bad block list from its plane representation to its virtual - * block representation. The fold is done in place and reduced size is - * returned. - * - * If any of the planes status are bad or grown bad block, the virtual block - * is marked bad. If not bad, the first plane state acts as the block state. + * Scans a 1.2 chunk first and last page to determine if its state. + * If the chunk is found to be open, also scan it to update the write + * pointer. */ -int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) +static int nvm_bb_chunk_scan(struct nvm_dev *dev, struct ppa_addr ppa, + struct nvm_chk_meta *meta) { struct nvm_geo *geo = &dev->geo; - int blk, offset, pl, blktype; + int ret, pg, pl; - if (nr_blks != geo->num_chk * geo->pln_mode) - return -EINVAL; + /* sense first page */ + ret = nvm_bb_chunk_sense(dev, ppa); + if (ret < 0) /* io error */ + return ret; + else if (ret == 0) /* valid data */ + meta->state = NVM_CHK_ST_OPEN; + else if (ret > 0) { + /* + * If empty page, the chunk is free, else it is an + * actual io error. In that case, mark it offline. + */ + switch (ret) { + case NVM_RSP_ERR_EMPTYPAGE: + meta->state = NVM_CHK_ST_FREE; + return 0; + case NVM_RSP_ERR_FAILCRC: + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_WARN_HIGHECC: + meta->state = NVM_CHK_ST_OPEN; + goto scan; + default: + return -ret; /* other io error */ + } + } + + /* sense last page */ + ppa.g.pg = geo->num_pg - 1; + ppa.g.pl = geo->num_pln - 1; + + ret = nvm_bb_chunk_sense(dev, ppa); + if (ret < 0) /* io error */ + return ret; + else if (ret == 0) { /* Chunk fully written */ + meta->state = NVM_CHK_ST_CLOSED; + meta->wp = geo->clba; + return 0; + } else if (ret > 0) { + switch (ret) { + case NVM_RSP_ERR_EMPTYPAGE: + case NVM_RSP_ERR_FAILCRC: + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_WARN_HIGHECC: + meta->state = NVM_CHK_ST_OPEN; + break; + default: + return -ret; /* other io error */ + } + } + +scan: + /* + * chunk is open, we scan sequentially to update the write pointer. + * We make the assumption that targets write data across all planes + * before moving to the next page. + */ + for (pg = 0; pg < geo->num_pg; pg++) { + for (pl = 0; pl < geo->num_pln; pl++) { + ppa.g.pg = pg; + ppa.g.pl = pl; + + ret = nvm_bb_chunk_sense(dev, ppa); + if (ret < 0) /* io error */ + return ret; + else if (ret == 0) { + meta->wp += geo->ws_min; + } else if (ret > 0) { + switch (ret) { + case NVM_RSP_ERR_EMPTYPAGE: + return 0; + case NVM_RSP_ERR_FAILCRC: + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_WARN_HIGHECC: + meta->wp += geo->ws_min; + break; + default: + return -ret; /* other io error */ + } + } + } + } + + return 0; +} + +/* + * folds a bad block list from its plane representation to its + * chunk representation. + * + * If any of the planes status are bad or grown bad, the chunk is marked + * offline. If not bad, the first plane state acts as the chunk state. + */ +static int nvm_bb_to_chunk(struct nvm_dev *dev, struct ppa_addr ppa, + u8 *blks, int nr_blks, struct nvm_chk_meta *meta) +{ + struct nvm_geo *geo = &dev->geo; + int ret, blk, pl, offset, blktype; for (blk = 0; blk < geo->num_chk; blk++) { offset = blk * geo->pln_mode; blktype = blks[offset]; - /* Bad blocks on any planes take precedence over other types */ for (pl = 0; pl < geo->pln_mode; pl++) { if (blks[offset + pl] & (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) { @@ -834,23 +945,124 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) } } - blks[blk] = blktype; + ppa.g.blk = blk; + + meta->wp = 0; + meta->type = NVM_CHK_TP_W_SEQ; + meta->wi = 0; + meta->slba = generic_to_dev_addr(dev, ppa).ppa; + meta->cnlb = dev->geo.clba; + + if (blktype == NVM_BLK_T_FREE) { + ret = nvm_bb_chunk_scan(dev, ppa, meta); + if (ret) + return ret; + } else { + meta->state = NVM_CHK_ST_OFFLINE; + } + + meta++; } - return geo->num_chk; + return 0; +} + +static int nvm_get_bb_meta(struct nvm_dev *dev, sector_t slba, + int nchks, struct nvm_chk_meta *meta) +{ + struct nvm_geo *geo = &dev->geo; + struct ppa_addr ppa; + u8 *blks; + int ch, lun, nr_blks; + int ret; + + ppa.ppa = slba; + ppa = dev_to_generic_addr(dev, ppa); + + if (ppa.g.blk != 0) + return -EINVAL; + + if ((nchks % geo->num_chk) != 0) + return -EINVAL; + + nr_blks = geo->num_chk * geo->pln_mode; + + blks = kmalloc(nr_blks, GFP_KERNEL); + if (!blks) + return -ENOMEM; + + for (ch = ppa.g.ch; ch < geo->num_ch; ch++) { + for (lun = ppa.g.lun; lun < geo->num_lun; lun++) { + struct ppa_addr ppa_gen, ppa_dev; + + if (!nchks) + goto done; + + ppa_gen.ppa = 0; + ppa_gen.g.ch = ch; + ppa_gen.g.lun = lun; + ppa_dev = generic_to_dev_addr(dev, ppa_gen); + + ret = dev->ops->get_bb_tbl(dev, ppa_dev, blks); + if (ret) + goto done; + + ret = nvm_bb_to_chunk(dev, ppa_gen, blks, nr_blks, + meta); + if (ret) + goto done; + + meta += geo->num_chk; + nchks -= geo->num_chk; + } + } +done: + kfree(blks); + return ret; } -EXPORT_SYMBOL(nvm_bb_tbl_fold); -int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, - u8 *blks) +int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, + int nchks, struct nvm_chk_meta *meta) { struct nvm_dev *dev = tgt_dev->parent; nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); - return dev->ops->get_bb_tbl(dev, ppa, blks); + if (dev->geo.version == NVM_OCSSD_SPEC_12) + return nvm_get_bb_meta(dev, (sector_t)ppa.ppa, nchks, meta); + + return dev->ops->get_chk_meta(dev, (sector_t)ppa.ppa, nchks, meta); +} +EXPORT_SYMBOL_GPL(nvm_get_chunk_meta); + +int nvm_set_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, + int nr_ppas, int type) +{ + struct nvm_dev *dev = tgt_dev->parent; + struct nvm_rq rqd; + int ret; + + if (dev->geo.version == NVM_OCSSD_SPEC_20) + return 0; + + if (nr_ppas > NVM_MAX_VLBA) { + pr_err("nvm: unable to update all blocks atomically\n"); + return -EINVAL; + } + + memset(&rqd, 0, sizeof(struct nvm_rq)); + + nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); + nvm_rq_tgt_to_dev(tgt_dev, &rqd); + + ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); + nvm_free_rqd_ppalist(tgt_dev, &rqd); + if (ret) + return -EINVAL; + + return 0; } -EXPORT_SYMBOL(nvm_get_tgt_bb_tbl); +EXPORT_SYMBOL_GPL(nvm_set_chunk_meta); static int nvm_core_init(struct nvm_dev *dev) { diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c index f565a56b898a..c9fa26f95659 100644 --- a/drivers/lightnvm/pblk-cache.c +++ b/drivers/lightnvm/pblk-cache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 00984b486fea..6944aac43b01 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -16,7 +17,10 @@ * */ +#define CREATE_TRACE_POINTS + #include "pblk.h" +#include "pblk-trace.h" static void pblk_line_mark_bb(struct work_struct *work) { @@ -27,12 +31,12 @@ static void pblk_line_mark_bb(struct work_struct *work) struct ppa_addr *ppa = line_ws->priv; int ret; - ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); + ret = nvm_set_chunk_meta(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); if (ret) { struct pblk_line *line; int pos; - line = &pblk->lines[pblk_ppa_to_line(*ppa)]; + line = pblk_ppa_to_line(pblk, *ppa); pos = pblk_ppa_to_pos(&dev->geo, *ppa); pblk_err(pblk, "failed to mark bb, line:%d, pos:%d\n", @@ -80,19 +84,28 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) struct pblk_line *line; int pos; - line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)]; + line = pblk_ppa_to_line(pblk, rqd->ppa_addr); pos = pblk_ppa_to_pos(geo, rqd->ppa_addr); chunk = &line->chks[pos]; atomic_dec(&line->left_seblks); if (rqd->error) { + trace_pblk_chunk_reset(pblk_disk_name(pblk), + &rqd->ppa_addr, PBLK_CHUNK_RESET_FAILED); + chunk->state = NVM_CHK_ST_OFFLINE; pblk_mark_bb(pblk, line, rqd->ppa_addr); } else { + trace_pblk_chunk_reset(pblk_disk_name(pblk), + &rqd->ppa_addr, PBLK_CHUNK_RESET_DONE); + chunk->state = NVM_CHK_ST_FREE; } + trace_pblk_chunk_state(pblk_disk_name(pblk), &rqd->ppa_addr, + chunk->state); + atomic_dec(&pblk->inflight_io); } @@ -108,9 +121,9 @@ static void pblk_end_io_erase(struct nvm_rq *rqd) /* * Get information for all chunks from the device. * - * The caller is responsible for freeing the returned structure + * The caller is responsible for freeing (vmalloc) the returned structure */ -struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk) +struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; @@ -122,11 +135,11 @@ struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk) ppa.ppa = 0; len = geo->all_chunks * sizeof(*meta); - meta = kzalloc(len, GFP_KERNEL); + meta = vzalloc(len); if (!meta) return ERR_PTR(-ENOMEM); - ret = nvm_get_chunk_meta(dev, meta, ppa, geo->all_chunks); + ret = nvm_get_chunk_meta(dev, ppa, geo->all_chunks, meta); if (ret) { kfree(meta); return ERR_PTR(-EIO); @@ -192,7 +205,6 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa) { struct pblk_line *line; u64 paddr; - int line_id; #ifdef CONFIG_NVM_PBLK_DEBUG /* Callers must ensure that the ppa points to a device address */ @@ -200,8 +212,7 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa) BUG_ON(pblk_ppa_empty(ppa)); #endif - line_id = pblk_ppa_to_line(ppa); - line = &pblk->lines[line_id]; + line = pblk_ppa_to_line(pblk, ppa); paddr = pblk_dev_ppa_to_line_addr(pblk, ppa); __pblk_map_invalidate(pblk, line, paddr); @@ -227,6 +238,33 @@ static void pblk_invalidate_range(struct pblk *pblk, sector_t slba, spin_unlock(&pblk->trans_lock); } +int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + + rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &rqd->dma_meta_list); + if (!rqd->meta_list) + return -ENOMEM; + + if (rqd->nr_ppas == 1) + return 0; + + rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; + rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; + + return 0; +} + +void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + + if (rqd->meta_list) + nvm_dev_dma_free(dev->parent, rqd->meta_list, + rqd->dma_meta_list); +} + /* Caller must guarantee that the request is a valid type */ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type) { @@ -258,7 +296,6 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type) /* Typically used on completion path. Cannot guarantee request consistency */ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type) { - struct nvm_tgt_dev *dev = pblk->dev; mempool_t *pool; switch (type) { @@ -279,9 +316,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type) return; } - if (rqd->meta_list) - nvm_dev_dma_free(dev->parent, rqd->meta_list, - rqd->dma_meta_list); + pblk_free_rqd_meta(pblk, rqd); mempool_free(rqd, pool); } @@ -409,6 +444,9 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) } } else { line->state = PBLK_LINESTATE_CORRUPT; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); + line->gc_group = PBLK_LINEGC_NONE; move_list = &l_mg->corrupt_list; pblk_err(pblk, "corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n", @@ -479,9 +517,30 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd) return nvm_submit_io(dev, rqd); } +void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); + + int i; + + for (i = 0; i < rqd->nr_ppas; i++) { + struct ppa_addr *ppa = &ppa_list[i]; + struct nvm_chk_meta *chunk = pblk_dev_ppa_to_chunk(pblk, *ppa); + u64 caddr = pblk_dev_ppa_to_chunk_addr(pblk, *ppa); + + if (caddr == 0) + trace_pblk_chunk_state(pblk_disk_name(pblk), + ppa, NVM_CHK_ST_OPEN); + else if (caddr == chunk->cnlb) + trace_pblk_chunk_state(pblk_disk_name(pblk), + ppa, NVM_CHK_ST_CLOSED); + } +} + int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd) { struct nvm_tgt_dev *dev = pblk->dev; + int ret; atomic_inc(&pblk->inflight_io); @@ -490,7 +549,27 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd) return NVM_IO_ERR; #endif - return nvm_submit_io_sync(dev, rqd); + ret = nvm_submit_io_sync(dev, rqd); + + if (trace_pblk_chunk_state_enabled() && !ret && + rqd->opcode == NVM_OP_PWRITE) + pblk_check_chunk_state_update(pblk, rqd); + + return ret; +} + +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct ppa_addr *ppa_list; + int ret; + + ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + + pblk_down_chunk(pblk, ppa_list[0]); + ret = pblk_submit_io_sync(pblk, rqd); + pblk_up_chunk(pblk, ppa_list[0]); + + return ret; } static void pblk_bio_map_addr_endio(struct bio *bio) @@ -621,262 +700,227 @@ u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line) return paddr; } -/* - * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when - * taking the per LUN semaphore. - */ -static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line, - void *emeta_buf, u64 paddr, int dir) +u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; - void *ppa_list, *meta_list; - struct bio *bio; - struct nvm_rq rqd; - dma_addr_t dma_ppa_list, dma_meta_list; - int min = pblk->min_write_pgs; - int left_ppas = lm->emeta_sec[0]; - int id = line->id; - int rq_ppas, rq_len; - int cmd_op, bio_op; - int i, j; - int ret; + int bit; - if (dir == PBLK_WRITE) { - bio_op = REQ_OP_WRITE; - cmd_op = NVM_OP_PWRITE; - } else if (dir == PBLK_READ) { - bio_op = REQ_OP_READ; - cmd_op = NVM_OP_PREAD; - } else - return -EINVAL; + /* This usually only happens on bad lines */ + bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); + if (bit >= lm->blk_per_line) + return -1; - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &dma_meta_list); - if (!meta_list) - return -ENOMEM; + return bit * geo->ws_opt; +} - ppa_list = meta_list + pblk_dma_meta_size; - dma_ppa_list = dma_meta_list + pblk_dma_meta_size; +int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct pblk_line_meta *lm = &pblk->lm; + struct bio *bio; + struct nvm_rq rqd; + u64 paddr = pblk_line_smeta_start(pblk, line); + int i, ret; -next_rq: memset(&rqd, 0, sizeof(struct nvm_rq)); - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); - rq_len = rq_ppas * geo->csecs; + ret = pblk_alloc_rqd_meta(pblk, &rqd); + if (ret) + return ret; - bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, - l_mg->emeta_alloc_type, GFP_KERNEL); + bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - goto free_rqd_dma; + goto clear_rqd; } bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, bio_op, 0); + bio_set_op_attrs(bio, REQ_OP_READ, 0); rqd.bio = bio; - rqd.meta_list = meta_list; - rqd.ppa_list = ppa_list; - rqd.dma_meta_list = dma_meta_list; - rqd.dma_ppa_list = dma_ppa_list; - rqd.opcode = cmd_op; - rqd.nr_ppas = rq_ppas; - - if (dir == PBLK_WRITE) { - struct pblk_sec_meta *meta_list = rqd.meta_list; - - rqd.flags = pblk_set_progr_mode(pblk, PBLK_WRITE); - for (i = 0; i < rqd.nr_ppas; ) { - spin_lock(&line->lock); - paddr = __pblk_alloc_page(pblk, line, min); - spin_unlock(&line->lock); - for (j = 0; j < min; j++, i++, paddr++) { - meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); - rqd.ppa_list[i] = - addr_to_gen_ppa(pblk, paddr, id); - } - } - } else { - for (i = 0; i < rqd.nr_ppas; ) { - struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id); - int pos = pblk_ppa_to_pos(geo, ppa); - int read_type = PBLK_READ_RANDOM; - - if (pblk_io_aligned(pblk, rq_ppas)) - read_type = PBLK_READ_SEQUENTIAL; - rqd.flags = pblk_set_read_mode(pblk, read_type); - - while (test_bit(pos, line->blk_bitmap)) { - paddr += min; - if (pblk_boundary_paddr_checks(pblk, paddr)) { - pblk_err(pblk, "corrupt emeta line:%d\n", - line->id); - bio_put(bio); - ret = -EINTR; - goto free_rqd_dma; - } - - ppa = addr_to_gen_ppa(pblk, paddr, id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - if (pblk_boundary_paddr_checks(pblk, paddr + min)) { - pblk_err(pblk, "corrupt emeta line:%d\n", - line->id); - bio_put(bio); - ret = -EINTR; - goto free_rqd_dma; - } + rqd.opcode = NVM_OP_PREAD; + rqd.nr_ppas = lm->smeta_sec; + rqd.is_seq = 1; - for (j = 0; j < min; j++, i++, paddr++) - rqd.ppa_list[i] = - addr_to_gen_ppa(pblk, paddr, line->id); - } - } + for (i = 0; i < lm->smeta_sec; i++, paddr++) + rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); ret = pblk_submit_io_sync(pblk, &rqd); if (ret) { - pblk_err(pblk, "emeta I/O submission failed: %d\n", ret); + pblk_err(pblk, "smeta I/O submission failed: %d\n", ret); bio_put(bio); - goto free_rqd_dma; + goto clear_rqd; } atomic_dec(&pblk->inflight_io); - if (rqd.error) { - if (dir == PBLK_WRITE) - pblk_log_write_err(pblk, &rqd); - else - pblk_log_read_err(pblk, &rqd); - } + if (rqd.error) + pblk_log_read_err(pblk, &rqd); - emeta_buf += rq_len; - left_ppas -= rq_ppas; - if (left_ppas) - goto next_rq; -free_rqd_dma: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); +clear_rqd: + pblk_free_rqd_meta(pblk, &rqd); return ret; } -u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - int bit; - - /* This usually only happens on bad lines */ - bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); - if (bit >= lm->blk_per_line) - return -1; - - return bit * geo->ws_opt; -} - -static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, - u64 paddr, int dir) +static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, + u64 paddr) { struct nvm_tgt_dev *dev = pblk->dev; struct pblk_line_meta *lm = &pblk->lm; struct bio *bio; struct nvm_rq rqd; - __le64 *lba_list = NULL; + __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); + __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); int i, ret; - int cmd_op, bio_op; - int flags; - - if (dir == PBLK_WRITE) { - bio_op = REQ_OP_WRITE; - cmd_op = NVM_OP_PWRITE; - flags = pblk_set_progr_mode(pblk, PBLK_WRITE); - lba_list = emeta_to_lbas(pblk, line->emeta->buf); - } else if (dir == PBLK_READ_RECOV || dir == PBLK_READ) { - bio_op = REQ_OP_READ; - cmd_op = NVM_OP_PREAD; - flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - } else - return -EINVAL; memset(&rqd, 0, sizeof(struct nvm_rq)); - rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd.dma_meta_list); - if (!rqd.meta_list) - return -ENOMEM; - - rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size; - rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size; + ret = pblk_alloc_rqd_meta(pblk, &rqd); + if (ret) + return ret; bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - goto free_ppa_list; + goto clear_rqd; } bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, bio_op, 0); + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); rqd.bio = bio; - rqd.opcode = cmd_op; - rqd.flags = flags; + rqd.opcode = NVM_OP_PWRITE; rqd.nr_ppas = lm->smeta_sec; + rqd.is_seq = 1; for (i = 0; i < lm->smeta_sec; i++, paddr++) { struct pblk_sec_meta *meta_list = rqd.meta_list; rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); - - if (dir == PBLK_WRITE) { - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - - meta_list[i].lba = lba_list[paddr] = addr_empty; - } + meta_list[i].lba = lba_list[paddr] = addr_empty; } - /* - * This I/O is sent by the write thread when a line is replace. Since - * the write thread is the only one sending write and erase commands, - * there is no need to take the LUN semaphore. - */ - ret = pblk_submit_io_sync(pblk, &rqd); + ret = pblk_submit_io_sync_sem(pblk, &rqd); if (ret) { pblk_err(pblk, "smeta I/O submission failed: %d\n", ret); bio_put(bio); - goto free_ppa_list; + goto clear_rqd; } atomic_dec(&pblk->inflight_io); if (rqd.error) { - if (dir == PBLK_WRITE) { - pblk_log_write_err(pblk, &rqd); - ret = 1; - } else if (dir == PBLK_READ) - pblk_log_read_err(pblk, &rqd); + pblk_log_write_err(pblk, &rqd); + ret = -EIO; } -free_ppa_list: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); - +clear_rqd: + pblk_free_rqd_meta(pblk, &rqd); return ret; } -int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line) +int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, + void *emeta_buf) { - u64 bpaddr = pblk_line_smeta_start(pblk, line); + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + void *ppa_list, *meta_list; + struct bio *bio; + struct nvm_rq rqd; + u64 paddr = line->emeta_ssec; + dma_addr_t dma_ppa_list, dma_meta_list; + int min = pblk->min_write_pgs; + int left_ppas = lm->emeta_sec[0]; + int line_id = line->id; + int rq_ppas, rq_len; + int i, j; + int ret; - return pblk_line_submit_smeta_io(pblk, line, bpaddr, PBLK_READ_RECOV); -} + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &dma_meta_list); + if (!meta_list) + return -ENOMEM; -int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line, - void *emeta_buf) -{ - return pblk_line_submit_emeta_io(pblk, line, emeta_buf, - line->emeta_ssec, PBLK_READ); + ppa_list = meta_list + pblk_dma_meta_size; + dma_ppa_list = dma_meta_list + pblk_dma_meta_size; + +next_rq: + memset(&rqd, 0, sizeof(struct nvm_rq)); + + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + rq_len = rq_ppas * geo->csecs; + + bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, + l_mg->emeta_alloc_type, GFP_KERNEL); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + goto free_rqd_dma; + } + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + rqd.bio = bio; + rqd.meta_list = meta_list; + rqd.ppa_list = ppa_list; + rqd.dma_meta_list = dma_meta_list; + rqd.dma_ppa_list = dma_ppa_list; + rqd.opcode = NVM_OP_PREAD; + rqd.nr_ppas = rq_ppas; + + for (i = 0; i < rqd.nr_ppas; ) { + struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id); + int pos = pblk_ppa_to_pos(geo, ppa); + + if (pblk_io_aligned(pblk, rq_ppas)) + rqd.is_seq = 1; + + while (test_bit(pos, line->blk_bitmap)) { + paddr += min; + if (pblk_boundary_paddr_checks(pblk, paddr)) { + bio_put(bio); + ret = -EINTR; + goto free_rqd_dma; + } + + ppa = addr_to_gen_ppa(pblk, paddr, line_id); + pos = pblk_ppa_to_pos(geo, ppa); + } + + if (pblk_boundary_paddr_checks(pblk, paddr + min)) { + bio_put(bio); + ret = -EINTR; + goto free_rqd_dma; + } + + for (j = 0; j < min; j++, i++, paddr++) + rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id); + } + + ret = pblk_submit_io_sync(pblk, &rqd); + if (ret) { + pblk_err(pblk, "emeta I/O submission failed: %d\n", ret); + bio_put(bio); + goto free_rqd_dma; + } + + atomic_dec(&pblk->inflight_io); + + if (rqd.error) + pblk_log_read_err(pblk, &rqd); + + emeta_buf += rq_len; + left_ppas -= rq_ppas; + if (left_ppas) + goto next_rq; + +free_rqd_dma: + nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); + return ret; } static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, @@ -885,16 +929,17 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, rqd->opcode = NVM_OP_ERASE; rqd->ppa_addr = ppa; rqd->nr_ppas = 1; - rqd->flags = pblk_set_progr_mode(pblk, PBLK_ERASE); + rqd->is_seq = 1; rqd->bio = NULL; } static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) { - struct nvm_rq rqd; - int ret = 0; + struct nvm_rq rqd = {NULL}; + int ret; - memset(&rqd, 0, sizeof(struct nvm_rq)); + trace_pblk_chunk_reset(pblk_disk_name(pblk), &ppa, + PBLK_CHUNK_RESET_START); pblk_setup_e_rq(pblk, &rqd, ppa); @@ -902,19 +947,6 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) * with writes. Thus, there is no need to take the LUN semaphore. */ ret = pblk_submit_io_sync(pblk, &rqd); - if (ret) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - pblk_err(pblk, "could not sync erase line:%d,blk:%d\n", - pblk_ppa_to_line(ppa), - pblk_ppa_to_pos(geo, ppa)); - - rqd.error = ret; - goto out; - } - -out: rqd.private = pblk; __pblk_end_io_erase(pblk, &rqd); @@ -1008,6 +1040,8 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, spin_lock(&l_mg->free_lock); spin_lock(&line->lock); line->state = PBLK_LINESTATE_BAD; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_add_tail(&line->list, &l_mg->bad_list); @@ -1071,15 +1105,18 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line) { struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; - line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); + line->map_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL); if (!line->map_bitmap) return -ENOMEM; + memset(line->map_bitmap, 0, lm->sec_bitmap_len); + /* will be initialized using bb info from map_bitmap */ - line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL); + line->invalid_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL); if (!line->invalid_bitmap) { - kfree(line->map_bitmap); + mempool_free(line->map_bitmap, l_mg->bitmap_pool); line->map_bitmap = NULL; return -ENOMEM; } @@ -1122,7 +1159,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, line->smeta_ssec = off; line->cur_sec = off + lm->smeta_sec; - if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) { + if (init && pblk_line_smeta_write(pblk, line, off)) { pblk_debug(pblk, "line smeta I/O failed. Retry\n"); return 0; } @@ -1152,6 +1189,8 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) { spin_lock(&line->lock); line->state = PBLK_LINESTATE_BAD; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_add_tail(&line->list, &l_mg->bad_list); @@ -1204,6 +1243,8 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) if (line->state == PBLK_LINESTATE_NEW) { blk_to_erase = pblk_prepare_new_line(pblk, line); line->state = PBLK_LINESTATE_FREE; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); } else { blk_to_erase = blk_in_line; } @@ -1221,6 +1262,8 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) } line->state = PBLK_LINESTATE_OPEN; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); atomic_set(&line->left_eblks, blk_to_erase); atomic_set(&line->left_seblks, blk_to_erase); @@ -1265,7 +1308,9 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line) { - kfree(line->map_bitmap); + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + + mempool_free(line->map_bitmap, l_mg->bitmap_pool); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; @@ -1283,8 +1328,11 @@ static void pblk_line_reinit(struct pblk_line *line) void pblk_line_free(struct pblk_line *line) { - kfree(line->map_bitmap); - kfree(line->invalid_bitmap); + struct pblk *pblk = line->pblk; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + + mempool_free(line->map_bitmap, l_mg->bitmap_pool); + mempool_free(line->invalid_bitmap, l_mg->bitmap_pool); pblk_line_reinit(line); } @@ -1312,6 +1360,8 @@ retry: if (unlikely(bit >= lm->blk_per_line)) { spin_lock(&line->lock); line->state = PBLK_LINESTATE_BAD; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_add_tail(&line->list, &l_mg->bad_list); @@ -1446,12 +1496,32 @@ retry_setup: return line; } +void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa) +{ + struct pblk_line *line; + + line = pblk_ppa_to_line(pblk, ppa); + kref_put(&line->ref, pblk_line_put_wq); +} + +void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct ppa_addr *ppa_list; + int i; + + ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + + for (i = 0; i < rqd->nr_ppas; i++) + pblk_ppa_to_line_put(pblk, ppa_list[i]); +} + static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line) { lockdep_assert_held(&pblk->l_mg.free_lock); pblk_set_space_limit(pblk); pblk->state = PBLK_STATE_STOPPING; + trace_pblk_state(pblk_disk_name(pblk), pblk->state); } static void pblk_line_close_meta_sync(struct pblk *pblk) @@ -1501,6 +1571,7 @@ void __pblk_pipeline_flush(struct pblk *pblk) return; } pblk->state = PBLK_STATE_RECOVERING; + trace_pblk_state(pblk_disk_name(pblk), pblk->state); spin_unlock(&l_mg->free_lock); pblk_flush_writer(pblk); @@ -1522,6 +1593,7 @@ void __pblk_pipeline_stop(struct pblk *pblk) spin_lock(&l_mg->free_lock); pblk->state = PBLK_STATE_STOPPED; + trace_pblk_state(pblk_disk_name(pblk), pblk->state); l_mg->data_line = NULL; l_mg->data_next = NULL; spin_unlock(&l_mg->free_lock); @@ -1539,13 +1611,14 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk) struct pblk_line *cur, *new = NULL; unsigned int left_seblks; - cur = l_mg->data_line; new = l_mg->data_next; if (!new) goto out; - l_mg->data_line = new; spin_lock(&l_mg->free_lock); + cur = l_mg->data_line; + l_mg->data_line = new; + pblk_line_setup_metadata(new, l_mg, &pblk->lm); spin_unlock(&l_mg->free_lock); @@ -1612,6 +1685,8 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_GC); line->state = PBLK_LINESTATE_FREE; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); line->gc_group = PBLK_LINEGC_NONE; pblk_line_free(line); @@ -1680,6 +1755,9 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) rqd->end_io = pblk_end_io_erase; rqd->private = pblk; + trace_pblk_chunk_reset(pblk_disk_name(pblk), + &ppa, PBLK_CHUNK_RESET_START); + /* The write thread schedules erases so that it minimizes disturbances * with writes. Thus, there is no need to take the LUN semaphore. */ @@ -1689,7 +1767,7 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) struct nvm_geo *geo = &dev->geo; pblk_err(pblk, "could not async erase line:%d,blk:%d\n", - pblk_ppa_to_line(ppa), + pblk_ppa_to_line_id(ppa), pblk_ppa_to_pos(geo, ppa)); } @@ -1741,10 +1819,9 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line) WARN_ON(line->state != PBLK_LINESTATE_OPEN); line->state = PBLK_LINESTATE_CLOSED; move_list = pblk_line_gc_list(pblk, line); - list_add_tail(&line->list, move_list); - kfree(line->map_bitmap); + mempool_free(line->map_bitmap, l_mg->bitmap_pool); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; @@ -1760,6 +1837,9 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line) spin_unlock(&line->lock); spin_unlock(&l_mg->gc_lock); + + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); } void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) @@ -1778,6 +1858,17 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa)); wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa)); + if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC) { + emeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC); + memcpy(emeta_buf->header.uuid, pblk->instance_uuid, 16); + emeta_buf->header.id = cpu_to_le32(line->id); + emeta_buf->header.type = cpu_to_le16(line->type); + emeta_buf->header.version_major = EMETA_VERSION_MAJOR; + emeta_buf->header.version_minor = EMETA_VERSION_MINOR; + emeta_buf->header.crc = cpu_to_le32( + pblk_calc_meta_header_crc(pblk, &emeta_buf->header)); + } + emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas); emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf)); @@ -1795,8 +1886,6 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) spin_unlock(&l_mg->close_lock); pblk_line_should_sync_meta(pblk); - - } static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line) @@ -1847,8 +1936,7 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, queue_work(wq, &line_ws->ws); } -static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, - int nr_ppas, int pos) +static void __pblk_down_chunk(struct pblk *pblk, int pos) { struct pblk_lun *rlun = &pblk->luns[pos]; int ret; @@ -1857,13 +1945,6 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, * Only send one inflight I/O per LUN. Since we map at a page * granurality, all ppas in the I/O will map to the same LUN */ -#ifdef CONFIG_NVM_PBLK_DEBUG - int i; - - for (i = 1; i < nr_ppas; i++) - WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun || - ppa_list[0].a.ch != ppa_list[i].a.ch); -#endif ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000)); if (ret == -ETIME || ret == -EINTR) @@ -1871,21 +1952,21 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, -ret); } -void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas) +void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - int pos = pblk_ppa_to_pos(geo, ppa_list[0]); + int pos = pblk_ppa_to_pos(geo, ppa); - __pblk_down_page(pblk, ppa_list, nr_ppas, pos); + __pblk_down_chunk(pblk, pos); } -void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, +void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa, unsigned long *lun_bitmap) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - int pos = pblk_ppa_to_pos(geo, ppa_list[0]); + int pos = pblk_ppa_to_pos(geo, ppa); /* If the LUN has been locked for this same request, do no attempt to * lock it again @@ -1893,30 +1974,21 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, if (test_and_set_bit(pos, lun_bitmap)) return; - __pblk_down_page(pblk, ppa_list, nr_ppas, pos); + __pblk_down_chunk(pblk, pos); } -void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas) +void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct pblk_lun *rlun; - int pos = pblk_ppa_to_pos(geo, ppa_list[0]); - -#ifdef CONFIG_NVM_PBLK_DEBUG - int i; - - for (i = 1; i < nr_ppas; i++) - WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun || - ppa_list[0].a.ch != ppa_list[i].a.ch); -#endif + int pos = pblk_ppa_to_pos(geo, ppa); rlun = &pblk->luns[pos]; up(&rlun->wr_sem); } -void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, - unsigned long *lun_bitmap) +void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; @@ -2060,8 +2132,7 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, /* If the L2P entry maps to a line, the reference is valid */ if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) { - int line_id = pblk_ppa_to_line(ppa); - struct pblk_line *line = &pblk->lines[line_id]; + struct pblk_line *line = pblk_ppa_to_line(pblk, ppa); kref_get(&line->ref); } diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 157c2567c9e8..2fa118c8eb71 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -16,8 +17,10 @@ */ #include "pblk.h" +#include "pblk-trace.h" #include <linux/delay.h> + static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq) { if (gc_rq->data) @@ -64,6 +67,8 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_GC); line->state = PBLK_LINESTATE_CLOSED; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); move_list = pblk_line_gc_list(pblk, line); spin_unlock(&line->lock); @@ -144,7 +149,7 @@ static __le64 *get_lba_list_from_emeta(struct pblk *pblk, if (!emeta_buf) return NULL; - ret = pblk_line_read_emeta(pblk, line, emeta_buf); + ret = pblk_line_emeta_read(pblk, line, emeta_buf); if (ret) { pblk_err(pblk, "line %d read emeta failed (%d)\n", line->id, ret); @@ -405,6 +410,8 @@ void pblk_gc_free_full_lines(struct pblk *pblk) spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_CLOSED); line->state = PBLK_LINESTATE_GC; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_del(&line->list); @@ -451,6 +458,8 @@ next_gc_group: spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_CLOSED); line->state = PBLK_LINESTATE_GC; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_del(&line->list); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 537e98f2b24a..13822594647c 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015 IT University of Copenhagen (rrpc.c) * Copyright (C) 2016 CNEX Labs @@ -19,15 +20,31 @@ */ #include "pblk.h" +#include "pblk-trace.h" static unsigned int write_buffer_size; module_param(write_buffer_size, uint, 0644); MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer"); -static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, - *pblk_w_rq_cache; -static DECLARE_RWSEM(pblk_lock); +struct pblk_global_caches { + struct kmem_cache *ws; + struct kmem_cache *rec; + struct kmem_cache *g_rq; + struct kmem_cache *w_rq; + + struct kref kref; + + struct mutex mutex; /* Ensures consistency between + * caches and kref + */ +}; + +static struct pblk_global_caches pblk_caches = { + .mutex = __MUTEX_INITIALIZER(pblk_caches.mutex), + .kref = KREF_INIT(0), +}; + struct bio_set pblk_bio_set; static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, @@ -168,36 +185,26 @@ static void pblk_rwb_free(struct pblk *pblk) if (pblk_rb_tear_down_check(&pblk->rwb)) pblk_err(pblk, "write buffer error on tear down\n"); - pblk_rb_data_free(&pblk->rwb); - vfree(pblk_rb_entries_ref(&pblk->rwb)); + pblk_rb_free(&pblk->rwb); } static int pblk_rwb_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct pblk_rb_entry *entries; - unsigned long nr_entries, buffer_size; - unsigned int power_size, power_seg_sz; - int pgs_in_buffer; + unsigned long buffer_size; + int pgs_in_buffer, threshold; - pgs_in_buffer = max(geo->mw_cunits, geo->ws_opt) * geo->all_luns; + threshold = geo->mw_cunits * geo->all_luns; + pgs_in_buffer = (max(geo->mw_cunits, geo->ws_opt) + geo->ws_opt) + * geo->all_luns; if (write_buffer_size && (write_buffer_size > pgs_in_buffer)) buffer_size = write_buffer_size; else buffer_size = pgs_in_buffer; - nr_entries = pblk_rb_calculate_size(buffer_size); - - entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry))); - if (!entries) - return -ENOMEM; - - power_size = get_count_order(nr_entries); - power_seg_sz = get_count_order(geo->csecs); - - return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz); + return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs); } /* Minimum pages needed within a lun */ @@ -306,53 +313,80 @@ static int pblk_set_addrf(struct pblk *pblk) return 0; } -static int pblk_init_global_caches(struct pblk *pblk) +static int pblk_create_global_caches(void) { - down_write(&pblk_lock); - pblk_ws_cache = kmem_cache_create("pblk_blk_ws", + + pblk_caches.ws = kmem_cache_create("pblk_blk_ws", sizeof(struct pblk_line_ws), 0, 0, NULL); - if (!pblk_ws_cache) { - up_write(&pblk_lock); + if (!pblk_caches.ws) return -ENOMEM; - } - pblk_rec_cache = kmem_cache_create("pblk_rec", + pblk_caches.rec = kmem_cache_create("pblk_rec", sizeof(struct pblk_rec_ctx), 0, 0, NULL); - if (!pblk_rec_cache) { - kmem_cache_destroy(pblk_ws_cache); - up_write(&pblk_lock); - return -ENOMEM; - } + if (!pblk_caches.rec) + goto fail_destroy_ws; - pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, + pblk_caches.g_rq = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, 0, 0, NULL); - if (!pblk_g_rq_cache) { - kmem_cache_destroy(pblk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - up_write(&pblk_lock); - return -ENOMEM; - } + if (!pblk_caches.g_rq) + goto fail_destroy_rec; - pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, + pblk_caches.w_rq = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, 0, 0, NULL); - if (!pblk_w_rq_cache) { - kmem_cache_destroy(pblk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - up_write(&pblk_lock); - return -ENOMEM; - } - up_write(&pblk_lock); + if (!pblk_caches.w_rq) + goto fail_destroy_g_rq; return 0; + +fail_destroy_g_rq: + kmem_cache_destroy(pblk_caches.g_rq); +fail_destroy_rec: + kmem_cache_destroy(pblk_caches.rec); +fail_destroy_ws: + kmem_cache_destroy(pblk_caches.ws); + + return -ENOMEM; } -static void pblk_free_global_caches(struct pblk *pblk) +static int pblk_get_global_caches(void) { - kmem_cache_destroy(pblk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - kmem_cache_destroy(pblk_w_rq_cache); + int ret; + + mutex_lock(&pblk_caches.mutex); + + if (kref_read(&pblk_caches.kref) > 0) { + kref_get(&pblk_caches.kref); + mutex_unlock(&pblk_caches.mutex); + return 0; + } + + ret = pblk_create_global_caches(); + + if (!ret) + kref_get(&pblk_caches.kref); + + mutex_unlock(&pblk_caches.mutex); + + return ret; +} + +static void pblk_destroy_global_caches(struct kref *ref) +{ + struct pblk_global_caches *c; + + c = container_of(ref, struct pblk_global_caches, kref); + + kmem_cache_destroy(c->ws); + kmem_cache_destroy(c->rec); + kmem_cache_destroy(c->g_rq); + kmem_cache_destroy(c->w_rq); +} + +static void pblk_put_global_caches(void) +{ + mutex_lock(&pblk_caches.mutex); + kref_put(&pblk_caches.kref, pblk_destroy_global_caches); + mutex_unlock(&pblk_caches.mutex); } static int pblk_core_init(struct pblk *pblk) @@ -371,23 +405,19 @@ static int pblk_core_init(struct pblk *pblk) atomic64_set(&pblk->nr_flush, 0); pblk->nr_flush_rst = 0; - pblk->min_write_pgs = geo->ws_opt * (geo->csecs / PAGE_SIZE); + pblk->min_write_pgs = geo->ws_opt; max_write_ppas = pblk->min_write_pgs * geo->all_luns; pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA); + pblk->max_write_pgs = min_t(int, pblk->max_write_pgs, + queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT)); pblk_set_sec_per_write(pblk, pblk->min_write_pgs); - if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) { - pblk_err(pblk, "vector list too big(%u > %u)\n", - pblk->max_write_pgs, PBLK_MAX_REQ_ADDRS); - return -EINVAL; - } - pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t), GFP_KERNEL); if (!pblk->pad_dist) return -ENOMEM; - if (pblk_init_global_caches(pblk)) + if (pblk_get_global_caches()) goto fail_free_pad_dist; /* Internal bios can be at most the sectors signaled by the device. */ @@ -396,27 +426,27 @@ static int pblk_core_init(struct pblk *pblk) goto free_global_caches; ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE, - pblk_ws_cache); + pblk_caches.ws); if (ret) goto free_page_bio_pool; ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns, - pblk_rec_cache); + pblk_caches.rec); if (ret) goto free_gen_ws_pool; ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns, - pblk_g_rq_cache); + pblk_caches.g_rq); if (ret) goto free_rec_pool; ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns, - pblk_g_rq_cache); + pblk_caches.g_rq); if (ret) goto free_r_rq_pool; ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns, - pblk_w_rq_cache); + pblk_caches.w_rq); if (ret) goto free_e_rq_pool; @@ -462,7 +492,7 @@ free_gen_ws_pool: free_page_bio_pool: mempool_exit(&pblk->page_bio_pool); free_global_caches: - pblk_free_global_caches(pblk); + pblk_put_global_caches(); fail_free_pad_dist: kfree(pblk->pad_dist); return -ENOMEM; @@ -486,7 +516,7 @@ static void pblk_core_free(struct pblk *pblk) mempool_exit(&pblk->e_rq_pool); mempool_exit(&pblk->w_rq_pool); - pblk_free_global_caches(pblk); + pblk_put_global_caches(); kfree(pblk->pad_dist); } @@ -504,6 +534,9 @@ static void pblk_line_mg_free(struct pblk *pblk) pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type); kfree(l_mg->eline_meta[i]); } + + mempool_destroy(l_mg->bitmap_pool); + kmem_cache_destroy(l_mg->bitmap_cache); } static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg, @@ -540,67 +573,6 @@ static void pblk_lines_free(struct pblk *pblk) kfree(pblk->lines); } -static int pblk_bb_get_tbl(struct nvm_tgt_dev *dev, struct pblk_lun *rlun, - u8 *blks, int nr_blks) -{ - struct ppa_addr ppa; - int ret; - - ppa.ppa = 0; - ppa.g.ch = rlun->bppa.g.ch; - ppa.g.lun = rlun->bppa.g.lun; - - ret = nvm_get_tgt_bb_tbl(dev, ppa, blks); - if (ret) - return ret; - - nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); - if (nr_blks < 0) - return -EIO; - - return 0; -} - -static void *pblk_bb_get_meta(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - u8 *meta; - int i, nr_blks, blk_per_lun; - int ret; - - blk_per_lun = geo->num_chk * geo->pln_mode; - nr_blks = blk_per_lun * geo->all_luns; - - meta = kmalloc(nr_blks, GFP_KERNEL); - if (!meta) - return ERR_PTR(-ENOMEM); - - for (i = 0; i < geo->all_luns; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - u8 *meta_pos = meta + i * blk_per_lun; - - ret = pblk_bb_get_tbl(dev, rlun, meta_pos, blk_per_lun); - if (ret) { - kfree(meta); - return ERR_PTR(-EIO); - } - } - - return meta; -} - -static void *pblk_chunk_get_meta(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - if (geo->version == NVM_OCSSD_SPEC_12) - return pblk_bb_get_meta(pblk); - else - return pblk_chunk_get_info(pblk); -} - static int pblk_luns_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; @@ -699,51 +671,7 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) atomic_set(&pblk->rl.free_user_blocks, nr_free_blks); } -static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line, - void *chunk_meta) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - int i, chk_per_lun, nr_bad_chks = 0; - - chk_per_lun = geo->num_chk * geo->pln_mode; - - for (i = 0; i < lm->blk_per_line; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - struct nvm_chk_meta *chunk; - int pos = pblk_ppa_to_pos(geo, rlun->bppa); - u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun; - - chunk = &line->chks[pos]; - - /* - * In 1.2 spec. chunk state is not persisted by the device. Thus - * some of the values are reset each time pblk is instantiated, - * so we have to assume that the block is closed. - */ - if (lun_bb_meta[line->id] == NVM_BLK_T_FREE) - chunk->state = NVM_CHK_ST_CLOSED; - else - chunk->state = NVM_CHK_ST_OFFLINE; - - chunk->type = NVM_CHK_TP_W_SEQ; - chunk->wi = 0; - chunk->slba = -1; - chunk->cnlb = geo->clba; - chunk->wp = 0; - - if (!(chunk->state & NVM_CHK_ST_OFFLINE)) - continue; - - set_bit(pos, line->blk_bitmap); - nr_bad_chks++; - } - - return nr_bad_chks; -} - -static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, +static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line, struct nvm_chk_meta *meta) { struct nvm_tgt_dev *dev = pblk->dev; @@ -772,6 +700,9 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, chunk->cnlb = chunk_meta->cnlb; chunk->wp = chunk_meta->wp; + trace_pblk_chunk_state(pblk_disk_name(pblk), &ppa, + chunk->state); + if (chunk->type & NVM_CHK_TP_SZ_SPEC) { WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n"); continue; @@ -790,8 +721,6 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line, void *chunk_meta, int line_id) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; long nr_bad_chks, chk_in_line; @@ -804,10 +733,7 @@ static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line, line->vsc = &l_mg->vsc_list[line_id]; spin_lock_init(&line->lock); - if (geo->version == NVM_OCSSD_SPEC_12) - nr_bad_chks = pblk_setup_line_meta_12(pblk, line, chunk_meta); - else - nr_bad_chks = pblk_setup_line_meta_20(pblk, line, chunk_meta); + nr_bad_chks = pblk_setup_line_meta_chk(pblk, line, chunk_meta); chk_in_line = lm->blk_per_line - nr_bad_chks; if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line || @@ -913,6 +839,17 @@ static int pblk_line_mg_init(struct pblk *pblk) goto fail_free_smeta; } + l_mg->bitmap_cache = kmem_cache_create("pblk_lm_bitmap", + lm->sec_bitmap_len, 0, 0, NULL); + if (!l_mg->bitmap_cache) + goto fail_free_smeta; + + /* the bitmap pool is used for both valid and map bitmaps */ + l_mg->bitmap_pool = mempool_create_slab_pool(PBLK_DATA_LINES * 2, + l_mg->bitmap_cache); + if (!l_mg->bitmap_pool) + goto fail_destroy_bitmap_cache; + /* emeta allocates three different buffers for managing metadata with * in-memory and in-media layouts */ @@ -965,6 +902,10 @@ fail_free_emeta: kfree(l_mg->eline_meta[i]->buf); kfree(l_mg->eline_meta[i]); } + + mempool_destroy(l_mg->bitmap_pool); +fail_destroy_bitmap_cache: + kmem_cache_destroy(l_mg->bitmap_cache); fail_free_smeta: for (i = 0; i < PBLK_DATA_LINES; i++) kfree(l_mg->sline_meta[i]); @@ -1058,7 +999,7 @@ static int pblk_lines_init(struct pblk *pblk) if (ret) goto fail_free_meta; - chunk_meta = pblk_chunk_get_meta(pblk); + chunk_meta = pblk_get_chunk_meta(pblk); if (IS_ERR(chunk_meta)) { ret = PTR_ERR(chunk_meta); goto fail_free_luns; @@ -1079,16 +1020,20 @@ static int pblk_lines_init(struct pblk *pblk) goto fail_free_lines; nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i); + + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); } if (!nr_free_chks) { pblk_err(pblk, "too many bad blocks prevent for sane instance\n"); - return -EINTR; + ret = -EINTR; + goto fail_free_lines; } pblk_set_provision(pblk, nr_free_chks); - kfree(chunk_meta); + vfree(chunk_meta); return 0; fail_free_lines: @@ -1165,7 +1110,6 @@ static void pblk_exit(void *private, bool graceful) { struct pblk *pblk = private; - down_write(&pblk_lock); pblk_gc_exit(pblk, graceful); pblk_tear_down(pblk, graceful); @@ -1174,7 +1118,6 @@ static void pblk_exit(void *private, bool graceful) #endif pblk_free(pblk); - up_write(&pblk_lock); } static sector_t pblk_capacity(void *private) @@ -1200,6 +1143,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk->dev = dev; pblk->disk = tdisk; pblk->state = PBLK_STATE_RUNNING; + trace_pblk_state(pblk_disk_name(pblk), pblk->state); pblk->gc.gc_enabled = 0; if (!(geo->version == NVM_OCSSD_SPEC_12 || @@ -1210,13 +1154,6 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, return ERR_PTR(-EINVAL); } - if (geo->version == NVM_OCSSD_SPEC_12 && geo->dom & NVM_RSP_L2P) { - pblk_err(pblk, "host-side L2P table not supported. (%x)\n", - geo->dom); - kfree(pblk); - return ERR_PTR(-EINVAL); - } - spin_lock_init(&pblk->resubmit_lock); spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index 953ca31dda68..6dcbd44e3acb 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -79,7 +80,7 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, } } - pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap); + pblk_down_rq(pblk, ppa_list[0], lun_bitmap); return 0; } @@ -88,13 +89,14 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, unsigned int off) { struct pblk_sec_meta *meta_list = rqd->meta_list; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); unsigned int map_secs; int min = pblk->min_write_pgs; int i; for (i = off; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i], lun_bitmap, &meta_list[i], map_secs)) { bio_put(rqd->bio); pblk_free_rqd(pblk, rqd, PBLK_WRITE); @@ -112,6 +114,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, struct nvm_geo *geo = &dev->geo; struct pblk_line_meta *lm = &pblk->lm; struct pblk_sec_meta *meta_list = rqd->meta_list; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); struct pblk_line *e_line, *d_line; unsigned int map_secs; int min = pblk->min_write_pgs; @@ -119,14 +122,14 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, for (i = 0; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i], lun_bitmap, &meta_list[i], map_secs)) { bio_put(rqd->bio); pblk_free_rqd(pblk, rqd, PBLK_WRITE); pblk_pipeline_stop(pblk); } - erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]); + erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]); /* line can change after page map. We might also be writing the * last line. @@ -141,7 +144,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, set_bit(erase_lun, e_line->erase_bitmap); atomic_dec(&e_line->left_eblks); - *erase_ppa = rqd->ppa_list[i]; + *erase_ppa = ppa_list[i]; erase_ppa->a.blk = e_line->id; spin_unlock(&e_line->lock); diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index f6eec0212dfc..b1f4b51783f4 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -22,7 +23,7 @@ static DECLARE_RWSEM(pblk_rb_lock); -void pblk_rb_data_free(struct pblk_rb *rb) +static void pblk_rb_data_free(struct pblk_rb *rb) { struct pblk_rb_pages *p, *t; @@ -35,25 +36,51 @@ void pblk_rb_data_free(struct pblk_rb *rb) up_write(&pblk_rb_lock); } +void pblk_rb_free(struct pblk_rb *rb) +{ + pblk_rb_data_free(rb); + vfree(rb->entries); +} + +/* + * pblk_rb_calculate_size -- calculate the size of the write buffer + */ +static unsigned int pblk_rb_calculate_size(unsigned int nr_entries) +{ + /* Alloc a write buffer that can at least fit 128 entries */ + return (1 << max(get_count_order(nr_entries), 7)); +} + /* * Initialize ring buffer. The data and metadata buffers must be previously * allocated and their size must be a power of two * (Documentation/core-api/circular-buffers.rst) */ -int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, - unsigned int power_size, unsigned int power_seg_sz) +int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold, + unsigned int seg_size) { struct pblk *pblk = container_of(rb, struct pblk, rwb); + struct pblk_rb_entry *entries; unsigned int init_entry = 0; - unsigned int alloc_order = power_size; unsigned int max_order = MAX_ORDER - 1; - unsigned int order, iter; + unsigned int power_size, power_seg_sz; + unsigned int alloc_order, order, iter; + unsigned int nr_entries; + + nr_entries = pblk_rb_calculate_size(size); + entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry))); + if (!entries) + return -ENOMEM; + + power_size = get_count_order(size); + power_seg_sz = get_count_order(seg_size); down_write(&pblk_rb_lock); - rb->entries = rb_entry_base; + rb->entries = entries; rb->seg_size = (1 << power_seg_sz); rb->nr_entries = (1 << power_size); rb->mem = rb->subm = rb->sync = rb->l2p_update = 0; + rb->back_thres = threshold; rb->flush_point = EMPTY_ENTRY; spin_lock_init(&rb->w_lock); @@ -61,6 +88,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, INIT_LIST_HEAD(&rb->pages); + alloc_order = power_size; if (alloc_order >= max_order) { order = max_order; iter = (1 << (alloc_order - max_order)); @@ -79,6 +107,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL); if (!page_set) { up_write(&pblk_rb_lock); + vfree(entries); return -ENOMEM; } @@ -88,6 +117,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, kfree(page_set); pblk_rb_data_free(rb); up_write(&pblk_rb_lock); + vfree(entries); return -ENOMEM; } kaddr = page_address(page_set->pages); @@ -124,20 +154,6 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, return 0; } -/* - * pblk_rb_calculate_size -- calculate the size of the write buffer - */ -unsigned int pblk_rb_calculate_size(unsigned int nr_entries) -{ - /* Alloc a write buffer that can at least fit 128 entries */ - return (1 << max(get_count_order(nr_entries), 7)); -} - -void *pblk_rb_entries_ref(struct pblk_rb *rb) -{ - return rb->entries; -} - static void clean_wctx(struct pblk_w_ctx *w_ctx) { int flags; @@ -168,6 +184,12 @@ static unsigned int pblk_rb_space(struct pblk_rb *rb) return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries); } +unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p, + unsigned int nr_entries) +{ + return (p + nr_entries) & (rb->nr_entries - 1); +} + /* * Buffer count is calculated with respect to the submission entry signaling the * entries that are available to send to the media @@ -194,8 +216,7 @@ unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries) subm = READ_ONCE(rb->subm); /* Commit read means updating submission pointer */ - smp_store_release(&rb->subm, - (subm + nr_entries) & (rb->nr_entries - 1)); + smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries)); return subm; } @@ -225,10 +246,10 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update) pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa, entry->cacheline); - line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)]; + line = pblk_ppa_to_line(pblk, w_ctx->ppa); kref_put(&line->ref, pblk_line_put); clean_wctx(w_ctx); - rb->l2p_update = (rb->l2p_update + 1) & (rb->nr_entries - 1); + rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1); } pblk_rl_out(&pblk->rl, user_io, gc_io); @@ -385,11 +406,14 @@ static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, { unsigned int mem; unsigned int sync; + unsigned int threshold; sync = READ_ONCE(rb->sync); mem = READ_ONCE(rb->mem); - if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < nr_entries) + threshold = nr_entries + rb->back_thres; + + if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold) return 0; if (pblk_rb_update_l2p(rb, nr_entries, mem, sync)) @@ -407,7 +431,7 @@ static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, return 0; /* Protect from read count */ - smp_store_release(&rb->mem, (*pos + nr_entries) & (rb->nr_entries - 1)); + smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries)); return 1; } @@ -431,7 +455,7 @@ static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, if (!__pblk_rb_may_write(rb, nr_entries, pos)) return 0; - mem = (*pos + nr_entries) & (rb->nr_entries - 1); + mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries); *io_ret = NVM_IO_DONE; if (bio->bi_opf & REQ_PREFLUSH) { @@ -571,7 +595,7 @@ try: /* Release flags on context. Protect from writes */ smp_store_release(&entry->w_ctx.flags, flags); - pos = (pos + 1) & (rb->nr_entries - 1); + pos = pblk_rb_ptr_wrap(rb, pos, 1); } if (pad) { @@ -651,7 +675,7 @@ out: struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos) { - unsigned int entry = pos & (rb->nr_entries - 1); + unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0); return &rb->entries[entry].w_ctx; } @@ -697,7 +721,7 @@ unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries) } } - sync = (sync + nr_entries) & (rb->nr_entries - 1); + sync = pblk_rb_ptr_wrap(rb, sync, nr_entries); /* Protect from counts */ smp_store_release(&rb->sync, sync); @@ -728,32 +752,6 @@ unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb) return (submitted < to_flush) ? (to_flush - submitted) : 0; } -/* - * Scan from the current position of the sync pointer to find the entry that - * corresponds to the given ppa. This is necessary since write requests can be - * completed out of order. The assumption is that the ppa is close to the sync - * pointer thus the search will not take long. - * - * The caller of this function must guarantee that the sync pointer will no - * reach the entry while it is using the metadata associated with it. With this - * assumption in mind, there is no need to take the sync lock. - */ -struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb, - struct ppa_addr *ppa) -{ - unsigned int sync, subm, count; - unsigned int i; - - sync = READ_ONCE(rb->sync); - subm = READ_ONCE(rb->subm); - count = pblk_rb_ring_count(subm, sync, rb->nr_entries); - - for (i = 0; i < count; i++) - sync = (sync + 1) & (rb->nr_entries - 1); - - return NULL; -} - int pblk_rb_tear_down_check(struct pblk_rb *rb) { struct pblk_rb_entry *entry; diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 5a46d7f9302f..9fba614adeeb 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -43,7 +44,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned long *read_bitmap) { struct pblk_sec_meta *meta_list = rqd->meta_list; - struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; + struct ppa_addr ppas[NVM_MAX_VLBA]; int nr_secs = rqd->nr_ppas; bool advanced_bio = false; int i, j = 0; @@ -93,9 +94,7 @@ next: } if (pblk_io_aligned(pblk, nr_secs)) - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - else - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); + rqd->is_seq = 1; #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_add(nr_secs, &pblk->inflight_reads); @@ -118,10 +117,9 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd, if (lba != blba + i) { #ifdef CONFIG_NVM_PBLK_DEBUG - struct ppa_addr *p; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - p = (nr_lbas == 1) ? &rqd->ppa_list[i] : &rqd->ppa_addr; - print_ppa(pblk, p, "seq", i); + print_ppa(pblk, &ppa_list[i], "seq", i); #endif pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n", lba, (u64)blba + i); @@ -150,14 +148,12 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, if (lba != meta_lba) { #ifdef CONFIG_NVM_PBLK_DEBUG - struct ppa_addr *p; - int nr_ppas = rqd->nr_ppas; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - p = (nr_ppas == 1) ? &rqd->ppa_list[j] : &rqd->ppa_addr; - print_ppa(pblk, p, "seq", j); + print_ppa(pblk, &ppa_list[j], "rnd", j); #endif pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n", - lba, meta_lba); + meta_lba, lba); WARN_ON(1); } @@ -167,22 +163,6 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n"); } -static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct ppa_addr *ppa_list; - int i; - - ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; - - for (i = 0; i < rqd->nr_ppas; i++) { - struct ppa_addr ppa = ppa_list[i]; - struct pblk_line *line; - - line = &pblk->lines[pblk_ppa_to_line(ppa)]; - kref_put(&line->ref, pblk_line_put_wq); - } -} - static void pblk_end_user_read(struct bio *bio) { #ifdef CONFIG_NVM_PBLK_DEBUG @@ -210,7 +190,7 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, bio_put(int_bio); if (put_line) - pblk_read_put_rqd_kref(pblk, rqd); + pblk_rq_to_line_put(pblk, rqd); #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_add(rqd->nr_ppas, &pblk->sync_reads); @@ -270,9 +250,9 @@ static void pblk_end_partial_read(struct nvm_rq *rqd) i = 0; hole = find_first_zero_bit(read_bitmap, nr_secs); do { - int line_id = pblk_ppa_to_line(rqd->ppa_list[i]); - struct pblk_line *line = &pblk->lines[line_id]; + struct pblk_line *line; + line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]); kref_put(&line->ref, pblk_line_put); meta_list[hole].lba = lba_list_media[i]; @@ -344,7 +324,6 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd, rqd->bio = new_bio; rqd->nr_ppas = nr_holes; - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); pr_ctx->ppa_ptr = NULL; pr_ctx->orig_bio = bio; @@ -438,8 +417,6 @@ retry: } else { rqd->ppa_addr = ppa; } - - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); } int pblk_submit_read(struct pblk *pblk, struct bio *bio) @@ -454,13 +431,6 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) DECLARE_BITMAP(read_bitmap, NVM_MAX_VLBA); int ret = NVM_IO_ERR; - /* logic error: lba out-of-bounds. Ignore read request */ - if (blba >= pblk->rl.nr_secs || nr_secs > PBLK_MAX_REQ_ADDRS) { - WARN(1, "pblk: read lba out of bounds (lba:%llu, nr:%d)\n", - (unsigned long long)blba, nr_secs); - return NVM_IO_ERR; - } - generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio), &pblk->disk->part0); @@ -484,21 +454,13 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) */ bio_init_idx = pblk_get_bi_idx(bio); - rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd->dma_meta_list); - if (!rqd->meta_list) { - pblk_err(pblk, "not able to allocate ppa list\n"); + if (pblk_alloc_rqd_meta(pblk, rqd)) goto fail_rqd_free; - } - - if (nr_secs > 1) { - rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; - rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; + if (nr_secs > 1) pblk_read_ppalist_rq(pblk, rqd, bio, blba, read_bitmap); - } else { + else pblk_read_rq(pblk, rqd, bio, blba, read_bitmap); - } if (bitmap_full(read_bitmap, nr_secs)) { atomic_inc(&pblk->inflight_io); @@ -552,7 +514,7 @@ static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_line *line, u64 *lba_list, u64 *paddr_list_gc, unsigned int nr_secs) { - struct ppa_addr ppa_list_l2p[PBLK_MAX_REQ_ADDRS]; + struct ppa_addr ppa_list_l2p[NVM_MAX_VLBA]; struct ppa_addr ppa_gc; int valid_secs = 0; int i; @@ -625,15 +587,11 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) memset(&rqd, 0, sizeof(struct nvm_rq)); - rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd.dma_meta_list); - if (!rqd.meta_list) - return -ENOMEM; + ret = pblk_alloc_rqd_meta(pblk, &rqd); + if (ret) + return ret; if (gc_rq->nr_secs > 1) { - rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size; - rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size; - gc_rq->secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, gc_rq->line, gc_rq->lba_list, gc_rq->paddr_list, @@ -654,7 +612,8 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) PBLK_VMALLOC_META, GFP_KERNEL); if (IS_ERR(bio)) { pblk_err(pblk, "could not allocate GC bio (%lu)\n", - PTR_ERR(bio)); + PTR_ERR(bio)); + ret = PTR_ERR(bio); goto err_free_dma; } @@ -663,7 +622,6 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) rqd.opcode = NVM_OP_PREAD; rqd.nr_ppas = gc_rq->secs_to_gc; - rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); rqd.bio = bio; if (pblk_submit_io_sync(pblk, &rqd)) { @@ -690,12 +648,12 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) #endif out: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); + pblk_free_rqd_meta(pblk, &rqd); return ret; err_free_bio: bio_put(bio); err_free_dma: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); + pblk_free_rqd_meta(pblk, &rqd); return ret; } diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index e232e47e1353..5740b7509bd8 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial: Javier Gonzalez <javier@cnexlabs.com> @@ -15,6 +16,7 @@ */ #include "pblk.h" +#include "pblk-trace.h" int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf) { @@ -85,15 +87,39 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) return 0; } -static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line) +static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line, + u64 written_secs) +{ + int i; + + for (i = 0; i < written_secs; i += pblk->min_write_pgs) + pblk_alloc_page(pblk, line, pblk->min_write_pgs); +} + +static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; struct pblk_line_meta *lm = &pblk->lm; int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); + u64 written_secs = 0; + int valid_chunks = 0; + int i; + + for (i = 0; i < lm->blk_per_line; i++) { + struct nvm_chk_meta *chunk = &line->chks[i]; + + if (chunk->state & NVM_CHK_ST_OFFLINE) + continue; + + written_secs += chunk->wp; + valid_chunks++; + } + + if (lm->blk_per_line - nr_bb != valid_chunks) + pblk_err(pblk, "recovery line %d is bad\n", line->id); - return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] - - nr_bb * geo->clba; + pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec); + + return written_secs; } struct pblk_recov_alloc { @@ -105,115 +131,6 @@ struct pblk_recov_alloc { dma_addr_t dma_meta_list; }; -static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line, - struct pblk_recov_alloc p, u64 r_ptr) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppa_list; - struct pblk_sec_meta *meta_list; - struct nvm_rq *rqd; - struct bio *bio; - void *data; - dma_addr_t dma_ppa_list, dma_meta_list; - u64 r_ptr_int; - int left_ppas; - int rq_ppas, rq_len; - int i, j; - int ret = 0; - - ppa_list = p.ppa_list; - meta_list = p.meta_list; - rqd = p.rqd; - data = p.data; - dma_ppa_list = p.dma_ppa_list; - dma_meta_list = p.dma_meta_list; - - left_ppas = line->cur_sec - r_ptr; - if (!left_ppas) - return 0; - - r_ptr_int = r_ptr; - -next_read_rq: - memset(rqd, 0, pblk_g_rq_size); - - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); - if (!rq_ppas) - rq_ppas = pblk->min_write_pgs; - rq_len = rq_ppas * geo->csecs; - - bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); - if (IS_ERR(bio)) - return PTR_ERR(bio); - - bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, REQ_OP_READ, 0); - - rqd->bio = bio; - rqd->opcode = NVM_OP_PREAD; - rqd->meta_list = meta_list; - rqd->nr_ppas = rq_ppas; - rqd->ppa_list = ppa_list; - rqd->dma_ppa_list = dma_ppa_list; - rqd->dma_meta_list = dma_meta_list; - - if (pblk_io_aligned(pblk, rq_ppas)) - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - else - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); - - for (i = 0; i < rqd->nr_ppas; ) { - struct ppa_addr ppa; - int pos; - - ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - - while (test_bit(pos, line->blk_bitmap)) { - r_ptr_int += pblk->min_write_pgs; - ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++) - rqd->ppa_list[i] = - addr_to_gen_ppa(pblk, r_ptr_int, line->id); - } - - /* If read fails, more padding is needed */ - ret = pblk_submit_io_sync(pblk, rqd); - if (ret) { - pblk_err(pblk, "I/O submission failed: %d\n", ret); - return ret; - } - - atomic_dec(&pblk->inflight_io); - - /* At this point, the read should not fail. If it does, it is a problem - * we cannot recover from here. Need FTL log. - */ - if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) { - pblk_err(pblk, "L2P recovery failed (%d)\n", rqd->error); - return -EINTR; - } - - for (i = 0; i < rqd->nr_ppas; i++) { - u64 lba = le64_to_cpu(meta_list[i].lba); - - if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) - continue; - - pblk_update_map(pblk, lba, rqd->ppa_list[i]); - } - - left_ppas -= rq_ppas; - if (left_ppas > 0) - goto next_read_rq; - - return 0; -} - static void pblk_recov_complete(struct kref *ref) { struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref); @@ -223,10 +140,11 @@ static void pblk_recov_complete(struct kref *ref) static void pblk_end_io_recov(struct nvm_rq *rqd) { + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); struct pblk_pad_rq *pad_rq = rqd->private; struct pblk *pblk = pad_rq->pblk; - pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_up_chunk(pblk, ppa_list[0]); pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); @@ -234,18 +152,17 @@ static void pblk_end_io_recov(struct nvm_rq *rqd) kref_put(&pad_rq->ref, pblk_recov_complete); } -static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, - int left_ppas) +/* pad line using line bitmap. */ +static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, + int left_ppas) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppa_list; struct pblk_sec_meta *meta_list; struct pblk_pad_rq *pad_rq; struct nvm_rq *rqd; struct bio *bio; void *data; - dma_addr_t dma_ppa_list, dma_meta_list; __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); u64 w_ptr = line->cur_sec; int left_line_ppas, rq_ppas, rq_len; @@ -279,20 +196,11 @@ next_pad_rq: rq_len = rq_ppas * geo->csecs; - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); - if (!meta_list) { - ret = -ENOMEM; - goto fail_free_pad; - } - - ppa_list = (void *)(meta_list) + pblk_dma_meta_size; - dma_ppa_list = dma_meta_list + pblk_dma_meta_size; - bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len, PBLK_VMALLOC_META, GFP_KERNEL); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - goto fail_free_meta; + goto fail_free_pad; } bio->bi_iter.bi_sector = 0; /* internal bio */ @@ -300,17 +208,19 @@ next_pad_rq: rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); + ret = pblk_alloc_rqd_meta(pblk, rqd); + if (ret) + goto fail_free_rqd; + rqd->bio = bio; rqd->opcode = NVM_OP_PWRITE; - rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); - rqd->meta_list = meta_list; + rqd->is_seq = 1; rqd->nr_ppas = rq_ppas; - rqd->ppa_list = ppa_list; - rqd->dma_ppa_list = dma_ppa_list; - rqd->dma_meta_list = dma_meta_list; rqd->end_io = pblk_end_io_recov; rqd->private = pad_rq; + meta_list = rqd->meta_list; + for (i = 0; i < rqd->nr_ppas; ) { struct ppa_addr ppa; int pos; @@ -338,13 +248,13 @@ next_pad_rq: } kref_get(&pad_rq->ref); - pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_down_chunk(pblk, rqd->ppa_list[0]); ret = pblk_submit_io(pblk, rqd); if (ret) { pblk_err(pblk, "I/O submission failed: %d\n", ret); - pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); - goto fail_free_bio; + pblk_up_chunk(pblk, rqd->ppa_list[0]); + goto fail_free_rqd; } left_line_ppas -= rq_ppas; @@ -368,157 +278,60 @@ free_rq: kfree(pad_rq); return ret; -fail_free_bio: +fail_free_rqd: + pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); bio_put(bio); -fail_free_meta: - nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); fail_free_pad: kfree(pad_rq); vfree(data); return ret; } -/* When this function is called, it means that not all upper pages have been - * written in a page that contains valid data. In order to recover this data, we - * first find the write pointer on the device, then we pad all necessary - * sectors, and finally attempt to read the valid data - */ -static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line, - struct pblk_recov_alloc p) +static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppa_list; - struct pblk_sec_meta *meta_list; - struct nvm_rq *rqd; - struct bio *bio; - void *data; - dma_addr_t dma_ppa_list, dma_meta_list; - u64 w_ptr = 0, r_ptr; - int rq_ppas, rq_len; - int i, j; - int ret = 0; - int rec_round; - int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec; - - ppa_list = p.ppa_list; - meta_list = p.meta_list; - rqd = p.rqd; - data = p.data; - dma_ppa_list = p.dma_ppa_list; - dma_meta_list = p.dma_meta_list; - - /* we could recover up until the line write pointer */ - r_ptr = line->cur_sec; - rec_round = 0; - -next_rq: - memset(rqd, 0, pblk_g_rq_size); + int distance = geo->mw_cunits * geo->all_luns * geo->ws_opt; - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); - if (!rq_ppas) - rq_ppas = pblk->min_write_pgs; - rq_len = rq_ppas * geo->csecs; - - bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); - if (IS_ERR(bio)) - return PTR_ERR(bio); - - bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, REQ_OP_READ, 0); + return (distance > line->left_msecs) ? line->left_msecs : distance; +} - rqd->bio = bio; - rqd->opcode = NVM_OP_PREAD; - rqd->meta_list = meta_list; - rqd->nr_ppas = rq_ppas; - rqd->ppa_list = ppa_list; - rqd->dma_ppa_list = dma_ppa_list; - rqd->dma_meta_list = dma_meta_list; +static int pblk_line_wp_is_unbalanced(struct pblk *pblk, + struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_lun *rlun; + struct nvm_chk_meta *chunk; + struct ppa_addr ppa; + u64 line_wp; + int pos, i; - if (pblk_io_aligned(pblk, rq_ppas)) - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - else - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); + rlun = &pblk->luns[0]; + ppa = rlun->bppa; + pos = pblk_ppa_to_pos(geo, ppa); + chunk = &line->chks[pos]; - for (i = 0; i < rqd->nr_ppas; ) { - struct ppa_addr ppa; - int pos; + line_wp = chunk->wp; - w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); - ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); + for (i = 1; i < lm->blk_per_line; i++) { + rlun = &pblk->luns[i]; + ppa = rlun->bppa; pos = pblk_ppa_to_pos(geo, ppa); + chunk = &line->chks[pos]; - while (test_bit(pos, line->blk_bitmap)) { - w_ptr += pblk->min_write_pgs; - ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) - rqd->ppa_list[i] = - addr_to_gen_ppa(pblk, w_ptr, line->id); - } - - ret = pblk_submit_io_sync(pblk, rqd); - if (ret) { - pblk_err(pblk, "I/O submission failed: %d\n", ret); - return ret; - } - - atomic_dec(&pblk->inflight_io); - - /* This should not happen since the read failed during normal recovery, - * but the media works funny sometimes... - */ - if (!rec_round++ && !rqd->error) { - rec_round = 0; - for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) { - u64 lba = le64_to_cpu(meta_list[i].lba); - - if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) - continue; - - pblk_update_map(pblk, lba, rqd->ppa_list[i]); - } - } - - /* Reached the end of the written line */ - if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) { - int pad_secs, nr_error_bits, bit; - int ret; - - bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas); - nr_error_bits = rqd->nr_ppas - bit; - - /* Roll back failed sectors */ - line->cur_sec -= nr_error_bits; - line->left_msecs += nr_error_bits; - bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); - - pad_secs = pblk_pad_distance(pblk); - if (pad_secs > line->left_msecs) - pad_secs = line->left_msecs; - - ret = pblk_recov_pad_oob(pblk, line, pad_secs); - if (ret) - pblk_err(pblk, "OOB padding failed (err:%d)\n", ret); - - ret = pblk_recov_read_oob(pblk, line, p, r_ptr); - if (ret) - pblk_err(pblk, "OOB read failed (err:%d)\n", ret); - - left_ppas = 0; + if (chunk->wp > line_wp) + return 1; + else if (chunk->wp < line_wp) + line_wp = chunk->wp; } - left_ppas -= rq_ppas; - if (left_ppas > 0) - goto next_rq; - - return ret; + return 0; } static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, - struct pblk_recov_alloc p, int *done) + struct pblk_recov_alloc p) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; @@ -528,11 +341,16 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, struct bio *bio; void *data; dma_addr_t dma_ppa_list, dma_meta_list; - u64 paddr; + __le64 *lba_list; + u64 paddr = 0; + bool padded = false; int rq_ppas, rq_len; int i, j; - int ret = 0; - int left_ppas = pblk_calc_sec_in_line(pblk, line); + int ret; + u64 left_ppas = pblk_sec_in_open_line(pblk, line); + + if (pblk_line_wp_is_unbalanced(pblk, line)) + pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id); ppa_list = p.ppa_list; meta_list = p.meta_list; @@ -541,7 +359,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, dma_ppa_list = p.dma_ppa_list; dma_meta_list = p.dma_meta_list; - *done = 1; + lba_list = emeta_to_lbas(pblk, line->emeta->buf); next_rq: memset(rqd, 0, pblk_g_rq_size); @@ -567,15 +385,13 @@ next_rq: rqd->dma_meta_list = dma_meta_list; if (pblk_io_aligned(pblk, rq_ppas)) - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - else - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); + rqd->is_seq = 1; +retry_rq: for (i = 0; i < rqd->nr_ppas; ) { struct ppa_addr ppa; int pos; - paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); ppa = addr_to_gen_ppa(pblk, paddr, line->id); pos = pblk_ppa_to_pos(geo, ppa); @@ -585,9 +401,9 @@ next_rq: pos = pblk_ppa_to_pos(geo, ppa); } - for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++) + for (j = 0; j < pblk->min_write_pgs; j++, i++) rqd->ppa_list[i] = - addr_to_gen_ppa(pblk, paddr, line->id); + addr_to_gen_ppa(pblk, paddr + j, line->id); } ret = pblk_submit_io_sync(pblk, rqd); @@ -599,31 +415,33 @@ next_rq: atomic_dec(&pblk->inflight_io); - /* Reached the end of the written line */ + /* If a read fails, do a best effort by padding the line and retrying */ if (rqd->error) { - int nr_error_bits, bit; + int pad_distance, ret; - bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas); - nr_error_bits = rqd->nr_ppas - bit; - - /* Roll back failed sectors */ - line->cur_sec -= nr_error_bits; - line->left_msecs += nr_error_bits; - bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); + if (padded) { + pblk_log_read_err(pblk, rqd); + return -EINTR; + } - left_ppas = 0; - rqd->nr_ppas = bit; + pad_distance = pblk_pad_distance(pblk, line); + ret = pblk_recov_pad_line(pblk, line, pad_distance); + if (ret) + return ret; - if (rqd->error != NVM_RSP_ERR_EMPTYPAGE) - *done = 0; + padded = true; + goto retry_rq; } for (i = 0; i < rqd->nr_ppas; i++) { u64 lba = le64_to_cpu(meta_list[i].lba); + lba_list[paddr++] = cpu_to_le64(lba); + if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) continue; + line->nr_valid_lbas++; pblk_update_map(pblk, lba, rqd->ppa_list[i]); } @@ -631,7 +449,11 @@ next_rq: if (left_ppas > 0) goto next_rq; - return ret; +#ifdef CONFIG_NVM_PBLK_DEBUG + WARN_ON(padded && !pblk_line_is_full(line)); +#endif + + return 0; } /* Scan line for lbas on out of bound area */ @@ -645,7 +467,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) struct pblk_recov_alloc p; void *data; dma_addr_t dma_ppa_list, dma_meta_list; - int done, ret = 0; + int ret = 0; meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); if (!meta_list) @@ -660,7 +482,8 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) goto free_meta_list; } - rqd = pblk_alloc_rqd(pblk, PBLK_READ); + rqd = mempool_alloc(&pblk->r_rq_pool, GFP_KERNEL); + memset(rqd, 0, pblk_g_rq_size); p.ppa_list = ppa_list; p.meta_list = meta_list; @@ -669,24 +492,17 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) p.dma_ppa_list = dma_ppa_list; p.dma_meta_list = dma_meta_list; - ret = pblk_recov_scan_oob(pblk, line, p, &done); + ret = pblk_recov_scan_oob(pblk, line, p); if (ret) { - pblk_err(pblk, "could not recover L2P from OOB\n"); + pblk_err(pblk, "could not recover L2P form OOB\n"); goto out; } - if (!done) { - ret = pblk_recov_scan_all_oob(pblk, line, p); - if (ret) { - pblk_err(pblk, "could not recover L2P from OOB\n"); - goto out; - } - } - if (pblk_line_is_full(line)) pblk_line_recov_close(pblk, line); out: + mempool_free(rqd, &pblk->r_rq_pool); kfree(data); free_meta_list: nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); @@ -775,7 +591,7 @@ static void pblk_recov_wa_counters(struct pblk *pblk, } static int pblk_line_was_written(struct pblk_line *line, - struct pblk *pblk) + struct pblk *pblk) { struct pblk_line_meta *lm = &pblk->lm; @@ -801,6 +617,18 @@ static int pblk_line_was_written(struct pblk_line *line, return 1; } +static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + int i; + + for (i = 0; i < lm->blk_per_line; i++) + if (line->chks[i].state & NVM_CHK_ST_OPEN) + return true; + + return false; +} + struct pblk_line *pblk_recov_l2p(struct pblk *pblk) { struct pblk_line_meta *lm = &pblk->lm; @@ -841,7 +669,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) continue; /* Lines that cannot be read are assumed as not written here */ - if (pblk_line_read_smeta(pblk, line)) + if (pblk_line_smeta_read(pblk, line)) continue; crc = pblk_calc_smeta_crc(pblk, smeta_buf); @@ -911,7 +739,12 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) line->emeta = emeta; memset(line->emeta->buf, 0, lm->emeta_len[0]); - if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) { + if (pblk_line_is_open(pblk, line)) { + pblk_recov_l2p_from_oob(pblk, line); + goto next; + } + + if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) { pblk_recov_l2p_from_oob(pblk, line); goto next; } @@ -935,6 +768,8 @@ next: spin_lock(&line->lock); line->state = PBLK_LINESTATE_CLOSED; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); move_list = pblk_line_gc_list(pblk, line); spin_unlock(&line->lock); @@ -942,26 +777,36 @@ next: list_move_tail(&line->list, move_list); spin_unlock(&l_mg->gc_lock); - kfree(line->map_bitmap); + mempool_free(line->map_bitmap, l_mg->bitmap_pool); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; } else { - if (open_lines > 1) - pblk_err(pblk, "failed to recover L2P\n"); + spin_lock(&line->lock); + line->state = PBLK_LINESTATE_OPEN; + spin_unlock(&line->lock); + + line->emeta->mem = 0; + atomic_set(&line->emeta->sync, 0); + + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); - open_lines++; - line->meta_line = meta_line; data_line = line; + line->meta_line = meta_line; + + open_lines++; } } - spin_lock(&l_mg->free_lock); if (!open_lines) { + spin_lock(&l_mg->free_lock); WARN_ON_ONCE(!test_and_clear_bit(meta_line, &l_mg->meta_bitmap)); + spin_unlock(&l_mg->free_lock); pblk_line_replace_data(pblk); } else { + spin_lock(&l_mg->free_lock); /* Allocate next line for preparation */ l_mg->data_next = pblk_line_get(pblk); if (l_mg->data_next) { @@ -969,8 +814,8 @@ next: l_mg->data_next->type = PBLK_LINETYPE_DATA; is_next = 1; } + spin_unlock(&l_mg->free_lock); } - spin_unlock(&l_mg->free_lock); if (is_next) pblk_line_erase(pblk, l_mg->data_next); @@ -998,7 +843,7 @@ int pblk_recov_pad(struct pblk *pblk) left_msecs = line->left_msecs; spin_unlock(&l_mg->free_lock); - ret = pblk_recov_pad_oob(pblk, line, left_msecs); + ret = pblk_recov_pad_line(pblk, line, left_msecs); if (ret) { pblk_err(pblk, "tear down padding failed (%d)\n", ret); return ret; diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index 6a0616a6fcaf..db55a1c89997 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -127,7 +128,7 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl, } else if (free_blocks < rl->high) { int shift = rl->high_pw - rl->rb_windows_pw; int user_windows = free_blocks >> shift; - int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW; + int user_max = user_windows << ilog2(NVM_MAX_VLBA); rl->rb_user_max = user_max; rl->rb_gc_max = max - user_max; @@ -228,7 +229,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) rl->rsv_blocks = min_blocks; /* This will always be a power-of-2 */ - rb_windows = budget / PBLK_MAX_REQ_ADDRS; + rb_windows = budget / NVM_MAX_VLBA; rl->rb_windows_pw = get_count_order(rb_windows); /* To start with, all buffer is available to user I/O writers */ diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index 9fc3dfa168b4..2d2818155aa8 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -262,8 +263,14 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) sec_in_line = l_mg->data_line->sec_in_line; meta_weight = bitmap_weight(&l_mg->meta_bitmap, PBLK_DATA_LINES); - map_weight = bitmap_weight(l_mg->data_line->map_bitmap, + + spin_lock(&l_mg->data_line->lock); + if (l_mg->data_line->map_bitmap) + map_weight = bitmap_weight(l_mg->data_line->map_bitmap, lm->sec_per_line); + else + map_weight = 0; + spin_unlock(&l_mg->data_line->lock); } spin_unlock(&l_mg->free_lock); @@ -337,7 +344,6 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad, { int sz; - sz = snprintf(page, PAGE_SIZE, "user:%lld gc:%lld pad:%lld WA:", user, gc, pad); @@ -349,7 +355,7 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad, u32 wa_frac; wa_int = (user + gc + pad) * 100000; - wa_int = div_u64(wa_int, user); + wa_int = div64_u64(wa_int, user); wa_int = div_u64_rem(wa_int, 100000, &wa_frac); sz += snprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n", diff --git a/drivers/lightnvm/pblk-trace.h b/drivers/lightnvm/pblk-trace.h new file mode 100644 index 000000000000..679e5c458ca6 --- /dev/null +++ b/drivers/lightnvm/pblk-trace.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM pblk + +#if !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PBLK_H + +#include <linux/tracepoint.h> + +struct ppa_addr; + +#define show_chunk_flags(state) __print_flags(state, "", \ + { NVM_CHK_ST_FREE, "FREE", }, \ + { NVM_CHK_ST_CLOSED, "CLOSED", }, \ + { NVM_CHK_ST_OPEN, "OPEN", }, \ + { NVM_CHK_ST_OFFLINE, "OFFLINE", }) + +#define show_line_state(state) __print_symbolic(state, \ + { PBLK_LINESTATE_NEW, "NEW", }, \ + { PBLK_LINESTATE_FREE, "FREE", }, \ + { PBLK_LINESTATE_OPEN, "OPEN", }, \ + { PBLK_LINESTATE_CLOSED, "CLOSED", }, \ + { PBLK_LINESTATE_GC, "GC", }, \ + { PBLK_LINESTATE_BAD, "BAD", }, \ + { PBLK_LINESTATE_CORRUPT, "CORRUPT" }) + + +#define show_pblk_state(state) __print_symbolic(state, \ + { PBLK_STATE_RUNNING, "RUNNING", }, \ + { PBLK_STATE_STOPPING, "STOPPING", }, \ + { PBLK_STATE_RECOVERING, "RECOVERING", }, \ + { PBLK_STATE_STOPPED, "STOPPED" }) + +#define show_chunk_erase_state(state) __print_symbolic(state, \ + { PBLK_CHUNK_RESET_START, "START", }, \ + { PBLK_CHUNK_RESET_DONE, "OK", }, \ + { PBLK_CHUNK_RESET_FAILED, "FAILED" }) + + +TRACE_EVENT(pblk_chunk_reset, + + TP_PROTO(const char *name, struct ppa_addr *ppa, int state), + + TP_ARGS(name, ppa, state), + + TP_STRUCT__entry( + __string(name, name) + __field(u64, ppa) + __field(int, state); + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->ppa = ppa->ppa; + __entry->state = state; + ), + + TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk), + show_chunk_erase_state((int)__entry->state)) + +); + +TRACE_EVENT(pblk_chunk_state, + + TP_PROTO(const char *name, struct ppa_addr *ppa, int state), + + TP_ARGS(name, ppa, state), + + TP_STRUCT__entry( + __string(name, name) + __field(u64, ppa) + __field(int, state); + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->ppa = ppa->ppa; + __entry->state = state; + ), + + TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk), + show_chunk_flags((int)__entry->state)) + +); + +TRACE_EVENT(pblk_line_state, + + TP_PROTO(const char *name, int line, int state), + + TP_ARGS(name, line, state), + + TP_STRUCT__entry( + __string(name, name) + __field(int, line) + __field(int, state); + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->line = line; + __entry->state = state; + ), + + TP_printk("dev=%s line=%d state=%s", __get_str(name), + (int)__entry->line, + show_line_state((int)__entry->state)) + +); + +TRACE_EVENT(pblk_state, + + TP_PROTO(const char *name, int state), + + TP_ARGS(name, state), + + TP_STRUCT__entry( + __string(name, name) + __field(int, state); + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + ), + + TP_printk("dev=%s state=%s", __get_str(name), + show_pblk_state((int)__entry->state)) + +); + +#endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */ + +/* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../../drivers/lightnvm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE pblk-trace +#include <trace/define_trace.h> diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index ee774a86cf1e..fa8726493b39 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -16,6 +17,7 @@ */ #include "pblk.h" +#include "pblk-trace.h" static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx) @@ -81,8 +83,7 @@ static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd, #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes); #endif - - pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap); + pblk_up_rq(pblk, c_ctx->lun_bitmap); pos = pblk_rb_sync_init(&pblk->rwb, &flags); if (pos == c_ctx->sentry) { @@ -106,14 +107,12 @@ retry: /* Map remaining sectors in chunk, starting from ppa */ static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; struct pblk_line *line; struct ppa_addr map_ppa = *ppa; u64 paddr; int done = 0; - line = &pblk->lines[pblk_ppa_to_line(*ppa)]; + line = pblk_ppa_to_line(pblk, *ppa); spin_lock(&line->lock); while (!done) { @@ -125,15 +124,7 @@ static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) if (!test_and_set_bit(paddr, line->invalid_bitmap)) le32_add_cpu(line->vsc, -1); - if (geo->version == NVM_OCSSD_SPEC_12) { - map_ppa.ppa++; - if (map_ppa.g.pg == geo->num_pg) - done = 1; - } else { - map_ppa.m.sec++; - if (map_ppa.m.sec == geo->clba) - done = 1; - } + done = nvm_next_ppa_in_chk(pblk->dev, &map_ppa); } line->w_err_gc->has_write_err = 1; @@ -149,12 +140,11 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, struct pblk_w_ctx *w_ctx; struct ppa_addr ppa_l2p; int flags; - unsigned int pos, i; + unsigned int i; spin_lock(&pblk->trans_lock); - pos = sentry; for (i = 0; i < nr_entries; i++) { - entry = &rb->entries[pos]; + entry = &rb->entries[pblk_rb_ptr_wrap(rb, sentry, i)]; w_ctx = &entry->w_ctx; /* Check if the lba has been overwritten */ @@ -168,13 +158,11 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, /* Release flags on write context. Protect from writes */ smp_store_release(&w_ctx->flags, flags); - /* Decrese the reference count to the line as we will + /* Decrease the reference count to the line as we will * re-map these entries */ - line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)]; + line = pblk_ppa_to_line(pblk, w_ctx->ppa); kref_put(&line->ref, pblk_line_put); - - pos = (pos + 1) & (rb->nr_entries - 1); } spin_unlock(&pblk->trans_lock); } @@ -208,19 +196,14 @@ static void pblk_submit_rec(struct work_struct *work) struct pblk *pblk = recovery->pblk; struct nvm_rq *rqd = recovery->rqd; struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - struct ppa_addr *ppa_list; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); pblk_log_write_err(pblk, rqd); - if (rqd->nr_ppas == 1) - ppa_list = &rqd->ppa_addr; - else - ppa_list = rqd->ppa_list; - pblk_map_remaining(pblk, ppa_list); pblk_queue_resubmit(pblk, c_ctx); - pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap); + pblk_up_rq(pblk, c_ctx->lun_bitmap); if (c_ctx->nr_padded) pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid, c_ctx->nr_padded); @@ -257,11 +240,13 @@ static void pblk_end_io_write(struct nvm_rq *rqd) if (rqd->error) { pblk_end_w_fail(pblk, rqd); return; - } + } else { + if (trace_pblk_chunk_state_enabled()) + pblk_check_chunk_state_update(pblk, rqd); #ifdef CONFIG_NVM_PBLK_DEBUG - else WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n"); #endif + } pblk_complete_write(pblk, rqd, c_ctx); atomic_dec(&pblk->inflight_io); @@ -273,14 +258,18 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd); struct pblk_line *line = m_ctx->private; struct pblk_emeta *emeta = line->emeta; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); int sync; - pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_up_chunk(pblk, ppa_list[0]); if (rqd->error) { pblk_log_write_err(pblk, rqd); pblk_err(pblk, "metadata I/O failed. Line %d\n", line->id); line->w_err_gc->has_write_err = 1; + } else { + if (trace_pblk_chunk_state_enabled()) + pblk_check_chunk_state_update(pblk, rqd); } sync = atomic_add_return(rqd->nr_ppas, &emeta->sync); @@ -294,27 +283,16 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) } static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int nr_secs, - nvm_end_io_fn(*end_io)) + unsigned int nr_secs, nvm_end_io_fn(*end_io)) { - struct nvm_tgt_dev *dev = pblk->dev; - /* Setup write request */ rqd->opcode = NVM_OP_PWRITE; rqd->nr_ppas = nr_secs; - rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); + rqd->is_seq = 1; rqd->private = pblk; rqd->end_io = end_io; - rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd->dma_meta_list); - if (!rqd->meta_list) - return -ENOMEM; - - rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; - rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; - - return 0; + return pblk_alloc_rqd_meta(pblk, rqd); } static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, @@ -375,6 +353,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; struct pblk_emeta *emeta = meta_line->emeta; + struct ppa_addr *ppa_list; struct pblk_g_ctx *m_ctx; struct bio *bio; struct nvm_rq *rqd; @@ -409,22 +388,22 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) if (ret) goto fail_free_bio; + ppa_list = nvm_rq_to_ppa_list(rqd); for (i = 0; i < rqd->nr_ppas; ) { spin_lock(&meta_line->lock); paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas); spin_unlock(&meta_line->lock); for (j = 0; j < rq_ppas; j++, i++, paddr++) - rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id); + ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id); } + spin_lock(&l_mg->close_lock); emeta->mem += rq_len; - if (emeta->mem >= lm->emeta_len[0]) { - spin_lock(&l_mg->close_lock); + if (emeta->mem >= lm->emeta_len[0]) list_del(&meta_line->list); - spin_unlock(&l_mg->close_lock); - } + spin_unlock(&l_mg->close_lock); - pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_down_chunk(pblk, ppa_list[0]); ret = pblk_submit_io(pblk, rqd); if (ret) { @@ -435,7 +414,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) return NVM_IO_OK; fail_rollback: - pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_up_chunk(pblk, ppa_list[0]); spin_lock(&l_mg->close_lock); pblk_dealloc_page(pblk, meta_line, rq_ppas); list_add(&meta_line->list, &meta_line->list); @@ -491,14 +470,15 @@ static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line; spin_lock(&l_mg->close_lock); -retry: if (list_empty(&l_mg->emeta_list)) { spin_unlock(&l_mg->close_lock); return NULL; } meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list); - if (meta_line->emeta->mem >= lm->emeta_len[0]) - goto retry; + if (meta_line->emeta->mem >= lm->emeta_len[0]) { + spin_unlock(&l_mg->close_lock); + return NULL; + } spin_unlock(&l_mg->close_lock); if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd)) diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 4760af7b6499..02bb2e98f8a9 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2015 IT University of Copenhagen (rrpc.h) * Copyright (C) 2016 CNEX Labs @@ -37,8 +38,6 @@ #define PBLK_SECTOR (512) #define PBLK_EXPOSED_PAGE_SIZE (4096) -#define PBLK_MAX_REQ_ADDRS (64) -#define PBLK_MAX_REQ_ADDRS_PW (6) #define PBLK_NR_CLOSE_JOBS (4) @@ -81,6 +80,12 @@ enum { PBLK_BLK_ST_CLOSED = 0x2, }; +enum { + PBLK_CHUNK_RESET_START, + PBLK_CHUNK_RESET_DONE, + PBLK_CHUNK_RESET_FAILED, +}; + struct pblk_sec_meta { u64 reserved; __le64 lba; @@ -99,8 +104,8 @@ enum { PBLK_RL_LOW = 4 }; -#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) -#define pblk_dma_ppa_size (sizeof(u64) * PBLK_MAX_REQ_ADDRS) +#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * NVM_MAX_VLBA) +#define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA) /* write buffer completion context */ struct pblk_c_ctx { @@ -198,6 +203,11 @@ struct pblk_rb { * will be 4KB */ + unsigned int back_thres; /* Threshold that shall be maintained by + * the backpointer in order to respect + * geo->mw_cunits on a per chunk basis + */ + struct list_head pages; /* List of data pages */ spinlock_t w_lock; /* Write lock */ @@ -218,8 +228,8 @@ struct pblk_lun { struct pblk_gc_rq { struct pblk_line *line; void *data; - u64 paddr_list[PBLK_MAX_REQ_ADDRS]; - u64 lba_list[PBLK_MAX_REQ_ADDRS]; + u64 paddr_list[NVM_MAX_VLBA]; + u64 lba_list[NVM_MAX_VLBA]; int nr_secs; int secs_to_gc; struct list_head list; @@ -532,6 +542,10 @@ struct pblk_line_mgmt { struct pblk_emeta *eline_meta[PBLK_DATA_LINES]; unsigned long meta_bitmap; + /* Cache and mempool for map/invalid bitmaps */ + struct kmem_cache *bitmap_cache; + mempool_t *bitmap_pool; + /* Helpers for fast bitmap calculations */ unsigned long *bb_template; unsigned long *bb_aux; @@ -725,10 +739,8 @@ struct pblk_line_ws { /* * pblk ring buffer operations */ -int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, - unsigned int power_size, unsigned int power_seg_sz); -unsigned int pblk_rb_calculate_size(unsigned int nr_entries); -void *pblk_rb_entries_ref(struct pblk_rb *rb); +int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold, + unsigned int seg_sz); int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio, unsigned int nr_entries, unsigned int *pos); int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, @@ -751,8 +763,8 @@ unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries); -struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb, - struct ppa_addr *ppa); +unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p, + unsigned int nr_entries); void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags); unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb); @@ -762,7 +774,7 @@ unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos); int pblk_rb_tear_down_check(struct pblk_rb *rb); int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos); -void pblk_rb_data_free(struct pblk_rb *rb); +void pblk_rb_free(struct pblk_rb *rb); ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf); /* @@ -770,11 +782,13 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf); */ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type); void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type); +int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd); +void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd); void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write); int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx); void pblk_discard(struct pblk *pblk, struct bio *bio); -struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk); +struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, struct nvm_chk_meta *lp, struct ppa_addr ppa); @@ -782,13 +796,17 @@ void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd); +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line); +void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd); struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, unsigned int nr_secs, unsigned int len, int alloc_type, gfp_t gfp_mask); struct pblk_line *pblk_line_get(struct pblk *pblk); struct pblk_line *pblk_line_get_first_data(struct pblk *pblk); struct pblk_line *pblk_line_replace_data(struct pblk *pblk); +void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa); +void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd); int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line); void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line); struct pblk_line *pblk_line_get_data(struct pblk *pblk); @@ -806,8 +824,8 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, void (*work)(struct work_struct *), gfp_t gfp_mask, struct workqueue_struct *wq); u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line); -int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line); -int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line, +int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line); +int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, void *emeta_buf); int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa); void pblk_line_put(struct kref *ref); @@ -819,12 +837,11 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, unsigned long secs_to_flush); -void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas); -void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, +void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa, unsigned long *lun_bitmap); -void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas); -void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, - unsigned long *lun_bitmap); +void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa); +void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa); +void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap); int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, int nr_pages); void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, @@ -976,17 +993,15 @@ static inline int pblk_line_vsc(struct pblk_line *line) return le32_to_cpu(*line->vsc); } -static inline int pblk_pad_distance(struct pblk *pblk) +static inline int pblk_ppa_to_line_id(struct ppa_addr p) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - return geo->mw_cunits * geo->all_luns * geo->ws_opt; + return p.a.blk; } -static inline int pblk_ppa_to_line(struct ppa_addr p) +static inline struct pblk_line *pblk_ppa_to_line(struct pblk *pblk, + struct ppa_addr p) { - return p.a.blk; + return &pblk->lines[pblk_ppa_to_line_id(p)]; } static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) @@ -1034,6 +1049,25 @@ static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, return ppa; } +static inline struct nvm_chk_meta *pblk_dev_ppa_to_chunk(struct pblk *pblk, + struct ppa_addr p) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line *line = pblk_ppa_to_line(pblk, p); + int pos = pblk_ppa_to_pos(geo, p); + + return &line->chks[pos]; +} + +static inline u64 pblk_dev_ppa_to_chunk_addr(struct pblk *pblk, + struct ppa_addr p) +{ + struct nvm_tgt_dev *dev = pblk->dev; + + return dev_to_chunk_addr(dev->parent, &pblk->addrf, p); +} + static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, struct ppa_addr p) { @@ -1067,86 +1101,16 @@ static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32) { - struct ppa_addr ppa64; - - ppa64.ppa = 0; - - if (ppa32 == -1) { - ppa64.ppa = ADDR_EMPTY; - } else if (ppa32 & (1U << 31)) { - ppa64.c.line = ppa32 & ((~0U) >> 1); - ppa64.c.is_cached = 1; - } else { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = - (struct nvm_addrf_12 *)&pblk->addrf; - - ppa64.g.ch = (ppa32 & ppaf->ch_mask) >> - ppaf->ch_offset; - ppa64.g.lun = (ppa32 & ppaf->lun_mask) >> - ppaf->lun_offset; - ppa64.g.blk = (ppa32 & ppaf->blk_mask) >> - ppaf->blk_offset; - ppa64.g.pg = (ppa32 & ppaf->pg_mask) >> - ppaf->pg_offset; - ppa64.g.pl = (ppa32 & ppaf->pln_mask) >> - ppaf->pln_offset; - ppa64.g.sec = (ppa32 & ppaf->sec_mask) >> - ppaf->sec_offset; - } else { - struct nvm_addrf *lbaf = &pblk->addrf; - - ppa64.m.grp = (ppa32 & lbaf->ch_mask) >> - lbaf->ch_offset; - ppa64.m.pu = (ppa32 & lbaf->lun_mask) >> - lbaf->lun_offset; - ppa64.m.chk = (ppa32 & lbaf->chk_mask) >> - lbaf->chk_offset; - ppa64.m.sec = (ppa32 & lbaf->sec_mask) >> - lbaf->sec_offset; - } - } + struct nvm_tgt_dev *dev = pblk->dev; - return ppa64; + return nvm_ppa32_to_ppa64(dev->parent, &pblk->addrf, ppa32); } static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64) { - u32 ppa32 = 0; - - if (ppa64.ppa == ADDR_EMPTY) { - ppa32 = ~0U; - } else if (ppa64.c.is_cached) { - ppa32 |= ppa64.c.line; - ppa32 |= 1U << 31; - } else { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = - (struct nvm_addrf_12 *)&pblk->addrf; - - ppa32 |= ppa64.g.ch << ppaf->ch_offset; - ppa32 |= ppa64.g.lun << ppaf->lun_offset; - ppa32 |= ppa64.g.blk << ppaf->blk_offset; - ppa32 |= ppa64.g.pg << ppaf->pg_offset; - ppa32 |= ppa64.g.pl << ppaf->pln_offset; - ppa32 |= ppa64.g.sec << ppaf->sec_offset; - } else { - struct nvm_addrf *lbaf = &pblk->addrf; - - ppa32 |= ppa64.m.grp << lbaf->ch_offset; - ppa32 |= ppa64.m.pu << lbaf->lun_offset; - ppa32 |= ppa64.m.chk << lbaf->chk_offset; - ppa32 |= ppa64.m.sec << lbaf->sec_offset; - } - } + struct nvm_tgt_dev *dev = pblk->dev; - return ppa32; + return nvm_ppa64_to_ppa32(dev->parent, &pblk->addrf, ppa64); } static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk, @@ -1255,44 +1219,6 @@ static inline u32 pblk_calc_emeta_crc(struct pblk *pblk, return crc; } -static inline int pblk_set_progr_mode(struct pblk *pblk, int type) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int flags; - - if (geo->version == NVM_OCSSD_SPEC_20) - return 0; - - flags = geo->pln_mode >> 1; - - if (type == PBLK_WRITE) - flags |= NVM_IO_SCRAMBLE_ENABLE; - - return flags; -} - -enum { - PBLK_READ_RANDOM = 0, - PBLK_READ_SEQUENTIAL = 1, -}; - -static inline int pblk_set_read_mode(struct pblk *pblk, int type) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int flags; - - if (geo->version == NVM_OCSSD_SPEC_20) - return 0; - - flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE; - if (type == PBLK_READ_SEQUENTIAL) - flags |= geo->pln_mode >> 1; - - return flags; -} - static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs) { return !(nr_secs % pblk->min_write_pgs); @@ -1375,9 +1301,7 @@ static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd) { struct nvm_tgt_dev *dev = pblk->dev; - struct ppa_addr *ppa_list; - - ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) { WARN_ON(1); @@ -1386,12 +1310,10 @@ static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd) if (rqd->opcode == NVM_OP_PWRITE) { struct pblk_line *line; - struct ppa_addr ppa; int i; for (i = 0; i < rqd->nr_ppas; i++) { - ppa = ppa_list[i]; - line = &pblk->lines[pblk_ppa_to_line(ppa)]; + line = pblk_ppa_to_line(pblk, ppa_list[i]); spin_lock(&line->lock); if (line->state != PBLK_LINESTATE_OPEN) { @@ -1441,4 +1363,11 @@ static inline void pblk_setup_uuid(struct pblk *pblk) uuid_le_gen(&uuid); memcpy(pblk->instance_uuid, uuid.b, 16); } + +static inline char *pblk_disk_name(struct pblk *pblk) +{ + struct gendisk *disk = pblk->disk; + + return disk->disk_name; +} #endif /* PBLK_H_ */ |