summaryrefslogtreecommitdiff
path: root/drivers/lightnvm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/lightnvm')
-rw-r--r--drivers/lightnvm/core.c13
-rw-r--r--drivers/lightnvm/pblk-cache.c8
-rw-r--r--drivers/lightnvm/pblk-core.c672
-rw-r--r--drivers/lightnvm/pblk-gc.c475
-rw-r--r--drivers/lightnvm/pblk-init.c389
-rw-r--r--drivers/lightnvm/pblk-map.c75
-rw-r--r--drivers/lightnvm/pblk-rb.c106
-rw-r--r--drivers/lightnvm/pblk-read.c93
-rw-r--r--drivers/lightnvm/pblk-recovery.c290
-rw-r--r--drivers/lightnvm/pblk-rl.c90
-rw-r--r--drivers/lightnvm/pblk-sysfs.c94
-rw-r--r--drivers/lightnvm/pblk-write.c355
-rw-r--r--drivers/lightnvm/pblk.h298
-rw-r--r--drivers/lightnvm/rrpc.c10
14 files changed, 1981 insertions, 987 deletions
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 6a4aa608ad95..ddae430b6eae 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -252,8 +252,9 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
}
mutex_unlock(&dev->mlock);
- if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end))
- return -ENOMEM;
+ ret = nvm_reserve_luns(dev, s->lun_begin, s->lun_end);
+ if (ret)
+ return ret;
t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
if (!t) {
@@ -640,6 +641,7 @@ EXPORT_SYMBOL(nvm_max_phys_sects);
int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
{
struct nvm_dev *dev = tgt_dev->parent;
+ int ret;
if (!dev->ops->submit_io)
return -ENODEV;
@@ -647,7 +649,12 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
nvm_rq_tgt_to_dev(tgt_dev, rqd);
rqd->dev = tgt_dev;
- return dev->ops->submit_io(dev, rqd);
+
+ /* In case of error, fail with right address format */
+ ret = dev->ops->submit_io(dev, rqd);
+ if (ret)
+ nvm_rq_dev_to_tgt(tgt_dev, rqd);
+ return ret;
}
EXPORT_SYMBOL(nvm_submit_io);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 59bcea88db84..024a8fc93069 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -31,9 +31,13 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
*/
retry:
ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
- if (ret == NVM_IO_REQUEUE) {
+ switch (ret) {
+ case NVM_IO_REQUEUE:
io_schedule();
goto retry;
+ case NVM_IO_ERR:
+ pblk_pipeline_stop(pblk);
+ goto out;
}
if (unlikely(!bio_has_data(bio)))
@@ -58,6 +62,8 @@ retry:
atomic_long_add(nr_entries, &pblk->req_writes);
#endif
+ pblk_rl_inserted(&pblk->rl, nr_entries);
+
out:
pblk_write_should_kick(pblk);
return ret;
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 5e44768ccffa..81501644fb15 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -17,7 +17,6 @@
*/
#include "pblk.h"
-#include <linux/time.h>
static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
struct ppa_addr *ppa)
@@ -34,7 +33,7 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
line->id, pos);
- pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
+ pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, pblk->bb_wq);
}
static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
@@ -54,6 +53,8 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
*ppa = rqd->ppa_addr;
pblk_mark_bb(pblk, line, ppa);
}
+
+ atomic_dec(&pblk->inflight_io);
}
/* Erase completion assumes that only one block is erased at the time */
@@ -61,13 +62,12 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
- up(&pblk->erase_sem);
__pblk_end_io_erase(pblk, rqd);
- mempool_free(rqd, pblk->r_rq_pool);
+ mempool_free(rqd, pblk->g_rq_pool);
}
-static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+ u64 paddr)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list = NULL;
@@ -88,7 +88,7 @@ static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
spin_unlock(&line->lock);
return;
}
- line->vsc--;
+ le32_add_cpu(line->vsc, -1);
if (line->state == PBLK_LINESTATE_CLOSED)
move_list = pblk_line_gc_list(pblk, line);
@@ -130,18 +130,6 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
__pblk_map_invalidate(pblk, line, paddr);
}
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
-{
- __pblk_map_invalidate(pblk, line, paddr);
-
- pblk_rb_sync_init(&pblk->rwb, NULL);
- line->left_ssecs--;
- if (!line->left_ssecs)
- pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
- pblk_rb_sync_end(&pblk->rwb, NULL);
-}
-
static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
unsigned int nr_secs)
{
@@ -172,8 +160,8 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
pool = pblk->w_rq_pool;
rq_size = pblk_w_rq_size;
} else {
- pool = pblk->r_rq_pool;
- rq_size = pblk_r_rq_size;
+ pool = pblk->g_rq_pool;
+ rq_size = pblk_g_rq_size;
}
rqd = mempool_alloc(pool, GFP_KERNEL);
@@ -189,7 +177,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
if (rw == WRITE)
pool = pblk->w_rq_pool;
else
- pool = pblk->r_rq_pool;
+ pool = pblk->g_rq_pool;
mempool_free(rqd, pool);
}
@@ -271,35 +259,26 @@ void pblk_end_io_sync(struct nvm_rq *rqd)
complete(waiting);
}
-void pblk_flush_writer(struct pblk *pblk)
+void pblk_wait_for_meta(struct pblk *pblk)
{
- struct bio *bio;
- int ret;
- DECLARE_COMPLETION_ONSTACK(wait);
-
- bio = bio_alloc(GFP_KERNEL, 1);
- if (!bio)
- return;
-
- bio->bi_iter.bi_sector = 0; /* internal bio */
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
- bio->bi_private = &wait;
- bio->bi_end_io = pblk_end_bio_sync;
+ do {
+ if (!atomic_read(&pblk->inflight_io))
+ break;
- ret = pblk_write_to_cache(pblk, bio, 0);
- if (ret == NVM_IO_OK) {
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: flush cache timed out\n");
- }
- } else if (ret != NVM_IO_DONE) {
- pr_err("pblk: tear down bio failed\n");
- }
+ schedule();
+ } while (1);
+}
- if (bio->bi_error)
- pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
+static void pblk_flush_writer(struct pblk *pblk)
+{
+ pblk_rb_flush(&pblk->rwb);
+ do {
+ if (!pblk_rb_sync_count(&pblk->rwb))
+ break;
- bio_put(bio);
+ pblk_write_kick(pblk);
+ schedule();
+ } while (1);
}
struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
@@ -307,28 +286,31 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list = NULL;
+ int vsc = le32_to_cpu(*line->vsc);
- if (!line->vsc) {
+ lockdep_assert_held(&line->lock);
+
+ if (!vsc) {
if (line->gc_group != PBLK_LINEGC_FULL) {
line->gc_group = PBLK_LINEGC_FULL;
move_list = &l_mg->gc_full_list;
}
- } else if (line->vsc < lm->mid_thrs) {
+ } else if (vsc < lm->high_thrs) {
if (line->gc_group != PBLK_LINEGC_HIGH) {
line->gc_group = PBLK_LINEGC_HIGH;
move_list = &l_mg->gc_high_list;
}
- } else if (line->vsc < lm->high_thrs) {
+ } else if (vsc < lm->mid_thrs) {
if (line->gc_group != PBLK_LINEGC_MID) {
line->gc_group = PBLK_LINEGC_MID;
move_list = &l_mg->gc_mid_list;
}
- } else if (line->vsc < line->sec_in_line) {
+ } else if (vsc < line->sec_in_line) {
if (line->gc_group != PBLK_LINEGC_LOW) {
line->gc_group = PBLK_LINEGC_LOW;
move_list = &l_mg->gc_low_list;
}
- } else if (line->vsc == line->sec_in_line) {
+ } else if (vsc == line->sec_in_line) {
if (line->gc_group != PBLK_LINEGC_EMPTY) {
line->gc_group = PBLK_LINEGC_EMPTY;
move_list = &l_mg->gc_empty_list;
@@ -338,7 +320,7 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
line->gc_group = PBLK_LINEGC_NONE;
move_list = &l_mg->corrupt_list;
pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
- line->id, line->vsc,
+ line->id, vsc,
line->sec_in_line,
lm->high_thrs, lm->mid_thrs);
}
@@ -397,6 +379,11 @@ void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
#endif
}
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
+{
+ pblk->sec_per_write = sec_per_write;
+}
+
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -431,21 +418,23 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
}
}
#endif
+
+ atomic_inc(&pblk->inflight_io);
+
return nvm_submit_io(dev, rqd);
}
struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
unsigned int nr_secs, unsigned int len,
- gfp_t gfp_mask)
+ int alloc_type, gfp_t gfp_mask)
{
struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
void *kaddr = data;
struct page *page;
struct bio *bio;
int i, ret;
- if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
+ if (alloc_type == PBLK_KMALLOC_META)
return bio_map_kern(dev->q, kaddr, len, gfp_mask);
bio = bio_kmalloc(gfp_mask, nr_secs);
@@ -478,7 +467,7 @@ out:
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
unsigned long secs_to_flush)
{
- int max = pblk->max_write_pgs;
+ int max = pblk->sec_per_write;
int min = pblk->min_write_pgs;
int secs_to_sync = 0;
@@ -492,12 +481,26 @@ int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
return secs_to_sync;
}
-static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line,
- int nr_secs)
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
+{
+ u64 addr;
+ int i;
+
+ addr = find_next_zero_bit(line->map_bitmap,
+ pblk->lm.sec_per_line, line->cur_sec);
+ line->cur_sec = addr - nr_secs;
+
+ for (i = 0; i < nr_secs; i++, line->cur_sec--)
+ WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
+}
+
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
{
u64 addr;
int i;
+ lockdep_assert_held(&line->lock);
+
/* logic error: ppa out-of-bounds. Prevent generating bad address */
if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
WARN(1, "pblk: page allocation out of bounds\n");
@@ -528,27 +531,38 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
return addr;
}
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
+{
+ u64 paddr;
+
+ spin_lock(&line->lock);
+ paddr = find_next_zero_bit(line->map_bitmap,
+ pblk->lm.sec_per_line, line->cur_sec);
+ spin_unlock(&line->lock);
+
+ return paddr;
+}
+
/*
* Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
* taking the per LUN semaphore.
*/
static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
- u64 paddr, int dir)
+ void *emeta_buf, u64 paddr, int dir)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
+ void *ppa_list, *meta_list;
struct bio *bio;
struct nvm_rq rqd;
- struct ppa_addr *ppa_list;
- dma_addr_t dma_ppa_list;
- void *emeta = line->emeta;
+ dma_addr_t dma_ppa_list, dma_meta_list;
int min = pblk->min_write_pgs;
- int left_ppas = lm->emeta_sec;
+ int left_ppas = lm->emeta_sec[0];
int id = line->id;
int rq_ppas, rq_len;
int cmd_op, bio_op;
- int flags;
int i, j;
int ret;
DECLARE_COMPLETION_ONSTACK(wait);
@@ -556,25 +570,28 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
if (dir == WRITE) {
bio_op = REQ_OP_WRITE;
cmd_op = NVM_OP_PWRITE;
- flags = pblk_set_progr_mode(pblk, WRITE);
} else if (dir == READ) {
bio_op = REQ_OP_READ;
cmd_op = NVM_OP_PREAD;
- flags = pblk_set_read_mode(pblk);
} else
return -EINVAL;
- ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list);
- if (!ppa_list)
+ meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &dma_meta_list);
+ if (!meta_list)
return -ENOMEM;
+ ppa_list = meta_list + pblk_dma_meta_size;
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
next_rq:
memset(&rqd, 0, sizeof(struct nvm_rq));
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
rq_len = rq_ppas * geo->sec_size;
- bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL);
+ bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
+ l_mg->emeta_alloc_type, GFP_KERNEL);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
goto free_rqd_dma;
@@ -584,27 +601,38 @@ next_rq:
bio_set_op_attrs(bio, bio_op, 0);
rqd.bio = bio;
- rqd.opcode = cmd_op;
- rqd.flags = flags;
- rqd.nr_ppas = rq_ppas;
+ rqd.meta_list = meta_list;
rqd.ppa_list = ppa_list;
+ rqd.dma_meta_list = dma_meta_list;
rqd.dma_ppa_list = dma_ppa_list;
+ rqd.opcode = cmd_op;
+ rqd.nr_ppas = rq_ppas;
rqd.end_io = pblk_end_io_sync;
rqd.private = &wait;
if (dir == WRITE) {
+ struct pblk_sec_meta *meta_list = rqd.meta_list;
+
+ rqd.flags = pblk_set_progr_mode(pblk, WRITE);
for (i = 0; i < rqd.nr_ppas; ) {
spin_lock(&line->lock);
paddr = __pblk_alloc_page(pblk, line, min);
spin_unlock(&line->lock);
- for (j = 0; j < min; j++, i++, paddr++)
+ for (j = 0; j < min; j++, i++, paddr++) {
+ meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
rqd.ppa_list[i] =
addr_to_gen_ppa(pblk, paddr, id);
+ }
}
} else {
for (i = 0; i < rqd.nr_ppas; ) {
struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
int pos = pblk_dev_ppa_to_pos(geo, ppa);
+ int read_type = PBLK_READ_RANDOM;
+
+ if (pblk_io_aligned(pblk, rq_ppas))
+ read_type = PBLK_READ_SEQUENTIAL;
+ rqd.flags = pblk_set_read_mode(pblk, read_type);
while (test_bit(pos, line->blk_bitmap)) {
paddr += min;
@@ -645,9 +673,11 @@ next_rq:
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: emeta I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
- bio_put(bio);
+ if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
+ bio_put(bio);
if (rqd.error) {
if (dir == WRITE)
@@ -656,12 +686,12 @@ next_rq:
pblk_log_read_err(pblk, &rqd);
}
- emeta += rq_len;
+ emeta_buf += rq_len;
left_ppas -= rq_ppas;
if (left_ppas)
goto next_rq;
free_rqd_dma:
- nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return ret;
}
@@ -697,21 +727,24 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
bio_op = REQ_OP_WRITE;
cmd_op = NVM_OP_PWRITE;
flags = pblk_set_progr_mode(pblk, WRITE);
- lba_list = pblk_line_emeta_to_lbas(line->emeta);
+ lba_list = emeta_to_lbas(pblk, line->emeta->buf);
} else if (dir == READ) {
bio_op = REQ_OP_READ;
cmd_op = NVM_OP_PREAD;
- flags = pblk_set_read_mode(pblk);
+ flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
} else
return -EINVAL;
memset(&rqd, 0, sizeof(struct nvm_rq));
- rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd.dma_ppa_list);
- if (!rqd.ppa_list)
+ rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd.dma_meta_list);
+ if (!rqd.meta_list)
return -ENOMEM;
+ rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+ rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
+
bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
@@ -729,9 +762,15 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
rqd.private = &wait;
for (i = 0; i < lm->smeta_sec; i++, paddr++) {
+ struct pblk_sec_meta *meta_list = rqd.meta_list;
+
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
- if (dir == WRITE)
- lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
+
+ if (dir == WRITE) {
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+ meta_list[i].lba = lba_list[paddr] = addr_empty;
+ }
}
/*
@@ -750,6 +789,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: smeta I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
if (rqd.error) {
if (dir == WRITE)
@@ -759,7 +799,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
}
free_ppa_list:
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return ret;
}
@@ -771,9 +811,11 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
}
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line)
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+ void *emeta_buf)
{
- return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ);
+ return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
+ line->emeta_ssec, READ);
}
static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -789,7 +831,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
{
struct nvm_rq rqd;
- int ret;
+ int ret = 0;
DECLARE_COMPLETION_ONSTACK(wait);
memset(&rqd, 0, sizeof(struct nvm_rq));
@@ -824,14 +866,14 @@ out:
rqd.private = pblk;
__pblk_end_io_erase(pblk, &rqd);
- return 0;
+ return ret;
}
int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_meta *lm = &pblk->lm;
struct ppa_addr ppa;
- int bit = -1;
+ int ret, bit = -1;
/* Erase only good blocks, one at a time */
do {
@@ -850,27 +892,59 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
spin_unlock(&line->lock);
- if (pblk_blk_erase_sync(pblk, ppa)) {
+ ret = pblk_blk_erase_sync(pblk, ppa);
+ if (ret) {
pr_err("pblk: failed to erase line %d\n", line->id);
- return -ENOMEM;
+ return ret;
}
} while (1);
return 0;
}
+static void pblk_line_setup_metadata(struct pblk_line *line,
+ struct pblk_line_mgmt *l_mg,
+ struct pblk_line_meta *lm)
+{
+ int meta_line;
+
+ lockdep_assert_held(&l_mg->free_lock);
+
+retry_meta:
+ meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+ if (meta_line == PBLK_DATA_LINES) {
+ spin_unlock(&l_mg->free_lock);
+ io_schedule();
+ spin_lock(&l_mg->free_lock);
+ goto retry_meta;
+ }
+
+ set_bit(meta_line, &l_mg->meta_bitmap);
+ line->meta_line = meta_line;
+
+ line->smeta = l_mg->sline_meta[meta_line];
+ line->emeta = l_mg->eline_meta[meta_line];
+
+ memset(line->smeta, 0, lm->smeta_len);
+ memset(line->emeta->buf, 0, lm->emeta_len[0]);
+
+ line->emeta->mem = 0;
+ atomic_set(&line->emeta->sync, 0);
+}
+
/* For now lines are always assumed full lines. Thus, smeta former and current
* lun bitmaps are omitted.
*/
-static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
+static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
struct pblk_line *cur)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct line_smeta *smeta = line->smeta;
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
+ struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
int nr_blk_line;
/* After erasing the line, new bad blocks might appear and we risk
@@ -893,42 +967,44 @@ static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
}
/* Run-time metadata */
- line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta);
+ line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
/* Mark LUNs allocated in this line (all for now) */
bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
- smeta->header.identifier = cpu_to_le32(PBLK_MAGIC);
- memcpy(smeta->header.uuid, pblk->instance_uuid, 16);
- smeta->header.id = cpu_to_le32(line->id);
- smeta->header.type = cpu_to_le16(line->type);
- smeta->header.version = cpu_to_le16(1);
+ smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
+ memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
+ smeta_buf->header.id = cpu_to_le32(line->id);
+ smeta_buf->header.type = cpu_to_le16(line->type);
+ smeta_buf->header.version = cpu_to_le16(1);
/* Start metadata */
- smeta->seq_nr = cpu_to_le64(line->seq_nr);
- smeta->window_wr_lun = cpu_to_le32(geo->nr_luns);
+ smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+ smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);
/* Fill metadata among lines */
if (cur) {
memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
- smeta->prev_id = cpu_to_le32(cur->id);
- cur->emeta->next_id = cpu_to_le32(line->id);
+ smeta_buf->prev_id = cpu_to_le32(cur->id);
+ cur->emeta->buf->next_id = cpu_to_le32(line->id);
} else {
- smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
+ smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
}
/* All smeta must be set at this point */
- smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta));
- smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta));
+ smeta_buf->header.crc = cpu_to_le32(
+ pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
+ smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
/* End metadata */
- memcpy(&emeta->header, &smeta->header, sizeof(struct line_header));
- emeta->seq_nr = cpu_to_le64(line->seq_nr);
- emeta->nr_lbas = cpu_to_le64(line->sec_in_line);
- emeta->nr_valid_lbas = cpu_to_le64(0);
- emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
- emeta->crc = cpu_to_le32(0);
- emeta->prev_id = smeta->prev_id;
+ memcpy(&emeta_buf->header, &smeta_buf->header,
+ sizeof(struct line_header));
+ emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+ emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
+ emeta_buf->nr_valid_lbas = cpu_to_le64(0);
+ emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
+ emeta_buf->crc = cpu_to_le32(0);
+ emeta_buf->prev_id = smeta_buf->prev_id;
return 1;
}
@@ -965,7 +1041,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
/* Mark smeta metadata sectors as bad sectors */
bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
off = bit * geo->sec_per_pl;
-retry_smeta:
bitmap_set(line->map_bitmap, off, lm->smeta_sec);
line->sec_in_line -= lm->smeta_sec;
line->smeta_ssec = off;
@@ -973,8 +1048,7 @@ retry_smeta:
if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
pr_debug("pblk: line smeta I/O failed. Retry\n");
- off += geo->sec_per_pl;
- goto retry_smeta;
+ return 1;
}
bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
@@ -983,8 +1057,8 @@ retry_smeta:
* blocks to make sure that there are enough sectors to store emeta
*/
bit = lm->sec_per_line;
- off = lm->sec_per_line - lm->emeta_sec;
- bitmap_set(line->invalid_bitmap, off, lm->emeta_sec);
+ off = lm->sec_per_line - lm->emeta_sec[0];
+ bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
while (nr_bb) {
off -= geo->sec_per_pl;
if (!test_bit(off, line->invalid_bitmap)) {
@@ -993,9 +1067,11 @@ retry_smeta:
}
}
- line->sec_in_line -= lm->emeta_sec;
+ line->sec_in_line -= lm->emeta_sec[0];
line->emeta_ssec = off;
- line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line;
+ line->nr_valid_lbas = 0;
+ line->left_msecs = line->sec_in_line;
+ *line->vsc = cpu_to_le32(line->sec_in_line);
if (lm->sec_per_line - line->sec_in_line !=
bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
@@ -1034,14 +1110,20 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
spin_lock(&line->lock);
if (line->state != PBLK_LINESTATE_FREE) {
+ mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+ mempool_free(line->map_bitmap, pblk->line_meta_pool);
spin_unlock(&line->lock);
- WARN(1, "pblk: corrupted line state\n");
- return -EINTR;
+ WARN(1, "pblk: corrupted line %d, state %d\n",
+ line->id, line->state);
+ return -EAGAIN;
}
+
line->state = PBLK_LINESTATE_OPEN;
atomic_set(&line->left_eblks, blk_in_line);
atomic_set(&line->left_seblks, blk_in_line);
+
+ line->meta_distance = lm->meta_distance;
spin_unlock(&line->lock);
/* Bad blocks do not need to be erased */
@@ -1091,15 +1173,15 @@ struct pblk_line *pblk_line_get(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *line = NULL;
- int bit;
+ struct pblk_line *line;
+ int ret, bit;
lockdep_assert_held(&l_mg->free_lock);
-retry_get:
+retry:
if (list_empty(&l_mg->free_list)) {
pr_err("pblk: no free lines\n");
- goto out;
+ return NULL;
}
line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
@@ -1115,16 +1197,22 @@ retry_get:
list_add_tail(&line->list, &l_mg->bad_list);
pr_debug("pblk: line %d is bad\n", line->id);
- goto retry_get;
+ goto retry;
}
- if (pblk_line_prepare(pblk, line)) {
- pr_err("pblk: failed to prepare line %d\n", line->id);
- list_add(&line->list, &l_mg->free_list);
- return NULL;
+ ret = pblk_line_prepare(pblk, line);
+ if (ret) {
+ if (ret == -EAGAIN) {
+ list_add(&line->list, &l_mg->corrupt_list);
+ goto retry;
+ } else {
+ pr_err("pblk: failed to prepare line %d\n", line->id);
+ list_add(&line->list, &l_mg->free_list);
+ l_mg->nr_free_lines++;
+ return NULL;
+ }
}
-out:
return line;
}
@@ -1134,6 +1222,7 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *retry_line;
+retry:
spin_lock(&l_mg->free_lock);
retry_line = pblk_line_get(pblk);
if (!retry_line) {
@@ -1150,23 +1239,25 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
l_mg->data_line = retry_line;
spin_unlock(&l_mg->free_lock);
- if (pblk_line_erase(pblk, retry_line)) {
- spin_lock(&l_mg->free_lock);
- l_mg->data_line = NULL;
- spin_unlock(&l_mg->free_lock);
- return NULL;
- }
-
pblk_rl_free_lines_dec(&pblk->rl, retry_line);
+ if (pblk_line_erase(pblk, retry_line))
+ goto retry;
+
return retry_line;
}
+static void pblk_set_space_limit(struct pblk *pblk)
+{
+ struct pblk_rl *rl = &pblk->rl;
+
+ atomic_set(&rl->rb_space, 0);
+}
+
struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line;
- int meta_line;
int is_next = 0;
spin_lock(&l_mg->free_lock);
@@ -1180,30 +1271,37 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
line->type = PBLK_LINETYPE_DATA;
l_mg->data_line = line;
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- set_bit(meta_line, &l_mg->meta_bitmap);
- line->smeta = l_mg->sline_meta[meta_line].meta;
- line->emeta = l_mg->eline_meta[meta_line].meta;
- line->meta_line = meta_line;
+ pblk_line_setup_metadata(line, l_mg, &pblk->lm);
/* Allocate next line for preparation */
l_mg->data_next = pblk_line_get(pblk);
- if (l_mg->data_next) {
+ if (!l_mg->data_next) {
+ /* If we cannot get a new line, we need to stop the pipeline.
+ * Only allow as many writes in as we can store safely and then
+ * fail gracefully
+ */
+ pblk_set_space_limit(pblk);
+
+ l_mg->data_next = NULL;
+ } else {
l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
l_mg->data_next->type = PBLK_LINETYPE_DATA;
is_next = 1;
}
spin_unlock(&l_mg->free_lock);
+ if (pblk_line_erase(pblk, line)) {
+ line = pblk_line_retry(pblk, line);
+ if (!line)
+ return NULL;
+ }
+
pblk_rl_free_lines_dec(&pblk->rl, line);
if (is_next)
pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
- if (pblk_line_erase(pblk, line))
- return NULL;
-
retry_setup:
- if (!pblk_line_set_metadata(pblk, line, NULL)) {
+ if (!pblk_line_init_metadata(pblk, line, NULL)) {
line = pblk_line_retry(pblk, line);
if (!line)
return NULL;
@@ -1222,69 +1320,89 @@ retry_setup:
return line;
}
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
+static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
+{
+ lockdep_assert_held(&pblk->l_mg.free_lock);
+
+ pblk_set_space_limit(pblk);
+ pblk->state = PBLK_STATE_STOPPING;
+}
+
+void pblk_pipeline_stop(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ int ret;
+
+ spin_lock(&l_mg->free_lock);
+ if (pblk->state == PBLK_STATE_RECOVERING ||
+ pblk->state == PBLK_STATE_STOPPED) {
+ spin_unlock(&l_mg->free_lock);
+ return;
+ }
+ pblk->state = PBLK_STATE_RECOVERING;
+ spin_unlock(&l_mg->free_lock);
+
+ pblk_flush_writer(pblk);
+ pblk_wait_for_meta(pblk);
+
+ ret = pblk_recov_pad(pblk);
+ if (ret) {
+ pr_err("pblk: could not close data on teardown(%d)\n", ret);
+ return;
+ }
+
+ flush_workqueue(pblk->bb_wq);
+ pblk_line_close_meta_sync(pblk);
+
+ spin_lock(&l_mg->free_lock);
+ pblk->state = PBLK_STATE_STOPPED;
+ l_mg->data_line = NULL;
+ l_mg->data_next = NULL;
+ spin_unlock(&l_mg->free_lock);
+}
+
+void pblk_line_replace_data(struct pblk *pblk)
{
- struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *cur, *new;
unsigned int left_seblks;
- int meta_line;
int is_next = 0;
cur = l_mg->data_line;
new = l_mg->data_next;
if (!new)
- return NULL;
+ return;
l_mg->data_line = new;
-retry_line:
+ spin_lock(&l_mg->free_lock);
+ if (pblk->state != PBLK_STATE_RUNNING) {
+ l_mg->data_line = NULL;
+ l_mg->data_next = NULL;
+ spin_unlock(&l_mg->free_lock);
+ return;
+ }
+
+ pblk_line_setup_metadata(new, l_mg, &pblk->lm);
+ spin_unlock(&l_mg->free_lock);
+
+retry_erase:
left_seblks = atomic_read(&new->left_seblks);
if (left_seblks) {
/* If line is not fully erased, erase it */
if (atomic_read(&new->left_eblks)) {
if (pblk_line_erase(pblk, new))
- return NULL;
+ return;
} else {
io_schedule();
}
- goto retry_line;
- }
-
- spin_lock(&l_mg->free_lock);
- /* Allocate next line for preparation */
- l_mg->data_next = pblk_line_get(pblk);
- if (l_mg->data_next) {
- l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
- l_mg->data_next->type = PBLK_LINETYPE_DATA;
- is_next = 1;
- }
-
-retry_meta:
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- if (meta_line == PBLK_DATA_LINES) {
- spin_unlock(&l_mg->free_lock);
- io_schedule();
- spin_lock(&l_mg->free_lock);
- goto retry_meta;
+ goto retry_erase;
}
- set_bit(meta_line, &l_mg->meta_bitmap);
- new->smeta = l_mg->sline_meta[meta_line].meta;
- new->emeta = l_mg->eline_meta[meta_line].meta;
- new->meta_line = meta_line;
-
- memset(new->smeta, 0, lm->smeta_len);
- memset(new->emeta, 0, lm->emeta_len);
- spin_unlock(&l_mg->free_lock);
-
- if (is_next)
- pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
-
retry_setup:
- if (!pblk_line_set_metadata(pblk, new, cur)) {
+ if (!pblk_line_init_metadata(pblk, new, cur)) {
new = pblk_line_retry(pblk, new);
if (!new)
- return NULL;
+ return;
goto retry_setup;
}
@@ -1292,12 +1410,30 @@ retry_setup:
if (!pblk_line_init_bb(pblk, new, 1)) {
new = pblk_line_retry(pblk, new);
if (!new)
- return NULL;
+ return;
goto retry_setup;
}
- return new;
+ /* Allocate next line for preparation */
+ spin_lock(&l_mg->free_lock);
+ l_mg->data_next = pblk_line_get(pblk);
+ if (!l_mg->data_next) {
+ /* If we cannot get a new line, we need to stop the pipeline.
+ * Only allow as many writes in as we can store safely and then
+ * fail gracefully
+ */
+ pblk_stop_writes(pblk, new);
+ l_mg->data_next = NULL;
+ } else {
+ l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+ l_mg->data_next->type = PBLK_LINETYPE_DATA;
+ is_next = 1;
+ }
+ spin_unlock(&l_mg->free_lock);
+
+ if (is_next)
+ pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
}
void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
@@ -1307,6 +1443,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
if (line->invalid_bitmap)
mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+ *line->vsc = cpu_to_le32(EMPTY_ENTRY);
+
line->map_bitmap = NULL;
line->invalid_bitmap = NULL;
line->smeta = NULL;
@@ -1339,8 +1477,8 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
struct nvm_rq *rqd;
int err;
- rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL);
- memset(rqd, 0, pblk_r_rq_size);
+ rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL);
+ memset(rqd, 0, pblk_g_rq_size);
pblk_setup_e_rq(pblk, rqd, ppa);
@@ -1368,7 +1506,8 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk)
return pblk->l_mg.data_line;
}
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk)
+/* For now, always erase next line */
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
{
return pblk->l_mg.data_next;
}
@@ -1378,18 +1517,58 @@ int pblk_line_is_full(struct pblk_line *line)
return (line->left_msecs == 0);
}
+void pblk_line_close_meta_sync(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line *line, *tline;
+ LIST_HEAD(list);
+
+ spin_lock(&l_mg->close_lock);
+ if (list_empty(&l_mg->emeta_list)) {
+ spin_unlock(&l_mg->close_lock);
+ return;
+ }
+
+ list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev);
+ spin_unlock(&l_mg->close_lock);
+
+ list_for_each_entry_safe(line, tline, &list, list) {
+ struct pblk_emeta *emeta = line->emeta;
+
+ while (emeta->mem < lm->emeta_len[0]) {
+ int ret;
+
+ ret = pblk_submit_meta_io(pblk, line);
+ if (ret) {
+ pr_err("pblk: sync meta line %d failed (%d)\n",
+ line->id, ret);
+ return;
+ }
+ }
+ }
+
+ pblk_wait_for_meta(pblk);
+ flush_workqueue(pblk->close_wq);
+}
+
+static void pblk_line_should_sync_meta(struct pblk *pblk)
+{
+ if (pblk_rl_is_limit(&pblk->rl))
+ pblk_line_close_meta_sync(pblk);
+}
+
void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list;
- line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta));
-
- if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
- pr_err("pblk: line %d close I/O failed\n", line->id);
+#ifdef CONFIG_NVM_DEBUG
+ struct pblk_line_meta *lm = &pblk->lm;
- WARN(!bitmap_full(line->map_bitmap, line->sec_in_line),
+ WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
"pblk: corrupt closed line %d\n", line->id);
+#endif
spin_lock(&l_mg->free_lock);
WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
@@ -1410,6 +1589,31 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
spin_unlock(&line->lock);
spin_unlock(&l_mg->gc_lock);
+
+ pblk_gc_should_kick(pblk);
+}
+
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
+
+ /* No need for exact vsc value; avoid a big line lock and take aprox. */
+ memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
+ memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
+
+ emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
+ emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
+
+ spin_lock(&l_mg->close_lock);
+ spin_lock(&line->lock);
+ list_add_tail(&line->list, &l_mg->emeta_list);
+ spin_unlock(&line->lock);
+ spin_unlock(&l_mg->close_lock);
+
+ pblk_line_should_sync_meta(pblk);
}
void pblk_line_close_ws(struct work_struct *work)
@@ -1449,7 +1653,8 @@ void pblk_line_mark_bb(struct work_struct *work)
}
void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *))
+ void (*work)(struct work_struct *),
+ struct workqueue_struct *wq)
{
struct pblk_line_ws *line_ws;
@@ -1462,16 +1667,13 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
line_ws->priv = priv;
INIT_WORK(&line_ws->ws, work);
- queue_work(pblk->kw_wq, &line_ws->ws);
+ queue_work(wq, &line_ws->ws);
}
-void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
- unsigned long *lun_bitmap)
+static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list,
+ int nr_ppas, int pos)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_lun *rlun;
- int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun;
+ struct pblk_lun *rlun = &pblk->luns[pos];
int ret;
/*
@@ -1485,14 +1687,8 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
ppa_list[0].g.ch != ppa_list[i].g.ch);
#endif
- /* If the LUN has been locked for this same request, do no attempt to
- * lock it again
- */
- if (test_and_set_bit(lun_id, lun_bitmap))
- return;
- rlun = &pblk->luns[lun_id];
- ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+ ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000));
if (ret) {
switch (ret) {
case -ETIME:
@@ -1505,6 +1701,50 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
}
}
+void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+ __pblk_down_page(pblk, ppa_list, nr_ppas, pos);
+}
+
+void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+ unsigned long *lun_bitmap)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+ /* If the LUN has been locked for this same request, do no attempt to
+ * lock it again
+ */
+ if (test_and_set_bit(pos, lun_bitmap))
+ return;
+
+ __pblk_down_page(pblk, ppa_list, nr_ppas, pos);
+}
+
+void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_lun *rlun;
+ int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+#ifdef CONFIG_NVM_DEBUG
+ int i;
+
+ for (i = 1; i < nr_ppas; i++)
+ WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
+ ppa_list[0].g.ch != ppa_list[i].g.ch);
+#endif
+
+ rlun = &pblk->luns[pos];
+ up(&rlun->wr_sem);
+}
+
void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
unsigned long *lun_bitmap)
{
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index eaf479c6b63c..6090d28f7995 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -20,8 +20,7 @@
static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
{
- kfree(gc_rq->data);
- kfree(gc_rq->lba_list);
+ vfree(gc_rq->data);
kfree(gc_rq);
}
@@ -37,10 +36,8 @@ static int pblk_gc_write(struct pblk *pblk)
return 1;
}
- list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) {
- list_move_tail(&gc_rq->list, &w_list);
- gc->w_entries--;
- }
+ list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
+ gc->w_entries = 0;
spin_unlock(&gc->w_lock);
list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
@@ -48,9 +45,8 @@ static int pblk_gc_write(struct pblk *pblk)
gc_rq->nr_secs, gc_rq->secs_to_gc,
gc_rq->line, PBLK_IOTYPE_GC);
- kref_put(&gc_rq->line->ref, pblk_line_put);
-
list_del(&gc_rq->list);
+ kref_put(&gc_rq->line->ref, pblk_line_put);
pblk_gc_free_gc_rq(gc_rq);
}
@@ -66,52 +62,41 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
* Responsible for managing all memory related to a gc request. Also in case of
* failure
*/
-static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
- u64 *lba_list, unsigned int nr_secs)
+static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_gc *gc = &pblk->gc;
- struct pblk_gc_rq *gc_rq;
+ struct pblk_line *line = gc_rq->line;
void *data;
unsigned int secs_to_gc;
- int ret = NVM_IO_OK;
+ int ret = 0;
- data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL);
+ data = vmalloc(gc_rq->nr_secs * geo->sec_size);
if (!data) {
- ret = NVM_IO_ERR;
- goto free_lba_list;
+ ret = -ENOMEM;
+ goto out;
}
/* Read from GC victim block */
- if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs,
+ if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
&secs_to_gc, line)) {
- ret = NVM_IO_ERR;
+ ret = -EFAULT;
goto free_data;
}
if (!secs_to_gc)
- goto free_data;
-
- gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
- if (!gc_rq) {
- ret = NVM_IO_ERR;
- goto free_data;
- }
+ goto free_rq;
- gc_rq->line = line;
gc_rq->data = data;
- gc_rq->lba_list = lba_list;
- gc_rq->nr_secs = nr_secs;
gc_rq->secs_to_gc = secs_to_gc;
- kref_get(&line->ref);
-
retry:
spin_lock(&gc->w_lock);
- if (gc->w_entries > 256) {
+ if (gc->w_entries >= PBLK_GC_W_QD) {
spin_unlock(&gc->w_lock);
- usleep_range(256, 1024);
+ pblk_gc_writer_kick(&pblk->gc);
+ usleep_range(128, 256);
goto retry;
}
gc->w_entries++;
@@ -120,13 +105,14 @@ retry:
pblk_gc_writer_kick(&pblk->gc);
- return NVM_IO_OK;
+ return 0;
+free_rq:
+ kfree(gc_rq);
free_data:
- kfree(data);
-free_lba_list:
- kfree(lba_list);
-
+ vfree(data);
+out:
+ kref_put(&line->ref, pblk_line_put);
return ret;
}
@@ -150,140 +136,206 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
static void pblk_gc_line_ws(struct work_struct *work)
{
+ struct pblk_line_ws *line_rq_ws = container_of(work,
+ struct pblk_line_ws, ws);
+ struct pblk *pblk = line_rq_ws->pblk;
+ struct pblk_gc *gc = &pblk->gc;
+ struct pblk_line *line = line_rq_ws->line;
+ struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
+
+ up(&gc->gc_sem);
+
+ if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
+ pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
+ line->id, *line->vsc,
+ gc_rq->nr_secs);
+ }
+
+ mempool_free(line_rq_ws, pblk->line_ws_pool);
+}
+
+static void pblk_gc_line_prepare_ws(struct work_struct *work)
+{
struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
ws);
struct pblk *pblk = line_ws->pblk;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line = line_ws->line;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- __le64 *lba_list = line_ws->priv;
- u64 *gc_list;
- int sec_left;
- int nr_ppas, bit;
- int put_line = 1;
+ struct pblk_gc *gc = &pblk->gc;
+ struct line_emeta *emeta_buf;
+ struct pblk_line_ws *line_rq_ws;
+ struct pblk_gc_rq *gc_rq;
+ __le64 *lba_list;
+ int sec_left, nr_secs, bit;
+ int ret;
- pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
+ emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
+ GFP_KERNEL);
+ if (!emeta_buf) {
+ pr_err("pblk: cannot use GC emeta\n");
+ return;
+ }
- spin_lock(&line->lock);
- sec_left = line->vsc;
- if (!sec_left) {
- /* Lines are erased before being used (l_mg->data_/log_next) */
- spin_unlock(&line->lock);
- goto out;
+ ret = pblk_line_read_emeta(pblk, line, emeta_buf);
+ if (ret) {
+ pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
+ goto fail_free_emeta;
+ }
+
+ /* If this read fails, it means that emeta is corrupted. For now, leave
+ * the line untouched. TODO: Implement a recovery routine that scans and
+ * moves all sectors on the line.
+ */
+ lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
+ if (!lba_list) {
+ pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
+ goto fail_free_emeta;
}
- spin_unlock(&line->lock);
+ sec_left = pblk_line_vsc(line);
if (sec_left < 0) {
pr_err("pblk: corrupted GC line (%d)\n", line->id);
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
+ goto fail_free_emeta;
}
bit = -1;
next_rq:
- gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL);
- if (!gc_list) {
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
- }
+ gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
+ if (!gc_rq)
+ goto fail_free_emeta;
- nr_ppas = 0;
+ nr_secs = 0;
do {
bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
bit + 1);
if (bit > line->emeta_ssec)
break;
- gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]);
- } while (nr_ppas < pblk->max_write_pgs);
+ gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
+ } while (nr_secs < pblk->max_write_pgs);
- if (unlikely(!nr_ppas)) {
- kfree(gc_list);
+ if (unlikely(!nr_secs)) {
+ kfree(gc_rq);
goto out;
}
- if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
- pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
- line->id, line->vsc,
- nr_ppas, nr_ppas);
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
- }
+ gc_rq->nr_secs = nr_secs;
+ gc_rq->line = line;
+
+ line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+ if (!line_rq_ws)
+ goto fail_free_gc_rq;
- sec_left -= nr_ppas;
+ line_rq_ws->pblk = pblk;
+ line_rq_ws->line = line;
+ line_rq_ws->priv = gc_rq;
+
+ down(&gc->gc_sem);
+ kref_get(&line->ref);
+
+ INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
+ queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
+
+ sec_left -= nr_secs;
if (sec_left > 0)
goto next_rq;
out:
- pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
+ pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
mempool_free(line_ws, pblk->line_ws_pool);
- atomic_dec(&pblk->gc.inflight_gc);
- if (put_line)
- kref_put(&line->ref, pblk_line_put);
+
+ kref_put(&line->ref, pblk_line_put);
+ atomic_dec(&gc->inflight_gc);
+
+ return;
+
+fail_free_gc_rq:
+ kfree(gc_rq);
+fail_free_emeta:
+ pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+ pblk_put_line_back(pblk, line);
+ kref_put(&line->ref, pblk_line_put);
+ mempool_free(line_ws, pblk->line_ws_pool);
+ atomic_dec(&gc->inflight_gc);
+
+ pr_err("pblk: Failed to GC line %d\n", line->id);
}
static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_gc *gc = &pblk->gc;
struct pblk_line_ws *line_ws;
- __le64 *lba_list;
- int ret;
- line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
- line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
- GFP_KERNEL);
- if (!line->emeta) {
- pr_err("pblk: cannot use GC emeta\n");
- goto fail_free_ws;
- }
-
- ret = pblk_line_read_emeta(pblk, line);
- if (ret) {
- pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
- goto fail_free_emeta;
- }
+ pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
- /* If this read fails, it means that emeta is corrupted. For now, leave
- * the line untouched. TODO: Implement a recovery routine that scans and
- * moves all sectors on the line.
- */
- lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
- if (!lba_list) {
- pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
- goto fail_free_emeta;
- }
+ line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+ if (!line_ws)
+ return -ENOMEM;
line_ws->pblk = pblk;
line_ws->line = line;
- line_ws->priv = lba_list;
- INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
- queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
+ INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
+ queue_work(gc->gc_reader_wq, &line_ws->ws);
return 0;
+}
-fail_free_emeta:
- pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
-fail_free_ws:
- mempool_free(line_ws, pblk->line_ws_pool);
- pblk_put_line_back(pblk, line);
+static int pblk_gc_read(struct pblk *pblk)
+{
+ struct pblk_gc *gc = &pblk->gc;
+ struct pblk_line *line;
+
+ spin_lock(&gc->r_lock);
+ if (list_empty(&gc->r_list)) {
+ spin_unlock(&gc->r_lock);
+ return 1;
+ }
+
+ line = list_first_entry(&gc->r_list, struct pblk_line, list);
+ list_del(&line->list);
+ spin_unlock(&gc->r_lock);
+
+ pblk_gc_kick(pblk);
- return 1;
+ if (pblk_gc_line(pblk, line))
+ pr_err("pblk: failed to GC line %d\n", line->id);
+
+ return 0;
}
-static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list)
+static void pblk_gc_reader_kick(struct pblk_gc *gc)
{
- struct pblk_line *line, *tline;
+ wake_up_process(gc->gc_reader_ts);
+}
- list_for_each_entry_safe(line, tline, gc_list, list) {
- if (pblk_gc_line(pblk, line))
- pr_err("pblk: failed to GC line %d\n", line->id);
- list_del(&line->list);
+static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
+ struct list_head *group_list)
+{
+ struct pblk_line *line, *victim;
+ int line_vsc, victim_vsc;
+
+ victim = list_first_entry(group_list, struct pblk_line, list);
+ list_for_each_entry(line, group_list, list) {
+ line_vsc = le32_to_cpu(*line->vsc);
+ victim_vsc = le32_to_cpu(*victim->vsc);
+ if (line_vsc < victim_vsc)
+ victim = line;
}
+
+ return victim;
+}
+
+static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
+{
+ unsigned int nr_blocks_free, nr_blocks_need;
+
+ nr_blocks_need = pblk_rl_high_thrs(rl);
+ nr_blocks_free = pblk_rl_nr_free_blks(rl);
+
+ /* This is not critical, no need to take lock here */
+ return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
}
/*
@@ -296,71 +348,83 @@ static void pblk_gc_run(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_gc *gc = &pblk->gc;
- struct pblk_line *line, *tline;
- unsigned int nr_blocks_free, nr_blocks_need;
+ struct pblk_line *line;
struct list_head *group_list;
- int run_gc, gc_group = 0;
- int prev_gc = 0;
- int inflight_gc = atomic_read(&gc->inflight_gc);
- LIST_HEAD(gc_list);
+ bool run_gc;
+ int inflight_gc, gc_group = 0, prev_group = 0;
+
+ do {
+ spin_lock(&l_mg->gc_lock);
+ if (list_empty(&l_mg->gc_full_list)) {
+ spin_unlock(&l_mg->gc_lock);
+ break;
+ }
+
+ line = list_first_entry(&l_mg->gc_full_list,
+ struct pblk_line, list);
- spin_lock(&l_mg->gc_lock);
- list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
spin_unlock(&line->lock);
list_del(&line->list);
+ spin_unlock(&l_mg->gc_lock);
+
kref_put(&line->ref, pblk_line_put);
- }
- spin_unlock(&l_mg->gc_lock);
+ } while (1);
- nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl);
- nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl);
- run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
+ run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+ if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
+ return;
next_gc_group:
group_list = l_mg->gc_lists[gc_group++];
- spin_lock(&l_mg->gc_lock);
- while (run_gc && !list_empty(group_list)) {
- /* No need to queue up more GC lines than we can handle */
- if (!run_gc || inflight_gc > gc->gc_jobs_active) {
+
+ do {
+ spin_lock(&l_mg->gc_lock);
+ if (list_empty(group_list)) {
spin_unlock(&l_mg->gc_lock);
- pblk_gc_lines(pblk, &gc_list);
- return;
+ break;
}
- line = list_first_entry(group_list, struct pblk_line, list);
- nr_blocks_free += atomic_read(&line->blk_in_line);
+ line = pblk_gc_get_victim_line(pblk, group_list);
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
- list_move_tail(&line->list, &gc_list);
- atomic_inc(&gc->inflight_gc);
- inflight_gc++;
spin_unlock(&line->lock);
- prev_gc = 1;
- run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
- }
- spin_unlock(&l_mg->gc_lock);
+ list_del(&line->list);
+ spin_unlock(&l_mg->gc_lock);
+
+ spin_lock(&gc->r_lock);
+ list_add_tail(&line->list, &gc->r_list);
+ spin_unlock(&gc->r_lock);
- pblk_gc_lines(pblk, &gc_list);
+ inflight_gc = atomic_inc_return(&gc->inflight_gc);
+ pblk_gc_reader_kick(gc);
- if (!prev_gc && pblk->rl.rb_state > gc_group &&
- gc_group < PBLK_NR_GC_LISTS)
+ prev_group = 1;
+
+ /* No need to queue up more GC lines than we can handle */
+ run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+ if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
+ break;
+ } while (1);
+
+ if (!prev_group && pblk->rl.rb_state > gc_group &&
+ gc_group < PBLK_GC_NR_LISTS)
goto next_gc_group;
}
-
-static void pblk_gc_kick(struct pblk *pblk)
+void pblk_gc_kick(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
wake_up_process(gc->gc_ts);
pblk_gc_writer_kick(gc);
+ pblk_gc_reader_kick(gc);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
}
@@ -398,42 +462,34 @@ static int pblk_gc_writer_ts(void *data)
return 0;
}
-static void pblk_gc_start(struct pblk *pblk)
+static int pblk_gc_reader_ts(void *data)
{
- pblk->gc.gc_active = 1;
+ struct pblk *pblk = data;
- pr_debug("pblk: gc start\n");
+ while (!kthread_should_stop()) {
+ if (!pblk_gc_read(pblk))
+ continue;
+ set_current_state(TASK_INTERRUPTIBLE);
+ io_schedule();
+ }
+
+ return 0;
}
-int pblk_gc_status(struct pblk *pblk)
+static void pblk_gc_start(struct pblk *pblk)
{
- struct pblk_gc *gc = &pblk->gc;
- int ret;
-
- spin_lock(&gc->lock);
- ret = gc->gc_active;
- spin_unlock(&gc->lock);
-
- return ret;
+ pblk->gc.gc_active = 1;
+ pr_debug("pblk: gc start\n");
}
-static void __pblk_gc_should_start(struct pblk *pblk)
+void pblk_gc_should_start(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
- lockdep_assert_held(&gc->lock);
-
if (gc->gc_enabled && !gc->gc_active)
pblk_gc_start(pblk);
-}
-void pblk_gc_should_start(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- spin_lock(&gc->lock);
- __pblk_gc_should_start(pblk);
- spin_unlock(&gc->lock);
+ pblk_gc_kick(pblk);
}
/*
@@ -442,10 +498,7 @@ void pblk_gc_should_start(struct pblk *pblk)
*/
static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
{
- spin_lock(&pblk->gc.lock);
pblk->gc.gc_active = 0;
- spin_unlock(&pblk->gc.lock);
-
pr_debug("pblk: gc stop\n");
}
@@ -468,20 +521,25 @@ void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
spin_unlock(&gc->lock);
}
-void pblk_gc_sysfs_force(struct pblk *pblk, int force)
+int pblk_gc_sysfs_force(struct pblk *pblk, int force)
{
struct pblk_gc *gc = &pblk->gc;
- int rsv = 0;
+
+ if (force < 0 || force > 1)
+ return -EINVAL;
spin_lock(&gc->lock);
- if (force) {
- gc->gc_enabled = 1;
- rsv = 64;
- }
- pblk_rl_set_gc_rsc(&pblk->rl, rsv);
gc->gc_forced = force;
- __pblk_gc_should_start(pblk);
+
+ if (force)
+ gc->gc_enabled = 1;
+ else
+ gc->gc_enabled = 0;
spin_unlock(&gc->lock);
+
+ pblk_gc_should_start(pblk);
+
+ return 0;
}
int pblk_gc_init(struct pblk *pblk)
@@ -503,30 +561,58 @@ int pblk_gc_init(struct pblk *pblk)
goto fail_free_main_kthread;
}
+ gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
+ "pblk-gc-reader-ts");
+ if (IS_ERR(gc->gc_reader_ts)) {
+ pr_err("pblk: could not allocate GC reader kthread\n");
+ ret = PTR_ERR(gc->gc_reader_ts);
+ goto fail_free_writer_kthread;
+ }
+
setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
gc->gc_active = 0;
gc->gc_forced = 0;
gc->gc_enabled = 1;
- gc->gc_jobs_active = 8;
gc->w_entries = 0;
atomic_set(&gc->inflight_gc, 0);
- gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active);
+ /* Workqueue that reads valid sectors from a line and submit them to the
+ * GC writer to be recycled.
+ */
+ gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
+ if (!gc->gc_line_reader_wq) {
+ pr_err("pblk: could not allocate GC line reader workqueue\n");
+ ret = -ENOMEM;
+ goto fail_free_reader_kthread;
+ }
+
+ /* Workqueue that prepare lines for GC */
+ gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
if (!gc->gc_reader_wq) {
pr_err("pblk: could not allocate GC reader workqueue\n");
ret = -ENOMEM;
- goto fail_free_writer_kthread;
+ goto fail_free_reader_line_wq;
}
spin_lock_init(&gc->lock);
spin_lock_init(&gc->w_lock);
+ spin_lock_init(&gc->r_lock);
+
+ sema_init(&gc->gc_sem, 128);
+
INIT_LIST_HEAD(&gc->w_list);
+ INIT_LIST_HEAD(&gc->r_list);
return 0;
+fail_free_reader_line_wq:
+ destroy_workqueue(gc->gc_line_reader_wq);
+fail_free_reader_kthread:
+ kthread_stop(gc->gc_reader_ts);
fail_free_writer_kthread:
kthread_stop(gc->gc_writer_ts);
fail_free_main_kthread:
@@ -540,6 +626,7 @@ void pblk_gc_exit(struct pblk *pblk)
struct pblk_gc *gc = &pblk->gc;
flush_workqueue(gc->gc_reader_wq);
+ flush_workqueue(gc->gc_line_reader_wq);
del_timer(&gc->gc_timer);
pblk_gc_stop(pblk, 1);
@@ -547,9 +634,15 @@ void pblk_gc_exit(struct pblk *pblk)
if (gc->gc_ts)
kthread_stop(gc->gc_ts);
- if (pblk->gc.gc_reader_wq)
- destroy_workqueue(pblk->gc.gc_reader_wq);
+ if (gc->gc_reader_wq)
+ destroy_workqueue(gc->gc_reader_wq);
+
+ if (gc->gc_line_reader_wq)
+ destroy_workqueue(gc->gc_line_reader_wq);
if (gc->gc_writer_ts)
kthread_stop(gc->gc_writer_ts);
+
+ if (gc->gc_reader_ts)
+ kthread_stop(gc->gc_reader_ts);
}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index ae8cd6d5af8b..1b0f61233c21 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -20,9 +20,10 @@
#include "pblk.h"
-static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache,
- *pblk_w_rq_cache, *pblk_line_meta_cache;
+static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
+ *pblk_w_rq_cache, *pblk_line_meta_cache;
static DECLARE_RWSEM(pblk_lock);
+struct bio_set *pblk_bio_set;
static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
struct bio *bio)
@@ -33,7 +34,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
* constraint. Writes can be of arbitrary size.
*/
if (bio_data_dir(bio) == READ) {
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
ret = pblk_submit_read(pblk, bio);
if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
bio_put(bio);
@@ -46,7 +47,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
* available for user I/O.
*/
if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
}
@@ -199,9 +200,9 @@ static int pblk_init_global_caches(struct pblk *pblk)
return -ENOMEM;
}
- pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size,
+ pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
0, 0, NULL);
- if (!pblk_r_rq_cache) {
+ if (!pblk_g_rq_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
up_write(&pblk_lock);
@@ -213,7 +214,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
if (!pblk_w_rq_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
up_write(&pblk_lock);
return -ENOMEM;
}
@@ -225,7 +226,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
if (!pblk_line_meta_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
kmem_cache_destroy(pblk_w_rq_cache);
up_write(&pblk_lock);
return -ENOMEM;
@@ -239,27 +240,10 @@ static int pblk_core_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- int max_write_ppas;
- int mod;
- pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
- max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
- pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
- max_write_ppas : nvm_max_phys_sects(dev);
pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
geo->nr_planes * geo->nr_luns;
- if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
- pr_err("pblk: cannot support device max_phys_sect\n");
- return -EINVAL;
- }
-
- div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
- if (mod) {
- pr_err("pblk: bad configuration of sectors/pages\n");
- return -EINVAL;
- }
-
if (pblk_init_global_caches(pblk))
return -ENOMEM;
@@ -267,7 +251,7 @@ static int pblk_core_init(struct pblk *pblk)
if (!pblk->page_pool)
return -ENOMEM;
- pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns,
+ pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE,
pblk_blk_ws_cache);
if (!pblk->line_ws_pool)
goto free_page_pool;
@@ -276,41 +260,51 @@ static int pblk_core_init(struct pblk *pblk)
if (!pblk->rec_pool)
goto free_blk_ws_pool;
- pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache);
- if (!pblk->r_rq_pool)
+ pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE,
+ pblk_g_rq_cache);
+ if (!pblk->g_rq_pool)
goto free_rec_pool;
- pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache);
+ pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2,
+ pblk_w_rq_cache);
if (!pblk->w_rq_pool)
- goto free_r_rq_pool;
+ goto free_g_rq_pool;
pblk->line_meta_pool =
- mempool_create_slab_pool(16, pblk_line_meta_cache);
+ mempool_create_slab_pool(PBLK_META_POOL_SIZE,
+ pblk_line_meta_cache);
if (!pblk->line_meta_pool)
goto free_w_rq_pool;
- pblk->kw_wq = alloc_workqueue("pblk-aux-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
- if (!pblk->kw_wq)
+ pblk->close_wq = alloc_workqueue("pblk-close-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
+ if (!pblk->close_wq)
goto free_line_meta_pool;
+ pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+ if (!pblk->bb_wq)
+ goto free_close_wq;
+
if (pblk_set_ppaf(pblk))
- goto free_kw_wq;
+ goto free_bb_wq;
if (pblk_rwb_init(pblk))
- goto free_kw_wq;
+ goto free_bb_wq;
INIT_LIST_HEAD(&pblk->compl_list);
return 0;
-free_kw_wq:
- destroy_workqueue(pblk->kw_wq);
+free_bb_wq:
+ destroy_workqueue(pblk->bb_wq);
+free_close_wq:
+ destroy_workqueue(pblk->close_wq);
free_line_meta_pool:
mempool_destroy(pblk->line_meta_pool);
free_w_rq_pool:
mempool_destroy(pblk->w_rq_pool);
-free_r_rq_pool:
- mempool_destroy(pblk->r_rq_pool);
+free_g_rq_pool:
+ mempool_destroy(pblk->g_rq_pool);
free_rec_pool:
mempool_destroy(pblk->rec_pool);
free_blk_ws_pool:
@@ -322,19 +316,22 @@ free_page_pool:
static void pblk_core_free(struct pblk *pblk)
{
- if (pblk->kw_wq)
- destroy_workqueue(pblk->kw_wq);
+ if (pblk->close_wq)
+ destroy_workqueue(pblk->close_wq);
+
+ if (pblk->bb_wq)
+ destroy_workqueue(pblk->bb_wq);
mempool_destroy(pblk->page_pool);
mempool_destroy(pblk->line_ws_pool);
mempool_destroy(pblk->rec_pool);
- mempool_destroy(pblk->r_rq_pool);
+ mempool_destroy(pblk->g_rq_pool);
mempool_destroy(pblk->w_rq_pool);
mempool_destroy(pblk->line_meta_pool);
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
kmem_cache_destroy(pblk_w_rq_cache);
kmem_cache_destroy(pblk_line_meta_cache);
}
@@ -344,6 +341,12 @@ static void pblk_luns_free(struct pblk *pblk)
kfree(pblk->luns);
}
+static void pblk_free_line_bitmaps(struct pblk_line *line)
+{
+ kfree(line->blk_bitmap);
+ kfree(line->erase_bitmap);
+}
+
static void pblk_lines_free(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -355,8 +358,7 @@ static void pblk_lines_free(struct pblk *pblk)
line = &pblk->lines[i];
pblk_line_free(pblk, line);
- kfree(line->blk_bitmap);
- kfree(line->erase_bitmap);
+ pblk_free_line_bitmaps(line);
}
spin_unlock(&l_mg->free_lock);
}
@@ -368,11 +370,15 @@ static void pblk_line_meta_free(struct pblk *pblk)
kfree(l_mg->bb_template);
kfree(l_mg->bb_aux);
+ kfree(l_mg->vsc_list);
+ spin_lock(&l_mg->free_lock);
for (i = 0; i < PBLK_DATA_LINES; i++) {
- pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
- pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
+ kfree(l_mg->sline_meta[i]);
+ pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
+ kfree(l_mg->eline_meta[i]);
}
+ spin_unlock(&l_mg->free_lock);
kfree(pblk->lines);
}
@@ -411,13 +417,31 @@ out:
return ret;
}
-static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
+static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
+ int blk_per_line)
{
- struct pblk_line_meta *lm = &pblk->lm;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
struct pblk_lun *rlun;
int bb_cnt = 0;
int i;
+ for (i = 0; i < blk_per_line; i++) {
+ rlun = &pblk->luns[i];
+ if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
+ continue;
+
+ set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
+ bb_cnt++;
+ }
+
+ return bb_cnt;
+}
+
+static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+
line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
if (!line->blk_bitmap)
return -ENOMEM;
@@ -428,16 +452,7 @@ static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
return -ENOMEM;
}
- for (i = 0; i < lm->blk_per_line; i++) {
- rlun = &pblk->luns[i];
- if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
- continue;
-
- set_bit(i, line->blk_bitmap);
- bb_cnt++;
- }
-
- return bb_cnt;
+ return 0;
}
static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
@@ -505,12 +520,32 @@ static int pblk_lines_configure(struct pblk *pblk, int flags)
}
/* See comment over struct line_emeta definition */
-static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm)
+static unsigned int calc_emeta_len(struct pblk *pblk)
{
- return (sizeof(struct line_emeta) +
- ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) +
- (pblk->l_mg.nr_lines * sizeof(u32)) +
- lm->blk_bitmap_len);
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+
+ /* Round to sector size so that lba_list starts on its own sector */
+ lm->emeta_sec[1] = DIV_ROUND_UP(
+ sizeof(struct line_emeta) + lm->blk_bitmap_len,
+ geo->sec_size);
+ lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;
+
+ /* Round to sector size so that vsc_list starts on its own sector */
+ lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
+ lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
+ geo->sec_size);
+ lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size;
+
+ lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
+ geo->sec_size);
+ lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size;
+
+ lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
+
+ return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
}
static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
@@ -534,6 +569,78 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
atomic_set(&pblk->rl.free_blocks, nr_free_blks);
}
+static int pblk_lines_alloc_metadata(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ int i;
+
+ /* smeta is always small enough to fit on a kmalloc memory allocation,
+ * emeta depends on the number of LUNs allocated to the pblk instance
+ */
+ for (i = 0; i < PBLK_DATA_LINES; i++) {
+ l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
+ if (!l_mg->sline_meta[i])
+ goto fail_free_smeta;
+ }
+
+ /* emeta allocates three different buffers for managing metadata with
+ * in-memory and in-media layouts
+ */
+ for (i = 0; i < PBLK_DATA_LINES; i++) {
+ struct pblk_emeta *emeta;
+
+ emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
+ if (!emeta)
+ goto fail_free_emeta;
+
+ if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) {
+ l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
+
+ emeta->buf = vmalloc(lm->emeta_len[0]);
+ if (!emeta->buf) {
+ kfree(emeta);
+ goto fail_free_emeta;
+ }
+
+ emeta->nr_entries = lm->emeta_sec[0];
+ l_mg->eline_meta[i] = emeta;
+ } else {
+ l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
+
+ emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL);
+ if (!emeta->buf) {
+ kfree(emeta);
+ goto fail_free_emeta;
+ }
+
+ emeta->nr_entries = lm->emeta_sec[0];
+ l_mg->eline_meta[i] = emeta;
+ }
+ }
+
+ l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
+ if (!l_mg->vsc_list)
+ goto fail_free_emeta;
+
+ for (i = 0; i < l_mg->nr_lines; i++)
+ l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
+
+ return 0;
+
+fail_free_emeta:
+ while (--i >= 0) {
+ vfree(l_mg->eline_meta[i]->buf);
+ kfree(l_mg->eline_meta[i]);
+ }
+
+fail_free_smeta:
+ for (i = 0; i < PBLK_DATA_LINES; i++)
+ kfree(l_mg->sline_meta[i]);
+
+ return -ENOMEM;
+}
+
static int pblk_lines_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -542,10 +649,32 @@ static int pblk_lines_init(struct pblk *pblk)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line *line;
unsigned int smeta_len, emeta_len;
- long nr_bad_blks, nr_meta_blks, nr_free_blks;
- int bb_distance;
- int i;
- int ret;
+ long nr_bad_blks, nr_free_blks;
+ int bb_distance, max_write_ppas, mod;
+ int i, ret;
+
+ pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
+ max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
+ pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
+ max_write_ppas : nvm_max_phys_sects(dev);
+ pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
+
+ if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
+ pr_err("pblk: cannot support device max_phys_sect\n");
+ return -EINVAL;
+ }
+
+ div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
+ if (mod) {
+ pr_err("pblk: bad configuration of sectors/pages\n");
+ return -EINVAL;
+ }
+
+ l_mg->nr_lines = geo->blks_per_lun;
+ l_mg->log_line = l_mg->data_line = NULL;
+ l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
+ l_mg->nr_free_lines = 0;
+ bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
lm->blk_per_line = geo->nr_luns;
@@ -554,20 +683,17 @@ static int pblk_lines_init(struct pblk *pblk)
lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
lm->high_thrs = lm->sec_per_line / 2;
lm->mid_thrs = lm->sec_per_line / 4;
+ lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs;
/* Calculate necessary pages for smeta. See comment over struct
* line_smeta definition
*/
- lm->smeta_len = sizeof(struct line_smeta) +
- PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
-
i = 1;
add_smeta_page:
lm->smeta_sec = i * geo->sec_per_pl;
lm->smeta_len = lm->smeta_sec * geo->sec_size;
- smeta_len = sizeof(struct line_smeta) +
- PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
+ smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
if (smeta_len > lm->smeta_len) {
i++;
goto add_smeta_page;
@@ -578,66 +704,28 @@ add_smeta_page:
*/
i = 1;
add_emeta_page:
- lm->emeta_sec = i * geo->sec_per_pl;
- lm->emeta_len = lm->emeta_sec * geo->sec_size;
+ lm->emeta_sec[0] = i * geo->sec_per_pl;
+ lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size;
- emeta_len = calc_emeta_len(pblk, lm);
- if (emeta_len > lm->emeta_len) {
+ emeta_len = calc_emeta_len(pblk);
+ if (emeta_len > lm->emeta_len[0]) {
i++;
goto add_emeta_page;
}
- lm->emeta_bb = geo->nr_luns - i;
-
- nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
- (geo->sec_per_blk / 2)) / geo->sec_per_blk;
- lm->min_blk_line = nr_meta_blks + 1;
-
- l_mg->nr_lines = geo->blks_per_lun;
- l_mg->log_line = l_mg->data_line = NULL;
- l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
- l_mg->nr_free_lines = 0;
- bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- /* smeta is always small enough to fit on a kmalloc memory allocation,
- * emeta depends on the number of LUNs allocated to the pblk instance
- */
- l_mg->smeta_alloc_type = PBLK_KMALLOC_META;
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL);
- if (!l_mg->sline_meta[i].meta)
- while (--i >= 0) {
- kfree(l_mg->sline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
+ lm->emeta_bb = geo->nr_luns - i;
+ lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0],
+ geo->sec_per_blk);
+ if (lm->min_blk_line > lm->blk_per_line) {
+ pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
+ lm->blk_per_line);
+ ret = -EINVAL;
+ goto fail;
}
- if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) {
- l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
-
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
- if (!l_mg->eline_meta[i].meta)
- while (--i >= 0) {
- vfree(l_mg->eline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
- }
- } else {
- l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
-
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->eline_meta[i].meta =
- kmalloc(lm->emeta_len, GFP_KERNEL);
- if (!l_mg->eline_meta[i].meta)
- while (--i >= 0) {
- kfree(l_mg->eline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
- }
- }
+ ret = pblk_lines_alloc_metadata(pblk);
+ if (ret)
+ goto fail;
l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
if (!l_mg->bb_template) {
@@ -664,11 +752,14 @@ add_emeta_page:
INIT_LIST_HEAD(&l_mg->gc_low_list);
INIT_LIST_HEAD(&l_mg->gc_empty_list);
+ INIT_LIST_HEAD(&l_mg->emeta_list);
+
l_mg->gc_lists[0] = &l_mg->gc_high_list;
l_mg->gc_lists[1] = &l_mg->gc_mid_list;
l_mg->gc_lists[2] = &l_mg->gc_low_list;
spin_lock_init(&l_mg->free_lock);
+ spin_lock_init(&l_mg->close_lock);
spin_lock_init(&l_mg->gc_lock);
pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
@@ -689,10 +780,16 @@ add_emeta_page:
line->type = PBLK_LINETYPE_FREE;
line->state = PBLK_LINESTATE_FREE;
line->gc_group = PBLK_LINEGC_NONE;
+ line->vsc = &l_mg->vsc_list[i];
spin_lock_init(&line->lock);
- nr_bad_blks = pblk_bb_line(pblk, line);
+ ret = pblk_alloc_line_bitmaps(pblk, line);
+ if (ret)
+ goto fail_free_lines;
+
+ nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line);
if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
+ pblk_free_line_bitmaps(line);
ret = -EINVAL;
goto fail_free_lines;
}
@@ -713,24 +810,20 @@ add_emeta_page:
pblk_set_provision(pblk, nr_free_blks);
- sema_init(&pblk->erase_sem, 1);
-
/* Cleanup per-LUN bad block lists - managed within lines on run-time */
for (i = 0; i < geo->nr_luns; i++)
kfree(pblk->luns[i].bb_list);
return 0;
fail_free_lines:
- kfree(pblk->lines);
+ while (--i >= 0)
+ pblk_free_line_bitmaps(&pblk->lines[i]);
fail_free_bb_aux:
kfree(l_mg->bb_aux);
fail_free_bb_template:
kfree(l_mg->bb_template);
fail_free_meta:
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
- pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
- }
+ pblk_line_meta_free(pblk);
fail:
for (i = 0; i < geo->nr_luns; i++)
kfree(pblk->luns[i].bb_list);
@@ -754,6 +847,15 @@ static int pblk_writer_init(struct pblk *pblk)
static void pblk_writer_stop(struct pblk *pblk)
{
+ /* The pipeline must be stopped and the write buffer emptied before the
+ * write thread is stopped
+ */
+ WARN(pblk_rb_read_count(&pblk->rwb),
+ "Stopping not fully persisted write buffer\n");
+
+ WARN(pblk_rb_sync_count(&pblk->rwb),
+ "Stopping not fully synced write buffer\n");
+
if (pblk->writer_ts)
kthread_stop(pblk->writer_ts);
del_timer(&pblk->wtimer);
@@ -772,10 +874,9 @@ static void pblk_free(struct pblk *pblk)
static void pblk_tear_down(struct pblk *pblk)
{
- pblk_flush_writer(pblk);
+ pblk_pipeline_stop(pblk);
pblk_writer_stop(pblk);
pblk_rb_sync_l2p(&pblk->rwb);
- pblk_recov_pad(pblk);
pblk_rwb_free(pblk);
pblk_rl_free(&pblk->rl);
@@ -821,6 +922,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
pblk->dev = dev;
pblk->disk = tdisk;
+ pblk->state = PBLK_STATE_RUNNING;
spin_lock_init(&pblk->trans_lock);
spin_lock_init(&pblk->lock);
@@ -836,8 +938,8 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
atomic_long_set(&pblk->req_writes, 0);
atomic_long_set(&pblk->sub_writes, 0);
atomic_long_set(&pblk->sync_writes, 0);
- atomic_long_set(&pblk->compl_writes, 0);
atomic_long_set(&pblk->inflight_reads, 0);
+ atomic_long_set(&pblk->cache_reads, 0);
atomic_long_set(&pblk->sync_reads, 0);
atomic_long_set(&pblk->recov_writes, 0);
atomic_long_set(&pblk->recov_writes, 0);
@@ -946,11 +1048,20 @@ static struct nvm_tgt_type tt_pblk = {
static int __init pblk_module_init(void)
{
- return nvm_register_tgt_type(&tt_pblk);
+ int ret;
+
+ pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+ if (!pblk_bio_set)
+ return -ENOMEM;
+ ret = nvm_register_tgt_type(&tt_pblk);
+ if (ret)
+ bioset_free(pblk_bio_set);
+ return ret;
}
static void pblk_module_exit(void)
{
+ bioset_free(pblk_bio_set);
nvm_unregister_tgt_type(&tt_pblk);
}
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 17c16955284d..fddb924f6dde 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -25,9 +25,9 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
unsigned int valid_secs)
{
struct pblk_line *line = pblk_line_get_data(pblk);
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
struct pblk_w_ctx *w_ctx;
- __le64 *lba_list = pblk_line_emeta_to_lbas(emeta);
+ __le64 *lba_list = emeta_to_lbas(pblk, emeta->buf);
u64 paddr;
int nr_secs = pblk->min_write_pgs;
int i;
@@ -51,18 +51,20 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
w_ctx->ppa = ppa_list[i];
meta_list[i].lba = cpu_to_le64(w_ctx->lba);
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
- le64_add_cpu(&line->emeta->nr_valid_lbas, 1);
+ line->nr_valid_lbas++;
} else {
- meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
- lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
- pblk_map_pad_invalidate(pblk, line, paddr);
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+ lba_list[paddr] = meta_list[i].lba = addr_empty;
+ __pblk_map_invalidate(pblk, line, paddr);
}
}
if (pblk_line_is_full(line)) {
- line = pblk_line_replace_data(pblk);
- if (!line)
- return;
+ struct pblk_line *prev_line = line;
+
+ pblk_line_replace_data(pblk);
+ pblk_line_close_meta(pblk, prev_line);
}
pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
@@ -91,8 +93,9 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct pblk_line *e_line = pblk_line_get_data_next(pblk);
+ struct pblk_line_meta *lm = &pblk->lm;
struct pblk_sec_meta *meta_list = rqd->meta_list;
+ struct pblk_line *e_line, *d_line;
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i, erase_lun;
@@ -102,35 +105,63 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
lun_bitmap, &meta_list[i], map_secs);
- erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls +
- rqd->ppa_list[i].g.ch;
+ erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
- if (!test_bit(erase_lun, e_line->erase_bitmap)) {
- if (down_trylock(&pblk->erase_sem))
- continue;
+ /* line can change after page map. We might also be writing the
+ * last line.
+ */
+ e_line = pblk_line_get_erase(pblk);
+ if (!e_line)
+ return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
+ valid_secs, i + min);
+ spin_lock(&e_line->lock);
+ if (!test_bit(erase_lun, e_line->erase_bitmap)) {
set_bit(erase_lun, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
+
*erase_ppa = rqd->ppa_list[i];
erase_ppa->g.blk = e_line->id;
+ spin_unlock(&e_line->lock);
+
/* Avoid evaluating e_line->left_eblks */
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min);
}
+ spin_unlock(&e_line->lock);
}
- /* Erase blocks that are bad in this line but might not be in next */
- if (unlikely(ppa_empty(*erase_ppa))) {
- struct pblk_line_meta *lm = &pblk->lm;
+ d_line = pblk_line_get_data(pblk);
+
+ /* line can change after page map. We might also be writing the
+ * last line.
+ */
+ e_line = pblk_line_get_erase(pblk);
+ if (!e_line)
+ return;
- i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line);
- if (i == lm->blk_per_line)
+ /* Erase blocks that are bad in this line but might not be in next */
+ if (unlikely(ppa_empty(*erase_ppa)) &&
+ bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
+ int bit = -1;
+
+retry:
+ bit = find_next_bit(d_line->blk_bitmap,
+ lm->blk_per_line, bit + 1);
+ if (bit >= lm->blk_per_line)
return;
- set_bit(i, e_line->erase_bitmap);
+ spin_lock(&e_line->lock);
+ if (test_bit(bit, e_line->erase_bitmap)) {
+ spin_unlock(&e_line->lock);
+ goto retry;
+ }
+ spin_unlock(&e_line->lock);
+
+ set_bit(bit, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
- *erase_ppa = pblk->luns[i].bppa; /* set ch and lun */
+ *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
erase_ppa->g.blk = e_line->id;
}
}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 045384ddc1f9..5ecc154f6831 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -150,6 +150,7 @@ try:
/* Release flags on context. Protect from writes and reads */
smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
pblk_ppa_set_empty(&w_ctx->ppa);
+ w_ctx->lba = ADDR_EMPTY;
}
#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
@@ -180,6 +181,14 @@ unsigned int pblk_rb_read_count(struct pblk_rb *rb)
return pblk_rb_ring_count(mem, subm, rb->nr_entries);
}
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
+{
+ unsigned int mem = READ_ONCE(rb->mem);
+ unsigned int sync = READ_ONCE(rb->sync);
+
+ return pblk_rb_ring_count(mem, sync, rb->nr_entries);
+}
+
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
{
unsigned int subm;
@@ -199,12 +208,22 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
struct pblk_line *line;
struct pblk_rb_entry *entry;
struct pblk_w_ctx *w_ctx;
+ unsigned int user_io = 0, gc_io = 0;
unsigned int i;
+ int flags;
for (i = 0; i < to_update; i++) {
entry = &rb->entries[*l2p_upd];
w_ctx = &entry->w_ctx;
+ flags = READ_ONCE(entry->w_ctx.flags);
+ if (flags & PBLK_IOTYPE_USER)
+ user_io++;
+ else if (flags & PBLK_IOTYPE_GC)
+ gc_io++;
+ else
+ WARN(1, "pblk: unknown IO type\n");
+
pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
entry->cacheline);
@@ -214,6 +233,8 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
*l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
}
+ pblk_rl_out(&pblk->rl, user_io, gc_io);
+
return 0;
}
@@ -357,6 +378,9 @@ static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio,
/* Protect syncs */
smp_store_release(&rb->sync_point, sync_point);
+ if (!bio)
+ return 0;
+
spin_lock_irq(&rb->s_lock);
bio_list_add(&entry->w_ctx.bios, bio);
spin_unlock_irq(&rb->s_lock);
@@ -395,6 +419,17 @@ static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
return 1;
}
+void pblk_rb_flush(struct pblk_rb *rb)
+{
+ struct pblk *pblk = container_of(rb, struct pblk, rwb);
+ unsigned int mem = READ_ONCE(rb->mem);
+
+ if (pblk_rb_sync_point_set(rb, NULL, mem))
+ return;
+
+ pblk_write_should_kick(pblk);
+}
+
static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
unsigned int *pos, struct bio *bio,
int *io_ret)
@@ -431,15 +466,16 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
unsigned int nr_entries, unsigned int *pos)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
- int flush_done;
+ int io_ret;
spin_lock(&rb->w_lock);
- if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) {
+ io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
+ if (io_ret) {
spin_unlock(&rb->w_lock);
- return NVM_IO_REQUEUE;
+ return io_ret;
}
- if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) {
+ if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
spin_unlock(&rb->w_lock);
return NVM_IO_REQUEUE;
}
@@ -447,7 +483,7 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
pblk_rl_user_in(&pblk->rl, nr_entries);
spin_unlock(&rb->w_lock);
- return flush_done;
+ return io_ret;
}
/*
@@ -521,20 +557,18 @@ out:
* This function is used by the write thread to form the write bio that will
* persist data on the write buffer to the media.
*/
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
- struct pblk_c_ctx *c_ctx,
- unsigned int pos,
- unsigned int nr_entries,
- unsigned int count)
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+ struct bio *bio, unsigned int pos,
+ unsigned int nr_entries, unsigned int count)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
+ struct request_queue *q = pblk->dev->q;
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
struct pblk_rb_entry *entry;
struct page *page;
- unsigned int pad = 0, read = 0, to_read = nr_entries;
- unsigned int user_io = 0, gc_io = 0;
+ unsigned int pad = 0, to_read = nr_entries;
unsigned int i;
int flags;
- int ret;
if (count < nr_entries) {
pad = nr_entries - count;
@@ -553,15 +587,10 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
*/
try:
flags = READ_ONCE(entry->w_ctx.flags);
- if (!(flags & PBLK_WRITTEN_DATA))
+ if (!(flags & PBLK_WRITTEN_DATA)) {
+ io_schedule();
goto try;
-
- if (flags & PBLK_IOTYPE_USER)
- user_io++;
- else if (flags & PBLK_IOTYPE_GC)
- gc_io++;
- else
- WARN(1, "pblk: unknown IO type\n");
+ }
page = virt_to_page(entry->data);
if (!page) {
@@ -570,17 +599,17 @@ try:
flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
- goto out;
+ return NVM_IO_ERR;
}
- ret = bio_add_page(bio, page, rb->seg_size, 0);
- if (ret != rb->seg_size) {
+ if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
+ rb->seg_size) {
pr_err("pblk: could not add page to write bio\n");
flags &= ~PBLK_WRITTEN_DATA;
flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
- goto out;
+ return NVM_IO_ERR;
}
if (flags & PBLK_FLUSH_ENTRY) {
@@ -607,14 +636,19 @@ try:
pos = (pos + 1) & (rb->nr_entries - 1);
}
- read = to_read;
- pblk_rl_out(&pblk->rl, user_io, gc_io);
+ if (pad) {
+ if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
+ pr_err("pblk: could not pad page in write bio\n");
+ return NVM_IO_ERR;
+ }
+ }
+
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(pad, &((struct pblk *)
(container_of(rb, struct pblk, rwb)))->padded_writes);
#endif
-out:
- return read;
+
+ return NVM_IO_OK;
}
/*
@@ -623,15 +657,17 @@ out:
* be directed to disk.
*/
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- u64 pos, int bio_iter)
+ struct ppa_addr ppa, int bio_iter)
{
+ struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct pblk_rb_entry *entry;
struct pblk_w_ctx *w_ctx;
+ struct ppa_addr l2p_ppa;
+ u64 pos = pblk_addr_to_cacheline(ppa);
void *data;
int flags;
int ret = 1;
- spin_lock(&rb->w_lock);
#ifdef CONFIG_NVM_DEBUG
/* Caller must ensure that the access will not cause an overflow */
@@ -641,8 +677,14 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
w_ctx = &entry->w_ctx;
flags = READ_ONCE(w_ctx->flags);
+ spin_lock(&rb->w_lock);
+ spin_lock(&pblk->trans_lock);
+ l2p_ppa = pblk_trans_map_get(pblk, lba);
+ spin_unlock(&pblk->trans_lock);
+
/* Check if the entry has been overwritten or is scheduled to be */
- if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) {
+ if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
+ flags & PBLK_WRITABLE_ENTRY) {
ret = 0;
goto out;
}
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 4a12f14d78c6..4e5c48f3de62 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -34,8 +34,7 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
BUG_ON(!pblk_addr_in_cache(ppa));
#endif
- return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba,
- pblk_addr_to_cacheline(ppa), bio_iter);
+ return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, bio_iter);
}
static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -76,6 +75,9 @@ retry:
}
WARN_ON(test_and_set_bit(i, read_bitmap));
advanced_bio = 1;
+#ifdef CONFIG_NVM_DEBUG
+ atomic_long_inc(&pblk->cache_reads);
+#endif
} else {
/* Read from media non-cached sectors */
rqd->ppa_list[j++] = p;
@@ -85,6 +87,11 @@ retry:
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
}
+ if (pblk_io_aligned(pblk, nr_secs))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(nr_secs, &pblk->inflight_reads);
#endif
@@ -94,8 +101,6 @@ static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
{
int err;
- rqd->flags = pblk_set_read_mode(pblk);
-
err = pblk_submit_io(pblk, rqd);
if (err)
return NVM_IO_ERR;
@@ -107,27 +112,27 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct bio *bio = rqd->bio;
if (rqd->error)
pblk_log_read_err(pblk, rqd);
#ifdef CONFIG_NVM_DEBUG
else
- WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n");
+ WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
#endif
- if (rqd->nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
bio_put(bio);
- if (r_ctx->orig_bio) {
+ if (r_ctx->private) {
+ struct bio *orig_bio = r_ctx->private;
+
#ifdef CONFIG_NVM_DEBUG
- WARN_ONCE(r_ctx->orig_bio->bi_error,
- "pblk: corrupted read bio\n");
+ WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n");
#endif
- bio_endio(r_ctx->orig_bio);
- bio_put(r_ctx->orig_bio);
+ bio_endio(orig_bio);
+ bio_put(orig_bio);
}
#ifdef CONFIG_NVM_DEBUG
@@ -136,6 +141,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
#endif
pblk_free_rqd(pblk, rqd, READ);
+ atomic_dec(&pblk->inflight_io);
}
static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
@@ -173,6 +179,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
rqd->bio = new_bio;
rqd->nr_ppas = nr_holes;
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
rqd->end_io = NULL;
if (unlikely(nr_secs > 1 && nr_holes == 1)) {
@@ -280,9 +287,14 @@ retry:
goto retry;
}
WARN_ON(test_and_set_bit(0, read_bitmap));
+#ifdef CONFIG_NVM_DEBUG
+ atomic_long_inc(&pblk->cache_reads);
+#endif
} else {
rqd->ppa_addr = ppa;
}
+
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
}
int pblk_submit_read(struct pblk *pblk, struct bio *bio)
@@ -316,13 +328,16 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
*/
bio_init_idx = pblk_get_bi_idx(bio);
+ rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd->dma_meta_list);
+ if (!rqd->meta_list) {
+ pr_err("pblk: not able to allocate ppa list\n");
+ goto fail_rqd_free;
+ }
+
if (nr_secs > 1) {
- rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd->dma_ppa_list);
- if (!rqd->ppa_list) {
- pr_err("pblk: not able to allocate ppa list\n");
- goto fail_rqd_free;
- }
+ rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
+ rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
pblk_read_ppalist_rq(pblk, rqd, &read_bitmap);
} else {
@@ -332,6 +347,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
bio_get(bio);
if (bitmap_full(&read_bitmap, nr_secs)) {
bio_endio(bio);
+ atomic_inc(&pblk->inflight_io);
pblk_end_io_read(rqd);
return NVM_IO_OK;
}
@@ -339,17 +355,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
/* All sectors are to be read from the device */
if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
struct bio *int_bio = NULL;
- struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
/* Clone read bio to deal with read errors internally */
- int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
+ int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
if (!int_bio) {
pr_err("pblk: could not clone read bio\n");
return NVM_IO_ERR;
}
rqd->bio = int_bio;
- r_ctx->orig_bio = bio;
+ r_ctx->private = bio;
ret = pblk_submit_read_io(pblk, rqd);
if (ret) {
@@ -445,7 +461,6 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct request_queue *q = dev->q;
struct bio *bio;
struct nvm_rq rqd;
int ret, data_len;
@@ -453,22 +468,19 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
memset(&rqd, 0, sizeof(struct nvm_rq));
+ rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd.dma_meta_list);
+ if (!rqd.meta_list)
+ return NVM_IO_ERR;
+
if (nr_secs > 1) {
- rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd.dma_ppa_list);
- if (!rqd.ppa_list)
- return NVM_IO_ERR;
+ rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+ rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
*secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list,
nr_secs);
- if (*secs_to_gc == 1) {
- struct ppa_addr ppa;
-
- ppa = rqd.ppa_list[0];
- nvm_dev_dma_free(dev->parent, rqd.ppa_list,
- rqd.dma_ppa_list);
- rqd.ppa_addr = ppa;
- }
+ if (*secs_to_gc == 1)
+ rqd.ppa_addr = rqd.ppa_list[0];
} else {
*secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]);
}
@@ -477,7 +489,8 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
goto out;
data_len = (*secs_to_gc) * geo->sec_size;
- bio = bio_map_kern(q, data, data_len, GFP_KERNEL);
+ bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len,
+ PBLK_KMALLOC_META, GFP_KERNEL);
if (IS_ERR(bio)) {
pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio));
goto err_free_dma;
@@ -490,6 +503,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
rqd.end_io = pblk_end_io_sync;
rqd.private = &wait;
rqd.nr_ppas = *secs_to_gc;
+ rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
rqd.bio = bio;
ret = pblk_submit_read_io(pblk, &rqd);
@@ -503,6 +517,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: GC read I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
if (rqd.error) {
atomic_long_inc(&pblk->read_failed_gc);
@@ -518,12 +533,10 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
#endif
out:
- if (rqd.nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return NVM_IO_OK;
err_free_dma:
- if (rqd.nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return NVM_IO_ERR;
}
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index f8f85087cd3c..cb556e06673e 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -120,18 +120,18 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
return 0;
}
-__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta)
+__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf)
{
u32 crc;
- crc = pblk_calc_emeta_crc(pblk, emeta);
- if (le32_to_cpu(emeta->crc) != crc)
+ crc = pblk_calc_emeta_crc(pblk, emeta_buf);
+ if (le32_to_cpu(emeta_buf->crc) != crc)
return NULL;
- if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC)
+ if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
return NULL;
- return pblk_line_emeta_to_lbas(emeta);
+ return emeta_to_lbas(pblk, emeta_buf);
}
static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
@@ -139,19 +139,20 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
__le64 *lba_list;
int data_start;
int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
int i;
- lba_list = pblk_recov_get_lba_list(pblk, emeta);
+ lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
if (!lba_list)
return 1;
data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
- nr_data_lbas = lm->sec_per_line - lm->emeta_sec;
- nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas);
+ nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0];
+ nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
struct ppa_addr ppa;
@@ -169,7 +170,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
if (test_and_set_bit(i, line->invalid_bitmap))
WARN_ONCE(1, "pblk: rec. double invalidate:\n");
else
- line->vsc--;
+ le32_add_cpu(line->vsc, -1);
spin_unlock(&line->lock);
continue;
@@ -181,7 +182,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
if (nr_valid_lbas != nr_lbas)
pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
- line->id, line->emeta->nr_valid_lbas, nr_lbas);
+ line->id, emeta_buf->nr_valid_lbas, nr_lbas);
line->left_msecs = 0;
@@ -195,7 +196,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
struct pblk_line_meta *lm = &pblk->lm;
int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
- return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec -
+ return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
nr_bb * geo->sec_per_blk;
}
@@ -240,7 +241,7 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
r_ptr_int = r_ptr;
next_read_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
@@ -256,7 +257,6 @@ next_read_rq:
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
@@ -265,6 +265,11 @@ next_read_rq:
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -295,7 +300,7 @@ next_read_rq:
pr_err("pblk: L2P recovery read timed out\n");
return -EINTR;
}
-
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* At this point, the read should not fail. If it does, it is a problem
@@ -322,47 +327,99 @@ next_read_rq:
return 0;
}
+static void pblk_recov_complete(struct kref *ref)
+{
+ struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
+
+ complete(&pad_rq->wait);
+}
+
+static void pblk_end_io_recov(struct nvm_rq *rqd)
+{
+ struct pblk_pad_rq *pad_rq = rqd->private;
+ struct pblk *pblk = pad_rq->pblk;
+ struct nvm_tgt_dev *dev = pblk->dev;
+
+ pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
+ bio_put(rqd->bio);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+ pblk_free_rqd(pblk, rqd, WRITE);
+
+ atomic_dec(&pblk->inflight_io);
+ kref_put(&pad_rq->ref, pblk_recov_complete);
+}
+
static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
- struct pblk_recov_alloc p, int left_ppas)
+ int left_ppas)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct ppa_addr *ppa_list;
struct pblk_sec_meta *meta_list;
+ struct pblk_pad_rq *pad_rq;
struct nvm_rq *rqd;
struct bio *bio;
void *data;
dma_addr_t dma_ppa_list, dma_meta_list;
- __le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta);
+ __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
u64 w_ptr = line->cur_sec;
- int left_line_ppas = line->left_msecs;
- int rq_ppas, rq_len;
+ int left_line_ppas, rq_ppas, rq_len;
int i, j;
int ret = 0;
- DECLARE_COMPLETION_ONSTACK(wait);
- ppa_list = p.ppa_list;
- meta_list = p.meta_list;
- rqd = p.rqd;
- data = p.data;
- dma_ppa_list = p.dma_ppa_list;
- dma_meta_list = p.dma_meta_list;
+ spin_lock(&line->lock);
+ left_line_ppas = line->left_msecs;
+ spin_unlock(&line->lock);
+
+ pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
+ if (!pad_rq)
+ return -ENOMEM;
+
+ data = vzalloc(pblk->max_write_pgs * geo->sec_size);
+ if (!data) {
+ ret = -ENOMEM;
+ goto free_rq;
+ }
+
+ pad_rq->pblk = pblk;
+ init_completion(&pad_rq->wait);
+ kref_init(&pad_rq->ref);
next_pad_rq:
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
- if (!rq_ppas)
- rq_ppas = pblk->min_write_pgs;
+ if (rq_ppas < pblk->min_write_pgs) {
+ pr_err("pblk: corrupted pad line %d\n", line->id);
+ goto fail_free_pad;
+ }
+
rq_len = rq_ppas * geo->sec_size;
- bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
- if (IS_ERR(bio))
- return PTR_ERR(bio);
+ meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+ if (!meta_list) {
+ ret = -ENOMEM;
+ goto fail_free_pad;
+ }
+
+ ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
+ rqd = pblk_alloc_rqd(pblk, WRITE);
+ if (IS_ERR(rqd)) {
+ ret = PTR_ERR(rqd);
+ goto fail_free_meta;
+ }
+
+ bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+ PBLK_VMALLOC_META, GFP_KERNEL);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ goto fail_free_rqd;
+ }
bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- memset(rqd, 0, pblk_r_rq_size);
-
rqd->bio = bio;
rqd->opcode = NVM_OP_PWRITE;
rqd->flags = pblk_set_progr_mode(pblk, WRITE);
@@ -371,8 +428,8 @@ next_pad_rq:
rqd->ppa_list = ppa_list;
rqd->dma_ppa_list = dma_ppa_list;
rqd->dma_meta_list = dma_meta_list;
- rqd->end_io = pblk_end_io_sync;
- rqd->private = &wait;
+ rqd->end_io = pblk_end_io_recov;
+ rqd->private = pad_rq;
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
@@ -390,34 +447,57 @@ next_pad_rq:
for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
struct ppa_addr dev_ppa;
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
pblk_map_invalidate(pblk, dev_ppa);
- meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
- lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY);
+ lba_list[w_ptr] = meta_list[i].lba = addr_empty;
rqd->ppa_list[i] = dev_ppa;
}
}
+ kref_get(&pad_rq->ref);
+ pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
ret = pblk_submit_io(pblk, rqd);
if (ret) {
pr_err("pblk: I/O submission failed: %d\n", ret);
- return ret;
+ pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+ goto fail_free_bio;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: L2P recovery write timed out\n");
- }
- reinit_completion(&wait);
-
left_line_ppas -= rq_ppas;
left_ppas -= rq_ppas;
- if (left_ppas > 0 && left_line_ppas)
+ if (left_ppas && left_line_ppas)
goto next_pad_rq;
- return 0;
+ kref_put(&pad_rq->ref, pblk_recov_complete);
+
+ if (!wait_for_completion_io_timeout(&pad_rq->wait,
+ msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+ pr_err("pblk: pad write timed out\n");
+ ret = -ETIME;
+ }
+
+ if (!pblk_line_is_full(line))
+ pr_err("pblk: corrupted padded line: %d\n", line->id);
+
+ vfree(data);
+free_rq:
+ kfree(pad_rq);
+ return ret;
+
+fail_free_bio:
+ bio_put(bio);
+fail_free_rqd:
+ pblk_free_rqd(pblk, rqd, WRITE);
+fail_free_meta:
+ nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+fail_free_pad:
+ kfree(pad_rq);
+ vfree(data);
+ return ret;
}
/* When this function is called, it means that not all upper pages have been
@@ -456,7 +536,7 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
rec_round = 0;
next_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
@@ -472,7 +552,6 @@ next_rq:
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
@@ -481,6 +560,11 @@ next_rq:
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -510,6 +594,7 @@ next_rq:
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: L2P recovery read timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* This should not happen since the read failed during normal recovery,
@@ -544,7 +629,7 @@ next_rq:
if (pad_secs > line->left_msecs)
pad_secs = line->left_msecs;
- ret = pblk_recov_pad_oob(pblk, line, p, pad_secs);
+ ret = pblk_recov_pad_oob(pblk, line, pad_secs);
if (ret)
pr_err("pblk: OOB padding failed (err:%d)\n", ret);
@@ -552,7 +637,6 @@ next_rq:
if (ret)
pr_err("pblk: OOB read failed (err:%d)\n", ret);
- line->left_ssecs = line->left_msecs;
left_ppas = 0;
}
@@ -591,7 +675,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
*done = 1;
next_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
@@ -607,7 +691,6 @@ next_rq:
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
@@ -616,6 +699,11 @@ next_rq:
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -646,6 +734,7 @@ next_rq:
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: L2P recovery read timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* Reached the end of the written line */
@@ -658,7 +747,6 @@ next_rq:
/* Roll back failed sectors */
line->cur_sec -= nr_error_bits;
line->left_msecs += nr_error_bits;
- line->left_ssecs = line->left_msecs;
bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
left_ppas = 0;
@@ -770,8 +858,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line, *tline, *data_line = NULL;
- struct line_smeta *smeta;
- struct line_emeta *emeta;
+ struct pblk_smeta *smeta;
+ struct pblk_emeta *emeta;
+ struct line_smeta *smeta_buf;
int found_lines = 0, recovered_lines = 0, open_lines = 0;
int is_next = 0;
int meta_line;
@@ -784,8 +873,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
spin_lock(&l_mg->free_lock);
meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
set_bit(meta_line, &l_mg->meta_bitmap);
- smeta = l_mg->sline_meta[meta_line].meta;
- emeta = l_mg->eline_meta[meta_line].meta;
+ smeta = l_mg->sline_meta[meta_line];
+ emeta = l_mg->eline_meta[meta_line];
+ smeta_buf = (struct line_smeta *)smeta;
spin_unlock(&l_mg->free_lock);
/* Order data lines using their sequence number */
@@ -796,33 +886,33 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
memset(smeta, 0, lm->smeta_len);
line->smeta = smeta;
- line->lun_bitmap = ((void *)(smeta)) +
+ line->lun_bitmap = ((void *)(smeta_buf)) +
sizeof(struct line_smeta);
/* Lines that cannot be read are assumed as not written here */
if (pblk_line_read_smeta(pblk, line))
continue;
- crc = pblk_calc_smeta_crc(pblk, smeta);
- if (le32_to_cpu(smeta->crc) != crc)
+ crc = pblk_calc_smeta_crc(pblk, smeta_buf);
+ if (le32_to_cpu(smeta_buf->crc) != crc)
continue;
- if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC)
+ if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
continue;
- if (le16_to_cpu(smeta->header.version) != 1) {
+ if (le16_to_cpu(smeta_buf->header.version) != 1) {
pr_err("pblk: found incompatible line version %u\n",
- smeta->header.version);
+ smeta_buf->header.version);
return ERR_PTR(-EINVAL);
}
/* The first valid instance uuid is used for initialization */
if (!valid_uuid) {
- memcpy(pblk->instance_uuid, smeta->header.uuid, 16);
+ memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
valid_uuid = 1;
}
- if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) {
+ if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
pr_debug("pblk: ignore line %u due to uuid mismatch\n",
i);
continue;
@@ -830,9 +920,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
/* Update line metadata */
spin_lock(&line->lock);
- line->id = le32_to_cpu(line->smeta->header.id);
- line->type = le16_to_cpu(line->smeta->header.type);
- line->seq_nr = le64_to_cpu(line->smeta->seq_nr);
+ line->id = le32_to_cpu(smeta_buf->header.id);
+ line->type = le16_to_cpu(smeta_buf->header.type);
+ line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
spin_unlock(&line->lock);
/* Update general metadata */
@@ -848,7 +938,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
pblk_recov_line_add_ordered(&recov_list, line);
found_lines++;
pr_debug("pblk: recovering data line %d, seq:%llu\n",
- line->id, smeta->seq_nr);
+ line->id, smeta_buf->seq_nr);
}
if (!found_lines) {
@@ -868,15 +958,15 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
recovered_lines++;
/* Calculate where emeta starts based on the line bb */
- off = lm->sec_per_line - lm->emeta_sec;
+ off = lm->sec_per_line - lm->emeta_sec[0];
nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
off -= nr_bb * geo->sec_per_pl;
- memset(emeta, 0, lm->emeta_len);
- line->emeta = emeta;
line->emeta_ssec = off;
+ line->emeta = emeta;
+ memset(line->emeta->buf, 0, lm->emeta_len[0]);
- if (pblk_line_read_emeta(pblk, line)) {
+ if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
pblk_recov_l2p_from_oob(pblk, line);
goto next;
}
@@ -941,58 +1031,26 @@ out:
}
/*
- * Pad until smeta can be read on current data line
+ * Pad current line
*/
-void pblk_recov_pad(struct pblk *pblk)
+int pblk_recov_pad(struct pblk *pblk)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
struct pblk_line *line;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct nvm_rq *rqd;
- struct pblk_recov_alloc p;
- struct ppa_addr *ppa_list;
- struct pblk_sec_meta *meta_list;
- void *data;
- dma_addr_t dma_ppa_list, dma_meta_list;
+ int left_msecs;
+ int ret = 0;
spin_lock(&l_mg->free_lock);
line = l_mg->data_line;
+ left_msecs = line->left_msecs;
spin_unlock(&l_mg->free_lock);
- rqd = pblk_alloc_rqd(pblk, READ);
- if (IS_ERR(rqd))
- return;
-
- meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
- if (!meta_list)
- goto free_rqd;
-
- ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
-
- data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
- if (!data)
- goto free_meta_list;
-
- p.ppa_list = ppa_list;
- p.meta_list = meta_list;
- p.rqd = rqd;
- p.data = data;
- p.dma_ppa_list = dma_ppa_list;
- p.dma_meta_list = dma_meta_list;
-
- if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) {
- pr_err("pblk: Tear down padding failed\n");
- goto free_data;
+ ret = pblk_recov_pad_oob(pblk, line, left_msecs);
+ if (ret) {
+ pr_err("pblk: Tear down padding failed (%d)\n", ret);
+ return ret;
}
- pblk_line_close(pblk, line);
-
-free_data:
- kfree(data);
-free_meta_list:
- nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
-free_rqd:
- pblk_free_rqd(pblk, rqd, READ);
+ pblk_line_close_meta(pblk, line);
+ return ret;
}
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index ab7cbb144f3f..2e6a5361baf0 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -23,11 +23,35 @@ static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
}
+int pblk_rl_is_limit(struct pblk_rl *rl)
+{
+ int rb_space;
+
+ rb_space = atomic_read(&rl->rb_space);
+
+ return (rb_space == 0);
+}
+
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
{
int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
+ int rb_space = atomic_read(&rl->rb_space);
- return (!(rb_user_cnt + nr_entries > rl->rb_user_max));
+ if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
+ return NVM_IO_ERR;
+
+ if (rb_user_cnt >= rl->rb_user_max)
+ return NVM_IO_REQUEUE;
+
+ return NVM_IO_OK;
+}
+
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
+{
+ int rb_space = atomic_read(&rl->rb_space);
+
+ if (unlikely(rb_space >= 0))
+ atomic_sub(nr_entries, &rl->rb_space);
}
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
@@ -37,7 +61,7 @@ int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
/* If there is no user I/O let GC take over space on the write buffer */
rb_user_active = READ_ONCE(rl->rb_user_active);
- return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active));
+ return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
}
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
@@ -77,33 +101,32 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max)
unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
if (free_blocks >= rl->high) {
- rl->rb_user_max = max - rl->rb_gc_rsv;
- rl->rb_gc_max = rl->rb_gc_rsv;
+ rl->rb_user_max = max;
+ rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH;
} else if (free_blocks < rl->high) {
int shift = rl->high_pw - rl->rb_windows_pw;
int user_windows = free_blocks >> shift;
int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
- int gc_max;
rl->rb_user_max = user_max;
- gc_max = max - rl->rb_user_max;
- rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv);
-
- if (free_blocks > rl->low)
- rl->rb_state = PBLK_RL_MID;
- else
- rl->rb_state = PBLK_RL_LOW;
+ rl->rb_gc_max = max - user_max;
+
+ if (free_blocks <= rl->rsv_blocks) {
+ rl->rb_user_max = 0;
+ rl->rb_gc_max = max;
+ }
+
+ /* In the worst case, we will need to GC lines in the low list
+ * (high valid sector count). If there are lines to GC on high
+ * or mid lists, these will be prioritized
+ */
+ rl->rb_state = PBLK_RL_LOW;
}
return rl->rb_state;
}
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
-{
- rl->rb_gc_rsv = rl->rb_gc_max = rsv;
-}
-
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
{
struct pblk *pblk = container_of(rl, struct pblk, rl);
@@ -122,11 +145,15 @@ void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
{
- struct pblk *pblk = container_of(rl, struct pblk, rl);
int blk_in_line = atomic_read(&line->blk_in_line);
- int ret;
atomic_sub(blk_in_line, &rl->free_blocks);
+}
+
+void pblk_gc_should_kick(struct pblk *pblk)
+{
+ struct pblk_rl *rl = &pblk->rl;
+ int ret;
/* Rates will not change that often - no need to lock update */
ret = pblk_rl_update_rates(rl, rl->rb_budget);
@@ -136,11 +163,16 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
pblk_gc_should_stop(pblk);
}
-int pblk_rl_gc_thrs(struct pblk_rl *rl)
+int pblk_rl_high_thrs(struct pblk_rl *rl)
{
return rl->high;
}
+int pblk_rl_low_thrs(struct pblk_rl *rl)
+{
+ return rl->low;
+}
+
int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
{
return rl->rb_user_max;
@@ -161,24 +193,36 @@ void pblk_rl_free(struct pblk_rl *rl)
void pblk_rl_init(struct pblk_rl *rl, int budget)
{
+ struct pblk *pblk = container_of(rl, struct pblk, rl);
+ struct pblk_line_meta *lm = &pblk->lm;
+ int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
unsigned int rb_windows;
rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
- rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
rl->high_pw = get_count_order(rl->high);
+ rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
+ if (rl->low < min_blocks)
+ rl->low = min_blocks;
+
+ rl->rsv_blocks = min_blocks;
+
/* This will always be a power-of-2 */
rb_windows = budget / PBLK_MAX_REQ_ADDRS;
- rl->rb_windows_pw = get_count_order(rb_windows) + 1;
+ rl->rb_windows_pw = get_count_order(rb_windows);
/* To start with, all buffer is available to user I/O writers */
rl->rb_budget = budget;
rl->rb_user_max = budget;
- atomic_set(&rl->rb_user_cnt, 0);
rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH;
+
+ atomic_set(&rl->rb_user_cnt, 0);
atomic_set(&rl->rb_gc_cnt, 0);
+ atomic_set(&rl->rb_space, -1);
setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
+
rl->rb_user_active = 0;
+ rl->rb_gc_active = 0;
}
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index f0af1d1ceeff..95fb434e2f01 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -49,30 +49,26 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
int free_blocks, total_blocks;
int rb_user_max, rb_user_cnt;
- int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state;
+ int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
free_blocks = atomic_read(&pblk->rl.free_blocks);
rb_user_max = pblk->rl.rb_user_max;
rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
rb_gc_max = pblk->rl.rb_gc_max;
- rb_gc_rsv = pblk->rl.rb_gc_rsv;
rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
rb_budget = pblk->rl.rb_budget;
rb_state = pblk->rl.rb_state;
- total_blocks = geo->blks_per_lun * geo->nr_luns;
+ total_blocks = pblk->rl.total_blocks;
return snprintf(page, PAGE_SIZE,
- "u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
+ "u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
rb_user_cnt,
rb_user_max,
rb_gc_cnt,
rb_gc_max,
- rb_gc_rsv,
rb_state,
rb_budget,
pblk->rl.low,
@@ -150,11 +146,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
ssize_t sz = 0;
int nr_free_lines;
int cur_data, cur_log;
- int free_line_cnt = 0, closed_line_cnt = 0;
+ int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
int d_line_cnt = 0, l_line_cnt = 0;
int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
- int free = 0, bad = 0, cor = 0;
- int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
+ int bad = 0, cor = 0;
+ int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
int map_weight = 0, meta_weight = 0;
spin_lock(&l_mg->free_lock);
@@ -166,6 +162,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
free_line_cnt++;
spin_unlock(&l_mg->free_lock);
+ spin_lock(&l_mg->close_lock);
+ list_for_each_entry(line, &l_mg->emeta_list, list)
+ emeta_line_cnt++;
+ spin_unlock(&l_mg->close_lock);
+
spin_lock(&l_mg->gc_lock);
list_for_each_entry(line, &l_mg->gc_full_list, list) {
if (line->type == PBLK_LINETYPE_DATA)
@@ -212,8 +213,6 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
gc_empty++;
}
- list_for_each_entry(line, &l_mg->free_list, list)
- free++;
list_for_each_entry(line, &l_mg->bad_list, list)
bad++;
list_for_each_entry(line, &l_mg->corrupt_list, list)
@@ -224,8 +223,7 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
if (l_mg->data_line) {
cur_sec = l_mg->data_line->cur_sec;
msecs = l_mg->data_line->left_msecs;
- ssecs = l_mg->data_line->left_ssecs;
- vsc = l_mg->data_line->vsc;
+ vsc = le32_to_cpu(*l_mg->data_line->vsc);
sec_in_line = l_mg->data_line->sec_in_line;
meta_weight = bitmap_weight(&l_mg->meta_bitmap,
PBLK_DATA_LINES);
@@ -235,17 +233,20 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
spin_unlock(&l_mg->free_lock);
if (nr_free_lines != free_line_cnt)
- pr_err("pblk: corrupted free line list\n");
+ pr_err("pblk: corrupted free line list:%d/%d\n",
+ nr_free_lines, free_line_cnt);
sz = snprintf(page, PAGE_SIZE - sz,
"line: nluns:%d, nblks:%d, nsecs:%d\n",
geo->nr_luns, lm->blk_per_line, lm->sec_per_line);
sz += snprintf(page + sz, PAGE_SIZE - sz,
- "lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n",
+ "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
cur_data, cur_log,
- free, nr_free_lines, bad, cor,
+ nr_free_lines,
+ emeta_line_cnt, meta_weight,
closed_line_cnt,
+ bad, cor,
d_line_cnt, l_line_cnt,
l_mg->nr_lines);
@@ -255,9 +256,10 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
atomic_read(&pblk->gc.inflight_gc));
sz += snprintf(page + sz, PAGE_SIZE - sz,
- "data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
- cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line,
- map_weight, lm->sec_per_line, meta_weight);
+ "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
+ cur_data, cur_sec, msecs, vsc, sec_in_line,
+ map_weight, lm->sec_per_line,
+ atomic_read(&pblk->inflight_io));
return sz;
}
@@ -274,7 +276,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
lm->smeta_len, lm->smeta_sec);
sz += snprintf(page + sz, PAGE_SIZE - sz,
"emeta - len:%d, sec:%d, bb_start:%d\n",
- lm->emeta_len, lm->emeta_sec,
+ lm->emeta_len[0], lm->emeta_sec[0],
lm->emeta_bb);
sz += snprintf(page + sz, PAGE_SIZE - sz,
"bitmap lengths: sec:%d, blk:%d, lun:%d\n",
@@ -290,6 +292,11 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
return sz;
}
+static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
+}
+
#ifdef CONFIG_NVM_DEBUG
static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
{
@@ -303,52 +310,51 @@ static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
atomic_long_read(&pblk->padded_wb),
atomic_long_read(&pblk->sub_writes),
atomic_long_read(&pblk->sync_writes),
- atomic_long_read(&pblk->compl_writes),
atomic_long_read(&pblk->recov_writes),
atomic_long_read(&pblk->recov_gc_writes),
atomic_long_read(&pblk->recov_gc_reads),
+ atomic_long_read(&pblk->cache_reads),
atomic_long_read(&pblk->sync_reads));
}
#endif
-static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page,
- size_t len)
+static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
+ size_t len)
{
- struct pblk_gc *gc = &pblk->gc;
size_t c_len;
- int value;
+ int force;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
- if (kstrtouint(page, 0, &value))
+ if (kstrtouint(page, 0, &force))
return -EINVAL;
- spin_lock(&gc->lock);
- pblk_rl_set_gc_rsc(&pblk->rl, value);
- spin_unlock(&gc->lock);
+ pblk_gc_sysfs_force(pblk, force);
return len;
}
-static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
- size_t len)
+static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
+ const char *page, size_t len)
{
size_t c_len;
- int force;
+ int sec_per_write;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
- if (kstrtouint(page, 0, &force))
+ if (kstrtouint(page, 0, &sec_per_write))
return -EINVAL;
- if (force < 0 || force > 1)
+ if (sec_per_write < pblk->min_write_pgs
+ || sec_per_write > pblk->max_write_pgs
+ || sec_per_write % pblk->min_write_pgs != 0)
return -EINVAL;
- pblk_gc_sysfs_force(pblk, force);
+ pblk_set_sec_per_write(pblk, sec_per_write);
return len;
}
@@ -398,9 +404,9 @@ static struct attribute sys_gc_force = {
.mode = 0200,
};
-static struct attribute sys_gc_rl_max = {
- .name = "gc_rl_max",
- .mode = 0200,
+static struct attribute sys_max_sec_per_write = {
+ .name = "max_sec_per_write",
+ .mode = 0644,
};
#ifdef CONFIG_NVM_DEBUG
@@ -416,7 +422,7 @@ static struct attribute *pblk_attrs[] = {
&sys_errors_attr,
&sys_gc_state,
&sys_gc_force,
- &sys_gc_rl_max,
+ &sys_max_sec_per_write,
&sys_rb_attr,
&sys_stats_ppaf_attr,
&sys_lines_attr,
@@ -448,6 +454,8 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
return pblk_sysfs_lines(pblk, buf);
else if (strcmp(attr->name, "lines_info") == 0)
return pblk_sysfs_lines_info(pblk, buf);
+ else if (strcmp(attr->name, "max_sec_per_write") == 0)
+ return pblk_sysfs_get_sec_per_write(pblk, buf);
#ifdef CONFIG_NVM_DEBUG
else if (strcmp(attr->name, "stats") == 0)
return pblk_sysfs_stats_debug(pblk, buf);
@@ -460,10 +468,10 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
{
struct pblk *pblk = container_of(kobj, struct pblk, kobj);
- if (strcmp(attr->name, "gc_rl_max") == 0)
- return pblk_sysfs_rate_store(pblk, buf, len);
- else if (strcmp(attr->name, "gc_force") == 0)
+ if (strcmp(attr->name, "gc_force") == 0)
return pblk_sysfs_gc_force(pblk, buf, len);
+ else if (strcmp(attr->name, "max_sec_per_write") == 0)
+ return pblk_sysfs_set_sec_per_write(pblk, buf, len);
return 0;
}
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index aef6fd7c4a0c..3ad9e56d2473 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -17,18 +17,6 @@
#include "pblk.h"
-static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line)
-{
-#ifdef CONFIG_NVM_DEBUG
- atomic_long_inc(&pblk->sync_writes);
-#endif
-
- /* Counter protected by rb sync lock */
- line->left_ssecs--;
- if (!line->left_ssecs)
- pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
-}
-
static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx)
{
@@ -39,28 +27,19 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
for (i = 0; i < c_ctx->nr_valid; i++) {
struct pblk_w_ctx *w_ctx;
- struct ppa_addr p;
- struct pblk_line *line;
w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i);
-
- p = rqd->ppa_list[i];
- line = &pblk->lines[pblk_dev_ppa_to_line(p)];
- pblk_sync_line(pblk, line);
-
while ((original_bio = bio_list_pop(&w_ctx->bios)))
bio_endio(original_bio);
}
#ifdef CONFIG_NVM_DEBUG
- atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes);
+ atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes);
#endif
ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
- if (rqd->meta_list)
- nvm_dev_dma_free(dev->parent, rqd->meta_list,
- rqd->dma_meta_list);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
bio_put(rqd->bio);
pblk_free_rqd(pblk, rqd, WRITE);
@@ -169,7 +148,7 @@ static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
}
INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
- queue_work(pblk->kw_wq, &recovery->ws_rec);
+ queue_work(pblk->close_wq, &recovery->ws_rec);
out:
pblk_complete_write(pblk, rqd, c_ctx);
@@ -186,14 +165,48 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
}
#ifdef CONFIG_NVM_DEBUG
else
- WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n");
+ WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
#endif
pblk_complete_write(pblk, rqd, c_ctx);
+ atomic_dec(&pblk->inflight_io);
+}
+
+static void pblk_end_io_write_meta(struct nvm_rq *rqd)
+{
+ struct pblk *pblk = rqd->private;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_line *line = m_ctx->private;
+ struct pblk_emeta *emeta = line->emeta;
+ int sync;
+
+ pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
+ if (rqd->error) {
+ pblk_log_write_err(pblk, rqd);
+ pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
+ }
+#ifdef CONFIG_NVM_DEBUG
+ else
+ WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
+#endif
+
+ sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
+ if (sync == emeta->nr_entries)
+ pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws,
+ pblk->close_wq);
+
+ bio_put(rqd->bio);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+ pblk_free_rqd(pblk, rqd, READ);
+
+ atomic_dec(&pblk->inflight_io);
}
static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int nr_secs)
+ unsigned int nr_secs,
+ nvm_end_io_fn(*end_io))
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -202,16 +215,13 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
rqd->nr_ppas = nr_secs;
rqd->flags = pblk_set_progr_mode(pblk, WRITE);
rqd->private = pblk;
- rqd->end_io = pblk_end_io_write;
+ rqd->end_io = end_io;
rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
&rqd->dma_meta_list);
if (!rqd->meta_list)
return -ENOMEM;
- if (unlikely(nr_secs == 1))
- return 0;
-
rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
@@ -219,11 +229,10 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
}
static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx)
+ struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
{
struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *e_line = pblk_line_get_data_next(pblk);
- struct ppa_addr erase_ppa;
+ struct pblk_line *e_line = pblk_line_get_erase(pblk);
unsigned int valid = c_ctx->nr_valid;
unsigned int padded = c_ctx->nr_padded;
unsigned int nr_secs = valid + padded;
@@ -231,40 +240,23 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
int ret = 0;
lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
- if (!lun_bitmap) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!lun_bitmap)
+ return -ENOMEM;
c_ctx->lun_bitmap = lun_bitmap;
- ret = pblk_alloc_w_rq(pblk, rqd, nr_secs);
+ ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
if (ret) {
kfree(lun_bitmap);
- goto out;
+ return ret;
}
- ppa_set_empty(&erase_ppa);
if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
else
pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
- valid, &erase_ppa);
-
-out:
- if (unlikely(e_line && !ppa_empty(erase_ppa))) {
- if (pblk_blk_erase_async(pblk, erase_ppa)) {
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int bit;
-
- atomic_inc(&e_line->left_eblks);
- bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
- WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
- up(&pblk->erase_sem);
- }
- }
+ valid, erase_ppa);
- return ret;
+ return 0;
}
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -280,7 +272,7 @@ int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
c_ctx->lun_bitmap = lun_bitmap;
- ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas);
+ ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
if (ret)
return ret;
@@ -311,16 +303,234 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
return secs_to_sync;
}
+static inline int pblk_valid_meta_ppa(struct pblk *pblk,
+ struct pblk_line *meta_line,
+ struct ppa_addr *ppa_list, int nr_ppas)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line *data_line;
+ struct ppa_addr ppa, ppa_opt;
+ u64 paddr;
+ int i;
+
+ data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
+ paddr = pblk_lookup_page(pblk, meta_line);
+ ppa = addr_to_gen_ppa(pblk, paddr, 0);
+
+ if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
+ return 1;
+
+ /* Schedule a metadata I/O that is half the distance from the data I/O
+ * with regards to the number of LUNs forming the pblk instance. This
+ * balances LUN conflicts across every I/O.
+ *
+ * When the LUN configuration changes (e.g., due to GC), this distance
+ * can align, which would result on a LUN deadlock. In this case, modify
+ * the distance to not be optimal, but allow metadata I/Os to succeed.
+ */
+ ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
+ if (unlikely(ppa_opt.ppa == ppa.ppa)) {
+ data_line->meta_distance--;
+ return 0;
+ }
+
+ for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+ if (ppa_list[i].g.ch == ppa_opt.g.ch &&
+ ppa_list[i].g.lun == ppa_opt.g.lun)
+ return 1;
+
+ if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
+ for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+ if (ppa_list[i].g.ch == ppa.g.ch &&
+ ppa_list[i].g.lun == ppa.g.lun)
+ return 0;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_emeta *emeta = meta_line->emeta;
+ struct pblk_g_ctx *m_ctx;
+ struct bio *bio;
+ struct nvm_rq *rqd;
+ void *data;
+ u64 paddr;
+ int rq_ppas = pblk->min_write_pgs;
+ int id = meta_line->id;
+ int rq_len;
+ int i, j;
+ int ret;
+
+ rqd = pblk_alloc_rqd(pblk, READ);
+ if (IS_ERR(rqd)) {
+ pr_err("pblk: cannot allocate write req.\n");
+ return PTR_ERR(rqd);
+ }
+ m_ctx = nvm_rq_to_pdu(rqd);
+ m_ctx->private = meta_line;
+
+ rq_len = rq_ppas * geo->sec_size;
+ data = ((void *)emeta->buf) + emeta->mem;
+
+ bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+ l_mg->emeta_alloc_type, GFP_KERNEL);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ goto fail_free_rqd;
+ }
+ bio->bi_iter.bi_sector = 0; /* internal bio */
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ rqd->bio = bio;
+
+ ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
+ if (ret)
+ goto fail_free_bio;
+
+ for (i = 0; i < rqd->nr_ppas; ) {
+ spin_lock(&meta_line->lock);
+ paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
+ spin_unlock(&meta_line->lock);
+ for (j = 0; j < rq_ppas; j++, i++, paddr++)
+ rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
+ }
+
+ emeta->mem += rq_len;
+ if (emeta->mem >= lm->emeta_len[0]) {
+ spin_lock(&l_mg->close_lock);
+ list_del(&meta_line->list);
+ WARN(!bitmap_full(meta_line->map_bitmap, lm->sec_per_line),
+ "pblk: corrupt meta line %d\n", meta_line->id);
+ spin_unlock(&l_mg->close_lock);
+ }
+
+ pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
+ ret = pblk_submit_io(pblk, rqd);
+ if (ret) {
+ pr_err("pblk: emeta I/O submission failed: %d\n", ret);
+ goto fail_rollback;
+ }
+
+ return NVM_IO_OK;
+
+fail_rollback:
+ pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+ spin_lock(&l_mg->close_lock);
+ pblk_dealloc_page(pblk, meta_line, rq_ppas);
+ list_add(&meta_line->list, &meta_line->list);
+ spin_unlock(&l_mg->close_lock);
+
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+fail_free_bio:
+ if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
+ bio_put(bio);
+fail_free_rqd:
+ pblk_free_rqd(pblk, rqd, READ);
+ return ret;
+}
+
+static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
+ int prev_n)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line *meta_line;
+
+ spin_lock(&l_mg->close_lock);
+retry:
+ if (list_empty(&l_mg->emeta_list)) {
+ spin_unlock(&l_mg->close_lock);
+ return 0;
+ }
+ meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
+ if (bitmap_full(meta_line->map_bitmap, lm->sec_per_line))
+ goto retry;
+ spin_unlock(&l_mg->close_lock);
+
+ if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n))
+ return 0;
+
+ return pblk_submit_meta_io(pblk, meta_line);
+}
+
+static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+ struct ppa_addr erase_ppa;
+ int err;
+
+ ppa_set_empty(&erase_ppa);
+
+ /* Assign lbas to ppas and populate request structure */
+ err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
+ if (err) {
+ pr_err("pblk: could not setup write request: %d\n", err);
+ return NVM_IO_ERR;
+ }
+
+ if (likely(ppa_empty(erase_ppa))) {
+ /* Submit metadata write for previous data line */
+ err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
+ if (err) {
+ pr_err("pblk: metadata I/O submission failed: %d", err);
+ return NVM_IO_ERR;
+ }
+
+ /* Submit data write for current data line */
+ err = pblk_submit_io(pblk, rqd);
+ if (err) {
+ pr_err("pblk: data I/O submission failed: %d\n", err);
+ return NVM_IO_ERR;
+ }
+ } else {
+ /* Submit data write for current data line */
+ err = pblk_submit_io(pblk, rqd);
+ if (err) {
+ pr_err("pblk: data I/O submission failed: %d\n", err);
+ return NVM_IO_ERR;
+ }
+
+ /* Submit available erase for next data line */
+ if (pblk_blk_erase_async(pblk, erase_ppa)) {
+ struct pblk_line *e_line = pblk_line_get_erase(pblk);
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int bit;
+
+ atomic_inc(&e_line->left_eblks);
+ bit = pblk_ppa_to_pos(geo, erase_ppa);
+ WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
+ }
+ }
+
+ return NVM_IO_OK;
+}
+
+static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+ struct bio *bio = rqd->bio;
+
+ if (c_ctx->nr_padded)
+ pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded);
+}
+
static int pblk_submit_write(struct pblk *pblk)
{
struct bio *bio;
struct nvm_rq *rqd;
- struct pblk_c_ctx *c_ctx;
- unsigned int pgs_read;
unsigned int secs_avail, secs_to_sync, secs_to_com;
unsigned int secs_to_flush;
unsigned long pos;
- int err;
/* If there are no sectors in the cache, flushes (bios without data)
* will be cleared on the cache threads
@@ -338,7 +548,6 @@ static int pblk_submit_write(struct pblk *pblk)
pr_err("pblk: cannot allocate write req.\n");
return 1;
}
- c_ctx = nvm_rq_to_pdu(rqd);
bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
if (!bio) {
@@ -358,29 +567,14 @@ static int pblk_submit_write(struct pblk *pblk)
secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
- pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos,
- secs_to_sync, secs_avail);
- if (!pgs_read) {
+ if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync,
+ secs_avail)) {
pr_err("pblk: corrupted write bio\n");
goto fail_put_bio;
}
- if (c_ctx->nr_padded)
- if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
- goto fail_put_bio;
-
- /* Assign lbas to ppas and populate request structure */
- err = pblk_setup_w_rq(pblk, rqd, c_ctx);
- if (err) {
- pr_err("pblk: could not setup write request\n");
- goto fail_free_bio;
- }
-
- err = pblk_submit_io(pblk, rqd);
- if (err) {
- pr_err("pblk: I/O submission failed: %d\n", err);
+ if (pblk_submit_io_set(pblk, rqd))
goto fail_free_bio;
- }
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(secs_to_sync, &pblk->sub_writes);
@@ -389,8 +583,7 @@ static int pblk_submit_write(struct pblk *pblk)
return 0;
fail_free_bio:
- if (c_ctx->nr_padded)
- pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
+ pblk_free_write_rqd(pblk, rqd);
fail_put_bio:
bio_put(bio);
fail_free_rqd:
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 99f3186b5288..0c5692cc2f60 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -40,6 +40,12 @@
#define PBLK_MAX_REQ_ADDRS (64)
#define PBLK_MAX_REQ_ADDRS_PW (6)
+#define PBLK_WS_POOL_SIZE (128)
+#define PBLK_META_POOL_SIZE (128)
+#define PBLK_READ_REQ_POOL_SIZE (1024)
+
+#define PBLK_NR_CLOSE_JOBS (4)
+
#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
#define PBLK_COMMAND_TIMEOUT_MS 30000
@@ -72,11 +78,15 @@ enum {
PBLK_BLK_ST_CLOSED = 0x2,
};
+struct pblk_sec_meta {
+ u64 reserved;
+ __le64 lba;
+};
+
/* The number of GC lists and the rate-limiter states go together. This way the
* rate-limiter can dictate how much GC is needed based on resource utilization.
*/
-#define PBLK_NR_GC_LISTS 3
-#define PBLK_MAX_GC_JOBS 32
+#define PBLK_GC_NR_LISTS 3
enum {
PBLK_RL_HIGH = 1,
@@ -84,14 +94,9 @@ enum {
PBLK_RL_LOW = 3,
};
-struct pblk_sec_meta {
- u64 reserved;
- __le64 lba;
-};
-
#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
-/* write completion context */
+/* write buffer completion context */
struct pblk_c_ctx {
struct list_head list; /* Head for out-of-order completion */
@@ -101,9 +106,16 @@ struct pblk_c_ctx {
unsigned int nr_padded;
};
-/* Read context */
-struct pblk_r_ctx {
- struct bio *orig_bio;
+/* generic context */
+struct pblk_g_ctx {
+ void *private;
+};
+
+/* Pad context */
+struct pblk_pad_rq {
+ struct pblk *pblk;
+ struct completion wait;
+ struct kref ref;
};
/* Recovery context */
@@ -195,29 +207,39 @@ struct pblk_lun {
struct pblk_gc_rq {
struct pblk_line *line;
void *data;
- u64 *lba_list;
+ u64 lba_list[PBLK_MAX_REQ_ADDRS];
int nr_secs;
int secs_to_gc;
struct list_head list;
};
struct pblk_gc {
+ /* These states are not protected by a lock since (i) they are in the
+ * fast path, and (ii) they are not critical.
+ */
int gc_active;
int gc_enabled;
int gc_forced;
- int gc_jobs_active;
- atomic_t inflight_gc;
struct task_struct *gc_ts;
struct task_struct *gc_writer_ts;
+ struct task_struct *gc_reader_ts;
+
+ struct workqueue_struct *gc_line_reader_wq;
struct workqueue_struct *gc_reader_wq;
+
struct timer_list gc_timer;
+ struct semaphore gc_sem;
+ atomic_t inflight_gc;
int w_entries;
+
struct list_head w_list;
+ struct list_head r_list;
spinlock_t lock;
spinlock_t w_lock;
+ spinlock_t r_lock;
};
struct pblk_rl {
@@ -229,10 +251,8 @@ struct pblk_rl {
*/
unsigned int high_pw; /* High rounded up as a power of 2 */
-#define PBLK_USER_HIGH_THRS 2 /* Begin write limit at 50 percent
- * available blks
- */
-#define PBLK_USER_LOW_THRS 20 /* Aggressive GC at 5% available blocks */
+#define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */
+#define PBLK_USER_LOW_THRS 10 /* Aggressive GC at 10% available blocks */
int rb_windows_pw; /* Number of rate windows in the write buffer
* given as a power-of-2. This guarantees that
@@ -244,13 +264,19 @@ struct pblk_rl {
*/
int rb_budget; /* Total number of entries available for I/O */
int rb_user_max; /* Max buffer entries available for user I/O */
- atomic_t rb_user_cnt; /* User I/O buffer counter */
int rb_gc_max; /* Max buffer entries available for GC I/O */
int rb_gc_rsv; /* Reserved buffer entries for GC I/O */
int rb_state; /* Rate-limiter current state */
+
+ atomic_t rb_user_cnt; /* User I/O buffer counter */
atomic_t rb_gc_cnt; /* GC I/O buffer counter */
+ atomic_t rb_space; /* Space limit in case of reaching capacity */
+
+ int rsv_blocks; /* Reserved blocks for GC */
int rb_user_active;
+ int rb_gc_active;
+
struct timer_list u_timer;
unsigned long long nr_secs;
@@ -258,8 +284,6 @@ struct pblk_rl {
atomic_t free_blocks;
};
-#define PBLK_LINE_NR_LUN_BITMAP 2
-#define PBLK_LINE_NR_SEC_BITMAP 2
#define PBLK_LINE_EMPTY (~0U)
enum {
@@ -310,16 +334,19 @@ struct line_smeta {
__le32 window_wr_lun; /* Number of parallel LUNs to write */
__le32 rsvd[2];
+
+ __le64 lun_bitmap[];
};
/*
- * Metadata Layout:
- * 1. struct pblk_emeta
- * 2. nr_lbas u64 forming lba list
- * 3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line)
- * 4. nr_luns bits (u64 format) forming line bad block bitmap
- *
- * 3. and 4. will be part of FTL log
+ * Metadata layout in media:
+ * First sector:
+ * 1. struct line_emeta
+ * 2. bad block bitmap (u64 * window_wr_lun)
+ * Mid sectors (start at lbas_sector):
+ * 3. nr_lbas (u64) forming lba list
+ * Last sectors (start at vsc_sector):
+ * 4. u32 valid sector count (vsc) for all lines (~0U: free line)
*/
struct line_emeta {
struct line_header header;
@@ -339,6 +366,23 @@ struct line_emeta {
__le32 next_id; /* Line id for next line */
__le64 nr_lbas; /* Number of lbas mapped in line */
__le64 nr_valid_lbas; /* Number of valid lbas mapped in line */
+ __le64 bb_bitmap[]; /* Updated bad block bitmap for line */
+};
+
+struct pblk_emeta {
+ struct line_emeta *buf; /* emeta buffer in media format */
+ int mem; /* Write offset - points to next
+ * writable entry in memory
+ */
+ atomic_t sync; /* Synced - backpointer that signals the
+ * last entry that has been successfully
+ * persisted to media
+ */
+ unsigned int nr_entries; /* Number of emeta entries */
+};
+
+struct pblk_smeta {
+ struct line_smeta *buf; /* smeta buffer in persistent format */
};
struct pblk_line {
@@ -355,9 +399,12 @@ struct pblk_line {
unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */
- struct line_smeta *smeta; /* Start metadata */
- struct line_emeta *emeta; /* End metadata */
+ struct pblk_smeta *smeta; /* Start metadata */
+ struct pblk_emeta *emeta; /* End medatada */
+
int meta_line; /* Metadata line id */
+ int meta_distance; /* Distance between data and metadata */
+
u64 smeta_ssec; /* Sector where smeta starts */
u64 emeta_ssec; /* Sector where emeta starts */
@@ -374,9 +421,10 @@ struct pblk_line {
atomic_t left_seblks; /* Blocks left for sync erasing */
int left_msecs; /* Sectors left for mapping */
- int left_ssecs; /* Sectors left to sync */
unsigned int cur_sec; /* Sector map pointer */
- unsigned int vsc; /* Valid sector count in line */
+ unsigned int nr_valid_lbas; /* Number of valid lbas in line */
+
+ __le32 *vsc; /* Valid sector count in line */
struct kref ref; /* Write buffer L2P references */
@@ -385,13 +433,15 @@ struct pblk_line {
#define PBLK_DATA_LINES 4
-enum{
+enum {
PBLK_KMALLOC_META = 1,
PBLK_VMALLOC_META = 2,
};
-struct pblk_line_metadata {
- void *meta;
+enum {
+ PBLK_EMETA_TYPE_HEADER = 1, /* struct line_emeta first sector */
+ PBLK_EMETA_TYPE_LLBA = 2, /* lba list - type: __le64 */
+ PBLK_EMETA_TYPE_VSC = 3, /* vsc list - type: __le32 */
};
struct pblk_line_mgmt {
@@ -404,7 +454,7 @@ struct pblk_line_mgmt {
struct list_head bad_list; /* Full lines bad */
/* GC lists - use gc_lock */
- struct list_head *gc_lists[PBLK_NR_GC_LISTS];
+ struct list_head *gc_lists[PBLK_GC_NR_LISTS];
struct list_head gc_high_list; /* Full lines ready to GC, high isc */
struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
struct list_head gc_low_list; /* Full lines ready to GC, low isc */
@@ -417,13 +467,16 @@ struct pblk_line_mgmt {
struct pblk_line *log_next; /* Next FTL log line */
struct pblk_line *data_next; /* Next data line */
+ struct list_head emeta_list; /* Lines queued to schedule emeta */
+
+ __le32 *vsc_list; /* Valid sector counts for all lines */
+
/* Metadata allocation type: VMALLOC | KMALLOC */
- int smeta_alloc_type;
int emeta_alloc_type;
/* Pre-allocated metadata for data lines */
- struct pblk_line_metadata sline_meta[PBLK_DATA_LINES];
- struct pblk_line_metadata eline_meta[PBLK_DATA_LINES];
+ struct pblk_smeta *sline_meta[PBLK_DATA_LINES];
+ struct pblk_emeta *eline_meta[PBLK_DATA_LINES];
unsigned long meta_bitmap;
/* Helpers for fast bitmap calculations */
@@ -434,25 +487,40 @@ struct pblk_line_mgmt {
unsigned long l_seq_nr; /* Log line unique sequence number */
spinlock_t free_lock;
+ spinlock_t close_lock;
spinlock_t gc_lock;
};
struct pblk_line_meta {
unsigned int smeta_len; /* Total length for smeta */
- unsigned int smeta_sec; /* Sectors needed for smeta*/
- unsigned int emeta_len; /* Total length for emeta */
- unsigned int emeta_sec; /* Sectors needed for emeta*/
+ unsigned int smeta_sec; /* Sectors needed for smeta */
+
+ unsigned int emeta_len[4]; /* Lengths for emeta:
+ * [0]: Total length
+ * [1]: struct line_emeta length
+ * [2]: L2P portion length
+ * [3]: vsc list length
+ */
+ unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout
+ * as emeta_len
+ */
+
unsigned int emeta_bb; /* Boundary for bb that affects emeta */
+
+ unsigned int vsc_list_len; /* Length for vsc list */
unsigned int sec_bitmap_len; /* Length for sector bitmap in line */
unsigned int blk_bitmap_len; /* Length for block bitmap in line */
unsigned int lun_bitmap_len; /* Length for lun bitmap in line */
unsigned int blk_per_line; /* Number of blocks in a full line */
unsigned int sec_per_line; /* Number of sectors in a line */
+ unsigned int dsec_per_line; /* Number of data sectors in a line */
unsigned int min_blk_line; /* Min. number of good blocks in line */
unsigned int mid_thrs; /* Threshold for GC mid list */
unsigned int high_thrs; /* Threshold for GC high list */
+
+ unsigned int meta_distance; /* Distance between data and metadata */
};
struct pblk_addr_format {
@@ -470,6 +538,13 @@ struct pblk_addr_format {
u8 sec_offset;
};
+enum {
+ PBLK_STATE_RUNNING = 0,
+ PBLK_STATE_STOPPING = 1,
+ PBLK_STATE_RECOVERING = 2,
+ PBLK_STATE_STOPPED = 3,
+};
+
struct pblk {
struct nvm_tgt_dev *dev;
struct gendisk *disk;
@@ -487,6 +562,8 @@ struct pblk {
struct pblk_rb rwb;
+ int state; /* pblk line state */
+
int min_write_pgs; /* Minimum amount of pages required by controller */
int max_write_pgs; /* Maximum amount of pages supported by controller */
int pgs_in_buffer; /* Number of pages that need to be held in buffer to
@@ -499,7 +576,7 @@ struct pblk {
/* pblk provisioning values. Used by rate limiter */
struct pblk_rl rl;
- struct semaphore erase_sem;
+ int sec_per_write;
unsigned char instance_uuid[16];
#ifdef CONFIG_NVM_DEBUG
@@ -511,8 +588,8 @@ struct pblk {
atomic_long_t req_writes; /* Sectors stored on write buffer */
atomic_long_t sub_writes; /* Sectors submitted from buffer */
atomic_long_t sync_writes; /* Sectors synced to media */
- atomic_long_t compl_writes; /* Sectors completed in write bio */
atomic_long_t inflight_reads; /* Inflight sector read requests */
+ atomic_long_t cache_reads; /* Read requests that hit the cache */
atomic_long_t sync_reads; /* Completed sector read requests */
atomic_long_t recov_writes; /* Sectors submitted from recovery */
atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */
@@ -528,6 +605,8 @@ struct pblk {
atomic_long_t write_failed;
atomic_long_t erase_failed;
+ atomic_t inflight_io; /* General inflight I/O counter */
+
struct task_struct *writer_ts;
/* Simple translation map of logical addresses to physical addresses.
@@ -542,11 +621,13 @@ struct pblk {
mempool_t *page_pool;
mempool_t *line_ws_pool;
mempool_t *rec_pool;
- mempool_t *r_rq_pool;
+ mempool_t *g_rq_pool;
mempool_t *w_rq_pool;
mempool_t *line_meta_pool;
- struct workqueue_struct *kw_wq;
+ struct workqueue_struct *close_wq;
+ struct workqueue_struct *bb_wq;
+
struct timer_list wtimer;
struct pblk_gc gc;
@@ -559,7 +640,7 @@ struct pblk_line_ws {
struct work_struct ws;
};
-#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx))
+#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx))
#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
/*
@@ -579,18 +660,17 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
unsigned int pos);
struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
+void pblk_rb_flush(struct pblk_rb *rb);
void pblk_rb_sync_l2p(struct pblk_rb *rb);
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
- struct pblk_c_ctx *c_ctx,
- unsigned int pos,
- unsigned int nr_entries,
- unsigned int count);
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+ struct bio *bio, unsigned int pos,
+ unsigned int nr_entries, unsigned int count);
unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
struct list_head *list,
unsigned int max);
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- u64 pos, int bio_iter);
+ struct ppa_addr ppa, int bio_iter);
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
@@ -601,6 +681,7 @@ void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);
unsigned int pblk_rb_read_count(struct pblk_rb *rb);
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb);
unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
int pblk_rb_tear_down_check(struct pblk_rb *rb);
@@ -612,55 +693,67 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
* pblk core
*/
struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw);
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx);
void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw);
-void pblk_flush_writer(struct pblk *pblk);
+void pblk_wait_for_meta(struct pblk *pblk);
struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba);
void pblk_discard(struct pblk *pblk, struct bio *bio);
void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
unsigned int nr_secs, unsigned int len,
- gfp_t gfp_mask);
+ int alloc_type, gfp_t gfp_mask);
struct pblk_line *pblk_line_get(struct pblk *pblk);
struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
+void pblk_line_replace_data(struct pblk *pblk);
int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
struct pblk_line *pblk_line_get_data(struct pblk *pblk);
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk);
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
int pblk_line_is_full(struct pblk_line *line);
void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_close_ws(struct work_struct *work);
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_close_meta_sync(struct pblk *pblk);
+void pblk_line_close_ws(struct work_struct *work);
+void pblk_pipeline_stop(struct pblk *pblk);
void pblk_line_mark_bb(struct work_struct *work);
void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *));
+ void (*work)(struct work_struct *),
+ struct workqueue_struct *wq);
u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+ void *emeta_buf);
int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
void pblk_line_put(struct kref *ref);
struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
unsigned long secs_to_flush);
+void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
unsigned long *lun_bitmap);
+void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
unsigned long *lun_bitmap);
void pblk_end_bio_sync(struct bio *bio);
void pblk_end_io_sync(struct nvm_rq *rqd);
int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
int nr_pages);
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr);
void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
int nr_pages);
void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+ u64 paddr);
void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
struct ppa_addr ppa);
@@ -702,6 +795,7 @@ void pblk_write_should_kick(struct pblk *pblk);
/*
* pblk read path
*/
+extern struct bio_set *pblk_bio_set;
int pblk_submit_read(struct pblk *pblk, struct bio *bio);
int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
unsigned int nr_secs, unsigned int *secs_to_gc,
@@ -711,7 +805,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
*/
void pblk_submit_rec(struct work_struct *work);
struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
-void pblk_recov_pad(struct pblk *pblk);
+int pblk_recov_pad(struct pblk *pblk);
__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta);
int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
struct pblk_rec_ctx *recovery, u64 *comp_bits,
@@ -720,33 +814,40 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
/*
* pblk gc
*/
-#define PBLK_GC_TRIES 3
+#define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */
+#define PBLK_GC_W_QD 128 /* Queue depth for inflight GC write I/Os */
+#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */
+#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */
int pblk_gc_init(struct pblk *pblk);
void pblk_gc_exit(struct pblk *pblk);
void pblk_gc_should_start(struct pblk *pblk);
void pblk_gc_should_stop(struct pblk *pblk);
-int pblk_gc_status(struct pblk *pblk);
+void pblk_gc_should_kick(struct pblk *pblk);
+void pblk_gc_kick(struct pblk *pblk);
void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
int *gc_active);
-void pblk_gc_sysfs_force(struct pblk *pblk, int force);
+int pblk_gc_sysfs_force(struct pblk *pblk, int force);
/*
* pblk rate limiter
*/
void pblk_rl_init(struct pblk_rl *rl, int budget);
void pblk_rl_free(struct pblk_rl *rl);
-int pblk_rl_gc_thrs(struct pblk_rl *rl);
+int pblk_rl_high_thrs(struct pblk_rl *rl);
+int pblk_rl_low_thrs(struct pblk_rl *rl);
unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
+void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left);
+int pblk_rl_is_limit(struct pblk_rl *rl);
/*
* pblk sysfs
@@ -774,9 +875,30 @@ static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
return c_ctx - sizeof(struct nvm_rq);
}
-static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta)
+static inline void *emeta_to_bb(struct line_emeta *emeta)
+{
+ return emeta->bb_bitmap;
+}
+
+static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
+{
+ return ((void *)emeta + pblk->lm.emeta_len[1]);
+}
+
+static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta)
{
- return (emeta) + 1;
+ return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]);
+}
+
+static inline int pblk_line_vsc(struct pblk_line *line)
+{
+ int vsc;
+
+ spin_lock(&line->lock);
+ vsc = le32_to_cpu(*line->vsc);
+ spin_unlock(&line->lock);
+
+ return vsc;
}
#define NVM_MEM_PAGE_WRITE (8)
@@ -917,6 +1039,14 @@ static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
ppa_addr->ppa = ADDR_EMPTY;
}
+static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa)
+{
+ if (lppa.ppa == rppa.ppa)
+ return true;
+
+ return false;
+}
+
static inline int pblk_addr_in_cache(struct ppa_addr ppa)
{
return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
@@ -964,11 +1094,11 @@ static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr,
}
static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
- struct line_smeta *smeta)
+ struct line_header *header)
{
u32 crc = ~(u32)0;
- crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc),
+ crc = crc32_le(crc, (unsigned char *)header + sizeof(crc),
sizeof(struct line_header) - sizeof(crc));
return crc;
@@ -996,7 +1126,7 @@ static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
crc = crc32_le(crc, (unsigned char *)emeta +
sizeof(struct line_header) + sizeof(crc),
- lm->emeta_len -
+ lm->emeta_len[0] -
sizeof(struct line_header) - sizeof(crc));
return crc;
@@ -1016,9 +1146,27 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
return flags;
}
-static inline int pblk_set_read_mode(struct pblk *pblk)
+enum {
+ PBLK_READ_RANDOM = 0,
+ PBLK_READ_SEQUENTIAL = 1,
+};
+
+static inline int pblk_set_read_mode(struct pblk *pblk, int type)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int flags;
+
+ flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+ if (type == PBLK_READ_SEQUENTIAL)
+ flags |= geo->plane_mode >> 1;
+
+ return flags;
+}
+
+static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
{
- return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+ return !(nr_secs % pblk->min_write_pgs);
}
#ifdef CONFIG_NVM_DEBUG
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index cf0e28a0ff61..267f01ae87e4 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -279,8 +279,8 @@ static void rrpc_end_sync_bio(struct bio *bio)
{
struct completion *waiting = bio->bi_private;
- if (bio->bi_error)
- pr_err("nvm: gc request failed (%u).\n", bio->bi_error);
+ if (bio->bi_status)
+ pr_err("nvm: gc request failed (%u).\n", bio->bi_status);
complete(waiting);
}
@@ -359,7 +359,7 @@ try:
goto finished;
}
wait_for_completion_io(&wait);
- if (bio->bi_error) {
+ if (bio->bi_status) {
rrpc_inflight_laddr_release(rrpc, rqd);
goto finished;
}
@@ -385,7 +385,7 @@ try:
wait_for_completion_io(&wait);
rrpc_inflight_laddr_release(rrpc, rqd);
- if (bio->bi_error)
+ if (bio->bi_status)
goto finished;
bio_reset(bio);
@@ -994,7 +994,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
struct nvm_rq *rqd;
int err;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if (bio_op(bio) == REQ_OP_DISCARD) {
rrpc_discard(rrpc, bio);