diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-29 22:51:49 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-29 22:51:49 +0300 |
commit | 0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0 (patch) | |
tree | cefccd67dc1f27bb45830f6b8065dd4a1c05e83b /drivers/lightnvm/pblk.h | |
parent | 9697e9da84299d0d715d515dd2cc48f1eceb277d (diff) | |
parent | 796baeeef85a40b3495a907fb7425086e7010102 (diff) | |
download | linux-0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0.tar.xz |
Merge branch 'for-4.16/block' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"This is the main pull request for block IO related changes for the
4.16 kernel. Nothing major in this pull request, but a good amount of
improvements and fixes all over the map. This contains:
- BFQ improvements, fixes, and cleanups from Angelo, Chiara, and
Paolo.
- Support for SMR zones for deadline and mq-deadline from Damien and
Christoph.
- Set of fixes for bcache by way of Michael Lyle, including fixes
from himself, Kent, Rui, Tang, and Coly.
- Series from Matias for lightnvm with fixes from Hans Holmberg,
Javier, and Matias. Mostly centered around pblk, and the removing
rrpc 1.2 in preparation for supporting 2.0.
- A couple of NVMe pull requests from Christoph. Nothing major in
here, just fixes and cleanups, and support for command tracing from
Johannes.
- Support for blk-throttle for tracking reads and writes separately.
From Joseph Qi. A few cleanups/fixes also for blk-throttle from
Weiping.
- Series from Mike Snitzer that enables dm to register its queue more
logically, something that's alwways been problematic on dm since
it's a stacked device.
- Series from Ming cleaning up some of the bio accessor use, in
preparation for supporting multipage bvecs.
- Various fixes from Ming closing up holes around queue mapping and
quiescing.
- BSD partition fix from Richard Narron, fixing a problem where we
can't mount newer (10/11) FreeBSD partitions.
- Series from Tejun reworking blk-mq timeout handling. The previous
scheme relied on atomic bits, but it had races where we would think
a request had timed out if it to reused at the wrong time.
- null_blk now supports faking timeouts, to enable us to better
exercise and test that functionality separately. From me.
- Kill the separate atomic poll bit in the request struct. After
this, we don't use the atomic bits on blk-mq anymore at all. From
me.
- sgl_alloc/free helpers from Bart.
- Heavily contended tag case scalability improvement from me.
- Various little fixes and cleanups from Arnd, Bart, Corentin,
Douglas, Eryu, Goldwyn, and myself"
* 'for-4.16/block' of git://git.kernel.dk/linux-block: (186 commits)
block: remove smart1,2.h
nvme: add tracepoint for nvme_complete_rq
nvme: add tracepoint for nvme_setup_cmd
nvme-pci: introduce RECONNECTING state to mark initializing procedure
nvme-rdma: remove redundant boolean for inline_data
nvme: don't free uuid pointer before printing it
nvme-pci: Suspend queues after deleting them
bsg: use pr_debug instead of hand crafted macros
blk-mq-debugfs: don't allow write on attributes with seq_operations set
nvme-pci: Fix queue double allocations
block: Set BIO_TRACE_COMPLETION on new bio during split
blk-throttle: use queue_is_rq_based
block: Remove kblockd_schedule_delayed_work{,_on}()
blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended delays
blk-mq: Rename blk_mq_request_direct_issue() into blk_mq_request_issue_directly()
lib/scatterlist: Fix chaining support in sgl_alloc_order()
blk-throttle: track read and write request individually
block: add bdev_read_only() checks to common helpers
block: fail op_is_write() requests to read-only partitions
blk-throttle: export io_serviced_recursive, io_service_bytes_recursive
...
Diffstat (limited to 'drivers/lightnvm/pblk.h')
-rw-r--r-- | drivers/lightnvm/pblk.h | 163 |
1 files changed, 68 insertions, 95 deletions
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 59a64d461a5d..8c357fb6538e 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -51,17 +51,16 @@ #define NR_PHY_IN_LOG (PBLK_EXPOSED_PAGE_SIZE / PBLK_SECTOR) -#define pblk_for_each_lun(pblk, rlun, i) \ - for ((i) = 0, rlun = &(pblk)->luns[0]; \ - (i) < (pblk)->nr_luns; (i)++, rlun = &(pblk)->luns[(i)]) - /* Static pool sizes */ #define PBLK_GEN_WS_POOL_SIZE (2) +#define PBLK_DEFAULT_OP (11) + enum { PBLK_READ = READ, PBLK_WRITE = WRITE,/* Write from write buffer */ PBLK_WRITE_INT, /* Internal write - no write buffer */ + PBLK_READ_RECOV, /* Recovery read - errors allowed */ PBLK_ERASE, }; @@ -114,6 +113,7 @@ struct pblk_c_ctx { /* read context */ struct pblk_g_ctx { void *private; + unsigned long start_time; u64 lba; }; @@ -170,7 +170,7 @@ struct pblk_rb { * the last submitted entry that has * been successfully persisted to media */ - unsigned int sync_point; /* Sync point - last entry that must be + unsigned int flush_point; /* Sync point - last entry that must be * flushed to the media. Used with * REQ_FLUSH and REQ_FUA */ @@ -193,7 +193,7 @@ struct pblk_rb { spinlock_t s_lock; /* Sync lock */ #ifdef CONFIG_NVM_DEBUG - atomic_t inflight_sync_point; /* Not served REQ_FLUSH | REQ_FUA */ + atomic_t inflight_flush_point; /* Not served REQ_FLUSH | REQ_FUA */ #endif }; @@ -256,9 +256,6 @@ struct pblk_rl { unsigned int high; /* Upper threshold for rate limiter (free run - * user I/O rate limiter */ - unsigned int low; /* Lower threshold for rate limiter (user I/O - * rate limiter - stall) - */ unsigned int high_pw; /* High rounded up as a power of 2 */ #define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */ @@ -292,7 +289,9 @@ struct pblk_rl { unsigned long long nr_secs; unsigned long total_blocks; - atomic_t free_blocks; + + atomic_t free_blocks; /* Total number of free blocks (+ OP) */ + atomic_t free_user_blocks; /* Number of user free blocks (no OP) */ }; #define PBLK_LINE_EMPTY (~0U) @@ -583,7 +582,9 @@ struct pblk { */ sector_t capacity; /* Device capacity when bad blocks are subtracted */ - int over_pct; /* Percentage of device used for over-provisioning */ + + int op; /* Percentage of device used for over-provisioning */ + int op_blks; /* Number of blocks used for over-provisioning */ /* pblk provisioning values. Used by rate limiter */ struct pblk_rl rl; @@ -691,7 +692,7 @@ unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries); struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb, struct ppa_addr *ppa); void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags); -unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb); +unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb); unsigned int pblk_rb_read_count(struct pblk_rb *rb); unsigned int pblk_rb_sync_count(struct pblk_rb *rb); @@ -812,7 +813,7 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq); void pblk_submit_rec(struct work_struct *work); struct pblk_line *pblk_recov_l2p(struct pblk *pblk); int pblk_recov_pad(struct pblk *pblk); -__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta); +int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta); int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, struct pblk_rec_ctx *recovery, u64 *comp_bits, unsigned int comp); @@ -843,6 +844,7 @@ void pblk_rl_free(struct pblk_rl *rl); void pblk_rl_update_rates(struct pblk_rl *rl); int pblk_rl_high_thrs(struct pblk_rl *rl); unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); +unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl); int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries); void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); @@ -851,7 +853,8 @@ void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); int pblk_rl_max_io(struct pblk_rl *rl); void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); -void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line); +void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line, + bool used); int pblk_rl_is_limit(struct pblk_rl *rl); /* @@ -907,28 +910,47 @@ static inline int pblk_pad_distance(struct pblk *pblk) struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - return NVM_MEM_PAGE_WRITE * geo->nr_luns * geo->sec_per_pl; + return NVM_MEM_PAGE_WRITE * geo->all_luns * geo->sec_per_pl; } -static inline int pblk_dev_ppa_to_line(struct ppa_addr p) +static inline int pblk_ppa_to_line(struct ppa_addr p) { return p.g.blk; } -static inline int pblk_tgt_ppa_to_line(struct ppa_addr p) +static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) { - return p.g.blk; + return p.g.lun * geo->nr_chnls + p.g.ch; } -static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) +static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, + u64 line_id) { - return p.g.lun * geo->nr_chnls + p.g.ch; + struct ppa_addr ppa; + + ppa.ppa = 0; + ppa.g.blk = line_id; + ppa.g.pg = (paddr & pblk->ppaf.pg_mask) >> pblk->ppaf.pg_offset; + ppa.g.lun = (paddr & pblk->ppaf.lun_mask) >> pblk->ppaf.lun_offset; + ppa.g.ch = (paddr & pblk->ppaf.ch_mask) >> pblk->ppaf.ch_offset; + ppa.g.pl = (paddr & pblk->ppaf.pln_mask) >> pblk->ppaf.pln_offset; + ppa.g.sec = (paddr & pblk->ppaf.sec_mask) >> pblk->ppaf.sec_offset; + + return ppa; } -/* A block within a line corresponds to the lun */ -static inline int pblk_dev_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) +static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, + struct ppa_addr p) { - return p.g.lun * geo->nr_chnls + p.g.ch; + u64 paddr; + + paddr = (u64)p.g.pg << pblk->ppaf.pg_offset; + paddr |= (u64)p.g.lun << pblk->ppaf.lun_offset; + paddr |= (u64)p.g.ch << pblk->ppaf.ch_offset; + paddr |= (u64)p.g.pl << pblk->ppaf.pln_offset; + paddr |= (u64)p.g.sec << pblk->ppaf.sec_offset; + + return paddr; } static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32) @@ -960,24 +982,6 @@ static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32) return ppa64; } -static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk, - sector_t lba) -{ - struct ppa_addr ppa; - - if (pblk->ppaf_bitsize < 32) { - u32 *map = (u32 *)pblk->trans_map; - - ppa = pblk_ppa32_to_ppa64(pblk, map[lba]); - } else { - struct ppa_addr *map = (struct ppa_addr *)pblk->trans_map; - - ppa = map[lba]; - } - - return ppa; -} - static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64) { u32 ppa32 = 0; @@ -999,33 +1003,36 @@ static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64) return ppa32; } -static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba, - struct ppa_addr ppa) +static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk, + sector_t lba) { + struct ppa_addr ppa; + if (pblk->ppaf_bitsize < 32) { u32 *map = (u32 *)pblk->trans_map; - map[lba] = pblk_ppa64_to_ppa32(pblk, ppa); + ppa = pblk_ppa32_to_ppa64(pblk, map[lba]); } else { - u64 *map = (u64 *)pblk->trans_map; + struct ppa_addr *map = (struct ppa_addr *)pblk->trans_map; - map[lba] = ppa.ppa; + ppa = map[lba]; } + + return ppa; } -static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, - struct ppa_addr p) +static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba, + struct ppa_addr ppa) { - u64 paddr; + if (pblk->ppaf_bitsize < 32) { + u32 *map = (u32 *)pblk->trans_map; - paddr = 0; - paddr |= (u64)p.g.pg << pblk->ppaf.pg_offset; - paddr |= (u64)p.g.lun << pblk->ppaf.lun_offset; - paddr |= (u64)p.g.ch << pblk->ppaf.ch_offset; - paddr |= (u64)p.g.pl << pblk->ppaf.pln_offset; - paddr |= (u64)p.g.sec << pblk->ppaf.sec_offset; + map[lba] = pblk_ppa64_to_ppa32(pblk, ppa); + } else { + u64 *map = (u64 *)pblk->trans_map; - return paddr; + map[lba] = ppa.ppa; + } } static inline int pblk_ppa_empty(struct ppa_addr ppa_addr) @@ -1040,10 +1047,7 @@ static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr) static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa) { - if (lppa.ppa == rppa.ppa) - return true; - - return false; + return (lppa.ppa == rppa.ppa); } static inline int pblk_addr_in_cache(struct ppa_addr ppa) @@ -1066,32 +1070,6 @@ static inline struct ppa_addr pblk_cacheline_to_addr(int addr) return p; } -static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, - u64 line_id) -{ - struct ppa_addr ppa; - - ppa.ppa = 0; - ppa.g.blk = line_id; - ppa.g.pg = (paddr & pblk->ppaf.pg_mask) >> pblk->ppaf.pg_offset; - ppa.g.lun = (paddr & pblk->ppaf.lun_mask) >> pblk->ppaf.lun_offset; - ppa.g.ch = (paddr & pblk->ppaf.ch_mask) >> pblk->ppaf.ch_offset; - ppa.g.pl = (paddr & pblk->ppaf.pln_mask) >> pblk->ppaf.pln_offset; - ppa.g.sec = (paddr & pblk->ppaf.sec_mask) >> pblk->ppaf.sec_offset; - - return ppa; -} - -static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr, - u64 line_id) -{ - struct ppa_addr ppa; - - ppa = addr_to_gen_ppa(pblk, paddr, line_id); - - return ppa; -} - static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk, struct line_header *header) { @@ -1212,10 +1190,10 @@ static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, if (!ppa->c.is_cached && ppa->g.ch < geo->nr_chnls && - ppa->g.lun < geo->luns_per_chnl && + ppa->g.lun < geo->nr_luns && ppa->g.pl < geo->nr_planes && - ppa->g.blk < geo->blks_per_lun && - ppa->g.pg < geo->pgs_per_blk && + ppa->g.blk < geo->nr_chks && + ppa->g.pg < geo->ws_per_chk && ppa->g.sec < geo->sec_per_pg) continue; @@ -1245,7 +1223,7 @@ static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd) for (i = 0; i < rqd->nr_ppas; i++) { ppa = ppa_list[i]; - line = &pblk->lines[pblk_dev_ppa_to_line(ppa)]; + line = &pblk->lines[pblk_ppa_to_line(ppa)]; spin_lock(&line->lock); if (line->state != PBLK_LINESTATE_OPEN) { @@ -1288,11 +1266,6 @@ static inline unsigned int pblk_get_secs(struct bio *bio) return bio->bi_iter.bi_size / PBLK_EXPOSED_PAGE_SIZE; } -static inline sector_t pblk_get_sector(sector_t lba) -{ - return lba * NR_PHY_IN_LOG; -} - static inline void pblk_setup_uuid(struct pblk *pblk) { uuid_le uuid; |