diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bio-integrity.c | 10 | ||||
-rw-r--r-- | block/bio.c | 55 | ||||
-rw-r--r-- | block/blk-cgroup.c | 4 | ||||
-rw-r--r-- | block/blk-core.c | 189 | ||||
-rw-r--r-- | block/blk-exec.c | 2 | ||||
-rw-r--r-- | block/blk-flush.c | 23 | ||||
-rw-r--r-- | block/blk-lib.c | 63 | ||||
-rw-r--r-- | block/blk-map.c | 27 | ||||
-rw-r--r-- | block/blk-merge.c | 34 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 26 | ||||
-rw-r--r-- | block/blk-mq.c | 85 | ||||
-rw-r--r-- | block/blk-sysfs.c | 11 | ||||
-rw-r--r-- | block/blk.h | 2 | ||||
-rw-r--r-- | block/cfq-iosched.c | 432 | ||||
-rw-r--r-- | block/deadline-iosched.c | 7 | ||||
-rw-r--r-- | block/elevator.c | 29 | ||||
-rw-r--r-- | block/genhd.c | 23 | ||||
-rw-r--r-- | block/ioprio.c | 2 | ||||
-rw-r--r-- | block/partition-generic.c | 3 | ||||
-rw-r--r-- | block/partitions/atari.c | 7 |
20 files changed, 592 insertions, 442 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 711e4d8de6fa..f70cc3bdfd01 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -26,6 +26,7 @@ #include <linux/bio.h> #include <linux/workqueue.h> #include <linux/slab.h> +#include "blk.h" #define BIP_INLINE_VECS 4 @@ -53,7 +54,6 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, { struct bio_integrity_payload *bip; struct bio_set *bs = bio->bi_pool; - unsigned long idx = BIO_POOL_NONE; unsigned inline_vecs; if (!bs || !bs->bio_integrity_pool) { @@ -71,17 +71,19 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, memset(bip, 0, sizeof(*bip)); if (nr_vecs > inline_vecs) { + unsigned long idx = 0; + bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx, bs->bvec_integrity_pool); if (!bip->bip_vec) goto err; bip->bip_max_vcnt = bvec_nr_vecs(idx); + bip->bip_slab = idx; } else { bip->bip_vec = bip->bip_inline_vecs; bip->bip_max_vcnt = inline_vecs; } - bip->bip_slab = idx; bip->bip_bio = bio; bio->bi_integrity = bip; bio->bi_rw |= REQ_INTEGRITY; @@ -110,9 +112,7 @@ void bio_integrity_free(struct bio *bio) bip->bip_vec->bv_offset); if (bs && bs->bio_integrity_pool) { - if (bip->bip_slab != BIO_POOL_NONE) - bvec_free(bs->bvec_integrity_pool, bip->bip_vec, - bip->bip_slab); + bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab); mempool_free(bip, bs->bio_integrity_pool); } else { diff --git a/block/bio.c b/block/bio.c index 0e4aa42bc30d..54ee3846c3a5 100644 --- a/block/bio.c +++ b/block/bio.c @@ -43,7 +43,7 @@ * unsigned short */ #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } -static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { +static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = { BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), }; #undef BV @@ -160,11 +160,15 @@ unsigned int bvec_nr_vecs(unsigned short idx) void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) { - BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); + if (!idx) + return; + idx--; + + BIO_BUG_ON(idx >= BVEC_POOL_NR); - if (idx == BIOVEC_MAX_IDX) + if (idx == BVEC_POOL_MAX) { mempool_free(bv, pool); - else { + } else { struct biovec_slab *bvs = bvec_slabs + idx; kmem_cache_free(bvs->slab, bv); @@ -206,7 +210,7 @@ struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx, * idx now points to the pool we want to allocate from. only the * 1-vec entry pool is mempool backed. */ - if (*idx == BIOVEC_MAX_IDX) { + if (*idx == BVEC_POOL_MAX) { fallback: bvl = mempool_alloc(pool, gfp_mask); } else { @@ -226,11 +230,12 @@ fallback: */ bvl = kmem_cache_alloc(bvs->slab, __gfp_mask); if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) { - *idx = BIOVEC_MAX_IDX; + *idx = BVEC_POOL_MAX; goto fallback; } } + (*idx)++; return bvl; } @@ -250,8 +255,7 @@ static void bio_free(struct bio *bio) __bio_free(bio); if (bs) { - if (bio_flagged(bio, BIO_OWNS_VEC)) - bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); + bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio)); /* * If we have front padding, adjust the bio pointer before freeing @@ -420,7 +424,6 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) gfp_t saved_gfp = gfp_mask; unsigned front_pad; unsigned inline_vecs; - unsigned long idx = BIO_POOL_NONE; struct bio_vec *bvl = NULL; struct bio *bio; void *p; @@ -480,6 +483,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) bio_init(bio); if (nr_iovecs > inline_vecs) { + unsigned long idx = 0; + bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); if (!bvl && gfp_mask != saved_gfp) { punt_bios_to_rescuer(bs); @@ -490,13 +495,12 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) if (unlikely(!bvl)) goto err_free; - bio_set_flag(bio, BIO_OWNS_VEC); + bio->bi_flags |= idx << BVEC_POOL_OFFSET; } else if (nr_iovecs) { bvl = bio->bi_inline_vecs; } bio->bi_pool = bs; - bio->bi_flags |= idx << BIO_POOL_OFFSET; bio->bi_max_vecs = nr_iovecs; bio->bi_io_vec = bvl; return bio; @@ -568,7 +572,7 @@ EXPORT_SYMBOL(bio_phys_segments); */ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) { - BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE); + BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio)); /* * most users will be overriding ->bi_bdev with a new target, @@ -656,16 +660,15 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs); if (!bio) return NULL; - bio->bi_bdev = bio_src->bi_bdev; bio->bi_rw = bio_src->bi_rw; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; - if (bio->bi_rw & REQ_DISCARD) + if (bio_op(bio) == REQ_OP_DISCARD) goto integrity_clone; - if (bio->bi_rw & REQ_WRITE_SAME) { + if (bio_op(bio) == REQ_OP_WRITE_SAME) { bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0]; goto integrity_clone; } @@ -854,21 +857,20 @@ static void submit_bio_wait_endio(struct bio *bio) /** * submit_bio_wait - submit a bio, and wait until it completes - * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) * @bio: The &struct bio which describes the I/O * * Simple wrapper around submit_bio(). Returns 0 on success, or the error from * bio_endio() on failure. */ -int submit_bio_wait(int rw, struct bio *bio) +int submit_bio_wait(struct bio *bio) { struct submit_bio_ret ret; - rw |= REQ_SYNC; init_completion(&ret.event); bio->bi_private = &ret; bio->bi_end_io = submit_bio_wait_endio; - submit_bio(rw, bio); + bio->bi_rw |= REQ_SYNC; + submit_bio(bio); wait_for_completion_io(&ret.event); return ret.error; @@ -1099,7 +1101,6 @@ int bio_uncopy_user(struct bio *bio) bio_put(bio); return ret; } -EXPORT_SYMBOL(bio_uncopy_user); /** * bio_copy_user_iov - copy user data to bio @@ -1167,7 +1168,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, goto out_bmd; if (iter->type & WRITE) - bio->bi_rw |= REQ_WRITE; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); ret = 0; @@ -1337,7 +1338,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, * set data direction, and check if mapped pages need bouncing */ if (iter->type & WRITE) - bio->bi_rw |= REQ_WRITE; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio_set_flag(bio, BIO_USER_MAPPED); @@ -1394,7 +1395,6 @@ void bio_unmap_user(struct bio *bio) __bio_unmap_user(bio); bio_put(bio); } -EXPORT_SYMBOL(bio_unmap_user); static void bio_map_kern_endio(struct bio *bio) { @@ -1530,7 +1530,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, bio->bi_private = data; } else { bio->bi_end_io = bio_copy_kern_endio; - bio->bi_rw |= REQ_WRITE; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); } return bio; @@ -1540,7 +1540,6 @@ cleanup: bio_put(bio); return ERR_PTR(-ENOMEM); } -EXPORT_SYMBOL(bio_copy_kern); /* * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions @@ -1785,7 +1784,7 @@ struct bio *bio_split(struct bio *bio, int sectors, * Discards need a mutable bio_vec to accommodate the payload * required by the DSM TRIM and UNMAP commands. */ - if (bio->bi_rw & REQ_DISCARD) + if (bio_op(bio) == REQ_OP_DISCARD) split = bio_clone_bioset(bio, gfp, bs); else split = bio_clone_fast(bio, gfp, bs); @@ -1834,7 +1833,7 @@ EXPORT_SYMBOL_GPL(bio_trim); */ mempool_t *biovec_create_pool(int pool_entries) { - struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; + struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX; return mempool_create_slab_pool(pool_entries, bp->slab); } @@ -2011,7 +2010,7 @@ static void __init biovec_init_slabs(void) { int i; - for (i = 0; i < BIOVEC_NR_POOLS; i++) { + for (i = 0; i < BVEC_POOL_NR; i++) { int size; struct biovec_slab *bvs = bvec_slabs + i; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 66e6f1aae02e..dd38e5ced4a3 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -905,7 +905,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) return 0; } -struct cftype blkcg_files[] = { +static struct cftype blkcg_files[] = { { .name = "stat", .flags = CFTYPE_NOT_ON_ROOT, @@ -914,7 +914,7 @@ struct cftype blkcg_files[] = { { } /* terminate */ }; -struct cftype blkcg_legacy_files[] = { +static struct cftype blkcg_legacy_files[] = { { .name = "reset_stats", .write_u64 = blkcg_reset_stats, diff --git a/block/blk-core.c b/block/blk-core.c index 2475b1c72773..a687e9cc16c2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -959,10 +959,10 @@ static void __freed_request(struct request_list *rl, int sync) * A request has just been released. Account for it, update the full and * congestion status, wake up any waiters. Called under q->queue_lock. */ -static void freed_request(struct request_list *rl, unsigned int flags) +static void freed_request(struct request_list *rl, int op, unsigned int flags) { struct request_queue *q = rl->q; - int sync = rw_is_sync(flags); + int sync = rw_is_sync(op, flags); q->nr_rqs[sync]--; rl->count[sync]--; @@ -1029,7 +1029,7 @@ static bool blk_rq_should_init_elevator(struct bio *bio) * Flush requests do not use the elevator so skip initialization. * This allows a request to share the flush and elevator data. */ - if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) + if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) return false; return true; @@ -1054,7 +1054,8 @@ static struct io_context *rq_ioc(struct bio *bio) /** * __get_request - get a free request * @rl: request list to allocate from - * @rw_flags: RW and SYNC flags + * @op: REQ_OP_READ/REQ_OP_WRITE + * @op_flags: rq_flag_bits * @bio: bio to allocate request for (can be %NULL) * @gfp_mask: allocation mask * @@ -1065,21 +1066,22 @@ static struct io_context *rq_ioc(struct bio *bio) * Returns ERR_PTR on failure, with @q->queue_lock held. * Returns request pointer on success, with @q->queue_lock *not held*. */ -static struct request *__get_request(struct request_list *rl, int rw_flags, - struct bio *bio, gfp_t gfp_mask) +static struct request *__get_request(struct request_list *rl, int op, + int op_flags, struct bio *bio, + gfp_t gfp_mask) { struct request_queue *q = rl->q; struct request *rq; struct elevator_type *et = q->elevator->type; struct io_context *ioc = rq_ioc(bio); struct io_cq *icq = NULL; - const bool is_sync = rw_is_sync(rw_flags) != 0; + const bool is_sync = rw_is_sync(op, op_flags) != 0; int may_queue; if (unlikely(blk_queue_dying(q))) return ERR_PTR(-ENODEV); - may_queue = elv_may_queue(q, rw_flags); + may_queue = elv_may_queue(q, op, op_flags); if (may_queue == ELV_MQUEUE_NO) goto rq_starved; @@ -1123,7 +1125,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, /* * Decide whether the new request will be managed by elevator. If - * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will + * so, mark @op_flags and increment elvpriv. Non-zero elvpriv will * prevent the current elevator from being destroyed until the new * request is freed. This guarantees icq's won't be destroyed and * makes creating new ones safe. @@ -1132,14 +1134,14 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, * it will be created after releasing queue_lock. */ if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) { - rw_flags |= REQ_ELVPRIV; + op_flags |= REQ_ELVPRIV; q->nr_rqs_elvpriv++; if (et->icq_cache && ioc) icq = ioc_lookup_icq(ioc, q); } if (blk_queue_io_stat(q)) - rw_flags |= REQ_IO_STAT; + op_flags |= REQ_IO_STAT; spin_unlock_irq(q->queue_lock); /* allocate and init request */ @@ -1149,10 +1151,10 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, blk_rq_init(q, rq); blk_rq_set_rl(rq, rl); - rq->cmd_flags = rw_flags | REQ_ALLOCED; + req_set_op_attrs(rq, op, op_flags | REQ_ALLOCED); /* init elvpriv */ - if (rw_flags & REQ_ELVPRIV) { + if (op_flags & REQ_ELVPRIV) { if (unlikely(et->icq_cache && !icq)) { if (ioc) icq = ioc_create_icq(ioc, q, gfp_mask); @@ -1178,7 +1180,7 @@ out: if (ioc_batching(q, ioc)) ioc->nr_batch_requests--; - trace_block_getrq(q, bio, rw_flags & 1); + trace_block_getrq(q, bio, op); return rq; fail_elvpriv: @@ -1208,7 +1210,7 @@ fail_alloc: * queue, but this is pretty rare. */ spin_lock_irq(q->queue_lock); - freed_request(rl, rw_flags); + freed_request(rl, op, op_flags); /* * in the very unlikely event that allocation failed and no @@ -1226,7 +1228,8 @@ rq_starved: /** * get_request - get a free request * @q: request_queue to allocate request from - * @rw_flags: RW and SYNC flags + * @op: REQ_OP_READ/REQ_OP_WRITE + * @op_flags: rq_flag_bits * @bio: bio to allocate request for (can be %NULL) * @gfp_mask: allocation mask * @@ -1237,17 +1240,18 @@ rq_starved: * Returns ERR_PTR on failure, with @q->queue_lock held. * Returns request pointer on success, with @q->queue_lock *not held*. */ -static struct request *get_request(struct request_queue *q, int rw_flags, - struct bio *bio, gfp_t gfp_mask) +static struct request *get_request(struct request_queue *q, int op, + int op_flags, struct bio *bio, + gfp_t gfp_mask) { - const bool is_sync = rw_is_sync(rw_flags) != 0; + const bool is_sync = rw_is_sync(op, op_flags) != 0; DEFINE_WAIT(wait); struct request_list *rl; struct request *rq; rl = blk_get_rl(q, bio); /* transferred to @rq on success */ retry: - rq = __get_request(rl, rw_flags, bio, gfp_mask); + rq = __get_request(rl, op, op_flags, bio, gfp_mask); if (!IS_ERR(rq)) return rq; @@ -1260,7 +1264,7 @@ retry: prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, TASK_UNINTERRUPTIBLE); - trace_block_sleeprq(q, bio, rw_flags & 1); + trace_block_sleeprq(q, bio, op); spin_unlock_irq(q->queue_lock); io_schedule(); @@ -1289,11 +1293,16 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, create_io_context(gfp_mask, q->node); spin_lock_irq(q->queue_lock); - rq = get_request(q, rw, NULL, gfp_mask); - if (IS_ERR(rq)) + rq = get_request(q, rw, 0, NULL, gfp_mask); + if (IS_ERR(rq)) { spin_unlock_irq(q->queue_lock); - /* q->queue_lock is unlocked at this point */ + return rq; + } + /* q->queue_lock is unlocked at this point */ + rq->__data_len = 0; + rq->__sector = (sector_t) -1; + rq->bio = rq->biotail = NULL; return rq; } @@ -1309,63 +1318,6 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) EXPORT_SYMBOL(blk_get_request); /** - * blk_make_request - given a bio, allocate a corresponding struct request. - * @q: target request queue - * @bio: The bio describing the memory mappings that will be submitted for IO. - * It may be a chained-bio properly constructed by block/bio layer. - * @gfp_mask: gfp flags to be used for memory allocation - * - * blk_make_request is the parallel of generic_make_request for BLOCK_PC - * type commands. Where the struct request needs to be farther initialized by - * the caller. It is passed a &struct bio, which describes the memory info of - * the I/O transfer. - * - * The caller of blk_make_request must make sure that bi_io_vec - * are set to describe the memory buffers. That bio_data_dir() will return - * the needed direction of the request. (And all bio's in the passed bio-chain - * are properly set accordingly) - * - * If called under none-sleepable conditions, mapped bio buffers must not - * need bouncing, by calling the appropriate masked or flagged allocator, - * suitable for the target device. Otherwise the call to blk_queue_bounce will - * BUG. - * - * WARNING: When allocating/cloning a bio-chain, careful consideration should be - * given to how you allocate bios. In particular, you cannot use - * __GFP_DIRECT_RECLAIM for anything but the first bio in the chain. Otherwise - * you risk waiting for IO completion of a bio that hasn't been submitted yet, - * thus resulting in a deadlock. Alternatively bios should be allocated using - * bio_kmalloc() instead of bio_alloc(), as that avoids the mempool deadlock. - * If possible a big IO should be split into smaller parts when allocation - * fails. Partial allocation should not be an error, or you risk a live-lock. - */ -struct request *blk_make_request(struct request_queue *q, struct bio *bio, - gfp_t gfp_mask) -{ - struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); - - if (IS_ERR(rq)) - return rq; - - blk_rq_set_block_pc(rq); - - for_each_bio(bio) { - struct bio *bounce_bio = bio; - int ret; - - blk_queue_bounce(q, &bounce_bio); - ret = blk_rq_append_bio(q, rq, bounce_bio); - if (unlikely(ret)) { - blk_put_request(rq); - return ERR_PTR(ret); - } - } - - return rq; -} -EXPORT_SYMBOL(blk_make_request); - -/** * blk_rq_set_block_pc - initialize a request to type BLOCK_PC * @rq: request to be initialized * @@ -1373,9 +1325,6 @@ EXPORT_SYMBOL(blk_make_request); void blk_rq_set_block_pc(struct request *rq) { rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->__data_len = 0; - rq->__sector = (sector_t) -1; - rq->bio = rq->biotail = NULL; memset(rq->__cmd, 0, sizeof(rq->__cmd)); } EXPORT_SYMBOL(blk_rq_set_block_pc); @@ -1491,13 +1440,14 @@ void __blk_put_request(struct request_queue *q, struct request *req) */ if (req->cmd_flags & REQ_ALLOCED) { unsigned int flags = req->cmd_flags; + int op = req_op(req); struct request_list *rl = blk_rq_rl(req); BUG_ON(!list_empty(&req->queuelist)); BUG_ON(ELV_ON_HASH(req)); blk_free_request(rl, req); - freed_request(rl, flags); + freed_request(rl, op, flags); blk_put_rl(rl); } } @@ -1712,7 +1662,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) { const bool sync = !!(bio->bi_rw & REQ_SYNC); struct blk_plug *plug; - int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; + int el_ret, rw_flags = 0, where = ELEVATOR_INSERT_SORT; struct request *req; unsigned int request_count = 0; @@ -1731,7 +1681,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } - if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { + if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) { spin_lock_irq(q->queue_lock); where = ELEVATOR_INSERT_FLUSH; goto get_rq; @@ -1772,15 +1722,19 @@ get_rq: * but we need to set it earlier to expose the sync flag to the * rq allocator and io schedulers. */ - rw_flags = bio_data_dir(bio); if (sync) rw_flags |= REQ_SYNC; /* + * Add in META/PRIO flags, if set, before we get to the IO scheduler + */ + rw_flags |= (bio->bi_rw & (REQ_META | REQ_PRIO)); + + /* * Grab a free request. This is might sleep but can not fail. * Returns with the queue unlocked. */ - req = get_request(q, rw_flags, bio, GFP_NOIO); + req = get_request(q, bio_data_dir(bio), rw_flags, bio, GFP_NOIO); if (IS_ERR(req)) { bio->bi_error = PTR_ERR(req); bio_endio(bio); @@ -1849,7 +1803,7 @@ static void handle_bad_sector(struct bio *bio) char b[BDEVNAME_SIZE]; printk(KERN_INFO "attempt to access beyond end of device\n"); - printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", + printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n", bdevname(bio->bi_bdev, b), bio->bi_rw, (unsigned long long)bio_end_sector(bio), @@ -1964,25 +1918,30 @@ generic_make_request_checks(struct bio *bio) * drivers without flush support don't have to worry * about them. */ - if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && + if ((bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) && !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { - bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); + bio->bi_rw &= ~(REQ_PREFLUSH | REQ_FUA); if (!nr_sectors) { err = 0; goto end_io; } } - if ((bio->bi_rw & REQ_DISCARD) && - (!blk_queue_discard(q) || - ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) { - err = -EOPNOTSUPP; - goto end_io; - } - - if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) { - err = -EOPNOTSUPP; - goto end_io; + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + if (!blk_queue_discard(q)) + goto not_supported; + break; + case REQ_OP_SECURE_ERASE: + if (!blk_queue_secure_erase(q)) + goto not_supported; + break; + case REQ_OP_WRITE_SAME: + if (!bdev_write_same(bio->bi_bdev)) + goto not_supported; + break; + default: + break; } /* @@ -1999,6 +1958,8 @@ generic_make_request_checks(struct bio *bio) trace_block_bio_queue(q, bio); return true; +not_supported: + err = -EOPNOTSUPP; end_io: bio->bi_error = err; bio_endio(bio); @@ -2094,7 +2055,6 @@ EXPORT_SYMBOL(generic_make_request); /** * submit_bio - submit a bio to the block device layer for I/O - * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) * @bio: The &struct bio which describes the I/O * * submit_bio() is very similar in purpose to generic_make_request(), and @@ -2102,10 +2062,8 @@ EXPORT_SYMBOL(generic_make_request); * interfaces; @bio must be presetup and ready for I/O. * */ -blk_qc_t submit_bio(int rw, struct bio *bio) +blk_qc_t submit_bio(struct bio *bio) { - bio->bi_rw |= rw; - /* * If it's a regular read/write or a barrier with data attached, * go through the normal accounting stuff before submission. @@ -2113,12 +2071,12 @@ blk_qc_t submit_bio(int rw, struct bio *bio) if (bio_has_data(bio)) { unsigned int count; - if (unlikely(rw & REQ_WRITE_SAME)) + if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) count = bdev_logical_block_size(bio->bi_bdev) >> 9; else count = bio_sectors(bio); - if (rw & WRITE) { + if (op_is_write(bio_op(bio))) { count_vm_events(PGPGOUT, count); } else { task_io_account_read(bio->bi_iter.bi_size); @@ -2129,7 +2087,7 @@ blk_qc_t submit_bio(int rw, struct bio *bio) char b[BDEVNAME_SIZE]; printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n", current->comm, task_pid_nr(current), - (rw & WRITE) ? "WRITE" : "READ", + op_is_write(bio_op(bio)) ? "WRITE" : "READ", (unsigned long long)bio->bi_iter.bi_sector, bdevname(bio->bi_bdev, b), count); @@ -2160,7 +2118,7 @@ EXPORT_SYMBOL(submit_bio); static int blk_cloned_rq_check_limits(struct request_queue *q, struct request *rq) { - if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) { + if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) { printk(KERN_ERR "%s: over max size limit.\n", __func__); return -EIO; } @@ -2216,7 +2174,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) */ BUG_ON(blk_queued_rq(rq)); - if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA)) + if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) where = ELEVATOR_INSERT_FLUSH; add_acct_request(q, rq, where); @@ -2979,8 +2937,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request_err); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) { - /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ - rq->cmd_flags |= bio->bi_rw & REQ_WRITE; + req_set_op(rq, bio_op(bio)); if (bio_has_data(bio)) rq->nr_phys_segments = bio_phys_segments(q, bio); @@ -3065,7 +3022,8 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); static void __blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; - dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; + req_set_op_attrs(dst, req_op(src), + (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE); dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); @@ -3310,7 +3268,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) /* * rq is already accounted, so use raw insert */ - if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) + if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); else __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); @@ -3377,6 +3335,7 @@ bool blk_poll(struct request_queue *q, blk_qc_t cookie) return false; } +EXPORT_SYMBOL_GPL(blk_poll); #ifdef CONFIG_PM /** diff --git a/block/blk-exec.c b/block/blk-exec.c index 3fec8a29d0fa..7ea04325d02f 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -62,7 +62,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, /* * don't check dying flag for MQ because the request won't - * be resued after dying flag is set + * be reused after dying flag is set */ if (q->mq_ops) { blk_mq_insert_request(rq, at_head, true, false); diff --git a/block/blk-flush.c b/block/blk-flush.c index b1c91d229e5e..d308def812db 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -10,8 +10,8 @@ * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request * properties and hardware capability. * - * If a request doesn't have data, only REQ_FLUSH makes sense, which - * indicates a simple flush request. If there is data, REQ_FLUSH indicates + * If a request doesn't have data, only REQ_PREFLUSH makes sense, which + * indicates a simple flush request. If there is data, REQ_PREFLUSH indicates * that the device cache should be flushed before the data is executed, and * REQ_FUA means that the data must be on non-volatile media on request * completion. @@ -20,16 +20,16 @@ * difference. The requests are either completed immediately if there's no * data or executed as normal requests otherwise. * - * If the device has writeback cache and supports FUA, REQ_FLUSH is + * If the device has writeback cache and supports FUA, REQ_PREFLUSH is * translated to PREFLUSH but REQ_FUA is passed down directly with DATA. * - * If the device has writeback cache and doesn't support FUA, REQ_FLUSH is - * translated to PREFLUSH and REQ_FUA to POSTFLUSH. + * If the device has writeback cache and doesn't support FUA, REQ_PREFLUSH + * is translated to PREFLUSH and REQ_FUA to POSTFLUSH. * * The actual execution of flush is double buffered. Whenever a request * needs to execute PRE or POSTFLUSH, it queues at * fq->flush_queue[fq->flush_pending_idx]. Once certain criteria are met, a - * flush is issued and the pending_idx is toggled. When the flush + * REQ_OP_FLUSH is issued and the pending_idx is toggled. When the flush * completes, all the requests which were pending are proceeded to the next * step. This allows arbitrary merging of different types of FLUSH/FUA * requests. @@ -103,7 +103,7 @@ static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq) policy |= REQ_FSEQ_DATA; if (fflags & (1UL << QUEUE_FLAG_WC)) { - if (rq->cmd_flags & REQ_FLUSH) + if (rq->cmd_flags & REQ_PREFLUSH) policy |= REQ_FSEQ_PREFLUSH; if (!(fflags & (1UL << QUEUE_FLAG_FUA)) && (rq->cmd_flags & REQ_FUA)) @@ -330,7 +330,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) } flush_rq->cmd_type = REQ_TYPE_FS; - flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; + req_set_op_attrs(flush_rq, REQ_OP_FLUSH, WRITE_FLUSH | REQ_FLUSH_SEQ); flush_rq->rq_disk = first_rq->rq_disk; flush_rq->end_io = flush_end_io; @@ -391,9 +391,9 @@ void blk_insert_flush(struct request *rq) /* * @policy now records what operations need to be done. Adjust - * REQ_FLUSH and FUA for the driver. + * REQ_PREFLUSH and FUA for the driver. */ - rq->cmd_flags &= ~REQ_FLUSH; + rq->cmd_flags &= ~REQ_PREFLUSH; if (!(fflags & (1UL << QUEUE_FLAG_FUA))) rq->cmd_flags &= ~REQ_FUA; @@ -485,8 +485,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, bio = bio_alloc(gfp_mask, 0); bio->bi_bdev = bdev; + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); - ret = submit_bio_wait(WRITE_FLUSH, bio); + ret = submit_bio_wait(bio); /* * The driver must store the error location in ->bi_sector, if diff --git a/block/blk-lib.c b/block/blk-lib.c index 9e29dc351695..083e56f72308 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -9,33 +9,46 @@ #include "blk.h" -static struct bio *next_bio(struct bio *bio, int rw, unsigned int nr_pages, +static struct bio *next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp) { struct bio *new = bio_alloc(gfp, nr_pages); if (bio) { bio_chain(bio, new); - submit_bio(rw, bio); + submit_bio(bio); } return new; } int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, int type, struct bio **biop) + sector_t nr_sects, gfp_t gfp_mask, int flags, + struct bio **biop) { struct request_queue *q = bdev_get_queue(bdev); struct bio *bio = *biop; unsigned int granularity; + enum req_op op; int alignment; if (!q) return -ENXIO; - if (!blk_queue_discard(q)) - return -EOPNOTSUPP; - if ((type & REQ_SECURE) && !blk_queue_secdiscard(q)) - return -EOPNOTSUPP; + + if (flags & BLKDEV_DISCARD_SECURE) { + if (flags & BLKDEV_DISCARD_ZERO) + return -EOPNOTSUPP; + if (!blk_queue_secure_erase(q)) + return -EOPNOTSUPP; + op = REQ_OP_SECURE_ERASE; + } else { + if (!blk_queue_discard(q)) + return -EOPNOTSUPP; + if ((flags & BLKDEV_DISCARD_ZERO) && + !q->limits.discard_zeroes_data) + return -EOPNOTSUPP; + op = REQ_OP_DISCARD; + } /* Zero-sector (unknown) and one-sector granularities are the same. */ granularity = max(q->limits.discard_granularity >> 9, 1U); @@ -62,9 +75,10 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, req_sects = end_sect - sector; } - bio = next_bio(bio, type, 1, gfp_mask); + bio = next_bio(bio, 1, gfp_mask); bio->bi_iter.bi_sector = sector; bio->bi_bdev = bdev; + bio_set_op_attrs(bio, op, 0); bio->bi_iter.bi_size = req_sects << 9; nr_sects -= req_sects; @@ -98,20 +112,16 @@ EXPORT_SYMBOL(__blkdev_issue_discard); int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) { - int type = REQ_WRITE | REQ_DISCARD; struct bio *bio = NULL; struct blk_plug plug; int ret; - if (flags & BLKDEV_DISCARD_SECURE) - type |= REQ_SECURE; - blk_start_plug(&plug); - ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, type, + ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags, &bio); if (!ret && bio) { - ret = submit_bio_wait(type, bio); - if (ret == -EOPNOTSUPP) + ret = submit_bio_wait(bio); + if (ret == -EOPNOTSUPP && !(flags & BLKDEV_DISCARD_ZERO)) ret = 0; bio_put(bio); } @@ -148,13 +158,14 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, max_write_same_sectors = UINT_MAX >> 9; while (nr_sects) { - bio = next_bio(bio, REQ_WRITE | REQ_WRITE_SAME, 1, gfp_mask); + bio = next_bio(bio, 1, gfp_mask); bio->bi_iter.bi_sector = sector; bio->bi_bdev = bdev; bio->bi_vcnt = 1; bio->bi_io_vec->bv_page = page; bio->bi_io_vec->bv_offset = 0; bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); + bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0); if (nr_sects > max_write_same_sectors) { bio->bi_iter.bi_size = max_write_same_sectors << 9; @@ -167,10 +178,10 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, } if (bio) { - ret = submit_bio_wait(REQ_WRITE | REQ_WRITE_SAME, bio); + ret = submit_bio_wait(bio); bio_put(bio); } - return ret != -EOPNOTSUPP ? ret : 0; + return ret; } EXPORT_SYMBOL(blkdev_issue_write_same); @@ -193,11 +204,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, unsigned int sz; while (nr_sects != 0) { - bio = next_bio(bio, WRITE, - min(nr_sects, (sector_t)BIO_MAX_PAGES), + bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES), gfp_mask); bio->bi_iter.bi_sector = sector; bio->bi_bdev = bdev; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); while (nr_sects != 0) { sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); @@ -210,7 +221,7 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, } if (bio) { - ret = submit_bio_wait(WRITE, bio); + ret = submit_bio_wait(bio); bio_put(bio); return ret; } @@ -241,11 +252,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, bool discard) { - struct request_queue *q = bdev_get_queue(bdev); - - if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data && - blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0) == 0) - return 0; + if (discard) { + if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, + BLKDEV_DISCARD_ZERO)) + return 0; + } if (bdev_write_same(bdev) && blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, diff --git a/block/blk-map.c b/block/blk-map.c index b9f88b7751fb..b8657fa8dc9a 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -9,21 +9,26 @@ #include "blk.h" -int blk_rq_append_bio(struct request_queue *q, struct request *rq, - struct bio *bio) +/* + * Append a bio to a passthrough request. Only works can be merged into + * the request based on the driver constraints. + */ +int blk_rq_append_bio(struct request *rq, struct bio *bio) { - if (!rq->bio) - blk_rq_bio_prep(q, rq, bio); - else if (!ll_back_merge_fn(q, rq, bio)) - return -EINVAL; - else { + if (!rq->bio) { + blk_rq_bio_prep(rq->q, rq, bio); + } else { + if (!ll_back_merge_fn(rq->q, rq, bio)) + return -EINVAL; + rq->biotail->bi_next = bio; rq->biotail = bio; - rq->__data_len += bio->bi_iter.bi_size; } + return 0; } +EXPORT_SYMBOL(blk_rq_append_bio); static int __blk_rq_unmap_user(struct bio *bio) { @@ -71,7 +76,7 @@ static int __blk_rq_map_user_iov(struct request *rq, */ bio_get(bio); - ret = blk_rq_append_bio(q, rq, bio); + ret = blk_rq_append_bio(rq, bio); if (ret) { bio_endio(bio); __blk_rq_unmap_user(orig_bio); @@ -224,12 +229,12 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, return PTR_ERR(bio); if (!reading) - bio->bi_rw |= REQ_WRITE; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); if (do_copy) rq->cmd_flags |= REQ_COPY_USER; - ret = blk_rq_append_bio(q, rq, bio); + ret = blk_rq_append_bio(rq, bio); if (unlikely(ret)) { /* request is too big */ bio_put(bio); diff --git a/block/blk-merge.c b/block/blk-merge.c index 261353166dcf..41cbd4878958 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -172,9 +172,9 @@ void blk_queue_split(struct request_queue *q, struct bio **bio, struct bio *split, *res; unsigned nsegs; - if ((*bio)->bi_rw & REQ_DISCARD) + if (bio_op(*bio) == REQ_OP_DISCARD) split = blk_bio_discard_split(q, *bio, bs, &nsegs); - else if ((*bio)->bi_rw & REQ_WRITE_SAME) + else if (bio_op(*bio) == REQ_OP_WRITE_SAME) split = blk_bio_write_same_split(q, *bio, bs, &nsegs); else split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs); @@ -213,10 +213,10 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, * This should probably be returning 0, but blk_add_request_payload() * (Christoph!!!!) */ - if (bio->bi_rw & REQ_DISCARD) + if (bio_op(bio) == REQ_OP_DISCARD) return 1; - if (bio->bi_rw & REQ_WRITE_SAME) + if (bio_op(bio) == REQ_OP_WRITE_SAME) return 1; fbio = bio; @@ -385,7 +385,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, nsegs = 0; cluster = blk_queue_cluster(q); - if (bio->bi_rw & REQ_DISCARD) { + if (bio_op(bio) == REQ_OP_DISCARD) { /* * This is a hack - drivers should be neither modifying the * biovec, nor relying on bi_vcnt - but because of @@ -400,7 +400,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, return 0; } - if (bio->bi_rw & REQ_WRITE_SAME) { + if (bio_op(bio) == REQ_OP_WRITE_SAME) { single_segment: *sg = sglist; bvec = bio_iovec(bio); @@ -439,7 +439,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, } if (q->dma_drain_size && q->dma_drain_needed(rq)) { - if (rq->cmd_flags & REQ_WRITE) + if (op_is_write(req_op(rq))) memset(q->dma_drain_buffer, 0, q->dma_drain_size); sg_unmark_end(sg); @@ -500,7 +500,7 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, integrity_req_gap_back_merge(req, bio)) return 0; if (blk_rq_sectors(req) + bio_sectors(bio) > - blk_rq_get_max_sectors(req)) { + blk_rq_get_max_sectors(req, blk_rq_pos(req))) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -524,7 +524,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, integrity_req_gap_front_merge(req, bio)) return 0; if (blk_rq_sectors(req) + bio_sectors(bio) > - blk_rq_get_max_sectors(req)) { + blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -570,7 +570,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, * Will it become too large? */ if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > - blk_rq_get_max_sectors(req)) + blk_rq_get_max_sectors(req, blk_rq_pos(req))) return 0; total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; @@ -649,7 +649,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (!rq_mergeable(req) || !rq_mergeable(next)) return 0; - if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) + if (req_op(req) != req_op(next)) return 0; /* @@ -663,7 +663,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, || req_no_special_merge(next)) return 0; - if (req->cmd_flags & REQ_WRITE_SAME && + if (req_op(req) == REQ_OP_WRITE_SAME && !blk_write_same_mergeable(req->bio, next->bio)) return 0; @@ -743,6 +743,12 @@ int attempt_front_merge(struct request_queue *q, struct request *rq) int blk_attempt_req_merge(struct request_queue *q, struct request *rq, struct request *next) { + struct elevator_queue *e = q->elevator; + + if (e->type->ops.elevator_allow_rq_merge_fn) + if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next)) + return 0; + return attempt_merge(q, rq, next); } @@ -751,7 +757,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (!rq_mergeable(rq) || !bio_mergeable(bio)) return false; - if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) + if (req_op(rq) != bio_op(bio)) return false; /* different data direction or already started, don't merge */ @@ -767,7 +773,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) return false; /* must be using the same buffer */ - if (rq->cmd_flags & REQ_WRITE_SAME && + if (req_op(rq) == REQ_OP_WRITE_SAME && !blk_write_same_mergeable(rq->bio, bio)) return false; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 56a0c37a3d06..729bac3a673b 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -485,6 +485,32 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, } EXPORT_SYMBOL(blk_mq_tagset_busy_iter); +int blk_mq_reinit_tagset(struct blk_mq_tag_set *set) +{ + int i, j, ret = 0; + + if (!set->ops->reinit_request) + goto out; + + for (i = 0; i < set->nr_hw_queues; i++) { + struct blk_mq_tags *tags = set->tags[i]; + + for (j = 0; j < tags->nr_tags; j++) { + if (!tags->rqs[j]) + continue; + + ret = set->ops->reinit_request(set->driver_data, + tags->rqs[j]); + if (ret) + goto out; + } + } + +out: + return ret; +} +EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset); + void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv) { diff --git a/block/blk-mq.c b/block/blk-mq.c index f9b9049b1284..576e7112f807 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -159,16 +159,17 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) EXPORT_SYMBOL(blk_mq_can_queue); static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, - struct request *rq, unsigned int rw_flags) + struct request *rq, int op, + unsigned int op_flags) { if (blk_queue_io_stat(q)) - rw_flags |= REQ_IO_STAT; + op_flags |= REQ_IO_STAT; INIT_LIST_HEAD(&rq->queuelist); /* csd/requeue_work/fifo_time is initialized before use */ rq->q = q; rq->mq_ctx = ctx; - rq->cmd_flags |= rw_flags; + req_set_op_attrs(rq, op, op_flags); /* do not touch atomic flags, it needs atomic ops against the timer */ rq->cpu = -1; INIT_HLIST_NODE(&rq->hash); @@ -203,11 +204,11 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, rq->end_io_data = NULL; rq->next_rq = NULL; - ctx->rq_dispatched[rw_is_sync(rw_flags)]++; + ctx->rq_dispatched[rw_is_sync(op, op_flags)]++; } static struct request * -__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw) +__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags) { struct request *rq; unsigned int tag; @@ -222,7 +223,7 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw) } rq->tag = tag; - blk_mq_rq_ctx_init(data->q, data->ctx, rq, rw); + blk_mq_rq_ctx_init(data->q, data->ctx, rq, op, op_flags); return rq; } @@ -246,7 +247,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, hctx = q->mq_ops->map_queue(q, ctx->cpu); blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); - rq = __blk_mq_alloc_request(&alloc_data, rw); + rq = __blk_mq_alloc_request(&alloc_data, rw, 0); if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) { __blk_mq_run_hw_queue(hctx); blk_mq_put_ctx(ctx); @@ -254,7 +255,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); - rq = __blk_mq_alloc_request(&alloc_data, rw); + rq = __blk_mq_alloc_request(&alloc_data, rw, 0); ctx = alloc_data.ctx; } blk_mq_put_ctx(ctx); @@ -262,10 +263,53 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, blk_queue_exit(q); return ERR_PTR(-EWOULDBLOCK); } + + rq->__data_len = 0; + rq->__sector = (sector_t) -1; + rq->bio = rq->biotail = NULL; return rq; } EXPORT_SYMBOL(blk_mq_alloc_request); +struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, + unsigned int flags, unsigned int hctx_idx) +{ + struct blk_mq_hw_ctx *hctx; + struct blk_mq_ctx *ctx; + struct request *rq; + struct blk_mq_alloc_data alloc_data; + int ret; + + /* + * If the tag allocator sleeps we could get an allocation for a + * different hardware context. No need to complicate the low level + * allocator for this for the rare use case of a command tied to + * a specific queue. + */ + if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT))) + return ERR_PTR(-EINVAL); + + if (hctx_idx >= q->nr_hw_queues) + return ERR_PTR(-EIO); + + ret = blk_queue_enter(q, true); + if (ret) + return ERR_PTR(ret); + + hctx = q->queue_hw_ctx[hctx_idx]; + ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask)); + + blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); + rq = __blk_mq_alloc_request(&alloc_data, rw, 0); + if (!rq) { + blk_queue_exit(q); + return ERR_PTR(-EWOULDBLOCK); + } + + return rq; +} +EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); + static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct request *rq) { @@ -784,7 +828,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) switch (ret) { case BLK_MQ_RQ_QUEUE_OK: queued++; - continue; + break; case BLK_MQ_RQ_QUEUE_BUSY: list_add(&rq->queuelist, &rq_list); __blk_mq_requeue_request(rq); @@ -1169,28 +1213,29 @@ static struct request *blk_mq_map_request(struct request_queue *q, struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; struct request *rq; - int rw = bio_data_dir(bio); + int op = bio_data_dir(bio); + int op_flags = 0; struct blk_mq_alloc_data alloc_data; blk_queue_enter_live(q); ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); - if (rw_is_sync(bio->bi_rw)) - rw |= REQ_SYNC; + if (rw_is_sync(bio_op(bio), bio->bi_rw)) + op_flags |= REQ_SYNC; - trace_block_getrq(q, bio, rw); + trace_block_getrq(q, bio, op); blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx); - rq = __blk_mq_alloc_request(&alloc_data, rw); + rq = __blk_mq_alloc_request(&alloc_data, op, op_flags); if (unlikely(!rq)) { __blk_mq_run_hw_queue(hctx); blk_mq_put_ctx(ctx); - trace_block_sleeprq(q, bio, rw); + trace_block_sleeprq(q, bio, op); ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx); - rq = __blk_mq_alloc_request(&alloc_data, rw); + rq = __blk_mq_alloc_request(&alloc_data, op, op_flags); ctx = alloc_data.ctx; hctx = alloc_data.hctx; } @@ -1244,8 +1289,8 @@ static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie) */ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { - const int is_sync = rw_is_sync(bio->bi_rw); - const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); + const int is_sync = rw_is_sync(bio_op(bio), bio->bi_rw); + const int is_flush_fua = bio->bi_rw & (REQ_PREFLUSH | REQ_FUA); struct blk_map_ctx data; struct request *rq; unsigned int request_count = 0; @@ -1338,8 +1383,8 @@ done: */ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) { - const int is_sync = rw_is_sync(bio->bi_rw); - const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); + const int is_sync = rw_is_sync(bio_op(bio), bio->bi_rw); + const int is_flush_fua = bio->bi_rw & (REQ_PREFLUSH | REQ_FUA); struct blk_plug *plug; unsigned int request_count = 0; struct blk_map_ctx data; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 99205965f559..f87a7e747d36 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -379,6 +379,11 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page, return count; } +static ssize_t queue_dax_show(struct request_queue *q, char *page) +{ + return queue_var_show(blk_queue_dax(q), page); +} + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .show = queue_requests_show, @@ -516,6 +521,11 @@ static struct queue_sysfs_entry queue_wc_entry = { .store = queue_wc_store, }; +static struct queue_sysfs_entry queue_dax_entry = { + .attr = {.name = "dax", .mode = S_IRUGO }, + .show = queue_dax_show, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -542,6 +552,7 @@ static struct attribute *default_attrs[] = { &queue_random_entry.attr, &queue_poll_entry.attr, &queue_wc_entry.attr, + &queue_dax_entry.attr, NULL, }; diff --git a/block/blk.h b/block/blk.h index 70e4aee9cdcb..c37492f5edaa 100644 --- a/block/blk.h +++ b/block/blk.h @@ -64,8 +64,6 @@ void blk_exit_rl(struct request_list *rl); void init_request_from_bio(struct request *req, struct bio *bio); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); -int blk_rq_append_bio(struct request_queue *q, struct request *rq, - struct bio *bio); void blk_queue_bypass_start(struct request_queue *q); void blk_queue_bypass_end(struct request_queue *q); void blk_dequeue_request(struct request *rq); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4a349787bc62..acabba198de9 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -10,7 +10,7 @@ #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/elevator.h> -#include <linux/jiffies.h> +#include <linux/ktime.h> #include <linux/rbtree.h> #include <linux/ioprio.h> #include <linux/blktrace_api.h> @@ -22,28 +22,28 @@ */ /* max queue in one round of service */ static const int cfq_quantum = 8; -static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; +static const u64 cfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 }; /* maximum backwards seek, in KiB */ static const int cfq_back_max = 16 * 1024; /* penalty of a backwards seek */ static const int cfq_back_penalty = 2; -static const int cfq_slice_sync = HZ / 10; -static int cfq_slice_async = HZ / 25; +static const u64 cfq_slice_sync = NSEC_PER_SEC / 10; +static u64 cfq_slice_async = NSEC_PER_SEC / 25; static const int cfq_slice_async_rq = 2; -static int cfq_slice_idle = HZ / 125; -static int cfq_group_idle = HZ / 125; -static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ +static u64 cfq_slice_idle = NSEC_PER_SEC / 125; +static u64 cfq_group_idle = NSEC_PER_SEC / 125; +static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */ static const int cfq_hist_divisor = 4; /* * offset from end of service tree */ -#define CFQ_IDLE_DELAY (HZ / 5) +#define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5) /* * below this threshold, we consider thinktime immediate */ -#define CFQ_MIN_TT (2) +#define CFQ_MIN_TT (2 * NSEC_PER_SEC / HZ) #define CFQ_SLICE_SCALE (5) #define CFQ_HW_QUEUE_MIN (5) @@ -73,11 +73,11 @@ static struct kmem_cache *cfq_pool; #define CFQ_WEIGHT_LEGACY_MAX 1000 struct cfq_ttime { - unsigned long last_end_request; + u64 last_end_request; - unsigned long ttime_total; + u64 ttime_total; + u64 ttime_mean; unsigned long ttime_samples; - unsigned long ttime_mean; }; /* @@ -94,7 +94,7 @@ struct cfq_rb_root { struct cfq_ttime ttime; }; #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \ - .ttime = {.last_end_request = jiffies,},} + .ttime = {.last_end_request = ktime_get_ns(),},} /* * Per process-grouping structure @@ -109,7 +109,7 @@ struct cfq_queue { /* service_tree member */ struct rb_node rb_node; /* service_tree key */ - unsigned long rb_key; + u64 rb_key; /* prio tree member */ struct rb_node p_node; /* prio tree root we belong to, if any */ @@ -126,13 +126,13 @@ struct cfq_queue { struct list_head fifo; /* time when queue got scheduled in to dispatch first request. */ - unsigned long dispatch_start; - unsigned int allocated_slice; - unsigned int slice_dispatch; + u64 dispatch_start; + u64 allocated_slice; + u64 slice_dispatch; /* time when first request from queue completed and slice started. */ - unsigned long slice_start; - unsigned long slice_end; - long slice_resid; + u64 slice_start; + u64 slice_end; + s64 slice_resid; /* pending priority requests */ int prio_pending; @@ -141,7 +141,7 @@ struct cfq_queue { /* io prio of this group */ unsigned short ioprio, org_ioprio; - unsigned short ioprio_class; + unsigned short ioprio_class, org_ioprio_class; pid_t pid; @@ -290,7 +290,7 @@ struct cfq_group { struct cfq_rb_root service_trees[2][3]; struct cfq_rb_root service_tree_idle; - unsigned long saved_wl_slice; + u64 saved_wl_slice; enum wl_type_t saved_wl_type; enum wl_class_t saved_wl_class; @@ -329,7 +329,7 @@ struct cfq_data { */ enum wl_class_t serving_wl_class; enum wl_type_t serving_wl_type; - unsigned long workload_expires; + u64 workload_expires; struct cfq_group *serving_group; /* @@ -362,7 +362,7 @@ struct cfq_data { /* * idle window management */ - struct timer_list idle_slice_timer; + struct hrtimer idle_slice_timer; struct work_struct unplug_work; struct cfq_queue *active_queue; @@ -374,22 +374,22 @@ struct cfq_data { * tunables, see top of file */ unsigned int cfq_quantum; - unsigned int cfq_fifo_expire[2]; unsigned int cfq_back_penalty; unsigned int cfq_back_max; - unsigned int cfq_slice[2]; unsigned int cfq_slice_async_rq; - unsigned int cfq_slice_idle; - unsigned int cfq_group_idle; unsigned int cfq_latency; - unsigned int cfq_target_latency; + u64 cfq_fifo_expire[2]; + u64 cfq_slice[2]; + u64 cfq_slice_idle; + u64 cfq_group_idle; + u64 cfq_target_latency; /* * Fallback dummy cfqq for extreme OOM conditions */ struct cfq_queue oom_cfqq; - unsigned long last_delayed_sync; + u64 last_delayed_sync; }; static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); @@ -667,15 +667,16 @@ static inline void cfqg_put(struct cfq_group *cfqg) } while (0) static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, - struct cfq_group *curr_cfqg, int rw) + struct cfq_group *curr_cfqg, int op, + int op_flags) { - blkg_rwstat_add(&cfqg->stats.queued, rw, 1); + blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, 1); cfqg_stats_end_empty_time(&cfqg->stats); cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg); } static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, - unsigned long time, unsigned long unaccounted_time) + uint64_t time, unsigned long unaccounted_time) { blkg_stat_add(&cfqg->stats.time, time); #ifdef CONFIG_DEBUG_BLK_CGROUP @@ -683,26 +684,30 @@ static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, #endif } -static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) +static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op, + int op_flags) { - blkg_rwstat_add(&cfqg->stats.queued, rw, -1); + blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, -1); } -static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) +static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op, + int op_flags) { - blkg_rwstat_add(&cfqg->stats.merged, rw, 1); + blkg_rwstat_add(&cfqg->stats.merged, op, op_flags, 1); } static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, - uint64_t start_time, uint64_t io_start_time, int rw) + uint64_t start_time, uint64_t io_start_time, int op, + int op_flags) { struct cfqg_stats *stats = &cfqg->stats; unsigned long long now = sched_clock(); if (time_after64(now, io_start_time)) - blkg_rwstat_add(&stats->service_time, rw, now - io_start_time); + blkg_rwstat_add(&stats->service_time, op, op_flags, + now - io_start_time); if (time_after64(io_start_time, start_time)) - blkg_rwstat_add(&stats->wait_time, rw, + blkg_rwstat_add(&stats->wait_time, op, op_flags, io_start_time - start_time); } @@ -781,13 +786,16 @@ static inline void cfqg_put(struct cfq_group *cfqg) { } #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0) static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, - struct cfq_group *curr_cfqg, int rw) { } + struct cfq_group *curr_cfqg, int op, int op_flags) { } static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, - unsigned long time, unsigned long unaccounted_time) { } -static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) { } -static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) { } + uint64_t time, unsigned long unaccounted_time) { } +static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op, + int op_flags) { } +static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op, + int op_flags) { } static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, - uint64_t start_time, uint64_t io_start_time, int rw) { } + uint64_t start_time, uint64_t io_start_time, int op, + int op_flags) { } #endif /* CONFIG_CFQ_GROUP_IOSCHED */ @@ -807,7 +815,7 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd, struct cfq_ttime *ttime, bool group_idle) { - unsigned long slice; + u64 slice; if (!sample_valid(ttime->ttime_samples)) return false; if (group_idle) @@ -930,17 +938,18 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) * if a queue is marked sync and has sync io queued. A sync queue with async * io only, should not get full sync slice length. */ -static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync, +static inline u64 cfq_prio_slice(struct cfq_data *cfqd, bool sync, unsigned short prio) { - const int base_slice = cfqd->cfq_slice[sync]; + u64 base_slice = cfqd->cfq_slice[sync]; + u64 slice = div_u64(base_slice, CFQ_SLICE_SCALE); WARN_ON(prio >= IOPRIO_BE_NR); - return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio)); + return base_slice + (slice * (4 - prio)); } -static inline int +static inline u64 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) { return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); @@ -958,15 +967,14 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) * * The result is also in fixed point w/ CFQ_SERVICE_SHIFT. */ -static inline u64 cfqg_scale_charge(unsigned long charge, +static inline u64 cfqg_scale_charge(u64 charge, unsigned int vfraction) { u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */ /* charge / vfraction */ c <<= CFQ_SERVICE_SHIFT; - do_div(c, vfraction); - return c; + return div_u64(c, vfraction); } static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) @@ -1019,16 +1027,16 @@ static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd, return cfqg->busy_queues_avg[rt]; } -static inline unsigned +static inline u64 cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) { return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT; } -static inline unsigned +static inline u64 cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - unsigned slice = cfq_prio_to_slice(cfqd, cfqq); + u64 slice = cfq_prio_to_slice(cfqd, cfqq); if (cfqd->cfq_latency) { /* * interested queues (we consider only the ones with the same @@ -1036,20 +1044,22 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) */ unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg, cfq_class_rt(cfqq)); - unsigned sync_slice = cfqd->cfq_slice[1]; - unsigned expect_latency = sync_slice * iq; - unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg); + u64 sync_slice = cfqd->cfq_slice[1]; + u64 expect_latency = sync_slice * iq; + u64 group_slice = cfq_group_slice(cfqd, cfqq->cfqg); if (expect_latency > group_slice) { - unsigned base_low_slice = 2 * cfqd->cfq_slice_idle; + u64 base_low_slice = 2 * cfqd->cfq_slice_idle; + u64 low_slice; + /* scale low_slice according to IO priority * and sync vs async */ - unsigned low_slice = - min(slice, base_low_slice * slice / sync_slice); + low_slice = div64_u64(base_low_slice*slice, sync_slice); + low_slice = min(slice, low_slice); /* the adapted slice value is scaled to fit all iqs * into the target latency */ - slice = max(slice * group_slice / expect_latency, - low_slice); + slice = div64_u64(slice*group_slice, expect_latency); + slice = max(slice, low_slice); } } return slice; @@ -1058,12 +1068,13 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) static inline void cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq); + u64 slice = cfq_scaled_cfqq_slice(cfqd, cfqq); + u64 now = ktime_get_ns(); - cfqq->slice_start = jiffies; - cfqq->slice_end = jiffies + slice; + cfqq->slice_start = now; + cfqq->slice_end = now + slice; cfqq->allocated_slice = slice; - cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); + cfq_log_cfqq(cfqd, cfqq, "set_slice=%llu", cfqq->slice_end - now); } /* @@ -1075,7 +1086,7 @@ static inline bool cfq_slice_used(struct cfq_queue *cfqq) { if (cfq_cfqq_slice_new(cfqq)) return false; - if (time_before(jiffies, cfqq->slice_end)) + if (ktime_get_ns() < cfqq->slice_end) return false; return true; @@ -1241,8 +1252,8 @@ cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq, return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last)); } -static unsigned long cfq_slice_offset(struct cfq_data *cfqd, - struct cfq_queue *cfqq) +static u64 cfq_slice_offset(struct cfq_data *cfqd, + struct cfq_queue *cfqq) { /* * just an approximation, should be ok. @@ -1435,31 +1446,32 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg) cfqg_stats_update_dequeue(cfqg); } -static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq, - unsigned int *unaccounted_time) +static inline u64 cfq_cfqq_slice_usage(struct cfq_queue *cfqq, + u64 *unaccounted_time) { - unsigned int slice_used; + u64 slice_used; + u64 now = ktime_get_ns(); /* * Queue got expired before even a single request completed or * got expired immediately after first request completion. */ - if (!cfqq->slice_start || cfqq->slice_start == jiffies) { + if (!cfqq->slice_start || cfqq->slice_start == now) { /* * Also charge the seek time incurred to the group, otherwise * if there are mutiple queues in the group, each can dispatch * a single request on seeky media and cause lots of seek time * and group will never know it. */ - slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start), - 1); + slice_used = max_t(u64, (now - cfqq->dispatch_start), + jiffies_to_nsecs(1)); } else { - slice_used = jiffies - cfqq->slice_start; + slice_used = now - cfqq->slice_start; if (slice_used > cfqq->allocated_slice) { *unaccounted_time = slice_used - cfqq->allocated_slice; slice_used = cfqq->allocated_slice; } - if (time_after(cfqq->slice_start, cfqq->dispatch_start)) + if (cfqq->slice_start > cfqq->dispatch_start) *unaccounted_time += cfqq->slice_start - cfqq->dispatch_start; } @@ -1471,10 +1483,11 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, struct cfq_queue *cfqq) { struct cfq_rb_root *st = &cfqd->grp_service_tree; - unsigned int used_sl, charge, unaccounted_sl = 0; + u64 used_sl, charge, unaccounted_sl = 0; int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) - cfqg->service_tree_idle.count; unsigned int vfr; + u64 now = ktime_get_ns(); BUG_ON(nr_sync < 0); used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); @@ -1496,9 +1509,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, cfq_group_service_tree_add(st, cfqg); /* This group is being expired. Save the context */ - if (time_after(cfqd->workload_expires, jiffies)) { - cfqg->saved_wl_slice = cfqd->workload_expires - - jiffies; + if (cfqd->workload_expires > now) { + cfqg->saved_wl_slice = cfqd->workload_expires - now; cfqg->saved_wl_type = cfqd->serving_wl_type; cfqg->saved_wl_class = cfqd->serving_wl_class; } else @@ -1507,7 +1519,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, st->min_vdisktime); cfq_log_cfqq(cfqq->cfqd, cfqq, - "sl_used=%u disp=%u charge=%u iops=%u sect=%lu", + "sl_used=%llu disp=%llu charge=%llu iops=%u sect=%lu", used_sl, cfqq->slice_dispatch, charge, iops_mode(cfqd), cfqq->nr_sectors); cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl); @@ -1530,7 +1542,7 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg) *st = CFQ_RB_ROOT; RB_CLEAR_NODE(&cfqg->rb_node); - cfqg->ttime.last_end_request = jiffies; + cfqg->ttime.last_end_request = ktime_get_ns(); } #ifdef CONFIG_CFQ_GROUP_IOSCHED @@ -2213,10 +2225,11 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, { struct rb_node **p, *parent; struct cfq_queue *__cfqq; - unsigned long rb_key; + u64 rb_key; struct cfq_rb_root *st; int left; int new_cfqq = 1; + u64 now = ktime_get_ns(); st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq)); if (cfq_class_idle(cfqq)) { @@ -2226,7 +2239,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, __cfqq = rb_entry(parent, struct cfq_queue, rb_node); rb_key += __cfqq->rb_key; } else - rb_key += jiffies; + rb_key += now; } else if (!add_front) { /* * Get our rb key offset. Subtract any residual slice @@ -2234,13 +2247,13 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, * count indicates slice overrun, and this should position * the next service time further away in the tree. */ - rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; + rb_key = cfq_slice_offset(cfqd, cfqq) + now; rb_key -= cfqq->slice_resid; cfqq->slice_resid = 0; } else { - rb_key = -HZ; + rb_key = -NSEC_PER_SEC; __cfqq = cfq_rb_first(st); - rb_key += __cfqq ? __cfqq->rb_key : jiffies; + rb_key += __cfqq ? __cfqq->rb_key : now; } if (!RB_EMPTY_NODE(&cfqq->rb_node)) { @@ -2266,7 +2279,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, /* * sort by key, that represents service time. */ - if (time_before(rb_key, __cfqq->rb_key)) + if (rb_key < __cfqq->rb_key) p = &parent->rb_left; else { p = &parent->rb_right; @@ -2461,10 +2474,10 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) { elv_rb_del(&cfqq->sort_list, rq); cfqq->queued[rq_is_sync(rq)]--; - cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags); + cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags); cfq_add_rq_rb(rq); cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group, - rq->cmd_flags); + req_op(rq), rq->cmd_flags); } static struct request * @@ -2517,7 +2530,7 @@ static void cfq_remove_request(struct request *rq) cfq_del_rq_rb(rq); cfqq->cfqd->rq_queued--; - cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags); + cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags); if (rq->cmd_flags & REQ_PRIO) { WARN_ON(!cfqq->prio_pending); cfqq->prio_pending--; @@ -2531,7 +2544,7 @@ static int cfq_merge(struct request_queue *q, struct request **req, struct request *__rq; __rq = cfq_find_rq_fmerge(cfqd, bio); - if (__rq && elv_rq_merge_ok(__rq, bio)) { + if (__rq && elv_bio_merge_ok(__rq, bio)) { *req = __rq; return ELEVATOR_FRONT_MERGE; } @@ -2552,7 +2565,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req, static void cfq_bio_merged(struct request_queue *q, struct request *req, struct bio *bio) { - cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_rw); + cfqg_stats_update_io_merged(RQ_CFQG(req), bio_op(bio), bio->bi_rw); } static void @@ -2566,7 +2579,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, * reposition in fifo if next is older than rq */ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && - time_before(next->fifo_time, rq->fifo_time) && + next->fifo_time < rq->fifo_time && cfqq == RQ_CFQQ(next)) { list_move(&rq->queuelist, &next->queuelist); rq->fifo_time = next->fifo_time; @@ -2575,7 +2588,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, if (cfqq->next_rq == next) cfqq->next_rq = rq; cfq_remove_request(next); - cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags); + cfqg_stats_update_io_merged(RQ_CFQG(rq), req_op(next), next->cmd_flags); cfqq = RQ_CFQQ(next); /* @@ -2588,8 +2601,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, cfq_del_cfqq_rr(cfqd, cfqq); } -static int cfq_allow_merge(struct request_queue *q, struct request *rq, - struct bio *bio) +static int cfq_allow_bio_merge(struct request_queue *q, struct request *rq, + struct bio *bio) { struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_io_cq *cic; @@ -2613,9 +2626,15 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, return cfqq == RQ_CFQQ(rq); } +static int cfq_allow_rq_merge(struct request_queue *q, struct request *rq, + struct request *next) +{ + return RQ_CFQQ(rq) == RQ_CFQQ(next); +} + static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - del_timer(&cfqd->idle_slice_timer); + hrtimer_try_to_cancel(&cfqd->idle_slice_timer); cfqg_stats_update_idle_time(cfqq->cfqg); } @@ -2627,7 +2646,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, cfqd->serving_wl_class, cfqd->serving_wl_type); cfqg_stats_update_avg_queue_size(cfqq->cfqg); cfqq->slice_start = 0; - cfqq->dispatch_start = jiffies; + cfqq->dispatch_start = ktime_get_ns(); cfqq->allocated_slice = 0; cfqq->slice_end = 0; cfqq->slice_dispatch = 0; @@ -2676,8 +2695,8 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfq_cfqq_slice_new(cfqq)) cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq); else - cfqq->slice_resid = cfqq->slice_end - jiffies; - cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); + cfqq->slice_resid = cfqq->slice_end - ktime_get_ns(); + cfq_log_cfqq(cfqd, cfqq, "resid=%lld", cfqq->slice_resid); } cfq_group_served(cfqd, cfqq->cfqg, cfqq); @@ -2911,7 +2930,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) struct cfq_queue *cfqq = cfqd->active_queue; struct cfq_rb_root *st = cfqq->service_tree; struct cfq_io_cq *cic; - unsigned long sl, group_idle = 0; + u64 sl, group_idle = 0; + u64 now = ktime_get_ns(); /* * SSD device without seek penalty, disable idling. But only do so @@ -2954,8 +2974,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) * time slice. */ if (sample_valid(cic->ttime.ttime_samples) && - (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) { - cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu", + (cfqq->slice_end - now < cic->ttime.ttime_mean)) { + cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%llu", cic->ttime.ttime_mean); return; } @@ -2976,9 +2996,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) else sl = cfqd->cfq_slice_idle; - mod_timer(&cfqd->idle_slice_timer, jiffies + sl); + hrtimer_start(&cfqd->idle_slice_timer, ns_to_ktime(sl), + HRTIMER_MODE_REL); cfqg_stats_set_start_idle_time(cfqq->cfqg); - cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl, + cfq_log_cfqq(cfqd, cfqq, "arm_idle: %llu group_idle: %d", sl, group_idle ? 1 : 0); } @@ -3018,7 +3039,7 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq) return NULL; rq = rq_entry_fifo(cfqq->fifo.next); - if (time_before(jiffies, rq->fifo_time)) + if (ktime_get_ns() < rq->fifo_time) rq = NULL; cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq); @@ -3096,14 +3117,14 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd, struct cfq_queue *queue; int i; bool key_valid = false; - unsigned long lowest_key = 0; + u64 lowest_key = 0; enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD; for (i = 0; i <= SYNC_WORKLOAD; ++i) { /* select the one with lowest rb_key */ queue = cfq_rb_first(st_for(cfqg, wl_class, i)); if (queue && - (!key_valid || time_before(queue->rb_key, lowest_key))) { + (!key_valid || queue->rb_key < lowest_key)) { lowest_key = queue->rb_key; cur_best = i; key_valid = true; @@ -3116,11 +3137,12 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd, static void choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg) { - unsigned slice; + u64 slice; unsigned count; struct cfq_rb_root *st; - unsigned group_slice; + u64 group_slice; enum wl_class_t original_class = cfqd->serving_wl_class; + u64 now = ktime_get_ns(); /* Choose next priority. RT > BE > IDLE */ if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) @@ -3129,7 +3151,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg) cfqd->serving_wl_class = BE_WORKLOAD; else { cfqd->serving_wl_class = IDLE_WORKLOAD; - cfqd->workload_expires = jiffies + 1; + cfqd->workload_expires = now + jiffies_to_nsecs(1); return; } @@ -3147,7 +3169,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg) /* * check workload expiration, and that we still have other queues ready */ - if (count && !time_after(jiffies, cfqd->workload_expires)) + if (count && !(now > cfqd->workload_expires)) return; new_workload: @@ -3164,13 +3186,13 @@ new_workload: */ group_slice = cfq_group_slice(cfqd, cfqg); - slice = group_slice * count / + slice = div_u64(group_slice * count, max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class], cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd, - cfqg)); + cfqg))); if (cfqd->serving_wl_type == ASYNC_WORKLOAD) { - unsigned int tmp; + u64 tmp; /* * Async queues are currently system wide. Just taking @@ -3181,19 +3203,19 @@ new_workload: */ tmp = cfqd->cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg); - tmp = tmp/cfqd->busy_queues; - slice = min_t(unsigned, slice, tmp); + tmp = div_u64(tmp, cfqd->busy_queues); + slice = min_t(u64, slice, tmp); /* async workload slice is scaled down according to * the sync/async slice ratio. */ - slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1]; + slice = div64_u64(slice*cfqd->cfq_slice[0], cfqd->cfq_slice[1]); } else /* sync workload slice is at least 2 * cfq_slice_idle */ slice = max(slice, 2 * cfqd->cfq_slice_idle); - slice = max_t(unsigned, slice, CFQ_MIN_TT); - cfq_log(cfqd, "workload slice:%d", slice); - cfqd->workload_expires = jiffies + slice; + slice = max_t(u64, slice, CFQ_MIN_TT); + cfq_log(cfqd, "workload slice:%llu", slice); + cfqd->workload_expires = now + slice; } static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) @@ -3211,16 +3233,17 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) static void cfq_choose_cfqg(struct cfq_data *cfqd) { struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd); + u64 now = ktime_get_ns(); cfqd->serving_group = cfqg; /* Restore the workload type data */ if (cfqg->saved_wl_slice) { - cfqd->workload_expires = jiffies + cfqg->saved_wl_slice; + cfqd->workload_expires = now + cfqg->saved_wl_slice; cfqd->serving_wl_type = cfqg->saved_wl_type; cfqd->serving_wl_class = cfqg->saved_wl_class; } else - cfqd->workload_expires = jiffies - 1; + cfqd->workload_expires = now - 1; choose_wl_class_and_type(cfqd, cfqg); } @@ -3232,6 +3255,7 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd) static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) { struct cfq_queue *cfqq, *new_cfqq = NULL; + u64 now = ktime_get_ns(); cfqq = cfqd->active_queue; if (!cfqq) @@ -3292,7 +3316,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) * flight or is idling for a new request, allow either of these * conditions to happen (or time out) before selecting a new queue. */ - if (timer_pending(&cfqd->idle_slice_timer)) { + if (hrtimer_active(&cfqd->idle_slice_timer)) { cfqq = NULL; goto keep_queue; } @@ -3303,7 +3327,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) **/ if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) && (cfq_cfqq_slice_new(cfqq) || - (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) { + (cfqq->slice_end - now > now - cfqq->slice_start))) { cfq_clear_cfqq_deep(cfqq); cfq_clear_cfqq_idle_window(cfqq); } @@ -3381,11 +3405,12 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, struct cfq_queue *cfqq) { + u64 now = ktime_get_ns(); + /* the queue hasn't finished any request, can't estimate */ if (cfq_cfqq_slice_new(cfqq)) return true; - if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, - cfqq->slice_end)) + if (now + cfqd->cfq_slice_idle * cfqq->dispatched > cfqq->slice_end) return true; return false; @@ -3460,10 +3485,10 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) * based on the last sync IO we serviced */ if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) { - unsigned long last_sync = jiffies - cfqd->last_delayed_sync; + u64 last_sync = ktime_get_ns() - cfqd->last_delayed_sync; unsigned int depth; - depth = last_sync / cfqd->cfq_slice[1]; + depth = div64_u64(last_sync, cfqd->cfq_slice[1]); if (!depth && !cfqq->dispatched) depth = 1; if (depth < max_dispatch) @@ -3546,7 +3571,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) && cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || cfq_class_idle(cfqq))) { - cfqq->slice_end = jiffies + 1; + cfqq->slice_end = ktime_get_ns() + 1; cfq_slice_expired(cfqd, 0); } @@ -3624,7 +3649,7 @@ static void cfq_init_icq(struct io_cq *icq) { struct cfq_io_cq *cic = icq_to_cic(icq); - cic->ttime.last_end_request = jiffies; + cic->ttime.last_end_request = ktime_get_ns(); } static void cfq_exit_icq(struct io_cq *icq) @@ -3682,6 +3707,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic) * elevate the priority of this queue */ cfqq->org_ioprio = cfqq->ioprio; + cfqq->org_ioprio_class = cfqq->ioprio_class; cfq_clear_cfqq_prio_changed(cfqq); } @@ -3845,14 +3871,15 @@ out: } static void -__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) +__cfq_update_io_thinktime(struct cfq_ttime *ttime, u64 slice_idle) { - unsigned long elapsed = jiffies - ttime->last_end_request; + u64 elapsed = ktime_get_ns() - ttime->last_end_request; elapsed = min(elapsed, 2UL * slice_idle); ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8; - ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8; - ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples; + ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8); + ttime->ttime_mean = div64_ul(ttime->ttime_total + 128, + ttime->ttime_samples); } static void @@ -4105,10 +4132,10 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) cfq_log_cfqq(cfqd, cfqq, "insert_request"); cfq_init_prio_data(cfqq, RQ_CIC(rq)); - rq->fifo_time = jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]; + rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)]; list_add_tail(&rq->queuelist, &cfqq->fifo); cfq_add_rq_rb(rq); - cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, + cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, req_op(rq), rq->cmd_flags); cfq_rq_enqueued(cfqd, cfqq, rq); } @@ -4153,6 +4180,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd) static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) { struct cfq_io_cq *cic = cfqd->active_cic; + u64 now = ktime_get_ns(); /* If the queue already has requests, don't wait */ if (!RB_EMPTY_ROOT(&cfqq->sort_list)) @@ -4171,7 +4199,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) /* if slice left is less than think time, wait busy */ if (cic && sample_valid(cic->ttime.ttime_samples) - && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) + && (cfqq->slice_end - now < cic->ttime.ttime_mean)) return true; /* @@ -4181,7 +4209,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) * case where think time is less than a jiffy, mark the queue wait * busy if only 1 jiffy is left in the slice. */ - if (cfqq->slice_end - jiffies == 1) + if (cfqq->slice_end - now <= jiffies_to_nsecs(1)) return true; return false; @@ -4192,9 +4220,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) struct cfq_queue *cfqq = RQ_CFQQ(rq); struct cfq_data *cfqd = cfqq->cfqd; const int sync = rq_is_sync(rq); - unsigned long now; + u64 now = ktime_get_ns(); - now = jiffies; cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", !!(rq->cmd_flags & REQ_NOIDLE)); @@ -4206,7 +4233,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfqq->dispatched--; (RQ_CFQG(rq))->dispatched--; cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq), - rq_io_start_time_ns(rq), rq->cmd_flags); + rq_io_start_time_ns(rq), req_op(rq), + rq->cmd_flags); cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; @@ -4222,7 +4250,16 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfqq_type(cfqq)); st->ttime.last_end_request = now; - if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) + /* + * We have to do this check in jiffies since start_time is in + * jiffies and it is not trivial to convert to ns. If + * cfq_fifo_expire[1] ever comes close to 1 jiffie, this test + * will become problematic but so far we are fine (the default + * is 128 ms). + */ + if (!time_after(rq->start_time + + nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]), + jiffies)) cfqd->last_delayed_sync = now; } @@ -4247,10 +4284,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) * the queue. */ if (cfq_should_wait_busy(cfqd, cfqq)) { - unsigned long extend_sl = cfqd->cfq_slice_idle; + u64 extend_sl = cfqd->cfq_slice_idle; if (!cfqd->cfq_slice_idle) extend_sl = cfqd->cfq_group_idle; - cfqq->slice_end = jiffies + extend_sl; + cfqq->slice_end = now + extend_sl; cfq_mark_cfqq_wait_busy(cfqq); cfq_log_cfqq(cfqd, cfqq, "will busy wait"); } @@ -4275,6 +4312,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_schedule_dispatch(cfqd); } +static void cfqq_boost_on_prio(struct cfq_queue *cfqq, int op_flags) +{ + /* + * If REQ_PRIO is set, boost class and prio level, if it's below + * BE/NORM. If prio is not set, restore the potentially boosted + * class/prio level. + */ + if (!(op_flags & REQ_PRIO)) { + cfqq->ioprio_class = cfqq->org_ioprio_class; + cfqq->ioprio = cfqq->org_ioprio; + } else { + if (cfq_class_idle(cfqq)) + cfqq->ioprio_class = IOPRIO_CLASS_BE; + if (cfqq->ioprio > IOPRIO_NORM) + cfqq->ioprio = IOPRIO_NORM; + } +} + static inline int __cfq_may_queue(struct cfq_queue *cfqq) { if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { @@ -4285,7 +4340,7 @@ static inline int __cfq_may_queue(struct cfq_queue *cfqq) return ELV_MQUEUE_MAY; } -static int cfq_may_queue(struct request_queue *q, int rw) +static int cfq_may_queue(struct request_queue *q, int op, int op_flags) { struct cfq_data *cfqd = q->elevator->elevator_data; struct task_struct *tsk = current; @@ -4302,9 +4357,10 @@ static int cfq_may_queue(struct request_queue *q, int rw) if (!cic) return ELV_MQUEUE_MAY; - cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); + cfqq = cic_to_cfqq(cic, rw_is_sync(op, op_flags)); if (cfqq) { cfq_init_prio_data(cfqq, cic); + cfqq_boost_on_prio(cfqq, op_flags); return __cfq_may_queue(cfqq); } @@ -4435,9 +4491,10 @@ static void cfq_kick_queue(struct work_struct *work) /* * Timer running if the active_queue is currently idling inside its time slice */ -static void cfq_idle_slice_timer(unsigned long data) +static enum hrtimer_restart cfq_idle_slice_timer(struct hrtimer *timer) { - struct cfq_data *cfqd = (struct cfq_data *) data; + struct cfq_data *cfqd = container_of(timer, struct cfq_data, + idle_slice_timer); struct cfq_queue *cfqq; unsigned long flags; int timed_out = 1; @@ -4486,11 +4543,12 @@ out_kick: cfq_schedule_dispatch(cfqd); out_cont: spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); + return HRTIMER_NORESTART; } static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) { - del_timer_sync(&cfqd->idle_slice_timer); + hrtimer_cancel(&cfqd->idle_slice_timer); cancel_work_sync(&cfqd->unplug_work); } @@ -4586,9 +4644,9 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e) cfqg_put(cfqd->root_group); spin_unlock_irq(q->queue_lock); - init_timer(&cfqd->idle_slice_timer); + hrtimer_init(&cfqd->idle_slice_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); cfqd->idle_slice_timer.function = cfq_idle_slice_timer; - cfqd->idle_slice_timer.data = (unsigned long) cfqd; INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); @@ -4609,7 +4667,7 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e) * we optimistically start assuming sync ops weren't delayed in last * second, in order to have larger depth for async operations. */ - cfqd->last_delayed_sync = jiffies - HZ; + cfqd->last_delayed_sync = ktime_get_ns() - NSEC_PER_SEC; return 0; out_free: @@ -4652,9 +4710,9 @@ cfq_var_store(unsigned int *var, const char *page, size_t count) static ssize_t __FUNC(struct elevator_queue *e, char *page) \ { \ struct cfq_data *cfqd = e->elevator_data; \ - unsigned int __data = __VAR; \ + u64 __data = __VAR; \ if (__CONV) \ - __data = jiffies_to_msecs(__data); \ + __data = div_u64(__data, NSEC_PER_MSEC); \ return cfq_var_show(__data, (page)); \ } SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); @@ -4671,6 +4729,21 @@ SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1); #undef SHOW_FUNCTION +#define USEC_SHOW_FUNCTION(__FUNC, __VAR) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ +{ \ + struct cfq_data *cfqd = e->elevator_data; \ + u64 __data = __VAR; \ + __data = div_u64(__data, NSEC_PER_USEC); \ + return cfq_var_show(__data, (page)); \ +} +USEC_SHOW_FUNCTION(cfq_slice_idle_us_show, cfqd->cfq_slice_idle); +USEC_SHOW_FUNCTION(cfq_group_idle_us_show, cfqd->cfq_group_idle); +USEC_SHOW_FUNCTION(cfq_slice_sync_us_show, cfqd->cfq_slice[1]); +USEC_SHOW_FUNCTION(cfq_slice_async_us_show, cfqd->cfq_slice[0]); +USEC_SHOW_FUNCTION(cfq_target_latency_us_show, cfqd->cfq_target_latency); +#undef USEC_SHOW_FUNCTION + #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ { \ @@ -4682,7 +4755,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) else if (__data > (MAX)) \ __data = (MAX); \ if (__CONV) \ - *(__PTR) = msecs_to_jiffies(__data); \ + *(__PTR) = (u64)__data * NSEC_PER_MSEC; \ else \ *(__PTR) = __data; \ return ret; \ @@ -4705,6 +4778,26 @@ STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1); #undef STORE_FUNCTION +#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ +static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ +{ \ + struct cfq_data *cfqd = e->elevator_data; \ + unsigned int __data; \ + int ret = cfq_var_store(&__data, (page), count); \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + *(__PTR) = (u64)__data * NSEC_PER_USEC; \ + return ret; \ +} +USEC_STORE_FUNCTION(cfq_slice_idle_us_store, &cfqd->cfq_slice_idle, 0, UINT_MAX); +USEC_STORE_FUNCTION(cfq_group_idle_us_store, &cfqd->cfq_group_idle, 0, UINT_MAX); +USEC_STORE_FUNCTION(cfq_slice_sync_us_store, &cfqd->cfq_slice[1], 1, UINT_MAX); +USEC_STORE_FUNCTION(cfq_slice_async_us_store, &cfqd->cfq_slice[0], 1, UINT_MAX); +USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, UINT_MAX); +#undef USEC_STORE_FUNCTION + #define CFQ_ATTR(name) \ __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store) @@ -4715,12 +4808,17 @@ static struct elv_fs_entry cfq_attrs[] = { CFQ_ATTR(back_seek_max), CFQ_ATTR(back_seek_penalty), CFQ_ATTR(slice_sync), + CFQ_ATTR(slice_sync_us), CFQ_ATTR(slice_async), + CFQ_ATTR(slice_async_us), CFQ_ATTR(slice_async_rq), CFQ_ATTR(slice_idle), + CFQ_ATTR(slice_idle_us), CFQ_ATTR(group_idle), + CFQ_ATTR(group_idle_us), CFQ_ATTR(low_latency), CFQ_ATTR(target_latency), + CFQ_ATTR(target_latency_us), __ATTR_NULL }; @@ -4729,7 +4827,8 @@ static struct elevator_type iosched_cfq = { .elevator_merge_fn = cfq_merge, .elevator_merged_fn = cfq_merged_request, .elevator_merge_req_fn = cfq_merged_requests, - .elevator_allow_merge_fn = cfq_allow_merge, + .elevator_allow_bio_merge_fn = cfq_allow_bio_merge, + .elevator_allow_rq_merge_fn = cfq_allow_rq_merge, .elevator_bio_merged_fn = cfq_bio_merged, .elevator_dispatch_fn = cfq_dispatch_requests, .elevator_add_req_fn = cfq_insert_request, @@ -4776,18 +4875,7 @@ static int __init cfq_init(void) { int ret; - /* - * could be 0 on HZ < 1000 setups - */ - if (!cfq_slice_async) - cfq_slice_async = 1; - if (!cfq_slice_idle) - cfq_slice_idle = 1; - #ifdef CONFIG_CFQ_GROUP_IOSCHED - if (!cfq_group_idle) - cfq_group_idle = 1; - ret = blkcg_policy_register(&blkcg_policy_cfq); if (ret) return ret; diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index d0dd7882d8c7..55e0bb6d7da7 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -137,7 +137,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio) if (__rq) { BUG_ON(sector != blk_rq_pos(__rq)); - if (elv_rq_merge_ok(__rq, bio)) { + if (elv_bio_merge_ok(__rq, bio)) { ret = ELEVATOR_FRONT_MERGE; goto out; } @@ -173,7 +173,8 @@ deadline_merged_requests(struct request_queue *q, struct request *req, * and move into next position (next will be deleted) in fifo */ if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { - if (time_before(next->fifo_time, req->fifo_time)) { + if (time_before((unsigned long)next->fifo_time, + (unsigned long)req->fifo_time)) { list_move(&req->queuelist, &next->queuelist); req->fifo_time = next->fifo_time; } @@ -227,7 +228,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) /* * rq is expired! */ - if (time_after_eq(jiffies, rq->fifo_time)) + if (time_after_eq(jiffies, (unsigned long)rq->fifo_time)) return 1; return 0; diff --git a/block/elevator.c b/block/elevator.c index c3555c9c672f..7096c22041e7 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -53,13 +53,13 @@ static LIST_HEAD(elv_list); * Query io scheduler to see if the current process issuing bio may be * merged with rq. */ -static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) +static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio) { struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_allow_merge_fn) - return e->type->ops.elevator_allow_merge_fn(q, rq, bio); + if (e->type->ops.elevator_allow_bio_merge_fn) + return e->type->ops.elevator_allow_bio_merge_fn(q, rq, bio); return 1; } @@ -67,17 +67,17 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) /* * can we safely merge with this request? */ -bool elv_rq_merge_ok(struct request *rq, struct bio *bio) +bool elv_bio_merge_ok(struct request *rq, struct bio *bio) { if (!blk_rq_merge_ok(rq, bio)) - return 0; + return false; - if (!elv_iosched_allow_merge(rq, bio)) - return 0; + if (!elv_iosched_allow_bio_merge(rq, bio)) + return false; - return 1; + return true; } -EXPORT_SYMBOL(elv_rq_merge_ok); +EXPORT_SYMBOL(elv_bio_merge_ok); static struct elevator_type *elevator_find(const char *name) { @@ -366,8 +366,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); - if ((rq->cmd_flags & REQ_DISCARD) != - (pos->cmd_flags & REQ_DISCARD)) + if ((req_op(rq) == REQ_OP_DISCARD) != (req_op(pos) == REQ_OP_DISCARD)) break; if (rq_data_dir(rq) != rq_data_dir(pos)) break; @@ -426,7 +425,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) /* * First try one-hit cache. */ - if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) { + if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) { ret = blk_try_merge(q->last_merge, bio); if (ret != ELEVATOR_NO_MERGE) { *req = q->last_merge; @@ -441,7 +440,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) * See if our hash lookup can find a potential backmerge. */ __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector); - if (__rq && elv_rq_merge_ok(__rq, bio)) { + if (__rq && elv_bio_merge_ok(__rq, bio)) { *req = __rq; return ELEVATOR_BACK_MERGE; } @@ -717,12 +716,12 @@ void elv_put_request(struct request_queue *q, struct request *rq) e->type->ops.elevator_put_req_fn(rq); } -int elv_may_queue(struct request_queue *q, int rw) +int elv_may_queue(struct request_queue *q, int op, int op_flags) { struct elevator_queue *e = q->elevator; if (e->type->ops.elevator_may_queue_fn) - return e->type->ops.elevator_may_queue_fn(q, rw); + return e->type->ops.elevator_may_queue_fn(q, op, op_flags); return ELV_MQUEUE_MAY; } diff --git a/block/genhd.c b/block/genhd.c index 9f42526b4d62..3c9dede4e04f 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -506,7 +506,7 @@ static int exact_lock(dev_t devt, void *data) return 0; } -static void register_disk(struct gendisk *disk) +static void register_disk(struct device *parent, struct gendisk *disk) { struct device *ddev = disk_to_dev(disk); struct block_device *bdev; @@ -514,7 +514,7 @@ static void register_disk(struct gendisk *disk) struct hd_struct *part; int err; - ddev->parent = disk->driverfs_dev; + ddev->parent = parent; dev_set_name(ddev, "%s", disk->disk_name); @@ -573,7 +573,8 @@ exit: } /** - * add_disk - add partitioning information to kernel list + * device_add_disk - add partitioning information to kernel list + * @parent: parent device for the disk * @disk: per-device partitioning information * * This function registers the partitioning information in @disk @@ -581,7 +582,7 @@ exit: * * FIXME: error handling */ -void add_disk(struct gendisk *disk) +void device_add_disk(struct device *parent, struct gendisk *disk) { struct backing_dev_info *bdi; dev_t devt; @@ -617,7 +618,7 @@ void add_disk(struct gendisk *disk) blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); - register_disk(disk); + register_disk(parent, disk); blk_register_queue(disk); /* @@ -633,7 +634,7 @@ void add_disk(struct gendisk *disk) disk_add_events(disk); blk_integrity_add(disk); } -EXPORT_SYMBOL(add_disk); +EXPORT_SYMBOL(device_add_disk); void del_gendisk(struct gendisk *disk) { @@ -799,10 +800,9 @@ void __init printk_all_partitions(void) , disk_name(disk, part->partno, name_buf), part->info ? part->info->uuid : ""); if (is_part0) { - if (disk->driverfs_dev != NULL && - disk->driverfs_dev->driver != NULL) + if (dev->parent && dev->parent->driver) printk(" driver: %s\n", - disk->driverfs_dev->driver->name); + dev->parent->driver->name); else printk(" (driver?)\n"); } else @@ -1523,12 +1523,7 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now) if (--ev->block) goto out_unlock; - /* - * Not exactly a latency critical operation, set poll timer - * slack to 25% and kick event check. - */ intv = disk_events_poll_jiffies(disk); - set_timer_slack(&ev->dwork.timer, intv / 4); if (check_now) queue_delayed_work(system_freezable_power_efficient_wq, &ev->dwork, 0); diff --git a/block/ioprio.c b/block/ioprio.c index cc7800e9eb44..01b8116298a1 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -150,8 +150,10 @@ static int get_task_ioprio(struct task_struct *p) if (ret) goto out; ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM); + task_lock(p); if (p->io_context) ret = p->io_context->ioprio; + task_unlock(p); out: return ret; } diff --git a/block/partition-generic.c b/block/partition-generic.c index d7eb77e1e3a8..71d9ed9df8da 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -495,7 +495,6 @@ rescan: /* add partitions */ for (p = 1; p < state->limit; p++) { sector_t size, from; - struct partition_meta_info *info = NULL; size = state->parts[p].size; if (!size) @@ -530,8 +529,6 @@ rescan: } } - if (state->parts[p].has_info) - info = &state->parts[p].info; part = add_partition(disk, p, from, size, state->parts[p].flags, &state->parts[p].info); diff --git a/block/partitions/atari.c b/block/partitions/atari.c index 9875b05e80a2..ff1fb93712c1 100644 --- a/block/partitions/atari.c +++ b/block/partitions/atari.c @@ -42,6 +42,13 @@ int atari_partition(struct parsed_partitions *state) int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */ #endif + /* + * ATARI partition scheme supports 512 lba only. If this is not + * the case, bail early to avoid miscalculating hd_size. + */ + if (bdev_logical_block_size(state->bdev) != 512) + return 0; + rs = read_part_sector(state, 0, §); if (!rs) return -1; |