From 34053979fb1d923217685cf166349f1899980581 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 21 Feb 2009 11:16:36 +0100 Subject: block: cleanup bio_alloc_bioset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this warning (which got fixed by commit b2bf968): fs/bio.c: In function ‘bio_alloc_bioset’: fs/bio.c:305: warning: ‘p’ may be used uninitialized in this function Triggered because the code flow in bio_alloc_bioset() is correct but a bit complex for the compiler to see through. Streamline it a bit - this also makes the code a tiny bit more compact: text data bss dec hex filename 7540 256 40 7836 1e9c bio.o.before 7539 256 40 7835 1e9b bio.o.after Also remove an older compiler-warnings annotation from this function, it's not needed. Signed-off-by: Ingo Molnar Signed-off-by: Jens Axboe --- fs/bio.c | 71 +++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/fs/bio.c b/fs/bio.c index d4f06327c810..cef6258b8943 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -301,48 +301,51 @@ void bio_init(struct bio *bio) **/ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { + struct bio_vec *bvl = NULL; struct bio *bio = NULL; - void *uninitialized_var(p); + unsigned long idx = 0; + void *p = NULL; if (bs) { p = mempool_alloc(bs->bio_pool, gfp_mask); - - if (p) - bio = p + bs->front_pad; - } else + if (!p) + goto err; + bio = p + bs->front_pad; + } else { bio = kmalloc(sizeof(*bio), gfp_mask); + if (!bio) + goto err; + } - if (likely(bio)) { - struct bio_vec *bvl = NULL; - - bio_init(bio); - if (likely(nr_iovecs)) { - unsigned long uninitialized_var(idx); - - if (nr_iovecs <= BIO_INLINE_VECS) { - idx = 0; - bvl = bio->bi_inline_vecs; - nr_iovecs = BIO_INLINE_VECS; - } else { - bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, - bs); - nr_iovecs = bvec_nr_vecs(idx); - } - if (unlikely(!bvl)) { - if (bs) - mempool_free(p, bs->bio_pool); - else - kfree(bio); - bio = NULL; - goto out; - } - bio->bi_flags |= idx << BIO_POOL_OFFSET; - bio->bi_max_vecs = nr_iovecs; - } - bio->bi_io_vec = bvl; + bio_init(bio); + + if (unlikely(!nr_iovecs)) + goto out_set; + + if (nr_iovecs <= BIO_INLINE_VECS) { + bvl = bio->bi_inline_vecs; + nr_iovecs = BIO_INLINE_VECS; + } else { + bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + if (unlikely(!bvl)) + goto err_free; + + nr_iovecs = bvec_nr_vecs(idx); } -out: + bio->bi_flags |= idx << BIO_POOL_OFFSET; + bio->bi_max_vecs = nr_iovecs; +out_set: + bio->bi_io_vec = bvl; + return bio; + +err_free: + if (bs) + mempool_free(p, bs->bio_pool); + else + kfree(bio); +err: + return NULL; } struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) -- cgit v1.2.3 From a7fcd37cdcb47806fb8a9070f006ee34061defa6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 5 Dec 2008 16:10:29 +0100 Subject: block: don't create bio_vec slabs of less than the inline number If we don't have CONFIG_BLK_DEV_INTEGRITY set, then we don't have any external dependencies on the bio_vec slabs. So don't create the ones that we will inline anyway. Signed-off-by: Jens Axboe --- fs/bio.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/bio.c b/fs/bio.c index cef6258b8943..9cc1430b4495 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1589,6 +1589,13 @@ static void __init biovec_init_slabs(void) int size; struct biovec_slab *bvs = bvec_slabs + i; +#ifndef CONFIG_BLK_DEV_INTEGRITY + if (bvs->nr_vecs <= BIO_INLINE_VECS) { + bvs->slab = NULL; + continue; + } +#endif + size = bvs->nr_vecs * sizeof(struct bio_vec); bvs->slab = kmem_cache_create(bvs->name, size, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); -- cgit v1.2.3 From 10cbda97e73c7d537d7174eadb2d098484f8f1da Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 27 Feb 2009 20:14:20 +0100 Subject: cciss: add BUILD_BUG_ON() for catching bad CommandList_struct alignment The hardware requires 64-bit alignment of commands, so add a build bug check for that. The recent commit 8a3173de4ab4cdacc43675dc5c077f9a5bf17f5f didn't change the size of the command, but other additions/changes may and thus break badly at runtime. Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 4f9b6d792017..5d0e135824f9 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -3898,6 +3898,13 @@ static struct pci_driver cciss_pci_driver = { */ static int __init cciss_init(void) { + /* + * The hardware requires that commands are aligned on a 64-bit + * boundary. Given that we use pci_alloc_consistent() to allocate an + * array of them, the size must be a multiple of 8 bytes. + */ + BUILD_BUG_ON(sizeof(CommandList_struct) % 8); + printk(KERN_INFO DRIVER_NAME "\n"); /* Register for our PCI devices */ -- cgit v1.2.3 From f3b144aa7f2861e1024682af3bf3dbf1c29184b9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 Mar 2009 08:48:33 +0100 Subject: block: remove various blk_queue_*() setting functions in blk_init_queue_node() It calls blk_queue_make_request(), which sets the identical set of limits. Signed-off-by: Jens Axboe --- block/blk-core.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 29bcfac6c688..5e14b3f4510f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -603,13 +603,10 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) q->queue_flags = QUEUE_FLAG_DEFAULT; q->queue_lock = lock; - blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK); - + /* + * This also sets hw/phys segments, boundary and size + */ blk_queue_make_request(q, __make_request); - blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); - - blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); - blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); q->sg_reserved_size = INT_MAX; -- cgit v1.2.3 From 50e174931051bf4849cd7931667bb0a4d681ff60 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 Mar 2009 11:12:17 +0100 Subject: block: get rid of unused blkdev_free_rq() define Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 5e14b3f4510f..7b63c9b6333d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -732,7 +732,6 @@ static void freed_request(struct request_queue *q, int rw, int priv) __freed_request(q, rw ^ 1); } -#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) /* * Get a free request, queue_lock must be held. * Returns NULL on failure, with queue_lock held. -- cgit v1.2.3 From 32ca163c9cdb33151d79e95a7cf244f62b5d4418 Mon Sep 17 00:00:00 2001 From: Petros Koutoupis Date: Tue, 10 Mar 2009 08:25:54 +0100 Subject: block: genhd.h comment needs updating The include/linux/genhd.h file, on line 338-352 declares some function prototypes in which the comment on line 338 states that the definition of these prototypes are to be found at drivers/block/genhd.c. The problem is that genhd.c has been relocated to block/genhd.c. See attached patch to correct this minor cosmetic typo. Signed-off-by: Petros Koutoupis Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 16948eaecae3..56946b21ab78 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -336,7 +336,7 @@ static inline void part_dec_in_flight(struct hd_struct *part) /* drivers/block/ll_rw_blk.c */ extern void part_round_stats(int cpu, struct hd_struct *part); -/* drivers/block/genhd.c */ +/* block/genhd.c */ extern int get_blkdev_list(char *, int); extern void add_disk(struct gendisk *disk); extern void del_gendisk(struct gendisk *gp); -- cgit v1.2.3 From 6d2a78e783416ba99e36beb1d4395b785b34e867 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 10 Mar 2009 08:27:39 +0100 Subject: block: add private bio_set for bio integrity allocations The integrity bio allocation needs its own bio_set to avoid violating the mempool allocation rules and risking deadlocks. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- fs/bio-integrity.c | 85 ++++++++++++++++------------------------------------- fs/bio.c | 9 ++---- include/linux/bio.h | 18 +++--------- 3 files changed, 32 insertions(+), 80 deletions(-) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index fe2b1aa2464e..31c46a241bac 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -26,23 +26,23 @@ #include static struct kmem_cache *bio_integrity_slab __read_mostly; +static mempool_t *bio_integrity_pool; +static struct bio_set *integrity_bio_set; static struct workqueue_struct *kintegrityd_wq; /** - * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio + * bio_integrity_alloc - Allocate integrity payload and attach it to bio * @bio: bio to attach integrity metadata to * @gfp_mask: Memory allocation mask * @nr_vecs: Number of integrity metadata scatter-gather elements - * @bs: bio_set to allocate from * * Description: This function prepares a bio for attaching integrity * metadata. nr_vecs specifies the maximum number of pages containing * integrity metadata that can be attached. */ -struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, - gfp_t gfp_mask, - unsigned int nr_vecs, - struct bio_set *bs) +struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, + gfp_t gfp_mask, + unsigned int nr_vecs) { struct bio_integrity_payload *bip; struct bio_vec *iv; @@ -50,7 +50,7 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, BUG_ON(bio == NULL); - bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); + bip = mempool_alloc(bio_integrity_pool, gfp_mask); if (unlikely(bip == NULL)) { printk(KERN_ERR "%s: could not alloc bip\n", __func__); return NULL; @@ -58,10 +58,10 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, memset(bip, 0, sizeof(*bip)); - iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, bs); + iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set); if (unlikely(iv == NULL)) { printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__); - mempool_free(bip, bs->bio_integrity_pool); + mempool_free(bip, bio_integrity_pool); return NULL; } @@ -72,35 +72,16 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, return bip; } -EXPORT_SYMBOL(bio_integrity_alloc_bioset); - -/** - * bio_integrity_alloc - Allocate integrity payload and attach it to bio - * @bio: bio to attach integrity metadata to - * @gfp_mask: Memory allocation mask - * @nr_vecs: Number of integrity metadata scatter-gather elements - * - * Description: This function prepares a bio for attaching integrity - * metadata. nr_vecs specifies the maximum number of pages containing - * integrity metadata that can be attached. - */ -struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, - gfp_t gfp_mask, - unsigned int nr_vecs) -{ - return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set); -} EXPORT_SYMBOL(bio_integrity_alloc); /** * bio_integrity_free - Free bio integrity payload * @bio: bio containing bip to be freed - * @bs: bio_set this bio was allocated from * * Description: Used to free the integrity portion of a bio. Usually * called from bio_free(). */ -void bio_integrity_free(struct bio *bio, struct bio_set *bs) +void bio_integrity_free(struct bio *bio) { struct bio_integrity_payload *bip = bio->bi_integrity; @@ -111,8 +92,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs) && bip->bip_buf != NULL) kfree(bip->bip_buf); - bvec_free_bs(bs, bip->bip_vec, bip->bip_pool); - mempool_free(bip, bs->bio_integrity_pool); + bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool); + mempool_free(bip, bio_integrity_pool); bio->bi_integrity = NULL; } @@ -686,19 +667,17 @@ EXPORT_SYMBOL(bio_integrity_split); * @bio: New bio * @bio_src: Original bio * @gfp_mask: Memory allocation mask - * @bs: bio_set to allocate bip from * * Description: Called to allocate a bip when cloning a bio */ -int bio_integrity_clone(struct bio *bio, struct bio *bio_src, - gfp_t gfp_mask, struct bio_set *bs) +int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) { struct bio_integrity_payload *bip_src = bio_src->bi_integrity; struct bio_integrity_payload *bip; BUG_ON(bip_src == NULL); - bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs); + bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); if (bip == NULL) return -EIO; @@ -714,37 +693,25 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, } EXPORT_SYMBOL(bio_integrity_clone); -int bioset_integrity_create(struct bio_set *bs, int pool_size) +static int __init bio_integrity_init(void) { - bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, - bio_integrity_slab); - if (!bs->bio_integrity_pool) - return -1; - - return 0; -} -EXPORT_SYMBOL(bioset_integrity_create); + kintegrityd_wq = create_workqueue("kintegrityd"); -void bioset_integrity_free(struct bio_set *bs) -{ - if (bs->bio_integrity_pool) - mempool_destroy(bs->bio_integrity_pool); -} -EXPORT_SYMBOL(bioset_integrity_free); + if (!kintegrityd_wq) + panic("Failed to create kintegrityd\n"); -void __init bio_integrity_init_slab(void) -{ bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, SLAB_HWCACHE_ALIGN|SLAB_PANIC); -} -static int __init integrity_init(void) -{ - kintegrityd_wq = create_workqueue("kintegrityd"); + bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE, + bio_integrity_slab); + if (!bio_integrity_pool) + panic("bio_integrity: can't allocate bip pool\n"); - if (!kintegrityd_wq) - panic("Failed to create kintegrityd\n"); + integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0); + if (!integrity_bio_set) + panic("bio_integrity: can't allocate bio_set\n"); return 0; } -subsys_initcall(integrity_init); +subsys_initcall(bio_integrity_init); diff --git a/fs/bio.c b/fs/bio.c index 9cc1430b4495..a040cde7f6fd 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -248,7 +248,7 @@ void bio_free(struct bio *bio, struct bio_set *bs) bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); if (bio_integrity(bio)) - bio_integrity_free(bio, bs); + bio_integrity_free(bio); /* * If we have front padding, adjust the bio pointer before freeing @@ -466,7 +466,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) if (bio_integrity(bio)) { int ret; - ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set); + ret = bio_integrity_clone(b, bio, gfp_mask); if (ret < 0) { bio_put(b); @@ -1529,7 +1529,6 @@ void bioset_free(struct bio_set *bs) if (bs->bio_pool) mempool_destroy(bs->bio_pool); - bioset_integrity_free(bs); biovec_free_pools(bs); bio_put_slab(bs); @@ -1570,9 +1569,6 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) if (!bs->bio_pool) goto bad; - if (bioset_integrity_create(bs, pool_size)) - goto bad; - if (!biovec_create_pools(bs, pool_size)) return bs; @@ -1610,7 +1606,6 @@ static int __init init_bio(void) if (!bio_slabs) panic("bio: can't allocate bios\n"); - bio_integrity_init_slab(); biovec_init_slabs(); fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); diff --git a/include/linux/bio.h b/include/linux/bio.h index d8bd43bfdcf5..b05b1d4d17d2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -426,9 +426,6 @@ struct bio_set { unsigned int front_pad; mempool_t *bio_pool; -#if defined(CONFIG_BLK_DEV_INTEGRITY) - mempool_t *bio_integrity_pool; -#endif mempool_t *bvec_pool; }; @@ -519,9 +516,8 @@ static inline int bio_has_data(struct bio *bio) #define bio_integrity(bio) (bio->bi_integrity != NULL) -extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); -extern void bio_integrity_free(struct bio *, struct bio_set *); +extern void bio_integrity_free(struct bio *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern int bio_integrity_enabled(struct bio *bio); extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); @@ -531,27 +527,21 @@ extern void bio_integrity_endio(struct bio *, int); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); extern void bio_integrity_split(struct bio *, struct bio_pair *, int); -extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *); -extern int bioset_integrity_create(struct bio_set *, int); -extern void bioset_integrity_free(struct bio_set *); -extern void bio_integrity_init_slab(void); +extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); #else /* CONFIG_BLK_DEV_INTEGRITY */ #define bio_integrity(a) (0) -#define bioset_integrity_create(a, b) (0) #define bio_integrity_prep(a) (0) #define bio_integrity_enabled(a) (0) -#define bio_integrity_clone(a, b, c,d ) (0) -#define bioset_integrity_free(a) do { } while (0) -#define bio_integrity_free(a, b) do { } while (0) +#define bio_integrity_clone(a, b, c) (0) +#define bio_integrity_free(a) do { } while (0) #define bio_integrity_endio(a, b) do { } while (0) #define bio_integrity_advance(a, b) do { } while (0) #define bio_integrity_trim(a, b, c) do { } while (0) #define bio_integrity_split(a, b, c) do { } while (0) #define bio_integrity_set_tag(a, b, c) do { } while (0) #define bio_integrity_get_tag(a, b, c) do { } while (0) -#define bio_integrity_init_slab(a) do { } while (0) #endif /* CONFIG_BLK_DEV_INTEGRITY */ -- cgit v1.2.3 From d399228646e26db315d6233bed65ec9d08c57f57 Mon Sep 17 00:00:00 2001 From: Petros Koutoupis Date: Wed, 11 Mar 2009 10:49:35 +0100 Subject: block: genhd.h cleanup patch In include/linux/genhd.h: Line 335 has a comment that needs to be updated from: /* drivers/block/ll_rw_blk.c */ to /* block/blk-core.c */. Also as of kernel 2.6.16, the function definition for get_blkdev_list was removed from block/genhd.c but the function declaration is still present on line 339. This patch addresses both those fixes, by updating the comment and removing the declaration. Signed-off-by: Petros Koutoupis Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 56946b21ab78..634c53028fb8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -333,11 +333,10 @@ static inline void part_dec_in_flight(struct hd_struct *part) part_to_disk(part)->part0.in_flight--; } -/* drivers/block/ll_rw_blk.c */ +/* block/blk-core.c */ extern void part_round_stats(int cpu, struct hd_struct *part); /* block/genhd.c */ -extern int get_blkdev_list(char *, int); extern void add_disk(struct gendisk *disk); extern void del_gendisk(struct gendisk *gp); extern void unlink_gendisk(struct gendisk *gp); -- cgit v1.2.3 From 0061d38642244892e17156f005bd7055fe744644 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 16 Mar 2009 10:06:05 +0100 Subject: cpqarray: enable bus mastering We've been carrying this patch for the last 3 years in Fedora, long past time we got it upstream... Call pci_set_master to enable bus-mastering if the BIOS hasn't done it already. Signed-off-by: Kyle McMartin Signed-off-by: Dave Jones Signed-off-by: Jens Axboe --- drivers/block/cpqarray.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 5d39df14ed90..ca268ca11159 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -617,6 +617,7 @@ static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev) int i; c->pci_dev = pdev; + pci_set_master(pdev); if (pci_enable_device(pdev)) { printk(KERN_ERR "cpqarray: Unable to Enable PCI device\n"); return -1; -- cgit v1.2.3 From 05378940caf979a8655c18b18a17213dcfa52412 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Tue, 24 Mar 2009 12:23:40 +0100 Subject: bsg: add support for tail queuing Currently inherited from sg.c bsg will submit asynchronous request at the head-of-the-queue, (using "at_head" set in the call to blk_execute_rq_nowait()). This is bad in situation where the queues are full, requests will execute out of order, and can cause starvation of the first submitted requests. The sg_io_v4->flags member is used and a bit is allocated to denote the Q_AT_TAIL. Zero is to queue at_head as before, to be compatible with old code at the write/read path. SG_IO code path behavior was changed so to be the same as write/read behavior. SG_IO was very rarely used and breaking compatibility with it is OK at this stage. sg_io_hdr at sg.h also has a flags member and uses 3 bits from the first nibble and one bit from the last nibble. Even though none of these bits are supported by bsg, The second nibble is allocated for use by bsg. Just in case. Signed-off-by: Boaz Harrosh CC: Douglas Gilbert Signed-off-by: Jens Axboe --- block/bsg.c | 9 +++++++-- include/linux/bsg.h | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 0ce8806dd0c1..0f63b91d0af6 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -353,6 +353,8 @@ static void bsg_rq_end_io(struct request *rq, int uptodate) static void bsg_add_command(struct bsg_device *bd, struct request_queue *q, struct bsg_command *bc, struct request *rq) { + int at_head = (0 == (bc->hdr.flags & BSG_FLAG_Q_AT_TAIL)); + /* * add bc command to busy queue and submit rq for io */ @@ -368,7 +370,7 @@ static void bsg_add_command(struct bsg_device *bd, struct request_queue *q, dprintk("%s: queueing rq %p, bc %p\n", bd->name, rq, bc); rq->end_io_data = bc; - blk_execute_rq_nowait(q, NULL, rq, 1, bsg_rq_end_io); + blk_execute_rq_nowait(q, NULL, rq, at_head, bsg_rq_end_io); } static struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd) @@ -924,6 +926,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct request *rq; struct bio *bio, *bidi_bio = NULL; struct sg_io_v4 hdr; + int at_head; u8 sense[SCSI_SENSE_BUFFERSIZE]; if (copy_from_user(&hdr, uarg, sizeof(hdr))) @@ -936,7 +939,9 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg) bio = rq->bio; if (rq->next_rq) bidi_bio = rq->next_rq->bio; - blk_execute_rq(bd->queue, NULL, rq, 0); + + at_head = (0 == (hdr.flags & BSG_FLAG_Q_AT_TAIL)); + blk_execute_rq(bd->queue, NULL, rq, at_head); ret = blk_complete_sgv4_hdr_rq(rq, &hdr, bio, bidi_bio); if (copy_to_user(uarg, &hdr, sizeof(hdr))) diff --git a/include/linux/bsg.h b/include/linux/bsg.h index cf0303a60611..3f0c64ace424 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -7,6 +7,14 @@ #define BSG_SUB_PROTOCOL_SCSI_TMF 1 #define BSG_SUB_PROTOCOL_SCSI_TRANSPORT 2 +/* + * For flags member below + * sg.h sg_io_hdr also has bits defined for it's flags member. However + * none of these bits are implemented/used by bsg. The bits below are + * allocated to not conflict with sg.h ones anyway. + */ +#define BSG_FLAG_Q_AT_TAIL 0x10 /* default, == 0 at this bit, is Q_AT_HEAD */ + struct sg_io_v4 { __s32 guard; /* [i] 'Q' to differentiate from v3 */ __u32 protocol; /* [i] 0 -> SCSI , .... */ -- cgit v1.2.3 From 68db1961bbf4e16c220ccec4a780e966bc1fece3 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Tue, 24 Mar 2009 12:29:54 +0100 Subject: loop: support barrier writes Honour barrier requests in the loop back block device driver. In case of barrier bios, flush the backing file once before processing the barrier and once after to guarantee ordering. In case of filesystems that does not support fsync, barrier bios would be failed with -EOPNOTSUPP. Signed-off-by: Nikanth Karthikesan Signed-off-by: Jens Axboe --- drivers/block/loop.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index bf0345577672..9721d100caf1 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -474,10 +474,35 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) int ret; pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; - if (bio_rw(bio) == WRITE) + + if (bio_rw(bio) == WRITE) { + int barrier = bio_barrier(bio); + struct file *file = lo->lo_backing_file; + + if (barrier) { + if (unlikely(!file->f_op->fsync)) { + ret = -EOPNOTSUPP; + goto out; + } + + ret = vfs_fsync(file, file->f_path.dentry, 0); + if (unlikely(ret)) { + ret = -EIO; + goto out; + } + } + ret = lo_send(lo, bio, pos); - else + + if (barrier && !ret) { + ret = vfs_fsync(file, file->f_path.dentry, 0); + if (unlikely(ret)) + ret = -EIO; + } + } else ret = lo_receive(lo, bio, lo->lo_blocksize, pos); + +out: return ret; } @@ -826,6 +851,9 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->lo_queue->queuedata = lo; lo->lo_queue->unplug_fn = loop_unplug; + if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) + blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN, NULL); + set_capacity(lo->lo_disk, size); bd_set_size(bdev, size << 9); -- cgit v1.2.3 From f028f3b2f987ebc61cef382ab7a5c449917b728e Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Tue, 24 Mar 2009 12:33:41 +0100 Subject: loop: fix circular locking in loop_clr_fd() With CONFIG_PROVE_LOCKING enabled $ losetup /dev/loop0 file $ losetup -o 32256 /dev/loop1 /dev/loop0 $ losetup -d /dev/loop1 $ losetup -d /dev/loop0 triggers a [ INFO: possible circular locking dependency detected ] I think this warning is a false positive. Open/close on a loop device acquires bd_mutex of the device before acquiring lo_ctl_mutex of the same device. For ioctl(LOOP_CLR_FD) after acquiring lo_ctl_mutex, fput on the backing_file might acquire the bd_mutex of a device, if backing file is a device and this is the last reference to the file being dropped . But it is guaranteed that it is impossible to have a circular list of backing devices.(say loop2->loop1->loop0->loop2 is not possible), which guarantees that this can never deadlock. So this warning should be suppressed. It is very difficult to annotate lockdep not to warn here in the correct way. A simple way to silence lockdep could be to mark the lo_ctl_mutex in ioctl to be a sub class, but this might mask some other real bugs. @@ -1164,7 +1164,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, struct loop_device *lo = bdev->bd_disk->private_data; int err; - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock_nested(&lo->lo_ctl_mutex, 1); switch (cmd) { case LOOP_SET_FD: err = loop_set_fd(lo, mode, bdev, arg); Or actually marking the bd_mutex after lo_ctl_mutex as a sub class could be a better solution. Luckily it is easy to avoid calling fput on backing file with lo_ctl_mutex held, so no lockdep annotation is required. If you do not like the special handling of the lo_ctl_mutex just for the LOOP_CLR_FD ioctl in lo_ioctl(), the mutex handling could be moved inside each of the individual ioctl handlers and I could send you another patch. Signed-off-by: Jens Axboe --- drivers/block/loop.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 9721d100caf1..2621ed2ce6d2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -969,11 +969,18 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) bd_set_size(bdev, 0); mapping_set_gfp_mask(filp->f_mapping, gfp); lo->lo_state = Lo_unbound; - fput(filp); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); if (max_part > 0) ioctl_by_bdev(bdev, BLKRRPART, 0); + mutex_unlock(&lo->lo_ctl_mutex); + /* + * Need not hold lo_ctl_mutex to fput backing file. + * Calling fput holding lo_ctl_mutex triggers a circular + * lock dependency possibility warning as fput can take + * bd_mutex which is usually taken before lo_ctl_mutex. + */ + fput(filp); return 0; } @@ -1191,7 +1198,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, struct loop_device *lo = bdev->bd_disk->private_data; int err; - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock_nested(&lo->lo_ctl_mutex, 1); switch (cmd) { case LOOP_SET_FD: err = loop_set_fd(lo, mode, bdev, arg); @@ -1200,7 +1207,10 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, err = loop_change_fd(lo, bdev, arg); break; case LOOP_CLR_FD: + /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ err = loop_clr_fd(lo, bdev); + if (!err) + goto out_unlocked; break; case LOOP_SET_STATUS: err = loop_set_status_old(lo, (struct loop_info __user *) arg); @@ -1218,6 +1228,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; } mutex_unlock(&lo->lo_ctl_mutex); + +out_unlocked: return err; } -- cgit v1.2.3 From 1cd96c242a829d52f7a5ae98f554ca9775429685 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Tue, 24 Mar 2009 12:35:07 +0100 Subject: block: WARN in __blk_put_request() for potential bio leak Put a WARN_ON in __blk_put_request if it is about to leak bio(s). This is a serious bug that can happen in error handling code paths. For this to work I have fixed a couple of places in block/ where request->bio != NULL ownership was not honored. And a small cleanup at sg_io() while at it. Signed-off-by: Boaz Harrosh Signed-off-by: Jens Axboe --- block/blk-core.c | 3 +++ block/blk-merge.c | 2 ++ block/scsi_ioctl.c | 21 ++++----------------- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 7b63c9b6333d..996ed906d8ca 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1062,6 +1062,9 @@ void __blk_put_request(struct request_queue *q, struct request *req) elv_completed_request(q, req); + /* this is a bio leak */ + WARN_ON(req->bio != NULL); + /* * Request may not have originated from ll_rw_blk. if not, * it didn't come out of our reserved rq pools diff --git a/block/blk-merge.c b/block/blk-merge.c index 5a244f05360f..e39cb24b7679 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -403,6 +403,8 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (blk_rq_cpu_valid(next)) req->cpu = next->cpu; + /* owner-ship of bio passed from next to req */ + next->bio = NULL; __blk_put_request(q, next); return 1; } diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index ee9c67d7e1be..626ee274c5c4 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -214,21 +214,10 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, return 0; } -/* - * unmap a request that was previously mapped to this sg_io_hdr. handles - * both sg and non-sg sg_io_hdr. - */ -static int blk_unmap_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr) -{ - blk_rq_unmap_user(rq->bio); - blk_put_request(rq); - return 0; -} - static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, struct bio *bio) { - int r, ret = 0; + int ret = 0; /* * fill in all the output members @@ -253,12 +242,10 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, ret = -EFAULT; } - rq->bio = bio; - r = blk_unmap_sghdr_rq(rq, hdr); - if (ret) - r = ret; + blk_rq_unmap_user(bio); + blk_put_request(rq); - return r; + return ret; } static int sg_io(struct request_queue *q, struct gendisk *bd_disk, -- cgit v1.2.3 From e7cbbf1bf17e3c706f874e867f7b744e1c86fed9 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Tue, 24 Mar 2009 12:37:50 +0100 Subject: bsg: Remove bogus check against request_queue->max_sectors bsg submits REQ_TYPE_BLOCK_PC so the right check is max_hw_sectors. But I've removed this check because right after, bsg proceeds with calling blk_rq_map_user() which does all the right checks. Signed-off-by: Boaz Harrosh Signed-off-by: Jens Axboe --- block/bsg.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 0f63b91d0af6..206060e795da 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -218,9 +218,6 @@ bsg_validate_sgv4_hdr(struct request_queue *q, struct sg_io_v4 *hdr, int *rw) if (hdr->guard != 'Q') return -EINVAL; - if (hdr->dout_xfer_len > (q->max_sectors << 9) || - hdr->din_xfer_len > (q->max_sectors << 9)) - return -EIO; switch (hdr->protocol) { case BSG_PROTOCOL_SCSI: -- cgit v1.2.3 From 07e86f405addc6436eb969b8279bb14a6dcacce4 Mon Sep 17 00:00:00 2001 From: Avishay Traeger Date: Tue, 24 Mar 2009 12:40:18 +0100 Subject: block: Repeated lines in switching-sched.txt These lines appear in this file twice - removed one occurrence. Signed-off-by: Avishay Traeger Signed-off-by: Jens Axboe --- Documentation/block/switching-sched.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt index 634c952e1964..d5af3f630814 100644 --- a/Documentation/block/switching-sched.txt +++ b/Documentation/block/switching-sched.txt @@ -35,9 +35,3 @@ noop anticipatory deadline [cfq] # echo anticipatory > /sys/block/hda/queue/scheduler # cat /sys/block/hda/queue/scheduler noop [anticipatory] deadline cfq - -Each io queue has a set of io scheduler tunables associated with it. These -tunables control how the io scheduler works. You can find these entries -in: - -/sys/block//queue/iosched -- cgit v1.2.3 From 26160158d3d3df548f4ee046cc6147fe048cfa9c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Mar 2009 09:35:06 +0100 Subject: Move the default_backing_dev_info out of readahead.c and into backing-dev.c It really makes no sense to have it in readahead.c, so move it where it belongs. Signed-off-by: Jens Axboe --- mm/backing-dev.c | 26 +++++++++++++++++++++++++- mm/readahead.c | 25 ------------------------- 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 8e8587444132..be68c956a660 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -2,11 +2,24 @@ #include #include #include +#include #include #include #include #include +void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) +{ +} +EXPORT_SYMBOL(default_unplug_io_fn); + +struct backing_dev_info default_backing_dev_info = { + .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, + .state = 0, + .capabilities = BDI_CAP_MAP_COPY, + .unplug_io_fn = default_unplug_io_fn, +}; +EXPORT_SYMBOL_GPL(default_backing_dev_info); static struct class *bdi_class; @@ -166,9 +179,20 @@ static __init int bdi_class_init(void) bdi_debug_init(); return 0; } - postcore_initcall(bdi_class_init); +static int __init default_bdi_init(void) +{ + int err; + + err = bdi_init(&default_backing_dev_info); + if (!err) + bdi_register(&default_backing_dev_info, NULL, "default"); + + return err; +} +subsys_initcall(default_bdi_init); + int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...) { diff --git a/mm/readahead.c b/mm/readahead.c index bec83c15a78f..9ce303d4b810 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -17,19 +17,6 @@ #include #include -void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) -{ -} -EXPORT_SYMBOL(default_unplug_io_fn); - -struct backing_dev_info default_backing_dev_info = { - .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, - .state = 0, - .capabilities = BDI_CAP_MAP_COPY, - .unplug_io_fn = default_unplug_io_fn, -}; -EXPORT_SYMBOL_GPL(default_backing_dev_info); - /* * Initialise a struct file's readahead state. Assumes that the caller has * memset *ra to zero. @@ -233,18 +220,6 @@ unsigned long max_sane_readahead(unsigned long nr) + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); } -static int __init readahead_init(void) -{ - int err; - - err = bdi_init(&default_backing_dev_info); - if (!err) - bdi_register(&default_backing_dev_info, NULL, "default"); - - return err; -} -subsys_initcall(readahead_init); - /* * Submit IO for the read-ahead request in file_ra_state. */ -- cgit v1.2.3 From 6933c02e9cc47c2df3c016621a013ec79fb4203f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Mar 2009 09:36:37 +0100 Subject: btrfs: get rid of current_is_pdflush() in btrfs_btree_balance_dirty Chris says it's safe to kill. Acked-by: Chris Mason Signed-off-by: Jens Axboe --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3e18175248e0..6ec80c0fc869 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2385,7 +2385,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) unsigned long thresh = 32 * 1024 * 1024; tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; - if (current_is_pdflush() || current->flags & PF_MEMALLOC) + if (current->flags & PF_MEMALLOC) return; num_dirty = count_range_bits(tree, &start, (u64)-1, -- cgit v1.2.3 From a2a9537ac0b37a5da6fbe7e1e9cb06c524d2a9c4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Mar 2009 09:38:40 +0100 Subject: Get rid of pdflush_operation() in emergency sync and remount Opencode a cheasy approach with kevent. The idea here is that we'll add some generic delayed work infrastructure, which probably wont be based on pdflush (or maybe it will, in which case we can just add it back). This is in preparation for getting rid of pdflush completely. Signed-off-by: Jens Axboe --- fs/super.c | 11 +++++++++-- fs/sync.c | 14 +++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/super.c b/fs/super.c index 6ce501447ada..dd4acb158b5e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -674,7 +674,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) return 0; } -static void do_emergency_remount(unsigned long foo) +static void do_emergency_remount(struct work_struct *work) { struct super_block *sb; @@ -697,12 +697,19 @@ static void do_emergency_remount(unsigned long foo) spin_lock(&sb_lock); } spin_unlock(&sb_lock); + kfree(work); printk("Emergency Remount complete\n"); } void emergency_remount(void) { - pdflush_operation(do_emergency_remount, 0); + struct work_struct *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (work) { + INIT_WORK(work, do_emergency_remount); + schedule_work(work); + } } /* diff --git a/fs/sync.c b/fs/sync.c index a16d53e5fe9d..ec95a69d17aa 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -42,9 +42,21 @@ SYSCALL_DEFINE0(sync) return 0; } +static void do_sync_work(struct work_struct *work) +{ + do_sync(0); + kfree(work); +} + void emergency_sync(void) { - pdflush_operation(do_sync, 0); + struct work_struct *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (work) { + INIT_WORK(work, do_sync_work); + schedule_work(work); + } } /* -- cgit v1.2.3