From 5df97b91b5d7ed426034fcc84cb6e7cf682b8838 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 15 Aug 2008 10:20:02 +0200 Subject: drop vmerge accounting Remove hw_segments field from struct bio and struct request. Without virtual merge accounting they have no purpose. Signed-off-by: Mikulas Patocka Signed-off-by: Jens Axboe --- drivers/md/raid1.c | 3 --- drivers/md/raid10.c | 3 --- 2 files changed, 6 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 03a5ab705c20..28a3869dcfd2 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1302,9 +1302,6 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) sbio->bi_size = r1_bio->sectors << 9; sbio->bi_idx = 0; sbio->bi_phys_segments = 0; - sbio->bi_hw_segments = 0; - sbio->bi_hw_front_size = 0; - sbio->bi_hw_back_size = 0; sbio->bi_flags &= ~(BIO_POOL_MASK - 1); sbio->bi_flags |= 1 << BIO_UPTODATE; sbio->bi_next = NULL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e34cd0e62473..0f40688503e7 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1345,9 +1345,6 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) tbio->bi_size = r10_bio->sectors << 9; tbio->bi_idx = 0; tbio->bi_phys_segments = 0; - tbio->bi_hw_segments = 0; - tbio->bi_hw_front_size = 0; - tbio->bi_hw_back_size = 0; tbio->bi_flags &= ~(BIO_POOL_MASK - 1); tbio->bi_flags |= 1 << BIO_UPTODATE; tbio->bi_next = NULL; -- cgit v1.2.3 From 960e739d9e9f1c2346d8bdc65299ee2e1ed42218 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Aug 2008 10:41:18 +0200 Subject: block: raid fixups for removal of bi_hw_segments Signed-off-by: Jens Axboe --- drivers/md/raid1.c | 1 - drivers/md/raid10.c | 1 - drivers/md/raid5.c | 66 +++++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 51 insertions(+), 17 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 28a3869dcfd2..0b82030c265d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1787,7 +1787,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i bio->bi_vcnt = 0; bio->bi_idx = 0; bio->bi_phys_segments = 0; - bio->bi_hw_segments = 0; bio->bi_size = 0; bio->bi_end_io = NULL; bio->bi_private = NULL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0f40688503e7..d3b9aa096285 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1944,7 +1944,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i bio->bi_vcnt = 0; bio->bi_idx = 0; bio->bi_phys_segments = 0; - bio->bi_hw_segments = 0; bio->bi_size = 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 224de022e7c5..05b22925cce4 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -101,6 +101,40 @@ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); #endif +/* + * We maintain a biased count of active stripes in the bottom 8 bits of + * bi_phys_segments, and a count of processed stripes in the upper 8 bits + */ +static inline int raid5_bi_phys_segments(struct bio *bio) +{ + return bio->bi_phys_segments & 0xff; +} + +static inline int raid5_bi_hw_segments(struct bio *bio) +{ + return (bio->bi_phys_segments >> 8) & 0xff; +} + +static inline int raid5_dec_bi_phys_segments(struct bio *bio) +{ + --bio->bi_phys_segments; + return raid5_bi_phys_segments(bio); +} + +static inline int raid5_dec_bi_hw_segments(struct bio *bio) +{ + unsigned short val = raid5_bi_hw_segments(bio); + + --val; + bio->bi_phys_segments = (val << 8) | raid5_bi_phys_segments(bio); + return val; +} + +static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) +{ + bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 8); +} + static inline int raid6_next_disk(int disk, int raid_disks) { disk++; @@ -507,7 +541,7 @@ static void ops_complete_biofill(void *stripe_head_ref) while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { rbi2 = r5_next_bio(rbi, dev->sector); - if (--rbi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(rbi)) { rbi->bi_next = return_bi; return_bi = rbi; } @@ -1725,7 +1759,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in if (*bip) bi->bi_next = *bip; *bip = bi; - bi->bi_phys_segments ++; + bi->bi_phys_segments++; spin_unlock_irq(&conf->device_lock); spin_unlock(&sh->lock); @@ -1819,7 +1853,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; *return_bi = bi; @@ -1834,7 +1868,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; *return_bi = bi; @@ -1858,7 +1892,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(bi)) { bi->bi_next = *return_bi; *return_bi = bi; } @@ -2033,7 +2067,7 @@ static void handle_stripe_clean_event(raid5_conf_t *conf, while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { wbi2 = r5_next_bio(wbi, dev->sector); - if (--wbi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(wbi)) { md_write_end(conf->mddev); wbi->bi_next = *return_bi; *return_bi = wbi; @@ -2814,7 +2848,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) copy_data(0, rbi, dev->page, dev->sector); rbi2 = r5_next_bio(rbi, dev->sector); spin_lock_irq(&conf->device_lock); - if (--rbi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(rbi)) { rbi->bi_next = return_bi; return_bi = rbi; } @@ -3155,8 +3189,11 @@ static struct bio *remove_bio_from_retry(raid5_conf_t *conf) if(bi) { conf->retry_read_aligned_list = bi->bi_next; bi->bi_next = NULL; + /* + * this sets the active strip count to 1 and the processed + * strip count to zero (upper 8 bits) + */ bi->bi_phys_segments = 1; /* biased count of active stripes */ - bi->bi_hw_segments = 0; /* count of processed stripes */ } return bi; @@ -3206,8 +3243,7 @@ static int bio_fits_rdev(struct bio *bi) if ((bi->bi_size>>9) > q->max_sectors) return 0; blk_recount_segments(q, bi); - if (bi->bi_phys_segments > q->max_phys_segments || - bi->bi_hw_segments > q->max_hw_segments) + if (bi->bi_phys_segments > q->max_phys_segments) return 0; if (q->merge_bvec_fn) @@ -3468,7 +3504,7 @@ static int make_request(struct request_queue *q, struct bio * bi) } spin_lock_irq(&conf->device_lock); - remaining = --bi->bi_phys_segments; + remaining = raid5_dec_bi_phys_segments(bi); spin_unlock_irq(&conf->device_lock); if (remaining == 0) { @@ -3752,7 +3788,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) sector += STRIPE_SECTORS, scnt++) { - if (scnt < raid_bio->bi_hw_segments) + if (scnt < raid5_bi_hw_segments(raid_bio)) /* already done this stripe */ continue; @@ -3760,7 +3796,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) if (!sh) { /* failed to get a stripe - must wait */ - raid_bio->bi_hw_segments = scnt; + raid5_set_bi_hw_segments(raid_bio, scnt); conf->retry_read_aligned = raid_bio; return handled; } @@ -3768,7 +3804,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) set_bit(R5_ReadError, &sh->dev[dd_idx].flags); if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { release_stripe(sh); - raid_bio->bi_hw_segments = scnt; + raid5_set_bi_hw_segments(raid_bio, scnt); conf->retry_read_aligned = raid_bio; return handled; } @@ -3778,7 +3814,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) handled++; } spin_lock_irq(&conf->device_lock); - remaining = --raid_bio->bi_phys_segments; + remaining = raid5_dec_bi_phys_segments(raid_bio); spin_unlock_irq(&conf->device_lock); if (remaining == 0) bio_endio(raid_bio, 0); -- cgit v1.2.3 From 5b99c2ffa980528a197f26c7d876cceeccce8dd5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Aug 2008 10:56:11 +0200 Subject: block: make bi_phys_segments an unsigned int instead of short raid5 can overflow with more than 255 stripes, and we can increase it to an int for free on both 32 and 64-bit archs due to the padding. Signed-off-by: Jens Axboe --- drivers/md/raid5.c | 12 ++++++------ include/linux/bio.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 05b22925cce4..37e546528f9c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -102,17 +102,17 @@ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); #endif /* - * We maintain a biased count of active stripes in the bottom 8 bits of - * bi_phys_segments, and a count of processed stripes in the upper 8 bits + * We maintain a biased count of active stripes in the bottom 16 bits of + * bi_phys_segments, and a count of processed stripes in the upper 16 bits */ static inline int raid5_bi_phys_segments(struct bio *bio) { - return bio->bi_phys_segments & 0xff; + return bio->bi_phys_segments & 0xffff; } static inline int raid5_bi_hw_segments(struct bio *bio) { - return (bio->bi_phys_segments >> 8) & 0xff; + return (bio->bi_phys_segments >> 16) & 0xffff; } static inline int raid5_dec_bi_phys_segments(struct bio *bio) @@ -126,13 +126,13 @@ static inline int raid5_dec_bi_hw_segments(struct bio *bio) unsigned short val = raid5_bi_hw_segments(bio); --val; - bio->bi_phys_segments = (val << 8) | raid5_bi_phys_segments(bio); + bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); return val; } static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) { - bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 8); + bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); } static inline int raid6_next_disk(int disk, int raid_disks) diff --git a/include/linux/bio.h b/include/linux/bio.h index dfc3556d311c..2c0c09034fd2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -75,7 +75,7 @@ struct bio { /* Number of segments in this BIO after * physical address coalescing is performed. */ - unsigned short bi_phys_segments; + unsigned int bi_phys_segments; unsigned int bi_size; /* residual I/O count */ -- cgit v1.2.3 From f331c0296f2a9fee0d396a70598b954062603015 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:01:48 +0200 Subject: block: don't depend on consecutive minor space * Implement disk_devt() and part_devt() and use them to directly access devt instead of computing it from ->major and ->first_minor. Note that all references to ->major and ->first_minor outside of block layer is used to determine devt of the disk (the part0) and as ->major and ->first_minor will continue to represent devt for the disk, converting these users aren't strictly necessary. However, convert them for consistency. * Implement disk_max_parts() to avoid directly deferencing genhd->minors. * Update bdget_disk() such that it doesn't assume consecutive minor space. * Move devt computation from register_disk() to add_disk() and make it the only one (all other usages use the initially determined value). These changes clean up the code and will help disk->part dereference fix and extended block device numbers. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/genhd.c | 107 +++++++++++++++++++++++++----------- block/ioctl.c | 6 +- drivers/block/pktcdvd.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/char/random.c | 6 +- drivers/md/dm-ioctl.c | 4 +- drivers/md/dm-stripe.c | 4 +- drivers/md/dm.c | 7 ++- drivers/memstick/core/mspro_block.c | 2 +- drivers/mmc/card/block.c | 2 +- drivers/s390/block/dasd_proc.c | 3 +- drivers/s390/block/dcssblk.c | 4 +- drivers/scsi/sr.c | 2 +- fs/block_dev.c | 2 +- fs/partitions/check.c | 19 ++++--- include/linux/genhd.h | 27 +++++++-- 16 files changed, 132 insertions(+), 67 deletions(-) (limited to 'drivers/md') diff --git a/block/genhd.c b/block/genhd.c index dc9ad4c171e2..fa32d09fda24 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -186,13 +186,14 @@ void add_disk(struct gendisk *disk) int retval; disk->flags |= GENHD_FL_UP; - blk_register_region(MKDEV(disk->major, disk->first_minor), - disk->minors, NULL, exact_match, exact_lock, disk); + disk->dev.devt = MKDEV(disk->major, disk->first_minor); + blk_register_region(disk_devt(disk), disk->minors, NULL, + exact_match, exact_lock, disk); register_disk(disk); blk_register_queue(disk); bdi = &disk->queue->backing_dev_info; - bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor)); + bdi_register_dev(bdi, disk_devt(disk)); retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi"); WARN_ON(retval); } @@ -205,8 +206,7 @@ void unlink_gendisk(struct gendisk *disk) sysfs_remove_link(&disk->dev.kobj, "bdi"); bdi_unregister(&disk->queue->backing_dev_info); blk_unregister_queue(disk); - blk_unregister_region(MKDEV(disk->major, disk->first_minor), - disk->minors); + blk_unregister_region(disk_devt(disk), disk->minors); } /** @@ -225,6 +225,38 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) return kobj ? dev_to_disk(dev) : NULL; } +/** + * bdget_disk - do bdget() by gendisk and partition number + * @disk: gendisk of interest + * @partno: partition number + * + * Find partition @partno from @disk, do bdget() on it. + * + * CONTEXT: + * Don't care. + * + * RETURNS: + * Resulting block_device on success, NULL on failure. + */ +extern struct block_device *bdget_disk(struct gendisk *disk, int partno) +{ + dev_t devt = MKDEV(0, 0); + + if (partno == 0) + devt = disk_devt(disk); + else { + struct hd_struct *part = disk->part[partno - 1]; + + if (part && part->nr_sects) + devt = part_devt(part); + } + + if (likely(devt != MKDEV(0, 0))) + return bdget(devt); + return NULL; +} +EXPORT_SYMBOL(bdget_disk); + /* * print a full list of all partitions - intended for places where the root * filesystem can't be mounted and thus to give the victim some idea of what @@ -255,7 +287,7 @@ void __init printk_all_partitions(void) * option takes. */ printk("%02x%02x %10llu %s", - disk->major, disk->first_minor, + MAJOR(disk_devt(disk)), MINOR(disk_devt(disk)), (unsigned long long)get_capacity(disk) >> 1, disk_name(disk, 0, buf)); if (disk->driverfs_dev != NULL && @@ -266,15 +298,15 @@ void __init printk_all_partitions(void) printk(" (driver?)\n"); /* now show the partitions */ - for (n = 0; n < disk->minors - 1; ++n) { - if (disk->part[n] == NULL) - continue; - if (disk->part[n]->nr_sects == 0) + for (n = 0; n < disk_max_parts(disk); ++n) { + struct hd_struct *part = disk->part[n]; + + if (!part || !part->nr_sects) continue; printk(" %02x%02x %10llu %s\n", - disk->major, n + 1 + disk->first_minor, - (unsigned long long)disk->part[n]->nr_sects >> 1, - disk_name(disk, n + 1, buf)); + MAJOR(part_devt(part)), MINOR(part_devt(part)), + (unsigned long long)part->nr_sects >> 1, + disk_name(disk, part->partno, buf)); } } class_dev_iter_exit(&iter); @@ -343,26 +375,27 @@ static int show_partition(struct seq_file *seqf, void *v) char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ - if (!get_capacity(sgp) || - (sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE))) + if (!get_capacity(sgp) || (!disk_max_parts(sgp) && + (sgp->flags & GENHD_FL_REMOVABLE))) return 0; if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) return 0; /* show the full disk and all non-0 size partitions of it */ seq_printf(seqf, "%4d %4d %10llu %s\n", - sgp->major, sgp->first_minor, + MAJOR(disk_devt(sgp)), MINOR(disk_devt(sgp)), (unsigned long long)get_capacity(sgp) >> 1, disk_name(sgp, 0, buf)); - for (n = 0; n < sgp->minors - 1; n++) { - if (!sgp->part[n]) + for (n = 0; n < disk_max_parts(sgp); n++) { + struct hd_struct *part = sgp->part[n]; + if (!part) continue; - if (sgp->part[n]->nr_sects == 0) + if (part->nr_sects == 0) continue; seq_printf(seqf, "%4d %4d %10llu %s\n", - sgp->major, n + 1 + sgp->first_minor, - (unsigned long long)sgp->part[n]->nr_sects >> 1 , - disk_name(sgp, n + 1, buf)); + MAJOR(part_devt(part)), MINOR(part_devt(part)), + (unsigned long long)part->nr_sects >> 1, + disk_name(sgp, part->partno, buf)); } return 0; @@ -578,7 +611,8 @@ static int diskstats_show(struct seq_file *seqf, void *v) disk_round_stats(gp); preempt_enable(); seq_printf(seqf, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", - gp->major, gp->first_minor, disk_name(gp, 0, buf), + MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)), + disk_name(gp, 0, buf), disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), (unsigned long long)disk_stat_read(gp, sectors[0]), jiffies_to_msecs(disk_stat_read(gp, ticks[0])), @@ -590,7 +624,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); /* now show all non-0 size partitions of it */ - for (n = 0; n < gp->minors - 1; n++) { + for (n = 0; n < disk_max_parts(gp); n++) { struct hd_struct *hd = gp->part[n]; if (!hd || !hd->nr_sects) @@ -601,8 +635,8 @@ static int diskstats_show(struct seq_file *seqf, void *v) preempt_enable(); seq_printf(seqf, "%4d %4d %s %lu %lu %llu " "%u %lu %lu %llu %u %u %u %u\n", - gp->major, n + gp->first_minor + 1, - disk_name(gp, n + 1, buf), + MAJOR(part_devt(hd)), MINOR(part_devt(hd)), + disk_name(gp, hd->partno, buf), part_stat_read(hd, ios[0]), part_stat_read(hd, merges[0]), (unsigned long long)part_stat_read(hd, sectors[0]), @@ -661,11 +695,22 @@ dev_t blk_lookup_devt(const char *name, int partno) while ((dev = class_dev_iter_next(&iter))) { struct gendisk *disk = dev_to_disk(dev); - if (!strcmp(dev->bus_id, name) && partno < disk->minors) { - devt = MKDEV(MAJOR(dev->devt), - MINOR(dev->devt) + partno); - break; + if (strcmp(dev->bus_id, name)) + continue; + if (partno < 0 || partno > disk_max_parts(disk)) + continue; + + if (partno == 0) + devt = disk_devt(disk); + else { + struct hd_struct *part = disk->part[partno - 1]; + + if (!part || !part->nr_sects) + continue; + + devt = part_devt(part); } + break; } class_dev_iter_exit(&iter); return devt; @@ -755,7 +800,7 @@ void set_disk_ro(struct gendisk *disk, int flag) { int i; disk->policy = flag; - for (i = 0; i < disk->minors - 1; i++) + for (i = 0; i < disk_max_parts(disk); i++) if (disk->part[i]) disk->part[i]->policy = flag; } diff --git a/block/ioctl.c b/block/ioctl.c index d77f5e280a6e..403f7d7e0c28 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -29,7 +29,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user if (bdev != bdev->bd_contains) return -EINVAL; partno = p.pno; - if (partno <= 0 || partno >= disk->minors) + if (partno <= 0 || partno > disk_max_parts(disk)) return -EINVAL; switch (a.op) { case BLKPG_ADD_PARTITION: @@ -47,7 +47,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user mutex_lock(&bdev->bd_mutex); /* overlap? */ - for (i = 0; i < disk->minors - 1; i++) { + for (i = 0; i < disk_max_parts(disk); i++) { struct hd_struct *s = disk->part[i]; if (!s) @@ -96,7 +96,7 @@ static int blkdev_reread_part(struct block_device *bdev) struct gendisk *disk = bdev->bd_disk; int res; - if (disk->minors == 1 || bdev != bdev->bd_contains) + if (!disk_max_parts(disk) || bdev != bdev->bd_contains) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 29b7a648cc6e..e1a90bbb4747 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2911,7 +2911,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) if (!disk->queue) goto out_mem2; - pd->pkt_dev = MKDEV(disk->major, disk->first_minor); + pd->pkt_dev = MKDEV(pktdev_major, idx); ret = pkt_new_dev(pd, dev); if (ret) goto out_new_dev; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 4b0d6c7f4c66..936466f62afd 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -541,7 +541,7 @@ static int ps3disk_remove(struct ps3_system_bus_device *_dev) struct ps3disk_private *priv = dev->sbd.core.driver_data; mutex_lock(&ps3disk_mask_mutex); - __clear_bit(priv->gendisk->first_minor / PS3DISK_MINORS, + __clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS, &ps3disk_mask); mutex_unlock(&ps3disk_mask_mutex); del_gendisk(priv->gendisk); diff --git a/drivers/char/random.c b/drivers/char/random.c index 7ce1ac4baa6d..6af435b89867 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -661,10 +661,10 @@ void add_disk_randomness(struct gendisk *disk) if (!disk || !disk->random) return; /* first major is 1, so we get >= 0x200 here */ - DEBUG_ENT("disk event %d:%d\n", disk->major, disk->first_minor); + DEBUG_ENT("disk event %d:%d\n", + MAJOR(disk_devt(disk)), MINOR(disk_devt(disk))); - add_timer_randomness(disk->random, - 0x100 + MKDEV(disk->major, disk->first_minor)); + add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); } #endif diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index b262c0042de3..c3de311117a1 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -426,7 +426,7 @@ static int list_devices(struct dm_ioctl *param, size_t param_size) old_nl->next = (uint32_t) ((void *) nl - (void *) old_nl); disk = dm_disk(hc->md); - nl->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); + nl->dev = huge_encode_dev(disk_devt(disk)); nl->next = 0; strcpy(nl->name, hc->name); @@ -539,7 +539,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) if (dm_suspended(md)) param->flags |= DM_SUSPEND_FLAG; - param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); + param->dev = huge_encode_dev(disk_devt(disk)); /* * Yes, this will be out of date by the time it gets back diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 4de90ab3968b..b745d8ac625b 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -284,8 +284,8 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, memset(major_minor, 0, sizeof(major_minor)); sprintf(major_minor, "%d:%d", - bio->bi_bdev->bd_disk->major, - bio->bi_bdev->bd_disk->first_minor); + MAJOR(disk_devt(bio->bi_bdev->bd_disk)), + MINOR(disk_devt(bio->bi_bdev->bd_disk))); /* * Test to see which stripe drive triggered the event diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ace998ce59f6..a78caad29996 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1146,7 +1146,7 @@ static void unlock_fs(struct mapped_device *md); static void free_dev(struct mapped_device *md) { - int minor = md->disk->first_minor; + int minor = MINOR(disk_devt(md->disk)); if (md->suspended_bdev) { unlock_fs(md); @@ -1267,7 +1267,7 @@ static struct mapped_device *dm_find_md(dev_t dev) md = idr_find(&_minor_idr, minor); if (md && (md == MINOR_ALLOCED || - (dm_disk(md)->first_minor != minor) || + (MINOR(disk_devt(dm_disk(md))) != minor) || test_bit(DMF_FREEING, &md->flags))) { md = NULL; goto out; @@ -1318,7 +1318,8 @@ void dm_put(struct mapped_device *md) if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { map = dm_get_table(md); - idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor); + idr_replace(&_minor_idr, MINOR_ALLOCED, + MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); if (!dm_suspended(md)) { diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index d2d2318dafa4..82bf649ef138 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -197,7 +197,7 @@ static int mspro_block_bd_open(struct inode *inode, struct file *filp) static int mspro_block_disk_release(struct gendisk *disk) { struct mspro_block_data *msb = disk->private_data; - int disk_id = disk->first_minor >> MSPRO_BLOCK_PART_SHIFT; + int disk_id = MINOR(disk_devt(disk)) >> MSPRO_BLOCK_PART_SHIFT; mutex_lock(&mspro_block_disk_lock); diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index ebc8b9d77613..97156b689e82 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -83,7 +83,7 @@ static void mmc_blk_put(struct mmc_blk_data *md) mutex_lock(&open_lock); md->usage--; if (md->usage == 0) { - int devidx = md->disk->first_minor >> MMC_SHIFT; + int devidx = MINOR(disk_devt(md->disk)) >> MMC_SHIFT; __clear_bit(devidx, dev_use); put_disk(md->disk); diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 03c0e40a92ff..e3b5c4d3036e 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -76,7 +76,8 @@ dasd_devices_show(struct seq_file *m, void *v) /* Print kdev. */ if (block->gdp) seq_printf(m, " at (%3d:%6d)", - block->gdp->major, block->gdp->first_minor); + MAJOR(disk_devt(block->gdp)), + MINOR(disk_devt(block->gdp))); else seq_printf(m, " at (???:??????)"); /* Print device name. */ diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 711b3004b3e6..9481e4a3f76e 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -114,7 +114,7 @@ dcssblk_assign_free_minor(struct dcssblk_dev_info *dev_info) found = 0; // test if minor available list_for_each_entry(entry, &dcssblk_devices, lh) - if (minor == entry->gd->first_minor) + if (minor == MINOR(disk_devt(entry->gd))) found++; if (!found) break; // got unused minor } @@ -397,7 +397,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char goto unload_seg; } sprintf(dev_info->gd->disk_name, "dcssblk%d", - dev_info->gd->first_minor); + MINOR(disk_devt(dev_info->gd))); list_add_tail(&dev_info->lh, &dcssblk_devices); if (!try_module_get(THIS_MODULE)) { diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 27f5bfd1def3..8dbe3798d5fd 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -878,7 +878,7 @@ static void sr_kref_release(struct kref *kref) struct gendisk *disk = cd->disk; spin_lock(&sr_index_lock); - clear_bit(disk->first_minor, sr_index_bits); + clear_bit(MINOR(disk_devt(disk)), sr_index_bits); spin_unlock(&sr_index_lock); unregister_cdrom(&cd->cdi); diff --git a/fs/block_dev.c b/fs/block_dev.c index de0776cd7215..72e0a2887cb7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -892,7 +892,7 @@ int check_disk_change(struct block_device *bdev) if (bdops->revalidate_disk) bdops->revalidate_disk(bdev->bd_disk); - if (bdev->bd_disk->minors > 1) + if (disk_max_parts(bdev->bd_disk)) bdev->bd_invalidated = 1; return 1; } diff --git a/fs/partitions/check.c b/fs/partitions/check.c index b86aab1b0df6..e77fa144a07d 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -134,7 +134,11 @@ char *disk_name(struct gendisk *hd, int partno, char *buf) const char *bdevname(struct block_device *bdev, char *buf) { - int partno = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor; + int partno = 0; + + if (bdev->bd_part) + partno = bdev->bd_part->partno; + return disk_name(bdev->bd_disk, partno, buf); } @@ -169,7 +173,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev) if (isdigit(state->name[strlen(state->name)-1])) sprintf(state->name, "p"); - state->limit = hd->minors; + state->limit = disk_max_parts(hd) + 1; i = res = err = 0; while (!res && check_part[i]) { memset(&state->parts, 0, sizeof(state->parts)); @@ -416,7 +420,6 @@ void register_disk(struct gendisk *disk) int err; disk->dev.parent = disk->driverfs_dev; - disk->dev.devt = MKDEV(disk->major, disk->first_minor); strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); /* ewww... some of these buggers have / in the name... */ @@ -440,7 +443,7 @@ void register_disk(struct gendisk *disk) disk_sysfs_add_subdirs(disk); /* No minors to use for partitions */ - if (disk->minors == 1) + if (!disk_max_parts(disk)) goto exit; /* No such device (e.g., media were just removed) */ @@ -463,8 +466,8 @@ exit: kobject_uevent(&disk->dev.kobj, KOBJ_ADD); /* announce possible partitions */ - for (i = 1; i < disk->minors; i++) { - p = disk->part[i-1]; + for (i = 0; i < disk_max_parts(disk); i++) { + p = disk->part[i]; if (!p || !p->nr_sects) continue; kobject_uevent(&p->dev.kobj, KOBJ_ADD); @@ -482,7 +485,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (res) return res; bdev->bd_invalidated = 0; - for (p = 1; p < disk->minors; p++) + for (p = 1; p <= disk_max_parts(disk); p++) delete_partition(disk, p); if (disk->fops->revalidate_disk) disk->fops->revalidate_disk(disk); @@ -545,7 +548,7 @@ void del_gendisk(struct gendisk *disk) int p; /* invalidate stuff */ - for (p = disk->minors - 1; p > 0; p--) { + for (p = disk_max_parts(disk); p > 0; p--) { invalidate_partition(disk, p); delete_partition(disk, p); } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index d1723c0a8600..0ff75329199c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -111,10 +111,14 @@ struct hd_struct { #define GENHD_FL_FAIL 64 struct gendisk { + /* major, first_minor and minors are input parameters only, + * don't use directly. Use disk_devt() and disk_max_parts(). + */ int major; /* major number of driver */ int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ + char disk_name[32]; /* name of major driver */ struct hd_struct **part; /* [indexed by minor - 1] */ struct block_device_operations *fops; @@ -152,6 +156,21 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) return NULL; } +static inline int disk_max_parts(struct gendisk *disk) +{ + return disk->minors - 1; +} + +static inline dev_t disk_devt(struct gendisk *disk) +{ + return disk->dev.devt; +} + +static inline dev_t part_devt(struct hd_struct *part) +{ + return part->dev.devt; +} + /* * Macros to operate on percpu disk statistics: * @@ -163,7 +182,7 @@ static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, { struct hd_struct *part; int i; - for (i = 0; i < gendiskp->minors - 1; i++) { + for (i = 0; i < disk_max_parts(gendiskp); i++) { part = gendiskp->part[i]; if (part && part->start_sect <= sector && sector < part->start_sect + part->nr_sects) @@ -366,6 +385,7 @@ extern void add_disk(struct gendisk *disk); extern void del_gendisk(struct gendisk *gp); extern void unlink_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); +extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); @@ -553,11 +573,6 @@ extern void blk_register_region(dev_t devt, unsigned long range, void *data); extern void blk_unregister_region(dev_t devt, unsigned long range); -static inline struct block_device *bdget_disk(struct gendisk *disk, int partno) -{ - return bdget(MKDEV(disk->major, disk->first_minor) + partno); -} - #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -- cgit v1.2.3 From c9959059161ddd7bf4670cf47367033d6b2f79c4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:21 +0900 Subject: block: fix diskstats access There are two variants of stat functions - ones prefixed with double underbars which don't care about preemption and ones without which disable preemption before manipulating per-cpu counters. It's unclear whether the underbarred ones assume that preemtion is disabled on entry as some callers don't do that. This patch unifies diskstats access by implementing disk_stat_lock() and disk_stat_unlock() which take care of both RCU (for partition access) and preemption (for per-cpu counter access). diskstats access should always be enclosed between the two functions. As such, there's no need for the versions which disables preemption. They're removed and double underbars ones are renamed to drop the underbars. As an extra argument is added, there's no danger of using the old version unconverted. disk_stat_lock() uses get_cpu() and returns the cpu index and all diskstat functions which access per-cpu counters now has @cpu argument to help RT. This change adds RCU or preemption operations at some places but also collapses several preemption ops into one at others. Overall, the performance difference should be negligible as all involved ops are very lightweight per-cpu ones. Signed-off-by: Tejun Heo Cc: Peter Zijlstra Signed-off-by: Jens Axboe --- block/blk-core.c | 52 +++++++++-------- block/blk-merge.c | 11 ++-- block/genhd.c | 20 ++++--- drivers/block/aoe/aoecmd.c | 15 ++--- drivers/md/dm.c | 26 +++++---- drivers/md/linear.c | 7 ++- drivers/md/multipath.c | 7 ++- drivers/md/raid0.c | 7 ++- drivers/md/raid1.c | 8 ++- drivers/md/raid10.c | 7 ++- drivers/md/raid5.c | 8 ++- fs/partitions/check.c | 7 ++- include/linux/genhd.h | 139 +++++++++++++++++++-------------------------- 13 files changed, 158 insertions(+), 156 deletions(-) (limited to 'drivers/md') diff --git a/block/blk-core.c b/block/blk-core.c index d6128d9ad601..e0a5ee36849c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -56,25 +56,26 @@ static void drive_stat_acct(struct request *rq, int new_io) { struct hd_struct *part; int rw = rq_data_dir(rq); + int cpu; if (!blk_fs_request(rq) || !rq->rq_disk) return; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(rq->rq_disk, rq->sector); + if (!new_io) - __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); + all_stat_inc(cpu, rq->rq_disk, part, merges[rw], rq->sector); else { - disk_round_stats(rq->rq_disk); + disk_round_stats(cpu, rq->rq_disk); rq->rq_disk->in_flight++; if (part) { - part_round_stats(part); + part_round_stats(cpu, part); part->in_flight++; } } - rcu_read_unlock(); + disk_stat_unlock(); } void blk_queue_congestion_threshold(struct request_queue *q) @@ -997,7 +998,7 @@ static inline void add_request(struct request_queue *q, struct request *req) * /proc/diskstats. This accounts immediately for all queue usage up to * the current jiffies and restarts the counters again. */ -void disk_round_stats(struct gendisk *disk) +void disk_round_stats(int cpu, struct gendisk *disk) { unsigned long now = jiffies; @@ -1005,15 +1006,15 @@ void disk_round_stats(struct gendisk *disk) return; if (disk->in_flight) { - __disk_stat_add(disk, time_in_queue, - disk->in_flight * (now - disk->stamp)); - __disk_stat_add(disk, io_ticks, (now - disk->stamp)); + disk_stat_add(cpu, disk, time_in_queue, + disk->in_flight * (now - disk->stamp)); + disk_stat_add(cpu, disk, io_ticks, (now - disk->stamp)); } disk->stamp = now; } EXPORT_SYMBOL_GPL(disk_round_stats); -void part_round_stats(struct hd_struct *part) +void part_round_stats(int cpu, struct hd_struct *part) { unsigned long now = jiffies; @@ -1021,9 +1022,9 @@ void part_round_stats(struct hd_struct *part) return; if (part->in_flight) { - __part_stat_add(part, time_in_queue, - part->in_flight * (now - part->stamp)); - __part_stat_add(part, io_ticks, (now - part->stamp)); + part_stat_add(cpu, part, time_in_queue, + part->in_flight * (now - part->stamp)); + part_stat_add(cpu, part, io_ticks, (now - part->stamp)); } part->stamp = now; } @@ -1563,12 +1564,13 @@ static int __end_that_request_first(struct request *req, int error, if (blk_fs_request(req) && req->rq_disk) { const int rw = rq_data_dir(req); struct hd_struct *part; + int cpu; - rcu_read_lock(); + cpu = disk_stat_lock(); part = disk_map_sector_rcu(req->rq_disk, req->sector); - all_stat_add(req->rq_disk, part, sectors[rw], - nr_bytes >> 9, req->sector); - rcu_read_unlock(); + all_stat_add(cpu, req->rq_disk, part, sectors[rw], + nr_bytes >> 9, req->sector); + disk_stat_unlock(); } total_bytes = bio_nbytes = 0; @@ -1753,21 +1755,21 @@ static void end_that_request_last(struct request *req, int error) unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); struct hd_struct *part; + int cpu; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(disk, req->sector); - __all_stat_inc(disk, part, ios[rw], req->sector); - __all_stat_add(disk, part, ticks[rw], duration, req->sector); - disk_round_stats(disk); + all_stat_inc(cpu, disk, part, ios[rw], req->sector); + all_stat_add(cpu, disk, part, ticks[rw], duration, req->sector); + disk_round_stats(cpu, disk); disk->in_flight--; if (part) { - part_round_stats(part); + part_round_stats(cpu, part); part->in_flight--; } - rcu_read_unlock(); + disk_stat_unlock(); } if (req->end_io) diff --git a/block/blk-merge.c b/block/blk-merge.c index eb2a3ca58303..d926a24bf1fd 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -388,18 +388,19 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (req->rq_disk) { struct hd_struct *part; + int cpu; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(req->rq_disk, req->sector); - disk_round_stats(req->rq_disk); + + disk_round_stats(cpu, req->rq_disk); req->rq_disk->in_flight--; if (part) { - part_round_stats(part); + part_round_stats(cpu, part); part->in_flight--; } - rcu_read_unlock(); + disk_stat_unlock(); } req->ioprio = ioprio_best(req->ioprio, next->ioprio); diff --git a/block/genhd.c b/block/genhd.c index b431d6543942..430626e440f0 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -633,10 +633,11 @@ static ssize_t disk_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); + int cpu; - preempt_disable(); - disk_round_stats(disk); - preempt_enable(); + cpu = disk_stat_lock(); + disk_round_stats(cpu, disk); + disk_stat_unlock(); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " @@ -749,6 +750,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) struct disk_part_iter piter; struct hd_struct *hd; char buf[BDEVNAME_SIZE]; + int cpu; /* if (&gp->dev.kobj.entry == block_class.devices.next) @@ -758,9 +760,9 @@ static int diskstats_show(struct seq_file *seqf, void *v) "\n\n"); */ - preempt_disable(); - disk_round_stats(gp); - preempt_enable(); + cpu = disk_stat_lock(); + disk_round_stats(cpu, gp); + disk_stat_unlock(); seq_printf(seqf, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)), disk_name(gp, 0, buf), @@ -777,9 +779,9 @@ static int diskstats_show(struct seq_file *seqf, void *v) /* now show all non-0 size partitions of it */ disk_part_iter_init(&piter, gp, 0); while ((hd = disk_part_iter_next(&piter))) { - preempt_disable(); - part_round_stats(hd); - preempt_enable(); + cpu = disk_stat_lock(); + part_round_stats(cpu, hd); + disk_stat_unlock(); seq_printf(seqf, "%4d %4d %s %lu %lu %llu " "%u %lu %lu %llu %u %u %u %u\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 84c03d65dcc5..17eed8c025d0 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -756,16 +756,17 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector unsigned long n_sect = bio->bi_size >> 9; const int rw = bio_data_dir(bio); struct hd_struct *part; + int cpu; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(disk, sector); - all_stat_inc(disk, part, ios[rw], sector); - all_stat_add(disk, part, ticks[rw], duration, sector); - all_stat_add(disk, part, sectors[rw], n_sect, sector); - all_stat_add(disk, part, io_ticks, duration, sector); - rcu_read_unlock(); + all_stat_inc(cpu, disk, part, ios[rw], sector); + all_stat_add(cpu, disk, part, ticks[rw], duration, sector); + all_stat_add(cpu, disk, part, sectors[rw], n_sect, sector); + all_stat_add(cpu, disk, part, io_ticks, duration, sector); + + disk_stat_unlock(); } void diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a78caad29996..653624792eaf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -377,12 +377,13 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio) static void start_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; + int cpu; io->start_time = jiffies; - preempt_disable(); - disk_round_stats(dm_disk(md)); - preempt_enable(); + cpu = disk_stat_lock(); + disk_round_stats(cpu, dm_disk(md)); + disk_stat_unlock(); dm_disk(md)->in_flight = atomic_inc_return(&md->pending); } @@ -391,15 +392,15 @@ static int end_io_acct(struct dm_io *io) struct mapped_device *md = io->md; struct bio *bio = io->bio; unsigned long duration = jiffies - io->start_time; - int pending; + int pending, cpu; int rw = bio_data_dir(bio); - preempt_disable(); - disk_round_stats(dm_disk(md)); - preempt_enable(); - dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); + cpu = disk_stat_lock(); + disk_round_stats(cpu, dm_disk(md)); + disk_stat_add(cpu, dm_disk(md), ticks[rw], duration); + disk_stat_unlock(); - disk_stat_add(dm_disk(md), ticks[rw], duration); + dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); return !pending; } @@ -885,6 +886,7 @@ static int dm_request(struct request_queue *q, struct bio *bio) int r = -EIO; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; + int cpu; /* * There is no use in forwarding any barrier request since we can't @@ -897,8 +899,10 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); - disk_stat_inc(dm_disk(md), ios[rw]); - disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, dm_disk(md), ios[rw]); + disk_stat_add(cpu, dm_disk(md), sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); /* * If we're suspended we have to queue diff --git a/drivers/md/linear.c b/drivers/md/linear.c index b1eebf88c209..00cbc8e47294 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -318,14 +318,17 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) mddev_t *mddev = q->queuedata; dev_info_t *tmp_dev; sector_t block; + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); return 0; } - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); tmp_dev = which_dev(mddev, bio->bi_sector); block = bio->bi_sector >> 1; diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index c4779ccba1c3..182f5a94cdc5 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -147,6 +147,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) struct multipath_bh * mp_bh; struct multipath_info *multipath; const int rw = bio_data_dir(bio); + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); @@ -158,8 +159,10 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) mp_bh->master_bio = bio; mp_bh->mddev = mddev; - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); mp_bh->path = multipath_map(conf); if (mp_bh->path < 0) { diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 183610635661..e26030fa59ab 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -399,14 +399,17 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) sector_t chunk; sector_t block, rsect; const int rw = bio_data_dir(bio); + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); return 0; } - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); chunk_size = mddev->chunk_size >> 10; chunk_sects = mddev->chunk_size >> 9; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0b82030c265d..babb13036f93 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -779,7 +779,7 @@ static int make_request(struct request_queue *q, struct bio * bio) struct page **behind_pages = NULL; const int rw = bio_data_dir(bio); const int do_sync = bio_sync(bio); - int do_barriers; + int cpu, do_barriers; mdk_rdev_t *blocked_rdev; /* @@ -804,8 +804,10 @@ static int make_request(struct request_queue *q, struct bio * bio) bitmap = mddev->bitmap; - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); /* * make_request() can abort the operation when READA is being diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d3b9aa096285..5ec80da0a9d7 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -789,6 +789,7 @@ static int make_request(struct request_queue *q, struct bio * bio) mirror_info_t *mirror; r10bio_t *r10_bio; struct bio *read_bio; + int cpu; int i; int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); @@ -843,8 +844,10 @@ static int make_request(struct request_queue *q, struct bio * bio) */ wait_barrier(conf); - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 37e546528f9c..5899f211515f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3387,7 +3387,7 @@ static int make_request(struct request_queue *q, struct bio * bi) sector_t logical_sector, last_sector; struct stripe_head *sh; const int rw = bio_data_dir(bi); - int remaining; + int cpu, remaining; if (unlikely(bio_barrier(bi))) { bio_endio(bi, -EOPNOTSUPP); @@ -3396,8 +3396,10 @@ static int make_request(struct request_queue *q, struct bio * bi) md_write_start(mddev, bi); - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bi)); + disk_stat_unlock(); if (rw == READ && mddev->reshape_position == MaxSector && diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 96c8bf41e455..c442f0aadac3 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -219,10 +219,11 @@ static ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); + int cpu; - preempt_disable(); - part_round_stats(p); - preempt_enable(); + cpu = disk_stat_lock(); + part_round_stats(cpu, p); + disk_stat_unlock(); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7fbba19e076b..ac8a901f2002 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -209,16 +209,24 @@ extern void disk_part_iter_exit(struct disk_part_iter *piter); extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); -/* +/* * Macros to operate on percpu disk statistics: * - * The __ variants should only be called in critical sections. The full - * variants disable/enable preemption. + * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters + * and should be called between disk_stat_lock() and + * disk_stat_unlock(). + * + * part_stat_read() can be called at any time. + * + * part_stat_{add|set_all}() and {init|free}_part_stats are for + * internal use only. */ - #ifdef CONFIG_SMP -#define __disk_stat_add(gendiskp, field, addnd) \ - (per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd) +#define disk_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define disk_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) + +#define disk_stat_add(cpu, gendiskp, field, addnd) \ + (per_cpu_ptr(gendiskp->dkstats, cpu)->field += addnd) #define disk_stat_read(gendiskp, field) \ ({ \ @@ -229,7 +237,8 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, res; \ }) -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { +static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) +{ int i; for_each_possible_cpu(i) @@ -237,14 +246,14 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { sizeof(struct disk_stats)); } -#define __part_stat_add(part, field, addnd) \ - (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd) +#define part_stat_add(cpu, part, field, addnd) \ + (per_cpu_ptr(part->dkstats, cpu)->field += addnd) -#define __all_stat_add(gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - __part_stat_add(part, field, addnd); \ - __disk_stat_add(gendiskp, field, addnd); \ +#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ +({ \ + if (part) \ + part_stat_add(cpu, part, field, addnd); \ + disk_stat_add(cpu, gendiskp, field, addnd); \ }) #define part_stat_read(part, field) \ @@ -264,10 +273,13 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) memset(per_cpu_ptr(part->dkstats, i), value, sizeof(struct disk_stats)); } - + #else /* !CONFIG_SMP */ -#define __disk_stat_add(gendiskp, field, addnd) \ - (gendiskp->dkstats.field += addnd) +#define disk_stat_lock() ({ rcu_read_lock(); 0; }) +#define disk_stat_unlock() rcu_read_unlock() + +#define disk_stat_add(cpu, gendiskp, field, addnd) \ + (gendiskp->dkstats.field += addnd) #define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) @@ -275,14 +287,14 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); } -#define __part_stat_add(part, field, addnd) \ +#define part_stat_add(cpu, part, field, addnd) \ (part->dkstats.field += addnd) -#define __all_stat_add(gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part->dkstats.field += addnd; \ - __disk_stat_add(gendiskp, field, addnd); \ +#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ +({ \ + if (part) \ + part_stat_add(cpu, part, field, addnd); \ + disk_stat_add(cpu, gendiskp, field, addnd); \ }) #define part_stat_read(part, field) (part->dkstats.field) @@ -294,63 +306,26 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) #endif /* CONFIG_SMP */ -#define disk_stat_add(gendiskp, field, addnd) \ - do { \ - preempt_disable(); \ - __disk_stat_add(gendiskp, field, addnd); \ - preempt_enable(); \ - } while (0) - -#define __disk_stat_dec(gendiskp, field) __disk_stat_add(gendiskp, field, -1) -#define disk_stat_dec(gendiskp, field) disk_stat_add(gendiskp, field, -1) - -#define __disk_stat_inc(gendiskp, field) __disk_stat_add(gendiskp, field, 1) -#define disk_stat_inc(gendiskp, field) disk_stat_add(gendiskp, field, 1) - -#define __disk_stat_sub(gendiskp, field, subnd) \ - __disk_stat_add(gendiskp, field, -subnd) -#define disk_stat_sub(gendiskp, field, subnd) \ - disk_stat_add(gendiskp, field, -subnd) - -#define part_stat_add(gendiskp, field, addnd) \ - do { \ - preempt_disable(); \ - __part_stat_add(gendiskp, field, addnd);\ - preempt_enable(); \ - } while (0) - -#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1) -#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1) - -#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1) -#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1) - -#define __part_stat_sub(gendiskp, field, subnd) \ - __part_stat_add(gendiskp, field, -subnd) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) - -#define all_stat_add(gendiskp, part, field, addnd, sector) \ - do { \ - preempt_disable(); \ - __all_stat_add(gendiskp, part, field, addnd, sector); \ - preempt_enable(); \ - } while (0) - -#define __all_stat_dec(gendiskp, field, sector) \ - __all_stat_add(gendiskp, field, -1, sector) -#define all_stat_dec(gendiskp, field, sector) \ - all_stat_add(gendiskp, field, -1, sector) - -#define __all_stat_inc(gendiskp, part, field, sector) \ - __all_stat_add(gendiskp, part, field, 1, sector) -#define all_stat_inc(gendiskp, part, field, sector) \ - all_stat_add(gendiskp, part, field, 1, sector) - -#define __all_stat_sub(gendiskp, part, field, subnd, sector) \ - __all_stat_add(gendiskp, part, field, -subnd, sector) -#define all_stat_sub(gendiskp, part, field, subnd, sector) \ - all_stat_add(gendiskp, part, field, -subnd, sector) +#define disk_stat_dec(cpu, gendiskp, field) \ + disk_stat_add(cpu, gendiskp, field, -1) +#define disk_stat_inc(cpu, gendiskp, field) \ + disk_stat_add(cpu, gendiskp, field, 1) +#define disk_stat_sub(cpu, gendiskp, field, subnd) \ + disk_stat_add(cpu, gendiskp, field, -subnd) + +#define part_stat_dec(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, -1) +#define part_stat_inc(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, 1) +#define part_stat_sub(cpu, gendiskp, field, subnd) \ + part_stat_add(cpu, gendiskp, field, -subnd) + +#define all_stat_dec(cpu, gendiskp, field, sector) \ + all_stat_add(cpu, gendiskp, field, -1, sector) +#define all_stat_inc(cpu, gendiskp, part, field, sector) \ + all_stat_add(cpu, gendiskp, part, field, 1, sector) +#define all_stat_sub(cpu, gendiskp, part, field, subnd, sector) \ + all_stat_add(cpu, gendiskp, part, field, -subnd, sector) /* Inlines to alloc and free disk stats in struct gendisk */ #ifdef CONFIG_SMP @@ -401,8 +376,8 @@ static inline void free_part_stats(struct hd_struct *part) #endif /* CONFIG_SMP */ /* drivers/block/ll_rw_blk.c */ -extern void disk_round_stats(struct gendisk *disk); -extern void part_round_stats(struct hd_struct *part); +extern void disk_round_stats(int cpu, struct gendisk *disk); +extern void part_round_stats(int cpu, struct hd_struct *part); /* drivers/block/genhd.c */ extern int get_blkdev_list(char *, int); -- cgit v1.2.3 From ed9e1982347b36573cd622ee5f4e2a7ccd79b3fd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:05 +0900 Subject: block: implement and use {disk|part}_to_dev() Implement {disk|part}_to_dev() and use them to access generic device instead of directly dereferencing {disk|part}->dev. To make sure no user is left behind, rename generic devices fields to __dev. This is in preparation of unifying partition 0 handling with other partitions. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-integrity.c | 5 +-- block/blk-sysfs.c | 4 +-- block/genhd.c | 27 ++++++++-------- drivers/block/aoe/aoeblk.c | 4 +-- drivers/block/nbd.c | 4 +-- drivers/ide/ide-probe.c | 2 +- drivers/md/dm.c | 4 +-- drivers/md/md.c | 10 +++--- fs/block_dev.c | 4 +-- fs/partitions/check.c | 79 ++++++++++++++++++++++++---------------------- include/linux/genhd.h | 20 ++++++------ 11 files changed, 86 insertions(+), 77 deletions(-) (limited to 'drivers/md') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index d87606eaca1d..69023da63151 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -331,7 +331,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) return -1; if (kobject_init_and_add(&bi->kobj, &integrity_ktype, - &disk->dev.kobj, "%s", "integrity")) { + &disk_to_dev(disk)->kobj, + "%s", "integrity")) { kmem_cache_free(integrity_cachep, bi); return -1; } @@ -375,7 +376,7 @@ void blk_integrity_unregister(struct gendisk *disk) kobject_uevent(&bi->kobj, KOBJ_REMOVE); kobject_del(&bi->kobj); - kobject_put(&disk->dev.kobj); + kobject_put(&disk_to_dev(disk)->kobj); kmem_cache_free(integrity_cachep, bi); } EXPORT_SYMBOL(blk_integrity_unregister); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 304ec73ab821..b9a6ed166649 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -310,7 +310,7 @@ int blk_register_queue(struct gendisk *disk) if (!q->request_fn) return 0; - ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj), + ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), "%s", "queue"); if (ret < 0) return ret; @@ -339,6 +339,6 @@ void blk_unregister_queue(struct gendisk *disk) kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); - kobject_put(&disk->dev.kobj); + kobject_put(&disk_to_dev(disk)->kobj); } } diff --git a/block/genhd.c b/block/genhd.c index 67e5a59ced2a..0a2f16bd54b7 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -59,7 +59,7 @@ struct hd_struct *disk_get_part(struct gendisk *disk, int partno) rcu_read_lock(); part = rcu_dereference(disk->__part[partno - 1]); if (part) - get_device(&part->dev); + get_device(part_to_dev(part)); rcu_read_unlock(); return part; @@ -130,7 +130,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) continue; - get_device(&part->dev); + get_device(part_to_dev(part)); piter->part = part; piter->idx += inc; break; @@ -435,7 +435,7 @@ static struct kobject *exact_match(dev_t devt, int *partno, void *data) { struct gendisk *p = data; - return &p->dev.kobj; + return &disk_to_dev(p)->kobj; } static int exact_lock(dev_t devt, void *data) @@ -460,7 +460,7 @@ void add_disk(struct gendisk *disk) int retval; disk->flags |= GENHD_FL_UP; - disk->dev.devt = MKDEV(disk->major, disk->first_minor); + disk_to_dev(disk)->devt = MKDEV(disk->major, disk->first_minor); blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); register_disk(disk); @@ -468,7 +468,8 @@ void add_disk(struct gendisk *disk) bdi = &disk->queue->backing_dev_info; bdi_register_dev(bdi, disk_devt(disk)); - retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi"); + retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, + "bdi"); WARN_ON(retval); } @@ -477,7 +478,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ void unlink_gendisk(struct gendisk *disk) { - sysfs_remove_link(&disk->dev.kobj, "bdi"); + sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); bdi_unregister(&disk->queue->backing_dev_info); blk_unregister_queue(disk); blk_unregister_region(disk_devt(disk), disk->minors); @@ -903,7 +904,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) int cpu; /* - if (&gp->dev.kobj.entry == block_class.devices.next) + if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) seq_puts(seqf, "major minor name" " rio rmerge rsect ruse wio wmerge " "wsect wuse running use aveq" @@ -972,7 +973,7 @@ static void media_change_notify_thread(struct work_struct *work) * set enviroment vars to indicate which event this is for * so that user space will know to go check the media status. */ - kobject_uevent_env(&gd->dev.kobj, KOBJ_CHANGE, envp); + kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); put_device(gd->driverfs_dev); } @@ -1062,9 +1063,9 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) disk->minors = minors; disk->ext_minors = ext_minors; rand_initialize_disk(disk); - disk->dev.class = &block_class; - disk->dev.type = &disk_type; - device_initialize(&disk->dev); + disk_to_dev(disk)->class = &block_class; + disk_to_dev(disk)->type = &disk_type; + device_initialize(disk_to_dev(disk)); INIT_WORK(&disk->async_notify, media_change_notify_thread); } @@ -1086,7 +1087,7 @@ struct kobject *get_disk(struct gendisk *disk) owner = disk->fops->owner; if (owner && !try_module_get(owner)) return NULL; - kobj = kobject_get(&disk->dev.kobj); + kobj = kobject_get(&disk_to_dev(disk)->kobj); if (kobj == NULL) { module_put(owner); return NULL; @@ -1100,7 +1101,7 @@ EXPORT_SYMBOL(get_disk); void put_disk(struct gendisk *disk) { if (disk) - kobject_put(&disk->dev.kobj); + kobject_put(&disk_to_dev(disk)->kobj); } EXPORT_SYMBOL(put_disk); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 0c39782b2660..3edb6cb7d68f 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -109,12 +109,12 @@ static const struct attribute_group attr_group = { static int aoedisk_add_sysfs(struct aoedev *d) { - return sysfs_create_group(&d->gd->dev.kobj, &attr_group); + return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group); } void aoedisk_rm_sysfs(struct aoedev *d) { - sysfs_remove_group(&d->gd->dev.kobj, &attr_group); + sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group); } static int diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 1778e4a2c672..7b3351260d56 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -403,7 +403,7 @@ static int nbd_do_it(struct nbd_device *lo) BUG_ON(lo->magic != LO_MAGIC); lo->pid = current->pid; - ret = sysfs_create_file(&lo->disk->dev.kobj, &pid_attr.attr); + ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); if (ret) { printk(KERN_ERR "nbd: sysfs_create_file failed!"); return ret; @@ -412,7 +412,7 @@ static int nbd_do_it(struct nbd_device *lo) while ((req = nbd_read_stat(lo)) != NULL) nbd_end_request(req); - sysfs_remove_file(&lo->disk->dev.kobj, &pid_attr.attr); + sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); return 0; } diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index a51a30e9eab3..70aa86c8807e 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1188,7 +1188,7 @@ static struct kobject *exact_match(dev_t dev, int *part, void *data) { struct gendisk *p = data; *part &= (1 << PARTN_BITS) - 1; - return &p->dev.kobj; + return &disk_to_dev(p)->kobj; } static int exact_lock(dev_t dev, void *data) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 653624792eaf..637806695bb9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1186,7 +1186,7 @@ static void event_callback(void *context) list_splice_init(&md->uevent_list, &uevents); spin_unlock_irqrestore(&md->uevent_lock, flags); - dm_send_uevents(&uevents, &md->disk->dev.kobj); + dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); atomic_inc(&md->event_nr); wake_up(&md->eventq); @@ -1643,7 +1643,7 @@ out: *---------------------------------------------------------------*/ void dm_kobject_uevent(struct mapped_device *md) { - kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE); } uint32_t dm_next_uevent_seq(struct mapped_device *md) diff --git a/drivers/md/md.c b/drivers/md/md.c index deeac4b44173..96e9fccd2eab 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1465,9 +1465,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) goto fail; if (rdev->bdev->bd_part) - ko = &rdev->bdev->bd_part->dev.kobj; + ko = &part_to_dev(rdev->bdev->bd_part)->kobj; else - ko = &rdev->bdev->bd_disk->dev.kobj; + ko = &disk_to_dev(rdev->bdev->bd_disk)->kobj; if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { kobject_del(&rdev->kobj); goto fail; @@ -3470,8 +3470,8 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) disk->queue = mddev->queue; add_disk(disk); mddev->gendisk = disk; - error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj, - "%s", "md"); + error = kobject_init_and_add(&mddev->kobj, &md_ktype, + &disk_to_dev(disk)->kobj, "%s", "md"); mutex_unlock(&disks_mutex); if (error) printk(KERN_WARNING "md: cannot register %s/md - name in use\n", @@ -3761,7 +3761,7 @@ static int do_md_run(mddev_t * mddev) sysfs_notify(&mddev->kobj, NULL, "array_state"); sysfs_notify(&mddev->kobj, NULL, "sync_action"); sysfs_notify(&mddev->kobj, NULL, "degraded"); - kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); return 0; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 2f2873b9a041..a02df22f37c3 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -543,9 +543,9 @@ EXPORT_SYMBOL(bd_release); static struct kobject *bdev_get_kobj(struct block_device *bdev) { if (bdev->bd_contains != bdev) - return kobject_get(&bdev->bd_part->dev.kobj); + return kobject_get(&part_to_dev(bdev->bd_part)->kobj); else - return kobject_get(&bdev->bd_disk->dev.kobj); + return kobject_get(&disk_to_dev(bdev->bd_disk)->kobj); } static struct kobject *bdev_get_holder(struct block_device *bdev) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 0d4b7f28f13f..ac0df3acdcda 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -309,7 +309,7 @@ static inline void disk_sysfs_add_subdirs(struct gendisk *disk) { struct kobject *k; - k = kobject_get(&disk->dev.kobj); + k = kobject_get(&disk_to_dev(disk)->kobj); disk->holder_dir = kobject_create_and_add("holders", k); disk->slave_dir = kobject_create_and_add("slaves", k); kobject_put(k); @@ -322,7 +322,7 @@ static void delete_partition_rcu_cb(struct rcu_head *head) part->start_sect = 0; part->nr_sects = 0; part_stat_set_all(part, 0); - put_device(&part->dev); + put_device(part_to_dev(part)); } void delete_partition(struct gendisk *disk, int partno) @@ -336,7 +336,7 @@ void delete_partition(struct gendisk *disk, int partno) blk_free_devt(part_devt(part)); rcu_assign_pointer(disk->__part[partno-1], NULL); kobject_put(part->holder_dir); - device_del(&part->dev); + device_del(part_to_dev(part)); call_rcu(&part->rcu_head, delete_partition_rcu_cb); } @@ -354,6 +354,9 @@ int add_partition(struct gendisk *disk, int partno, { struct hd_struct *p; dev_t devt = MKDEV(0, 0); + struct device *ddev = disk_to_dev(disk); + struct device *pdev; + const char *dname; int err; if (disk->__part[partno - 1]) @@ -367,42 +370,43 @@ int add_partition(struct gendisk *disk, int partno, err = -ENOMEM; goto out_free; } + pdev = part_to_dev(p); + p->start_sect = start; p->nr_sects = len; p->partno = partno; p->policy = disk->policy; - if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) - snprintf(p->dev.bus_id, BUS_ID_SIZE, - "%sp%d", disk->dev.bus_id, partno); + dname = dev_name(ddev); + if (isdigit(dname[strlen(dname) - 1])) + snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno); else - snprintf(p->dev.bus_id, BUS_ID_SIZE, - "%s%d", disk->dev.bus_id, partno); + snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno); - device_initialize(&p->dev); - p->dev.class = &block_class; - p->dev.type = &part_type; - p->dev.parent = &disk->dev; + device_initialize(pdev); + pdev->class = &block_class; + pdev->type = &part_type; + pdev->parent = ddev; err = blk_alloc_devt(p, &devt); if (err) - goto out_put; - p->dev.devt = devt; + goto out_free; + pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ - p->dev.uevent_suppress = 1; - err = device_add(&p->dev); + pdev->uevent_suppress = 1; + err = device_add(pdev); if (err) goto out_put; err = -ENOMEM; - p->holder_dir = kobject_create_and_add("holders", &p->dev.kobj); + p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); if (!p->holder_dir) goto out_del; - p->dev.uevent_suppress = 0; + pdev->uevent_suppress = 0; if (flags & ADDPART_FLAG_WHOLEDISK) { - err = device_create_file(&p->dev, &dev_attr_whole_disk); + err = device_create_file(pdev, &dev_attr_whole_disk); if (err) goto out_del; } @@ -412,8 +416,8 @@ int add_partition(struct gendisk *disk, int partno, rcu_assign_pointer(disk->__part[partno - 1], p); /* suppress uevent if the disk supresses it */ - if (!disk->dev.uevent_suppress) - kobject_uevent(&p->dev.kobj, KOBJ_ADD); + if (!ddev->uevent_suppress) + kobject_uevent(&pdev->kobj, KOBJ_ADD); return 0; @@ -422,9 +426,9 @@ out_free: return err; out_del: kobject_put(p->holder_dir); - device_del(&p->dev); + device_del(pdev); out_put: - put_device(&p->dev); + put_device(pdev); blk_free_devt(devt); return err; } @@ -432,30 +436,31 @@ out_put: /* Not exported, helper to add_disk(). */ void register_disk(struct gendisk *disk) { + struct device *ddev = disk_to_dev(disk); struct block_device *bdev; struct disk_part_iter piter; struct hd_struct *part; char *s; int err; - disk->dev.parent = disk->driverfs_dev; + ddev->parent = disk->driverfs_dev; - strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); + strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE); /* ewww... some of these buggers have / in the name... */ - s = strchr(disk->dev.bus_id, '/'); + s = strchr(ddev->bus_id, '/'); if (s) *s = '!'; /* delay uevents, until we scanned partition table */ - disk->dev.uevent_suppress = 1; + ddev->uevent_suppress = 1; - if (device_add(&disk->dev)) + if (device_add(ddev)) return; #ifndef CONFIG_SYSFS_DEPRECATED - err = sysfs_create_link(block_depr, &disk->dev.kobj, - kobject_name(&disk->dev.kobj)); + err = sysfs_create_link(block_depr, &ddev->kobj, + kobject_name(&ddev->kobj)); if (err) { - device_del(&disk->dev); + device_del(ddev); return; } #endif @@ -481,13 +486,13 @@ void register_disk(struct gendisk *disk) exit: /* announce disk after possible partitions are created */ - disk->dev.uevent_suppress = 0; - kobject_uevent(&disk->dev.kobj, KOBJ_ADD); + ddev->uevent_suppress = 0; + kobject_uevent(&ddev->kobj, KOBJ_ADD); /* announce possible partitions */ disk_part_iter_init(&piter, disk, 0); while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part->dev.kobj, KOBJ_ADD); + kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); disk_part_iter_exit(&piter); } @@ -518,7 +523,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) return -EIO; /* tell userspace that the media / partition table may have changed */ - kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); for (p = 1; p < state->limit; p++) { sector_t size = state->parts[p].size; @@ -591,7 +596,7 @@ void del_gendisk(struct gendisk *disk) kobject_put(disk->slave_dir); disk->driverfs_dev = NULL; #ifndef CONFIG_SYSFS_DEPRECATED - sysfs_remove_link(block_depr, disk->dev.bus_id); + sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); #endif - device_del(&disk->dev); + device_del(disk_to_dev(disk)); } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6fc532424062..e4e18c509ac5 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -15,9 +15,11 @@ #ifdef CONFIG_BLOCK -#define kobj_to_dev(k) container_of(k, struct device, kobj) -#define dev_to_disk(device) container_of(device, struct gendisk, dev) -#define dev_to_part(device) container_of(device, struct hd_struct, dev) +#define kobj_to_dev(k) container_of((k), struct device, kobj) +#define dev_to_disk(device) container_of((device), struct gendisk, __dev) +#define dev_to_part(device) container_of((device), struct hd_struct, __dev) +#define disk_to_dev(disk) (&((disk)->__dev)) +#define part_to_dev(part) (&((part)->__dev)) extern struct device_type part_type; extern struct kobject *block_depr; @@ -88,7 +90,7 @@ struct disk_stats { struct hd_struct { sector_t start_sect; sector_t nr_sects; - struct device dev; + struct device __dev; struct kobject *holder_dir; int policy, partno; #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -139,7 +141,7 @@ struct gendisk { int flags; struct device *driverfs_dev; // FIXME: remove - struct device dev; + struct device __dev; struct kobject *holder_dir; struct kobject *slave_dir; @@ -163,7 +165,7 @@ struct gendisk { static inline struct gendisk *part_to_disk(struct hd_struct *part) { if (likely(part)) - return dev_to_disk((part)->dev.parent); + return dev_to_disk(part_to_dev(part)->parent); return NULL; } @@ -174,12 +176,12 @@ static inline int disk_max_parts(struct gendisk *disk) static inline dev_t disk_devt(struct gendisk *disk) { - return disk->dev.devt; + return disk_to_dev(disk)->devt; } static inline dev_t part_devt(struct hd_struct *part) { - return part->dev.devt; + return part_to_dev(part)->devt; } extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); @@ -187,7 +189,7 @@ extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); static inline void disk_put_part(struct hd_struct *part) { if (likely(part)) - put_device(&part->dev); + put_device(part_to_dev(part)); } /* -- cgit v1.2.3 From b7db9956e57c8151b930d5e5fe5c766e6aad3ff7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:10 +0900 Subject: block: move policy from disk to part0 Move disk->policy to part0->policy. Implement and use get_disk_ro(). Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/genhd.c | 16 +++++----------- drivers/ide/ide-cd.c | 2 +- drivers/md/dm-ioctl.c | 2 +- fs/partitions/check.c | 2 +- include/linux/genhd.h | 6 +++++- 5 files changed, 13 insertions(+), 15 deletions(-) (limited to 'drivers/md') diff --git a/block/genhd.c b/block/genhd.c index c70db35076a0..70358f3c7423 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -757,7 +757,7 @@ static ssize_t disk_ro_show(struct device *dev, { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%d\n", disk->policy ? 1 : 0); + return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); } static ssize_t disk_capability_show(struct device *dev, @@ -1090,10 +1090,7 @@ EXPORT_SYMBOL(put_disk); void set_device_ro(struct block_device *bdev, int flag) { - if (bdev->bd_contains != bdev) - bdev->bd_part->policy = flag; - else - bdev->bd_disk->policy = flag; + bdev->bd_part->policy = flag; } EXPORT_SYMBOL(set_device_ro); @@ -1103,8 +1100,8 @@ void set_disk_ro(struct gendisk *disk, int flag) struct disk_part_iter piter; struct hd_struct *part; - disk->policy = flag; - disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + disk_part_iter_init(&piter, disk, + DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) part->policy = flag; disk_part_iter_exit(&piter); @@ -1116,10 +1113,7 @@ int bdev_read_only(struct block_device *bdev) { if (!bdev) return 0; - else if (bdev->bd_contains != bdev) - return bdev->bd_part->policy; - else - return bdev->bd_disk->policy; + return bdev->bd_part->policy; } EXPORT_SYMBOL(bdev_read_only); diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index f16bb4667238..03c2cb6a58bc 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1113,7 +1113,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) if (write) { /* disk has become write protected */ - if (cd->disk->policy) { + if (get_disk_ro(cd->disk)) { cdrom_end_request(drive, 0); return ide_stopped; } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index c3de311117a1..5b919159f084 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -548,7 +548,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) */ param->open_count = dm_open_count(md); - if (disk->policy) + if (get_disk_ro(disk)) param->flags |= DM_READONLY_FLAG; param->event_nr = dm_get_event_nr(md); diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 24d2c56d7d2d..ace6d03602c7 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -375,7 +375,7 @@ int add_partition(struct gendisk *disk, int partno, p->start_sect = start; p->nr_sects = len; p->partno = partno; - p->policy = disk->policy; + p->policy = get_disk_ro(disk); dname = dev_name(ddev); if (isdigit(dname[strlen(dname) - 1])) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9cb8380cf0eb..4411bdd671dd 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -145,7 +145,6 @@ struct gendisk { struct kobject *slave_dir; struct timer_rand_state *random; - int policy; atomic_t sync_io; /* RAID */ unsigned long stamp; @@ -403,6 +402,11 @@ extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); +static inline int get_disk_ro(struct gendisk *disk) +{ + return disk->part0.policy; +} + /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk); extern void rand_initialize_disk(struct gendisk *disk); -- cgit v1.2.3 From 0762b8bde9729f10f8e6249809660ff2ec3ad735 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:12 +0900 Subject: block: always set bdev->bd_part Till now, bdev->bd_part is set only if the bdev was for parts other than part0. This patch makes bdev->bd_part always set so that code paths don't have to differenciate common handling. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- drivers/md/md.c | 5 +--- fs/block_dev.c | 67 ++++++++++++++++++++++++--------------------------- fs/partitions/check.c | 7 +----- include/linux/genhd.h | 2 +- 5 files changed, 35 insertions(+), 48 deletions(-) (limited to 'drivers/md') diff --git a/block/blk-core.c b/block/blk-core.c index e0a5ee36849c..a4a7c08d2f20 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1274,7 +1274,7 @@ __setup("fail_make_request=", setup_fail_make_request); static int should_fail_request(struct bio *bio) { if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || - (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) + bio->bi_bdev->bd_part->make_it_fail) return should_fail(&fail_make_request, bio->bi_size); return 0; diff --git a/drivers/md/md.c b/drivers/md/md.c index 96e9fccd2eab..2bd9cf416123 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1464,10 +1464,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) goto fail; - if (rdev->bdev->bd_part) - ko = &part_to_dev(rdev->bdev->bd_part)->kobj; - else - ko = &disk_to_dev(rdev->bdev->bd_disk)->kobj; + ko = &part_to_dev(rdev->bdev->bd_part)->kobj; if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { kobject_del(&rdev->kobj); goto fail; diff --git a/fs/block_dev.c b/fs/block_dev.c index 57d572642854..c3fa19bd64df 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -540,14 +540,6 @@ EXPORT_SYMBOL(bd_release); * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 */ -static struct kobject *bdev_get_kobj(struct block_device *bdev) -{ - if (bdev->bd_contains != bdev) - return kobject_get(&part_to_dev(bdev->bd_part)->kobj); - else - return kobject_get(&disk_to_dev(bdev->bd_disk)->kobj); -} - static int add_symlink(struct kobject *from, struct kobject *to) { if (!from || !to) @@ -596,7 +588,7 @@ static int bd_holder_grab_dirs(struct block_device *bdev, if (!bo->hdev) goto fail_put_sdir; - bo->sdev = bdev_get_kobj(bdev); + bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); if (!bo->sdev) goto fail_put_hdev; @@ -919,7 +911,6 @@ static int __blkdev_put(struct block_device *bdev, int for_part); static int do_open(struct block_device *bdev, struct file *file, int for_part) { - struct module *owner = NULL; struct gendisk *disk; struct hd_struct *part = NULL; int ret; @@ -941,25 +932,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) ret = -ENXIO; file->f_mapping = bdev->bd_inode->i_mapping; + lock_kernel(); + disk = get_gendisk(bdev->bd_dev, &partno); - if (!disk) { - unlock_kernel(); - bdput(bdev); - return ret; - } - owner = disk->fops->owner; + if (!disk) + goto out_unlock_kernel; + part = disk_get_part(disk, partno); + if (!part) + goto out_unlock_kernel; mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; + bdev->bd_part = part; bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; if (disk->fops->open) { ret = disk->fops->open(bdev->bd_inode, file); if (ret) - goto out_first; + goto out_clear; } if (!bdev->bd_openers) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); @@ -975,31 +968,32 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) whole = bdget_disk(disk, 0); ret = -ENOMEM; if (!whole) - goto out_first; + goto out_clear; BUG_ON(for_part); ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); if (ret) - goto out_first; + goto out_clear; bdev->bd_contains = whole; - part = disk_get_part(disk, partno); bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; if (!(disk->flags & GENHD_FL_UP) || !part || !part->nr_sects) { ret = -ENXIO; - goto out_first; + goto out_clear; } - bdev->bd_part = part; bd_set_size(bdev, (loff_t)part->nr_sects << 9); } } else { + disk_put_part(part); put_disk(disk); - module_put(owner); + module_put(disk->fops->owner); + part = NULL; + disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); if (ret) - goto out; + goto out_unlock_bdev; } if (bdev->bd_invalidated) rescan_partitions(bdev->bd_disk, bdev); @@ -1012,20 +1006,24 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) unlock_kernel(); return 0; -out_first: + out_clear: bdev->bd_disk = NULL; + bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, 1); bdev->bd_contains = NULL; - put_disk(disk); - disk_put_part(part); - module_put(owner); -out: + out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); + out_unlock_kernel: unlock_kernel(); - if (ret) - bdput(bdev); + + disk_put_part(part); + if (disk) + module_put(disk->fops->owner); + put_disk(disk); + bdput(bdev); + return ret; } @@ -1110,11 +1108,8 @@ static int __blkdev_put(struct block_device *bdev, int for_part) put_disk(disk); module_put(owner); - - if (bdev->bd_contains != bdev) { - disk_put_part(bdev->bd_part); - bdev->bd_part = NULL; - } + disk_put_part(bdev->bd_part); + bdev->bd_part = NULL; bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index f0f604950ff4..87298c0fc8ce 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -134,12 +134,7 @@ char *disk_name(struct gendisk *hd, int partno, char *buf) const char *bdevname(struct block_device *bdev, char *buf) { - int partno = 0; - - if (bdev->bd_part) - partno = bdev->bd_part->partno; - - return disk_name(bdev->bd_disk, partno, buf); + return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); } EXPORT_SYMBOL(bdevname); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 2c0e1b597ab4..45a3682b5d87 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -412,7 +412,7 @@ extern void rand_initialize_disk(struct gendisk *disk); static inline sector_t get_start_sect(struct block_device *bdev) { - return bdev->bd_contains == bdev ? 0 : bdev->bd_part->start_sect; + return bdev->bd_part->start_sect; } static inline sector_t get_capacity(struct gendisk *disk) { -- cgit v1.2.3 From 074a7aca7afa6f230104e8e65eba3420263714a5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:14 +0900 Subject: block: move stats from disk to part0 Move stats related fields - stamp, in_flight, dkstats - from disk to part0 and unify stat handling such that... * part_stat_*() now updates part0 together if the specified partition is not part0. ie. part_stat_*() are now essentially all_stat_*(). * {disk|all}_stat_*() are gone. * part_round_stats() is updated similary. It handles part0 stats automatically and disk_round_stats() is killed. * part_{inc|dec}_in_fligh() is implemented which automatically updates part0 stats for parts other than part0. * disk_map_sector_rcu() is updated to return part0 if no part matches. Combined with the above changes, this makes NULL special case handling in callers unnecessary. * Separate stats show code paths for disk are collapsed into part stats show code paths. * Rename disk_stat_lock/unlock() to part_stat_lock/unlock() While at it, reposition stat handling macros a bit and add missing parentheses around macro parameters. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 84 ++++++++++-------------- block/blk-merge.c | 12 ++-- block/genhd.c | 97 +++++++-------------------- drivers/block/aoe/aoecmd.c | 12 ++-- drivers/md/dm.c | 27 ++++---- drivers/md/linear.c | 9 +-- drivers/md/md.c | 4 +- drivers/md/multipath.c | 9 +-- drivers/md/raid0.c | 9 +-- drivers/md/raid1.c | 9 +-- drivers/md/raid10.c | 9 +-- drivers/md/raid5.c | 9 +-- fs/partitions/check.c | 12 ++-- include/linux/genhd.h | 159 +++++++++++++-------------------------------- 14 files changed, 165 insertions(+), 296 deletions(-) (limited to 'drivers/md') diff --git a/block/blk-core.c b/block/blk-core.c index 505ec61067df..98138f002524 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -61,21 +61,17 @@ static void drive_stat_acct(struct request *rq, int new_io) if (!blk_fs_request(rq) || !rq->rq_disk) return; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(rq->rq_disk, rq->sector); if (!new_io) - all_stat_inc(cpu, rq->rq_disk, part, merges[rw], rq->sector); + part_stat_inc(cpu, part, merges[rw]); else { - disk_round_stats(cpu, rq->rq_disk); - rq->rq_disk->in_flight++; - if (part) { - part_round_stats(cpu, part); - part->in_flight++; - } + part_round_stats(cpu, part); + part_inc_in_flight(part); } - disk_stat_unlock(); + part_stat_unlock(); } void blk_queue_congestion_threshold(struct request_queue *q) @@ -983,8 +979,22 @@ static inline void add_request(struct request_queue *q, struct request *req) __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); } -/* - * disk_round_stats() - Round off the performance stats on a struct +static void part_round_stats_single(int cpu, struct hd_struct *part, + unsigned long now) +{ + if (now == part->stamp) + return; + + if (part->in_flight) { + __part_stat_add(cpu, part, time_in_queue, + part->in_flight * (now - part->stamp)); + __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); + } + part->stamp = now; +} + +/** + * part_round_stats() - Round off the performance stats on a struct * disk_stats. * * The average IO queue length and utilisation statistics are maintained @@ -998,36 +1008,15 @@ static inline void add_request(struct request_queue *q, struct request *req) * /proc/diskstats. This accounts immediately for all queue usage up to * the current jiffies and restarts the counters again. */ -void disk_round_stats(int cpu, struct gendisk *disk) -{ - unsigned long now = jiffies; - - if (now == disk->stamp) - return; - - if (disk->in_flight) { - disk_stat_add(cpu, disk, time_in_queue, - disk->in_flight * (now - disk->stamp)); - disk_stat_add(cpu, disk, io_ticks, (now - disk->stamp)); - } - disk->stamp = now; -} -EXPORT_SYMBOL_GPL(disk_round_stats); - void part_round_stats(int cpu, struct hd_struct *part) { unsigned long now = jiffies; - if (now == part->stamp) - return; - - if (part->in_flight) { - part_stat_add(cpu, part, time_in_queue, - part->in_flight * (now - part->stamp)); - part_stat_add(cpu, part, io_ticks, (now - part->stamp)); - } - part->stamp = now; + if (part->partno) + part_round_stats_single(cpu, &part_to_disk(part)->part0, now); + part_round_stats_single(cpu, part, now); } +EXPORT_SYMBOL_GPL(part_round_stats); /* * queue lock must be held @@ -1567,11 +1556,10 @@ static int __end_that_request_first(struct request *req, int error, struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(req->rq_disk, req->sector); - all_stat_add(cpu, req->rq_disk, part, sectors[rw], - nr_bytes >> 9, req->sector); - disk_stat_unlock(); + part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9); + part_stat_unlock(); } total_bytes = bio_nbytes = 0; @@ -1758,19 +1746,15 @@ static void end_that_request_last(struct request *req, int error) struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(disk, req->sector); - all_stat_inc(cpu, disk, part, ios[rw], req->sector); - all_stat_add(cpu, disk, part, ticks[rw], duration, req->sector); - disk_round_stats(cpu, disk); - disk->in_flight--; - if (part) { - part_round_stats(cpu, part); - part->in_flight--; - } + part_stat_inc(cpu, part, ios[rw]); + part_stat_add(cpu, part, ticks[rw], duration); + part_round_stats(cpu, part); + part_dec_in_flight(part); - disk_stat_unlock(); + part_stat_unlock(); } if (req->end_io) diff --git a/block/blk-merge.c b/block/blk-merge.c index d926a24bf1fd..c77196d55899 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -390,17 +390,13 @@ static int attempt_merge(struct request_queue *q, struct request *req, struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(req->rq_disk, req->sector); - disk_round_stats(cpu, req->rq_disk); - req->rq_disk->in_flight--; - if (part) { - part_round_stats(cpu, part); - part->in_flight--; - } + part_round_stats(cpu, part); + part_dec_in_flight(part); - disk_stat_unlock(); + part_stat_unlock(); } req->ioprio = ioprio_best(req->ioprio, next->ioprio); diff --git a/block/genhd.c b/block/genhd.c index 06a252f2b967..e1cb96fb883e 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit); * while preemption is disabled. * * RETURNS: - * Found partition on success, NULL if there's no matching partition. + * Found partition on success, part0 is returned if no partition matches */ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) { @@ -189,7 +189,7 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) sector < part->start_sect + part->nr_sects) return part; } - return NULL; + return &disk->part0; } EXPORT_SYMBOL_GPL(disk_map_sector_rcu); @@ -580,24 +580,24 @@ void __init printk_all_partitions(void) * numbers in hex - the same format as the root= * option takes. */ - printk("%s %10llu %s", - bdevt_str(disk_devt(disk), devt_buf), - (unsigned long long)get_capacity(disk) >> 1, - disk_name(disk, 0, name_buf)); - if (disk->driverfs_dev != NULL && - disk->driverfs_dev->driver != NULL) - printk(" driver: %s\n", - disk->driverfs_dev->driver->name); - else - printk(" (driver?)\n"); + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); + while ((part = disk_part_iter_next(&piter))) { + bool is_part0 = part == &disk->part0; - /* now show the partitions */ - disk_part_iter_init(&piter, disk, 0); - while ((part = disk_part_iter_next(&piter))) - printk(" %s %10llu %s\n", + printk("%s%s %10llu %s", is_part0 ? "" : " ", bdevt_str(part_devt(part), devt_buf), (unsigned long long)part->nr_sects >> 1, disk_name(disk, part->partno, name_buf)); + if (is_part0) { + if (disk->driverfs_dev != NULL && + disk->driverfs_dev->driver != NULL) + printk(" driver: %s\n", + disk->driverfs_dev->driver->name); + else + printk(" (driver?)\n"); + } else + printk("\n"); + } disk_part_iter_exit(&piter); } class_dev_iter_exit(&iter); @@ -674,12 +674,7 @@ static int show_partition(struct seq_file *seqf, void *v) return 0; /* show the full disk and all non-0 size partitions of it */ - seq_printf(seqf, "%4d %7d %10llu %s\n", - MAJOR(disk_devt(sgp)), MINOR(disk_devt(sgp)), - (unsigned long long)get_capacity(sgp) >> 1, - disk_name(sgp, 0, buf)); - - disk_part_iter_init(&piter, sgp, 0); + disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) seq_printf(seqf, "%4d %7d %10llu %s\n", MAJOR(part_devt(part)), MINOR(part_devt(part)), @@ -768,40 +763,13 @@ static ssize_t disk_capability_show(struct device *dev, return sprintf(buf, "%x\n", disk->flags); } -static ssize_t disk_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gendisk *disk = dev_to_disk(dev); - int cpu; - - cpu = disk_stat_lock(); - disk_round_stats(cpu, disk); - disk_stat_unlock(); - return sprintf(buf, - "%8lu %8lu %8llu %8u " - "%8lu %8lu %8llu %8u " - "%8u %8u %8u" - "\n", - disk_stat_read(disk, ios[READ]), - disk_stat_read(disk, merges[READ]), - (unsigned long long)disk_stat_read(disk, sectors[READ]), - jiffies_to_msecs(disk_stat_read(disk, ticks[READ])), - disk_stat_read(disk, ios[WRITE]), - disk_stat_read(disk, merges[WRITE]), - (unsigned long long)disk_stat_read(disk, sectors[WRITE]), - jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])), - disk->in_flight, - jiffies_to_msecs(disk_stat_read(disk, io_ticks)), - jiffies_to_msecs(disk_stat_read(disk, time_in_queue))); -} - static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); -static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL); +static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); @@ -836,7 +804,7 @@ static void disk_release(struct device *dev) kfree(disk->random); kfree(disk->__part); - free_disk_stats(disk); + free_part_stats(&disk->part0); kfree(disk); } struct class block_class = { @@ -873,28 +841,11 @@ static int diskstats_show(struct seq_file *seqf, void *v) "\n\n"); */ - cpu = disk_stat_lock(); - disk_round_stats(cpu, gp); - disk_stat_unlock(); - seq_printf(seqf, "%4d %7d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", - MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)), - disk_name(gp, 0, buf), - disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), - (unsigned long long)disk_stat_read(gp, sectors[0]), - jiffies_to_msecs(disk_stat_read(gp, ticks[0])), - disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]), - (unsigned long long)disk_stat_read(gp, sectors[1]), - jiffies_to_msecs(disk_stat_read(gp, ticks[1])), - gp->in_flight, - jiffies_to_msecs(disk_stat_read(gp, io_ticks)), - jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); - - /* now show all non-0 size partitions of it */ - disk_part_iter_init(&piter, gp, 0); + disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0); while ((hd = disk_part_iter_next(&piter))) { - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part_round_stats(cpu, hd); - disk_stat_unlock(); + part_stat_unlock(); seq_printf(seqf, "%4d %7d %s %lu %lu %llu " "%u %lu %lu %llu %u %u %u %u\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), @@ -1000,7 +951,7 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) int tot_minors = minors + ext_minors; int size = tot_minors * sizeof(struct hd_struct *); - if (!init_disk_stats(disk)) { + if (!init_part_stats(&disk->part0)) { kfree(disk); return NULL; } @@ -1008,7 +959,7 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) disk->__part = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, node_id); if (!disk->__part) { - free_disk_stats(disk); + free_part_stats(&disk->part0); kfree(disk); return NULL; } diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 934800f979c9..961d29a53cab 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -758,15 +758,15 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(disk, sector); - all_stat_inc(cpu, disk, part, ios[rw], sector); - all_stat_add(cpu, disk, part, ticks[rw], duration, sector); - all_stat_add(cpu, disk, part, sectors[rw], n_sect, sector); - all_stat_add(cpu, disk, part, io_ticks, duration, sector); + part_stat_inc(cpu, part, ios[rw]); + part_stat_add(cpu, part, ticks[rw], duration); + part_stat_add(cpu, part, sectors[rw], n_sect); + part_stat_add(cpu, part, io_ticks, duration); - disk_stat_unlock(); + part_stat_unlock(); } void diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 637806695bb9..327de03a5bdf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -381,10 +381,10 @@ static void start_io_acct(struct dm_io *io) io->start_time = jiffies; - cpu = disk_stat_lock(); - disk_round_stats(cpu, dm_disk(md)); - disk_stat_unlock(); - dm_disk(md)->in_flight = atomic_inc_return(&md->pending); + cpu = part_stat_lock(); + part_round_stats(cpu, &dm_disk(md)->part0); + part_stat_unlock(); + dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); } static int end_io_acct(struct dm_io *io) @@ -395,12 +395,13 @@ static int end_io_acct(struct dm_io *io) int pending, cpu; int rw = bio_data_dir(bio); - cpu = disk_stat_lock(); - disk_round_stats(cpu, dm_disk(md)); - disk_stat_add(cpu, dm_disk(md), ticks[rw], duration); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_round_stats(cpu, &dm_disk(md)->part0); + part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); + part_stat_unlock(); - dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); + dm_disk(md)->part0.in_flight = pending = + atomic_dec_return(&md->pending); return !pending; } @@ -899,10 +900,10 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); - cpu = disk_stat_lock(); - disk_stat_inc(cpu, dm_disk(md), ios[rw]); - disk_stat_add(cpu, dm_disk(md), sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); + part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); /* * If we're suspended we have to queue diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 00cbc8e47294..c80ea90593d3 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -325,10 +325,11 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) return 0; } - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); tmp_dev = which_dev(mddev, bio->bi_sector); block = bio->bi_sector >> 1; diff --git a/drivers/md/md.c b/drivers/md/md.c index 2bd9cf416123..0a3a4bdcd4af 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5546,8 +5546,8 @@ static int is_mddev_idle(mddev_t *mddev) rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; - curr_events = disk_stat_read(disk, sectors[0]) + - disk_stat_read(disk, sectors[1]) - + curr_events = part_stat_read(&disk->part0, sectors[0]) + + part_stat_read(&disk->part0, sectors[1]) - atomic_read(&disk->sync_io); /* sync IO will cause sync_io to increase before the disk_stats * as sync_io is counted when a request starts, and diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 182f5a94cdc5..8bb8794129b3 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -159,10 +159,11 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) mp_bh->master_bio = bio; mp_bh->mddev = mddev; - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); mp_bh->path = multipath_map(conf); if (mp_bh->path < 0) { diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e26030fa59ab..f52f442a735f 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -406,10 +406,11 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) return 0; } - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); chunk_size = mddev->chunk_size >> 10; chunk_sects = mddev->chunk_size >> 9; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index babb13036f93..b9764429d856 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -804,10 +804,11 @@ static int make_request(struct request_queue *q, struct bio * bio) bitmap = mddev->bitmap; - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); /* * make_request() can abort the operation when READA is being diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5ec80da0a9d7..5f990133f5ef 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -844,10 +844,11 @@ static int make_request(struct request_queue *q, struct bio * bio) */ wait_barrier(conf); - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5899f211515f..ae16794bef20 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3396,10 +3396,11 @@ static int make_request(struct request_queue *q, struct bio * bi) md_write_start(mddev, bi); - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bi)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bi)); + part_stat_unlock(); if (rw == READ && mddev->reshape_position == MaxSector && diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 60592d9f43b6..f517869e8d10 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -210,15 +210,15 @@ ssize_t part_size_show(struct device *dev, return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); } -static ssize_t part_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t part_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part_round_stats(cpu, p); - disk_stat_unlock(); + part_stat_unlock(); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " @@ -575,8 +575,8 @@ void del_gendisk(struct gendisk *disk) set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; unlink_gendisk(disk); - disk_stat_set_all(disk, 0); - disk->stamp = 0; + part_stat_set_all(&disk->part0, 0); + disk->part0.stamp = 0; kobject_put(disk->part0.holder_dir); kobject_put(disk->slave_dir); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3d15b42dc352..c90e1b4fbe5a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -145,13 +145,6 @@ struct gendisk { struct timer_rand_state *random; atomic_t sync_io; /* RAID */ - unsigned long stamp; - int in_flight; -#ifdef CONFIG_SMP - struct disk_stats *dkstats; -#else - struct disk_stats dkstats; -#endif struct work_struct async_notify; #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity *integrity; @@ -232,46 +225,18 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, * internal use only. */ #ifdef CONFIG_SMP -#define disk_stat_lock() ({ rcu_read_lock(); get_cpu(); }) -#define disk_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) - -#define disk_stat_add(cpu, gendiskp, field, addnd) \ - (per_cpu_ptr(gendiskp->dkstats, cpu)->field += addnd) - -#define disk_stat_read(gendiskp, field) \ -({ \ - typeof(gendiskp->dkstats->field) res = 0; \ - int i; \ - for_each_possible_cpu(i) \ - res += per_cpu_ptr(gendiskp->dkstats, i)->field; \ - res; \ -}) - -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) -{ - int i; - - for_each_possible_cpu(i) - memset(per_cpu_ptr(gendiskp->dkstats, i), value, - sizeof(struct disk_stats)); -} +#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) -#define part_stat_add(cpu, part, field, addnd) \ - (per_cpu_ptr(part->dkstats, cpu)->field += addnd) - -#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part_stat_add(cpu, part, field, addnd); \ - disk_stat_add(cpu, gendiskp, field, addnd); \ -}) +#define __part_stat_add(cpu, part, field, addnd) \ + (per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd)) #define part_stat_read(part, field) \ ({ \ - typeof(part->dkstats->field) res = 0; \ + typeof((part)->dkstats->field) res = 0; \ int i; \ for_each_possible_cpu(i) \ - res += per_cpu_ptr(part->dkstats, i)->field; \ + res += per_cpu_ptr((part)->dkstats, i)->field; \ res; \ }) @@ -284,109 +249,73 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) sizeof(struct disk_stats)); } -#else /* !CONFIG_SMP */ -#define disk_stat_lock() ({ rcu_read_lock(); 0; }) -#define disk_stat_unlock() rcu_read_unlock() - -#define disk_stat_add(cpu, gendiskp, field, addnd) \ - (gendiskp->dkstats.field += addnd) -#define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) - -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) +static inline int init_part_stats(struct hd_struct *part) { - memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); + part->dkstats = alloc_percpu(struct disk_stats); + if (!part->dkstats) + return 0; + return 1; } -#define part_stat_add(cpu, part, field, addnd) \ - (part->dkstats.field += addnd) - -#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part_stat_add(cpu, part, field, addnd); \ - disk_stat_add(cpu, gendiskp, field, addnd); \ -}) - -#define part_stat_read(part, field) (part->dkstats.field) - -static inline void part_stat_set_all(struct hd_struct *part, int value) +static inline void free_part_stats(struct hd_struct *part) { - memset(&part->dkstats, value, sizeof(struct disk_stats)); + free_percpu(part->dkstats); } -#endif /* CONFIG_SMP */ - -#define disk_stat_dec(cpu, gendiskp, field) \ - disk_stat_add(cpu, gendiskp, field, -1) -#define disk_stat_inc(cpu, gendiskp, field) \ - disk_stat_add(cpu, gendiskp, field, 1) -#define disk_stat_sub(cpu, gendiskp, field, subnd) \ - disk_stat_add(cpu, gendiskp, field, -subnd) - -#define part_stat_dec(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, -1) -#define part_stat_inc(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, 1) -#define part_stat_sub(cpu, gendiskp, field, subnd) \ - part_stat_add(cpu, gendiskp, field, -subnd) +#else /* !CONFIG_SMP */ +#define part_stat_lock() ({ rcu_read_lock(); 0; }) +#define part_stat_unlock() rcu_read_unlock() -#define all_stat_dec(cpu, gendiskp, field, sector) \ - all_stat_add(cpu, gendiskp, field, -1, sector) -#define all_stat_inc(cpu, gendiskp, part, field, sector) \ - all_stat_add(cpu, gendiskp, part, field, 1, sector) -#define all_stat_sub(cpu, gendiskp, part, field, subnd, sector) \ - all_stat_add(cpu, gendiskp, part, field, -subnd, sector) +#define __part_stat_add(cpu, part, field, addnd) \ + ((part)->dkstats.field += addnd) -/* Inlines to alloc and free disk stats in struct gendisk */ -#ifdef CONFIG_SMP -static inline int init_disk_stats(struct gendisk *disk) -{ - disk->dkstats = alloc_percpu(struct disk_stats); - if (!disk->dkstats) - return 0; - return 1; -} +#define part_stat_read(part, field) ((part)->dkstats.field) -static inline void free_disk_stats(struct gendisk *disk) +static inline void part_stat_set_all(struct hd_struct *part, int value) { - free_percpu(disk->dkstats); + memset(&part->dkstats, value, sizeof(struct disk_stats)); } static inline int init_part_stats(struct hd_struct *part) { - part->dkstats = alloc_percpu(struct disk_stats); - if (!part->dkstats) - return 0; return 1; } static inline void free_part_stats(struct hd_struct *part) { - free_percpu(part->dkstats); } -#else /* CONFIG_SMP */ -static inline int init_disk_stats(struct gendisk *disk) -{ - return 1; -} +#endif /* CONFIG_SMP */ -static inline void free_disk_stats(struct gendisk *disk) -{ -} +#define part_stat_add(cpu, part, field, addnd) do { \ + __part_stat_add((cpu), (part), field, addnd); \ + if ((part)->partno) \ + __part_stat_add((cpu), &part_to_disk((part))->part0, \ + field, addnd); \ +} while (0) -static inline int init_part_stats(struct hd_struct *part) +#define part_stat_dec(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, -1) +#define part_stat_inc(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, 1) +#define part_stat_sub(cpu, gendiskp, field, subnd) \ + part_stat_add(cpu, gendiskp, field, -subnd) + +static inline void part_inc_in_flight(struct hd_struct *part) { - return 1; + part->in_flight++; + if (part->partno) + part_to_disk(part)->part0.in_flight++; } -static inline void free_part_stats(struct hd_struct *part) +static inline void part_dec_in_flight(struct hd_struct *part) { + part->in_flight--; + if (part->partno) + part_to_disk(part)->part0.in_flight--; } -#endif /* CONFIG_SMP */ /* drivers/block/ll_rw_blk.c */ -extern void disk_round_stats(int cpu, struct gendisk *disk); extern void part_round_stats(int cpu, struct hd_struct *part); /* drivers/block/genhd.c */ @@ -595,6 +524,8 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t part_stat_show(struct device *dev, + struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); -- cgit v1.2.3 From 224cb3e981f1b2f9f93dbd49eaef505d17d894c2 Mon Sep 17 00:00:00 2001 From: Mike Anderson Date: Fri, 29 Aug 2008 09:36:09 +0200 Subject: dm: Call blk_abort_queue on failed paths Signed-off-by: Mike Anderson Signed-off-by: Jens Axboe --- drivers/md/dm-mpath.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c2fcf28b4c70..3d3848132c69 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -33,6 +33,7 @@ struct pgpath { unsigned fail_count; /* Cumulative failure count */ struct dm_path path; + struct work_struct deactivate_path; }; #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) @@ -112,6 +113,7 @@ static struct workqueue_struct *kmultipathd, *kmpath_handlerd; static void process_queued_ios(struct work_struct *work); static void trigger_event(struct work_struct *work); static void activate_path(struct work_struct *work); +static void deactivate_path(struct work_struct *work); /*----------------------------------------------- @@ -122,8 +124,10 @@ static struct pgpath *alloc_pgpath(void) { struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); - if (pgpath) + if (pgpath) { pgpath->path.is_active = 1; + INIT_WORK(&pgpath->deactivate_path, deactivate_path); + } return pgpath; } @@ -133,6 +137,14 @@ static void free_pgpath(struct pgpath *pgpath) kfree(pgpath); } +static void deactivate_path(struct work_struct *work) +{ + struct pgpath *pgpath = + container_of(work, struct pgpath, deactivate_path); + + blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue); +} + static struct priority_group *alloc_priority_group(void) { struct priority_group *pg; @@ -870,6 +882,7 @@ static int fail_path(struct pgpath *pgpath) pgpath->path.dev->name, m->nr_valid_paths); queue_work(kmultipathd, &m->trigger_event); + queue_work(kmultipathd, &pgpath->deactivate_path); out: spin_unlock_irqrestore(&m->lock, flags); -- cgit v1.2.3 From 6feef531f55cf4a20fd9eb39f5352e5745203603 Mon Sep 17 00:00:00 2001 From: Denis ChengRq Date: Thu, 9 Oct 2008 08:57:05 +0200 Subject: block: mark bio_split_pool static Since all bio_split calls refer the same single bio_split_pool, the bio_split function can use bio_split_pool directly instead of the mempool_t parameter; then the mempool_t parameter can be removed from bio_split param list, and bio_split_pool is only referred in fs/bio.c file, can be marked static. Signed-off-by: Denis ChengRq Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 2 +- drivers/md/linear.c | 2 +- drivers/md/raid0.c | 2 +- drivers/md/raid10.c | 2 +- fs/bio.c | 9 ++++----- include/linux/bio.h | 4 +--- 6 files changed, 9 insertions(+), 12 deletions(-) (limited to 'drivers/md') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index e1a90bbb4747..0e077150568b 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2544,7 +2544,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio) if (last_zone != zone) { BUG_ON(last_zone != zone + pd->settings.size); first_sectors = last_zone - bio->bi_sector; - bp = bio_split(bio, bio_split_pool, first_sectors); + bp = bio_split(bio, first_sectors); BUG_ON(!bp); pkt_make_request(q, &bp->bio1); pkt_make_request(q, &bp->bio2); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index c80ea90593d3..b9cbee688fae 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -353,7 +353,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) * split it. */ struct bio_pair *bp; - bp = bio_split(bio, bio_split_pool, + bp = bio_split(bio, ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector); if (linear_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f52f442a735f..53508a8a981d 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -427,7 +427,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); + bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); if (raid0_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); if (raid0_make_request(q, &bp->bio2)) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5f990133f5ef..8bdc9bfc2887 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -817,7 +817,7 @@ static int make_request(struct request_queue *q, struct bio * bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, bio_split_pool, + bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); if (make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); diff --git a/fs/bio.c b/fs/bio.c index a5af5809f566..77a55bcceedb 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -30,7 +30,7 @@ static struct kmem_cache *bio_slab __read_mostly; -mempool_t *bio_split_pool __read_mostly; +static mempool_t *bio_split_pool __read_mostly; /* * if you change this list, also change bvec_alloc or things will @@ -1256,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err) * split a bio - only worry about a bio with a single page * in it's iovec */ -struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) +struct bio_pair *bio_split(struct bio *bi, int first_sectors) { - struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); + struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); if (!bp) return bp; @@ -1292,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) bp->bio2.bi_end_io = bio_pair_end_2; bp->bio1.bi_private = bi; - bp->bio2.bi_private = pool; + bp->bio2.bi_private = bio_split_pool; if (bio_integrity(bi)) bio_integrity_split(bi, bp, first_sectors); @@ -1455,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern); EXPORT_SYMBOL(bio_copy_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); -EXPORT_SYMBOL(bio_split_pool); EXPORT_SYMBOL(bio_copy_user); EXPORT_SYMBOL(bio_uncopy_user); EXPORT_SYMBOL(bioset_create); diff --git a/include/linux/bio.h b/include/linux/bio.h index fe12d0f9ebaa..fb97221d7c30 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -300,9 +300,7 @@ struct bio_pair { atomic_t cnt; int error; }; -extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, - int first_sectors); -extern mempool_t *bio_split_pool; +extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(int, int); -- cgit v1.2.3 From 01460f3520c100010aacc8f8500cafcb17ce4665 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Fri, 10 Oct 2008 13:36:57 +0100 Subject: dm mpath: use more error codes This patch allows path errors from the multipath ctr function to propagate up to userspace as errno values from the ioctl() call. This is in response to https://www.redhat.com/archives/dm-devel/2008-May/msg00000.html and https://bugzilla.redhat.com/show_bug.cgi?id=444421 The patch only lets through the errors that it needs to in order to get the path errors from parse_path(). Signed-off-by: Benjamin Marzinski Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c2fcf28b4c70..c2ff77d77a5e 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -563,12 +563,12 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, /* we need at least a path arg */ if (as->argc < 1) { ti->error = "no device given"; - return NULL; + return ERR_PTR(-EINVAL); } p = alloc_pgpath(); if (!p) - return NULL; + return ERR_PTR(-ENOMEM); r = dm_get_device(ti, shift(as), ti->begin, ti->len, dm_table_get_mode(ti->table), &p->path.dev); @@ -596,7 +596,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, bad: free_pgpath(p); - return NULL; + return ERR_PTR(r); } static struct priority_group *parse_priority_group(struct arg_set *as, @@ -614,14 +614,14 @@ static struct priority_group *parse_priority_group(struct arg_set *as, if (as->argc < 2) { as->argc = 0; - ti->error = "not enough priority group aruments"; - return NULL; + ti->error = "not enough priority group arguments"; + return ERR_PTR(-EINVAL); } pg = alloc_priority_group(); if (!pg) { ti->error = "couldn't allocate priority group"; - return NULL; + return ERR_PTR(-ENOMEM); } pg->m = m; @@ -654,8 +654,10 @@ static struct priority_group *parse_priority_group(struct arg_set *as, path_args.argv = as->argv; pgpath = parse_path(&path_args, &pg->ps, ti); - if (!pgpath) + if (IS_ERR(pgpath)) { + r = PTR_ERR(pgpath); goto bad; + } pgpath->pg = pg; list_add_tail(&pgpath->list, &pg->pgpaths); @@ -666,7 +668,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as, bad: free_priority_group(pg, ti); - return NULL; + return ERR_PTR(r); } static int parse_hw_handler(struct arg_set *as, struct multipath *m) @@ -785,8 +787,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, struct priority_group *pg; pg = parse_priority_group(&as, m); - if (!pg) { - r = -EINVAL; + if (IS_ERR(pg)) { + r = PTR_ERR(pg); goto bad; } -- cgit v1.2.3 From 6680073d3ec7c6dbdbf77870bf1fea869767d779 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Fri, 10 Oct 2008 13:36:58 +0100 Subject: dm mpath: remove is_active from struct dm_path This patch moves 'is_active' from struct dm_path to struct pgpath as it does not need exporting. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 13 +++++++------ drivers/md/dm-mpath.h | 2 -- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c2ff77d77a5e..b2ab8489d0eb 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -30,6 +30,7 @@ struct pgpath { struct list_head list; struct priority_group *pg; /* Owning PG */ + unsigned is_active; /* Path status */ unsigned fail_count; /* Cumulative failure count */ struct dm_path path; @@ -123,7 +124,7 @@ static struct pgpath *alloc_pgpath(void) struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); if (pgpath) - pgpath->path.is_active = 1; + pgpath->is_active = 1; return pgpath; } @@ -854,13 +855,13 @@ static int fail_path(struct pgpath *pgpath) spin_lock_irqsave(&m->lock, flags); - if (!pgpath->path.is_active) + if (!pgpath->is_active) goto out; DMWARN("Failing path %s.", pgpath->path.dev->name); pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); - pgpath->path.is_active = 0; + pgpath->is_active = 0; pgpath->fail_count++; m->nr_valid_paths--; @@ -890,7 +891,7 @@ static int reinstate_path(struct pgpath *pgpath) spin_lock_irqsave(&m->lock, flags); - if (pgpath->path.is_active) + if (pgpath->is_active) goto out; if (!pgpath->pg->ps.type->reinstate_path) { @@ -904,7 +905,7 @@ static int reinstate_path(struct pgpath *pgpath) if (r) goto out; - pgpath->path.is_active = 1; + pgpath->is_active = 1; m->current_pgpath = NULL; if (!m->nr_valid_paths++ && m->queue_size) @@ -1292,7 +1293,7 @@ static int multipath_status(struct dm_target *ti, status_type_t type, list_for_each_entry(p, &pg->pgpaths, list) { DMEMIT("%s %s %u ", p->path.dev->name, - p->path.is_active ? "A" : "F", + p->is_active ? "A" : "F", p->fail_count); if (pg->ps.type->status) sz += pg->ps.type->status(&pg->ps, diff --git a/drivers/md/dm-mpath.h b/drivers/md/dm-mpath.h index c198b856a452..e230f7196259 100644 --- a/drivers/md/dm-mpath.h +++ b/drivers/md/dm-mpath.h @@ -13,8 +13,6 @@ struct dm_dev; struct dm_path { struct dm_dev *dev; /* Read-only */ - unsigned is_active; /* Read-only */ - void *pscontext; /* For path-selector use */ }; -- cgit v1.2.3 From f7c83e2e4783c4f7abe6f3a85a8c5e210f98bc7b Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Fri, 10 Oct 2008 13:36:59 +0100 Subject: dm raid1: kcopyd should stop on error if errors handled dm-raid1 is setting the 'DM_KCOPYD_IGNORE_ERROR' flag unconditionally when assigning kcopyd work. kcopyd is responsible for copying an assigned section of disk to one or more other disks. The 'DM_KCOPYD_IGNORE_ERROR' flag affects kcopyd in the following way: When not set: kcopyd will immediately stop the copy operation when an error is encountered. When set: kcopyd will try to proceed regardless of errors and try to continue copying any remaining amount. Since dm-raid1 tracks regions of the address space that are (or are not) in sync and it now has the ability to handle these errors, we can safely enable this optimization. This optimization is conditional on whether mirror error handling has been enabled. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ff05fe893083..29913e42c4ab 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -842,7 +842,9 @@ static int recover(struct mirror_set *ms, struct region *reg) } /* hand to kcopyd */ - set_bit(DM_KCOPYD_IGNORE_ERROR, &flags); + if (!errors_handled(ms)) + set_bit(DM_KCOPYD_IGNORE_ERROR, &flags); + r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, flags, recovery_complete, reg); -- cgit v1.2.3 From a481db784682b33d078c7bf8a1d0581dc09946c1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:00 +0100 Subject: dm exception store: introduce area_location function Move this logic to a function, because it will be reused later. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-exception-store.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 41f408068a7c..824cf31967c5 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -208,6 +208,14 @@ static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata) return req.result; } +/* + * Convert a metadata area index to a chunk index. + */ +static chunk_t area_location(struct pstore *ps, chunk_t area) +{ + return 1 + ((ps->exceptions_per_area + 1) * area); +} + /* * Read or write a metadata area. Remembering to skip the first * chunk which holds the header. @@ -217,8 +225,7 @@ static int area_io(struct pstore *ps, uint32_t area, int rw) int r; uint32_t chunk; - /* convert a metadata area index to a chunk index */ - chunk = 1 + ((ps->exceptions_per_area + 1) * area); + chunk = area_location(ps, area); r = chunk_io(ps, chunk, rw, 0); if (r) -- cgit v1.2.3 From fd14acf6fc9f4635be201960004d847b14236a20 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:01 +0100 Subject: dm exception store: use chunk_t for_areas Change uint32_t into chunk_t to remove 32-bit limitation on the number of chunks on systems with 64-bit sector numbers. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-exception-store.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 824cf31967c5..769ab677f8e0 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -108,12 +108,12 @@ struct pstore { * Used to keep track of which metadata area the data in * 'chunk' refers to. */ - uint32_t current_area; + chunk_t current_area; /* * The next free chunk for an exception. */ - uint32_t next_free; + chunk_t next_free; /* * The index of next free exception in the current @@ -175,7 +175,7 @@ static void do_metadata(struct work_struct *work) /* * Read or write a chunk aligned and sized block of data from a device. */ -static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata) +static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) { struct dm_io_region where = { .bdev = ps->snap->cow->bdev, @@ -220,10 +220,10 @@ static chunk_t area_location(struct pstore *ps, chunk_t area) * Read or write a metadata area. Remembering to skip the first * chunk which holds the header. */ -static int area_io(struct pstore *ps, uint32_t area, int rw) +static int area_io(struct pstore *ps, chunk_t area, int rw) { int r; - uint32_t chunk; + chunk_t chunk; chunk = area_location(ps, area); @@ -235,7 +235,7 @@ static int area_io(struct pstore *ps, uint32_t area, int rw) return 0; } -static int zero_area(struct pstore *ps, uint32_t area) +static int zero_area(struct pstore *ps, chunk_t area) { memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); return area_io(ps, area, WRITE); @@ -411,7 +411,7 @@ static int insert_exceptions(struct pstore *ps, int *full) static int read_exceptions(struct pstore *ps) { - uint32_t area; + chunk_t area; int r, full = 1; /* @@ -524,6 +524,7 @@ static int persistent_prepare(struct exception_store *store, { struct pstore *ps = get_info(store); uint32_t stride; + chunk_t next_free; sector_t size = get_dev_size(store->snap->cow->bdev); /* Is there enough room ? */ @@ -537,7 +538,8 @@ static int persistent_prepare(struct exception_store *store, * into account the location of the metadata chunks. */ stride = (ps->exceptions_per_area + 1); - if ((++ps->next_free % stride) == 1) + next_free = ++ps->next_free; + if (sector_div(next_free, stride) == 1) ps->next_free++; atomic_inc(&ps->pending_count); -- cgit v1.2.3 From 3e1a8bdd05d6b1734a8ccf7af28042d72c447780 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:02 +0100 Subject: dm crypt: tidy inc pending Move io pending to one place. No functional change, usefull to simplify debugging. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 13956437bc81..6b9be99eff5b 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -517,6 +517,11 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) } } +static void crypt_inc_pending(struct dm_crypt_io *io) +{ + atomic_inc(&io->pending); +} + /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -591,7 +596,7 @@ static void kcryptd_io_read(struct dm_crypt_io *io) struct bio *base_bio = io->base_bio; struct bio *clone; - atomic_inc(&io->pending); + crypt_inc_pending(io); /* * The block layer might modify the bvec array, so always @@ -665,7 +670,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, if (async) kcryptd_queue_io(io); else { - atomic_inc(&io->pending); + crypt_inc_pending(io); generic_make_request(clone); } } @@ -701,7 +706,7 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io) if (unlikely(r < 0)) return; } else - atomic_inc(&io->pending); + crypt_inc_pending(io); /* out of memory -> run queues */ if (unlikely(remaining)) { @@ -720,7 +725,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) /* * Prevent io from disappearing until this function completes. */ - atomic_inc(&io->pending); + crypt_inc_pending(io); crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, io->sector); kcryptd_crypt_write_convert_loop(io); @@ -741,7 +746,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) struct crypt_config *cc = io->target->private; int r = 0; - atomic_inc(&io->pending); + crypt_inc_pending(io); crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, io->sector); -- cgit v1.2.3 From dc440d1e56c481f80d5350daadc7d078a04ca729 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:03 +0100 Subject: dm crypt: tidy crypt alloc Factor out crypt io allocation code. Later patches will call it from another place. No functional change. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 6b9be99eff5b..cac064d4aa72 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -517,6 +517,22 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) } } +static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti, + struct bio *bio, sector_t sector) +{ + struct crypt_config *cc = ti->private; + struct dm_crypt_io *io; + + io = mempool_alloc(cc->io_pool, GFP_NOIO); + io->target = ti; + io->base_bio = bio; + io->sector = sector; + io->error = 0; + atomic_set(&io->pending, 0); + + return io; +} + static void crypt_inc_pending(struct dm_crypt_io *io) { atomic_inc(&io->pending); @@ -1113,15 +1129,9 @@ static void crypt_dtr(struct dm_target *ti) static int crypt_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { - struct crypt_config *cc = ti->private; struct dm_crypt_io *io; - io = mempool_alloc(cc->io_pool, GFP_NOIO); - io->target = ti; - io->base_bio = bio; - io->sector = bio->bi_sector - ti->begin; - io->error = 0; - atomic_set(&io->pending, 0); + io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin); if (bio_data_dir(io->base_bio) == READ) kcryptd_queue_io(io); -- cgit v1.2.3 From fc5a5e9aa878f86642c962b309f793fb2db0727e Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:04 +0100 Subject: dm crypt: tidy write loop pending Move kcryptd_crypt_write_convert_loop inside kcryptd_crypt_write_convert. This change is needed for a later patch. No functional change. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index cac064d4aa72..97b407582c03 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -691,13 +691,19 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, } } -static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io) +static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->target->private; struct bio *clone; unsigned remaining = io->base_bio->bi_size; int r; + /* + * Prevent io from disappearing until this function completes. + */ + crypt_inc_pending(io); + crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, io->sector); + /* * The allocated buffers can be smaller than the whole bio, * so repeat the whole process until all the data can be handled. @@ -706,7 +712,7 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io) clone = crypt_alloc_buffer(io, remaining); if (unlikely(!clone)) { io->error = -ENOMEM; - return; + break; } io->ctx.bio_out = clone; @@ -720,7 +726,7 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io) /* processed, no running async crypto */ kcryptd_crypt_write_io_submit(io, r, 0); if (unlikely(r < 0)) - return; + break; } else crypt_inc_pending(io); @@ -732,19 +738,6 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io) congestion_wait(WRITE, HZ/100); } } -} - -static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) -{ - struct crypt_config *cc = io->target->private; - - /* - * Prevent io from disappearing until this function completes. - */ - crypt_inc_pending(io); - - crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, io->sector); - kcryptd_crypt_write_convert_loop(io); crypt_dec_pending(io); } -- cgit v1.2.3 From 1e37bb8e557a186d327eb4d1387953880ffc2cdd Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 10 Oct 2008 13:37:05 +0100 Subject: dm crypt: remove inc_pending from write_io_submit Make the caller reponsible for incrementing the pending count before calling kcryptd_crypt_write_io_submit() in the non-async case to bring it into line with the async case. Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 97b407582c03..0042636ad375 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -685,10 +685,8 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, if (async) kcryptd_queue_io(io); - else { - crypt_inc_pending(io); + else generic_make_request(clone); - } } static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) @@ -724,9 +722,12 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) if (atomic_dec_and_test(&io->ctx.pending)) { /* processed, no running async crypto */ + crypt_inc_pending(io); kcryptd_crypt_write_io_submit(io, r, 0); - if (unlikely(r < 0)) + if (unlikely(r < 0)) { + crypt_dec_pending(io); break; + } } else crypt_inc_pending(io); -- cgit v1.2.3 From 6c031f41db15b6cb0cd33545cec28ca706cd3c7e Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:06 +0100 Subject: dm crypt: move dec_pending on error into write_io_submit Make kcryptd_crypt_write_io_submit() responsible for decrementing the pending count after an error. Also fixes a bug in the async path that forgot to decrement it. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0042636ad375..d8126ac82960 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -674,6 +674,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, crypt_free_buffer_pages(cc, clone); bio_put(clone); io->error = -EIO; + crypt_dec_pending(io); return; } @@ -724,10 +725,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) /* processed, no running async crypto */ crypt_inc_pending(io); kcryptd_crypt_write_io_submit(io, r, 0); - if (unlikely(r < 0)) { - crypt_dec_pending(io); + if (unlikely(r < 0)) break; - } } else crypt_inc_pending(io); -- cgit v1.2.3 From 4e59409891c9cc30cb4d5d73250b0c968af8e39b Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:07 +0100 Subject: dm crypt: fix async inc_pending The pending reference count must be incremented *before* the async work is queued to another thread, not after. Otherwise there's a race if the work completes and decrements the reference count before it gets incremented. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d8126ac82960..262ed1816695 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -719,16 +719,15 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) remaining -= clone->bi_size; + crypt_inc_pending(io); r = crypt_convert(cc, &io->ctx); if (atomic_dec_and_test(&io->ctx.pending)) { /* processed, no running async crypto */ - crypt_inc_pending(io); kcryptd_crypt_write_io_submit(io, r, 0); if (unlikely(r < 0)) break; - } else - crypt_inc_pending(io); + } /* out of memory -> run queues */ if (unlikely(remaining)) { -- cgit v1.2.3 From c8081618a9f832fdf7ca81eb087f9f61f2bf07d5 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:08 +0100 Subject: dm crypt: tidy ctx pending Move the initialisation of ctx->pending into one place, at the start of crypt_convert(). Introduce crypt_finished to indicate whether or not the encryption is finished, for use in a later patch. No functional change. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 262ed1816695..f6018f5961de 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -333,7 +333,6 @@ static void crypt_convert_init(struct crypt_config *cc, ctx->idx_out = bio_out ? bio_out->bi_idx : 0; ctx->sector = sector + cc->iv_offset; init_completion(&ctx->restart); - atomic_set(&ctx->pending, 1); } static int crypt_convert_block(struct crypt_config *cc, @@ -408,6 +407,8 @@ static int crypt_convert(struct crypt_config *cc, { int r; + atomic_set(&ctx->pending, 1); + while(ctx->idx_in < ctx->bio_in->bi_vcnt && ctx->idx_out < ctx->bio_out->bi_vcnt) { @@ -694,6 +695,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->target->private; struct bio *clone; + int crypt_finished; unsigned remaining = io->base_bio->bi_size; int r; @@ -721,19 +723,23 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) crypt_inc_pending(io); r = crypt_convert(cc, &io->ctx); + crypt_finished = atomic_dec_and_test(&io->ctx.pending); - if (atomic_dec_and_test(&io->ctx.pending)) { - /* processed, no running async crypto */ + /* Encryption was already finished, submit io now */ + if (crypt_finished) { kcryptd_crypt_write_io_submit(io, r, 0); + + /* + * If there was an error, do not try next fragments. + * For async, error is processed in async handler. + */ if (unlikely(r < 0)) break; } /* out of memory -> run queues */ if (unlikely(remaining)) { - /* wait for async crypto then reinitialize pending */ wait_event(cc->writeq, !atomic_read(&io->ctx.pending)); - atomic_set(&io->ctx.pending, 1); congestion_wait(WRITE, HZ/100); } } -- cgit v1.2.3 From 933f01d43326fb12a978a8e0bb062c28a2de4d5a Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:08 +0100 Subject: dm crypt: avoid unnecessary wait when splitting bio Don't wait between submitting crypt requests for a bio unless we are short of memory. There are two situations when we must split an encrypted bio: 1) there are no free pages; 2) the new bio would violate underlying device restrictions (e.g. max hw segments). In case (2) we do not need to wait. Add output variable to crypt_alloc_buffer() to distinguish between these cases. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f6018f5961de..682ef9e6acd3 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -457,9 +457,11 @@ static void dm_crypt_bio_destructor(struct bio *bio) /* * Generate a new unfragmented bio with the given size * This should never violate the device limitations - * May return a smaller bio when running out of pages + * May return a smaller bio when running out of pages, indicated by + * *out_of_pages set to 1. */ -static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) +static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, + unsigned *out_of_pages) { struct crypt_config *cc = io->target->private; struct bio *clone; @@ -473,11 +475,14 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) return NULL; clone_init(io, clone); + *out_of_pages = 0; for (i = 0; i < nr_iovecs; i++) { page = mempool_alloc(cc->page_pool, gfp_mask); - if (!page) + if (!page) { + *out_of_pages = 1; break; + } /* * if additional pages cannot be allocated without waiting, @@ -696,6 +701,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) struct crypt_config *cc = io->target->private; struct bio *clone; int crypt_finished; + unsigned out_of_pages = 0; unsigned remaining = io->base_bio->bi_size; int r; @@ -710,7 +716,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) * so repeat the whole process until all the data can be handled. */ while (remaining) { - clone = crypt_alloc_buffer(io, remaining); + clone = crypt_alloc_buffer(io, remaining, &out_of_pages); if (unlikely(!clone)) { io->error = -ENOMEM; break; @@ -737,11 +743,15 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) break; } - /* out of memory -> run queues */ - if (unlikely(remaining)) { - wait_event(cc->writeq, !atomic_read(&io->ctx.pending)); + /* + * Out of memory -> run queues + * But don't wait if split was due to the io size restriction + */ + if (unlikely(out_of_pages)) congestion_wait(WRITE, HZ/100); - } + + if (unlikely(remaining)) + wait_event(cc->writeq, !atomic_read(&io->ctx.pending)); } crypt_dec_pending(io); -- cgit v1.2.3 From 82b1519b345d61dcfae526e3fcb08128f39f9bcc Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:09 +0100 Subject: dm: export struct dm_dev Split struct dm_dev in two and publish the part that other targets need in include/linux/device-mapper.h. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 4 +-- drivers/md/dm-table.c | 71 ++++++++++++++++++++++++------------------- drivers/md/dm.h | 7 ++--- include/linux/device-mapper.h | 7 ++++- 4 files changed, 49 insertions(+), 40 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index b262c0042de3..90e19f13f269 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1131,7 +1131,7 @@ static void retrieve_deps(struct dm_table *table, unsigned int count = 0; struct list_head *tmp; size_t len, needed; - struct dm_dev *dd; + struct dm_dev_internal *dd; struct dm_target_deps *deps; deps = get_result_buffer(param, param_size, &len); @@ -1157,7 +1157,7 @@ static void retrieve_deps(struct dm_table *table, deps->count = count; count = 0; list_for_each_entry (dd, dm_table_get_devices(table), list) - deps->dev[count++] = huge_encode_dev(dd->bdev->bd_dev); + deps->dev[count++] = huge_encode_dev(dd->dm_dev.bdev->bd_dev); param->data_size = param->data_start + needed; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 61f441409234..5dd32b8a57ac 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -250,7 +250,8 @@ static void free_devices(struct list_head *devices) struct list_head *tmp, *next; list_for_each_safe(tmp, next, devices) { - struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); + struct dm_dev_internal *dd = + list_entry(tmp, struct dm_dev_internal, list); kfree(dd); } } @@ -327,12 +328,12 @@ static int lookup_device(const char *path, dev_t *dev) /* * See if we've already got a device in the list. */ -static struct dm_dev *find_device(struct list_head *l, dev_t dev) +static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev) { - struct dm_dev *dd; + struct dm_dev_internal *dd; list_for_each_entry (dd, l, list) - if (dd->bdev->bd_dev == dev) + if (dd->dm_dev.bdev->bd_dev == dev) return dd; return NULL; @@ -341,45 +342,47 @@ static struct dm_dev *find_device(struct list_head *l, dev_t dev) /* * Open a device so we can use it as a map destination. */ -static int open_dev(struct dm_dev *d, dev_t dev, struct mapped_device *md) +static int open_dev(struct dm_dev_internal *d, dev_t dev, + struct mapped_device *md) { static char *_claim_ptr = "I belong to device-mapper"; struct block_device *bdev; int r; - BUG_ON(d->bdev); + BUG_ON(d->dm_dev.bdev); - bdev = open_by_devnum(dev, d->mode); + bdev = open_by_devnum(dev, d->dm_dev.mode); if (IS_ERR(bdev)) return PTR_ERR(bdev); r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md)); if (r) blkdev_put(bdev); else - d->bdev = bdev; + d->dm_dev.bdev = bdev; return r; } /* * Close a device that we've been using. */ -static void close_dev(struct dm_dev *d, struct mapped_device *md) +static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) { - if (!d->bdev) + if (!d->dm_dev.bdev) return; - bd_release_from_disk(d->bdev, dm_disk(md)); - blkdev_put(d->bdev); - d->bdev = NULL; + bd_release_from_disk(d->dm_dev.bdev, dm_disk(md)); + blkdev_put(d->dm_dev.bdev); + d->dm_dev.bdev = NULL; } /* * If possible, this checks an area of a destination device is valid. */ -static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len) +static int check_device_area(struct dm_dev_internal *dd, sector_t start, + sector_t len) { - sector_t dev_size = dd->bdev->bd_inode->i_size >> SECTOR_SHIFT; + sector_t dev_size = dd->dm_dev.bdev->bd_inode->i_size >> SECTOR_SHIFT; if (!dev_size) return 1; @@ -392,16 +395,17 @@ static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len) * careful to leave things as they were if we fail to reopen the * device. */ -static int upgrade_mode(struct dm_dev *dd, int new_mode, struct mapped_device *md) +static int upgrade_mode(struct dm_dev_internal *dd, int new_mode, + struct mapped_device *md) { int r; - struct dm_dev dd_copy; - dev_t dev = dd->bdev->bd_dev; + struct dm_dev_internal dd_copy; + dev_t dev = dd->dm_dev.bdev->bd_dev; dd_copy = *dd; - dd->mode |= new_mode; - dd->bdev = NULL; + dd->dm_dev.mode |= new_mode; + dd->dm_dev.bdev = NULL; r = open_dev(dd, dev, md); if (!r) close_dev(&dd_copy, md); @@ -421,7 +425,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, { int r; dev_t uninitialized_var(dev); - struct dm_dev *dd; + struct dm_dev_internal *dd; unsigned int major, minor; BUG_ON(!t); @@ -443,20 +447,20 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, if (!dd) return -ENOMEM; - dd->mode = mode; - dd->bdev = NULL; + dd->dm_dev.mode = mode; + dd->dm_dev.bdev = NULL; if ((r = open_dev(dd, dev, t->md))) { kfree(dd); return r; } - format_dev_t(dd->name, dev); + format_dev_t(dd->dm_dev.name, dev); atomic_set(&dd->count, 0); list_add(&dd->list, &t->devices); - } else if (dd->mode != (mode | dd->mode)) { + } else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) { r = upgrade_mode(dd, mode, t->md); if (r) return r; @@ -465,11 +469,11 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, if (!check_device_area(dd, start, len)) { DMWARN("device %s too small for target", path); - dm_put_device(ti, dd); + dm_put_device(ti, &dd->dm_dev); return -EINVAL; } - *result = dd; + *result = &dd->dm_dev; return 0; } @@ -540,8 +544,11 @@ int dm_get_device(struct dm_target *ti, const char *path, sector_t start, /* * Decrement a devices use count and remove it if necessary. */ -void dm_put_device(struct dm_target *ti, struct dm_dev *dd) +void dm_put_device(struct dm_target *ti, struct dm_dev *d) { + struct dm_dev_internal *dd = container_of(d, struct dm_dev_internal, + dm_dev); + if (atomic_dec_and_test(&dd->count)) { close_dev(dd, ti->table->md); list_del(&dd->list); @@ -937,12 +944,12 @@ int dm_table_resume_targets(struct dm_table *t) int dm_table_any_congested(struct dm_table *t, int bdi_bits) { - struct dm_dev *dd; + struct dm_dev_internal *dd; struct list_head *devices = dm_table_get_devices(t); int r = 0; list_for_each_entry(dd, devices, list) { - struct request_queue *q = bdev_get_queue(dd->bdev); + struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); r |= bdi_congested(&q->backing_dev_info, bdi_bits); } @@ -951,11 +958,11 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) void dm_table_unplug_all(struct dm_table *t) { - struct dm_dev *dd; + struct dm_dev_internal *dd; struct list_head *devices = dm_table_get_devices(t); list_for_each_entry(dd, devices, list) { - struct request_queue *q = bdev_get_queue(dd->bdev); + struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); blk_unplug(q); } diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 1e59a0b0a78a..65f2f562220e 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -25,13 +25,10 @@ /* * List of devices that a metadevice uses and should open/close. */ -struct dm_dev { +struct dm_dev_internal { struct list_head list; - atomic_t count; - int mode; - struct block_device *bdev; - char name[16]; + struct dm_dev dm_dev; }; struct dm_table; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a90222e3297d..178390de195e 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -13,7 +13,6 @@ struct dm_target; struct dm_table; -struct dm_dev; struct mapped_device; struct bio_vec; @@ -84,6 +83,12 @@ void dm_error(const char *message); */ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev); +struct dm_dev { + struct block_device *bdev; + int mode; + char name[16]; +}; + /* * Constructors should call these functions to ensure destination devices * are opened/closed correctly. -- cgit v1.2.3 From 89343da077ad564ed130c46e5ea6a79388410fa5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:10 +0100 Subject: dm: publish dm_get_mapinfo Publish dm_get_mapinfo in include/linux/device-mapper.h because this function is used by targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.h | 1 - include/linux/device-mapper.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 65f2f562220e..911d434361dd 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -91,7 +91,6 @@ int dm_stripe_init(void); void dm_stripe_exit(void); void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); -union map_info *dm_get_mapinfo(struct bio *bio); int dm_open_count(struct mapped_device *md); int dm_lock_for_deletion(struct mapped_device *md); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 178390de195e..6b80961650f0 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -207,6 +207,7 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid); struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct mapped_device *md); int dm_noflush_suspending(struct dm_target *ti); +union map_info *dm_get_mapinfo(struct bio *bio); /* * Geometry functions. -- cgit v1.2.3 From ea0ec640940c2ae3a8d71af3249fccf06a9997a3 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:11 +0100 Subject: dm: publish dm_table_unplug_all Publish dm_table_unplug_all in include/linux/device-mapper.h because this function is used by targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.h | 1 - include/linux/device-mapper.h | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 911d434361dd..8812208978e3 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -46,7 +46,6 @@ void dm_table_presuspend_targets(struct dm_table *t); void dm_table_postsuspend_targets(struct dm_table *t); int dm_table_resume_targets(struct dm_table *t); int dm_table_any_congested(struct dm_table *t, int bdi_bits); -void dm_table_unplug_all(struct dm_table *t); /* * To check the return value from dm_table_find_target(). diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 6b80961650f0..e462c7f3b391 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -237,6 +237,11 @@ int dm_table_add_target(struct dm_table *t, const char *type, */ int dm_table_complete(struct dm_table *t); +/* + * Unplug all devices in a table. + */ +void dm_table_unplug_all(struct dm_table *t); + /* * Table reference counting. */ -- cgit v1.2.3 From 54160904260fa764ba6e2dc738770be30fdf9553 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:12 +0100 Subject: dm: publish dm_vcalloc Publish dm_vcalloc in include/linux/device-mapper.h because this function is used by targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.h | 1 - include/linux/device-mapper.h | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 8812208978e3..cd189da2b2fa 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -89,7 +89,6 @@ void dm_linear_exit(void); int dm_stripe_init(void); void dm_stripe_exit(void); -void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); int dm_open_count(struct mapped_device *md); int dm_lock_for_deletion(struct mapped_device *md); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index e462c7f3b391..08d783592b73 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -267,6 +267,11 @@ void dm_table_event(struct dm_table *t); */ int dm_swap_table(struct mapped_device *md, struct dm_table *t); +/* + * A wrapper around vmalloc. + */ +void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); + /*----------------------------------------------------------------- * Macros. *---------------------------------------------------------------*/ -- cgit v1.2.3 From 0c2322e4ce144e130c03d813fe92de3798662c5e Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 10 Oct 2008 13:37:13 +0100 Subject: dm: detect lost queue Detect and report buggy drivers that destroy their request_queue. Signed-off-by: Alasdair G Kergon Cc: Stefan Raspl Cc: Jens Axboe Cc: Andrew Morton --- drivers/md/dm-table.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5dd32b8a57ac..a740a6950f59 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -482,6 +482,13 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); struct io_restrictions *rs = &ti->limits; + char b[BDEVNAME_SIZE]; + + if (unlikely(!q)) { + DMWARN("%s: Cannot set limits for nonexistent device %s", + dm_device_name(ti->table->md), bdevname(bdev, b)); + return; + } /* * Combine the device limits low. @@ -950,7 +957,14 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) list_for_each_entry(dd, devices, list) { struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); - r |= bdi_congested(&q->backing_dev_info, bdi_bits); + char b[BDEVNAME_SIZE]; + + if (likely(q)) + r |= bdi_congested(&q->backing_dev_info, bdi_bits); + else + DMWARN_LIMIT("%s: any_congested: nonexistent device %s", + dm_device_name(t->md), + bdevname(dd->dm_dev.bdev, b)); } return r; @@ -963,8 +977,14 @@ void dm_table_unplug_all(struct dm_table *t) list_for_each_entry(dd, devices, list) { struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); - - blk_unplug(q); + char b[BDEVNAME_SIZE]; + + if (likely(q)) + blk_unplug(q); + else + DMWARN_LIMIT("%s: Cannot unplug nonexistent device %s", + dm_device_name(t->md), + bdevname(dd->dm_dev.bdev, b)); } } -- cgit v1.2.3 From a364092a412975e506415f77f0628cbdd28c3913 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 21 Sep 2008 15:44:32 -0700 Subject: raid: make RAID autodetect default a KConfig option RAID autodetect has the side effect of requiring synchronisation of all device drivers, which can make the boot several seconds longer (I've measured 7 on one of my laptops).... even for systems that don't have RAID setup for the root filesystem (the only FS where this matters). This patch makes the default for autodetect a config option; either way the user can always override via the kernel command line. Signed-off-by: Arjan van de Ven Acked-by: NeilBrown --- drivers/md/Kconfig | 14 ++++++++++++++ init/do_mounts_md.c | 11 +++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 07d92c11b5d8..8e72c916535f 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -30,6 +30,20 @@ config BLK_DEV_MD If unsure, say N. +config MD_AUTODETECT + bool "Autodetect RAID arrays during kernel boot" + depends on BLK_DEV_MD + default y + ---help--- + If you say Y here, then the kernel will try to autodetect raid + arrays as part of its boot process. + + If you don't use raid and say Y, this autodetection can cause + a several-second delay in the boot time due to various + synchronisation steps that are part of this step. + + If unsure, say Y. + config MD_LINEAR tristate "Linear (append) mode" depends on BLK_DEV_MD diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index c0dfd3c1e91b..48b3fadd83ed 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -12,7 +12,12 @@ * The code for that is here. */ -static int __initdata raid_noautodetect, raid_autopart; +#ifdef CONFIG_MD_AUTODETECT +static int __initdata raid_noautodetect; +#else +static int __initdata raid_noautodetect=1; +#endif +static int __initdata raid_autopart; static struct { int minor; @@ -252,6 +257,8 @@ static int __init raid_setup(char *str) if (!strncmp(str, "noautodetect", wlen)) raid_noautodetect = 1; + if (!strncmp(str, "autodetect", wlen)) + raid_noautodetect = 0; if (strncmp(str, "partitionable", wlen)==0) raid_autopart = 1; if (strncmp(str, "part", wlen)==0) @@ -288,7 +295,7 @@ void __init md_run_setup(void) create_dev("/dev/md0", MKDEV(MD_MAJOR, 0)); if (raid_noautodetect) - printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=noautodetect)\n"); + printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=autodetect will force)\n"); else autodetect_raid(); md_setup_drive(); -- cgit v1.2.3 From ce52aebd0219edc7a783278fbe80a6ccca0556c0 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Fri, 10 Oct 2008 16:02:53 +0100 Subject: raid, fastboot: hide RAID autodetect option if MD is compiled as a module Signed-off-by: Alan Jenkins Signed-off-by: Ingo Molnar --- drivers/md/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 8e72c916535f..2281b5098e95 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -32,7 +32,7 @@ config BLK_DEV_MD config MD_AUTODETECT bool "Autodetect RAID arrays during kernel boot" - depends on BLK_DEV_MD + depends on BLK_DEV_MD=y default y ---help--- If you say Y here, then the kernel will try to autodetect raid -- cgit v1.2.3 From 7d3c6f8717ee6c2bf6cba5fa0bda3b28fbda6015 Mon Sep 17 00:00:00 2001 From: Chris Webb Date: Mon, 13 Oct 2008 11:55:11 +1100 Subject: md: Fix rdev_size_store with size == 0 Fix rdev_size_store with size == 0. size == 0 means to use the largest size allowed by the underlying device and is used when modifying an active array. This fixes a regression introduced by commit d7027458d68b2f1752a28016dcf2ffd0a7e8f567 Cc: Signed-off-by: Chris Webb Signed-off-by: NeilBrown --- drivers/md/md.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 0a3a4bdcd4af..7d8c2bb0a67c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2106,8 +2106,6 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) if (strict_strtoull(buf, 10, &size) < 0) return -EINVAL; - if (size < my_mddev->size) - return -EINVAL; if (my_mddev->pers && rdev->raid_disk >= 0) { if (my_mddev->persistent) { size = super_types[my_mddev->major_version]. @@ -2118,9 +2116,9 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) size = (rdev->bdev->bd_inode->i_size >> 10); size -= rdev->data_offset/2; } - if (size < my_mddev->size) - return -EINVAL; /* component must fit device */ } + if (size < my_mddev->size) + return -EINVAL; /* component must fit device */ rdev->size = size; if (size > oldsize && my_mddev->external) { -- cgit v1.2.3 From ea43ddd8491feccf36267349748ea91b1194481e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 13 Oct 2008 11:55:11 +1100 Subject: md: Allow metadata_version to be updated for externally managed metadata. For externally managed metadata, the 'metadata_version' sysfs attribute is really just a channel for user-space programs to communicate about how the array is being managed. It can be useful for this to be changed while the array is active. Normally changes to metadata_version are not permitted while the array is active. Change that so that if the metadata is externally managed, the metadata_version can be changed to a different flavour of external management. Signed-off-by: NeilBrown --- drivers/md/md.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 7d8c2bb0a67c..13dd7b276150 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2943,7 +2943,13 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len) { int major, minor; char *e; - if (!list_empty(&mddev->disks)) + /* Changing the details of 'external' metadata is + * always permitted. Otherwise there must be + * no devices attached to the array. + */ + if (mddev->external && strncmp(buf, "external:", 9) == 0) + ; + else if (!list_empty(&mddev->disks)) return -EBUSY; if (cmd_match(buf, "none")) { -- cgit v1.2.3 From 80268ee9270ebe4847365a7426de91d179e870d0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: Don't try to set an array to 'read-auto' if it is already in that state. 'read-auto' is a variant of 'readonly' which will switch to writable on the first write attempt. Calling do_md_stop to set the array readonly when it is already readonly returns an error. So make sure not to do that. Signed-off-by: NeilBrown --- drivers/md/md.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 13dd7b276150..fc33082d5ffd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2725,9 +2725,9 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) break; case read_auto: if (mddev->pers) { - if (mddev->ro != 1) + if (mddev->ro == 0) err = do_md_stop(mddev, 1, 0); - else + else if (mddev->ro == 1) err = restart_array(mddev); if (err == 0) { mddev->ro = 2; -- cgit v1.2.3 From e61130228ea5740e31e9646ea6d1c9d9089746c3 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: linear.c: Fix typo in comment. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index b9cbee688fae..61a980da8b51 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -161,7 +161,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) /* min_spacing is the minimum spacing that will fit the hash * table in one PAGE. This may be much smaller than needed. * We find the smallest non-terminal set of consecutive devices - * that is larger than min_spacing as use the size of that as + * that is larger than min_spacing and use the size of that as * the actual spacing */ conf->hash_spacing = conf->array_sectors / 2; -- cgit v1.2.3 From 481d86c7ebe2ce59dfb6ccb720efa9d3fc1cf7cd Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: linear.c: Remove pointless initialization of curr_offset. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 61a980da8b51..632898f3bc98 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -218,7 +218,6 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) conf->disks[i-1].size; table = conf->hash_table; - curr_offset = 0; i = 0; for (curr_offset = 0; curr_offset < conf->array_sectors / 2; -- cgit v1.2.3 From 451708d2a439accbce136637ed4f156fc27371ab Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: linear.c: Remove broken debug code. conf->smallest_size is undefined since day one of the git repo.. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 632898f3bc98..01ed03a0c7ee 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -371,29 +371,6 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) static void linear_status (struct seq_file *seq, mddev_t *mddev) { -#undef MD_DEBUG -#ifdef MD_DEBUG - int j; - linear_conf_t *conf = mddev_to_conf(mddev); - sector_t s = 0; - - seq_printf(seq, " "); - for (j = 0; j < mddev->raid_disks; j++) - { - char b[BDEVNAME_SIZE]; - s += conf->smallest_size; - seq_printf(seq, "[%s", - bdevname(conf->hash_table[j][0].rdev->bdev,b)); - - while (s > conf->hash_table[j][0].offset + - conf->hash_table[j][0].size) - seq_printf(seq, "/%s] ", - bdevname(conf->hash_table[j][1].rdev->bdev,b)); - else - seq_printf(seq, "] "); - } - seq_printf(seq, "\n"); -#endif seq_printf(seq, " %dk rounding", mddev->chunk_size/1024); } -- cgit v1.2.3 From 6283815d1853b7daf31dc4adb83e5c1dc9568251 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: linear: Represent dev_info->size and dev_info->offset in sectors. Rename them to num_sectors and start_sector which is more descriptive. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 54 ++++++++++++++++++++++++--------------------- include/linux/raid/linear.h | 4 ++-- 2 files changed, 31 insertions(+), 27 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 01ed03a0c7ee..1dadb134e0bb 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -42,7 +42,7 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) (void)sector_div(block, conf->hash_spacing); hash = conf->hash_table[block]; - while ((sector>>1) >= (hash->size + hash->offset)) + while (sector >= hash->num_sectors + hash->start_sector) hash++; return hash; } @@ -65,7 +65,7 @@ static int linear_mergeable_bvec(struct request_queue *q, sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); dev0 = which_dev(mddev, sector); - maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1)); + maxsectors = dev0->num_sectors - (sector - dev0->start_sector); if (maxsectors < bio_sectors) maxsectors = 0; @@ -113,7 +113,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) mdk_rdev_t *rdev; int i, nb_zone, cnt; sector_t min_spacing; - sector_t curr_offset; + sector_t curr_sector; struct list_head *tmp; conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), @@ -145,7 +145,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) mddev->queue->max_sectors > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); - disk->size = rdev->size; + disk->num_sectors = rdev->size * 2; conf->array_sectors += rdev->size * 2; cnt++; @@ -169,7 +169,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) sector_t sz = 0; int j; for (j = i; j < cnt - 1 && sz < min_spacing; j++) - sz += conf->disks[j].size; + sz += conf->disks[j].num_sectors / 2; if (sz >= min_spacing && sz < conf->hash_spacing) conf->hash_spacing = sz; } @@ -211,20 +211,20 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) * Here we generate the linear hash table * First calculate the device offsets. */ - conf->disks[0].offset = 0; + conf->disks[0].start_sector = 0; for (i = 1; i < raid_disks; i++) - conf->disks[i].offset = - conf->disks[i-1].offset + - conf->disks[i-1].size; + conf->disks[i].start_sector = + conf->disks[i-1].start_sector + + conf->disks[i-1].num_sectors; table = conf->hash_table; i = 0; - for (curr_offset = 0; - curr_offset < conf->array_sectors / 2; - curr_offset += conf->hash_spacing) { + for (curr_sector = 0; + curr_sector < conf->array_sectors; + curr_sector += conf->hash_spacing * 2) { while (i < raid_disks-1 && - curr_offset >= conf->disks[i+1].offset) + curr_sector >= conf->disks[i+1].start_sector) i++; *table ++ = conf->disks + i; @@ -316,7 +316,6 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) const int rw = bio_data_dir(bio); mddev_t *mddev = q->queuedata; dev_info_t *tmp_dev; - sector_t block; int cpu; if (unlikely(bio_barrier(bio))) { @@ -331,29 +330,33 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) part_stat_unlock(); tmp_dev = which_dev(mddev, bio->bi_sector); - block = bio->bi_sector >> 1; - if (unlikely(block >= (tmp_dev->size + tmp_dev->offset) - || block < tmp_dev->offset)) { + if (unlikely(bio->bi_sector >= (tmp_dev->num_sectors + + tmp_dev->start_sector) + || (bio->bi_sector < + tmp_dev->start_sector))) { char b[BDEVNAME_SIZE]; - printk("linear_make_request: Block %llu out of bounds on " - "dev %s size %llu offset %llu\n", - (unsigned long long)block, + printk("linear_make_request: Sector %llu out of bounds on " + "dev %s: %llu sectors, offset %llu\n", + (unsigned long long)bio->bi_sector, bdevname(tmp_dev->rdev->bdev, b), - (unsigned long long)tmp_dev->size, - (unsigned long long)tmp_dev->offset); + (unsigned long long)tmp_dev->num_sectors, + (unsigned long long)tmp_dev->start_sector); bio_io_error(bio); return 0; } if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > - (tmp_dev->offset + tmp_dev->size)<<1)) { + tmp_dev->start_sector + tmp_dev->num_sectors)) { /* This bio crosses a device boundary, so we have to * split it. */ struct bio_pair *bp; + bp = bio_split(bio, - ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector); + tmp_dev->start_sector + tmp_dev->num_sectors + - bio->bi_sector); + if (linear_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); if (linear_make_request(q, &bp->bio2)) @@ -363,7 +366,8 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) } bio->bi_bdev = tmp_dev->rdev->bdev; - bio->bi_sector = bio->bi_sector - (tmp_dev->offset << 1) + tmp_dev->rdev->data_offset; + bio->bi_sector = bio->bi_sector - tmp_dev->start_sector + + tmp_dev->rdev->data_offset; return 1; } diff --git a/include/linux/raid/linear.h b/include/linux/raid/linear.h index 7e375111d007..87090e98529f 100644 --- a/include/linux/raid/linear.h +++ b/include/linux/raid/linear.h @@ -5,8 +5,8 @@ struct dev_info { mdk_rdev_t *rdev; - sector_t size; - sector_t offset; + sector_t num_sectors; + sector_t start_sector; }; typedef struct dev_info dev_info_t; -- cgit v1.2.3 From 23242fbb470ff4c8c4d41f178832cf1929273d7d Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: linear.c: Make two local variables sector-based. This is a preparation for representing also the remaining fields of struct linear_private_data as sectors. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 1dadb134e0bb..13e928bde7cd 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -112,7 +112,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) dev_info_t **table; mdk_rdev_t *rdev; int i, nb_zone, cnt; - sector_t min_spacing; + sector_t min_sectors; sector_t curr_sector; struct list_head *tmp; @@ -155,23 +155,23 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) goto out; } - min_spacing = conf->array_sectors / 2; - sector_div(min_spacing, PAGE_SIZE/sizeof(struct dev_info *)); + min_sectors = conf->array_sectors; + sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *)); - /* min_spacing is the minimum spacing that will fit the hash + /* min_sectors is the minimum spacing that will fit the hash * table in one PAGE. This may be much smaller than needed. * We find the smallest non-terminal set of consecutive devices - * that is larger than min_spacing and use the size of that as + * that is larger than min_sectors and use the size of that as * the actual spacing */ conf->hash_spacing = conf->array_sectors / 2; for (i=0; i < cnt-1 ; i++) { - sector_t sz = 0; + sector_t tmp = 0; int j; - for (j = i; j < cnt - 1 && sz < min_spacing; j++) - sz += conf->disks[j].num_sectors / 2; - if (sz >= min_spacing && sz < conf->hash_spacing) - conf->hash_spacing = sz; + for (j = i; j < cnt - 1 && tmp < min_sectors; j++) + tmp += conf->disks[j].num_sectors; + if (tmp >= min_sectors && tmp < conf->hash_spacing * 2) + conf->hash_spacing = tmp / 2; } /* hash_spacing may be too large for sector_div to work with, -- cgit v1.2.3 From ab5bd5cbc8d4b868378d062eed3d4240930fbb86 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: Convert remaining 1k representations in linear.c to sectors. This patch renames hash_spacing and preshift to spacing and sector_shift respectively with the following change of semantics: Case 1: (sizeof(sector_t) <= sizeof(u32)). ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In this case, we have sector_shift = preshift = 0 and spacing = 2 * hash_spacing. Hence, the index for the hash table which is computed by the new code in which_dev() as sector / spacing equals the old value which was (sector/2) / hash_spacing. Note also that the value of nb_zone stays the same because both sz and base double. Case 2: (sizeof(sector_t) > sizeof(u32)). ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (aka the shifting dance case). Here we have sector_shift = preshift + 1 and spacing = 2 * hash_spacing during the computation of nb_zone and curr_sector, but spacing = hash_spacing in which_dev() because in the last hunk of the patch for linear.c we shift down conf->spacing (= 2 * hash_spacing) by one more bit than in the old code. Hence in the computation of nb_zone, sz and base have the same value as before, so nb_zone is not affected. Also curr_sector in the next hunk stays the same. In which_dev() the hash table index is computed as (sector >> sector_shift) / spacing In view of sector_shift = preshift + 1 and spacing = hash_spacing, this equals ((sector/2) >> preshift) / hash_spacing which is the value computed by the old code. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 35 +++++++++++++++++------------------ include/linux/raid/linear.h | 6 ++++-- 2 files changed, 21 insertions(+), 20 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 13e928bde7cd..09a64b9cbde8 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -33,14 +33,13 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) { dev_info_t *hash; linear_conf_t *conf = mddev_to_conf(mddev); - sector_t block = sector >> 1; /* * sector_div(a,b) returns the remainer and sets a to a/b */ - block >>= conf->preshift; - (void)sector_div(block, conf->hash_spacing); - hash = conf->hash_table[block]; + sector >>= conf->sector_shift; + (void)sector_div(sector, conf->spacing); + hash = conf->hash_table[sector]; while (sector >= hash->num_sectors + hash->start_sector) hash++; @@ -164,25 +163,25 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) * that is larger than min_sectors and use the size of that as * the actual spacing */ - conf->hash_spacing = conf->array_sectors / 2; + conf->spacing = conf->array_sectors; for (i=0; i < cnt-1 ; i++) { sector_t tmp = 0; int j; for (j = i; j < cnt - 1 && tmp < min_sectors; j++) tmp += conf->disks[j].num_sectors; - if (tmp >= min_sectors && tmp < conf->hash_spacing * 2) - conf->hash_spacing = tmp / 2; + if (tmp >= min_sectors && tmp < conf->spacing) + conf->spacing = tmp; } - /* hash_spacing may be too large for sector_div to work with, + /* spacing may be too large for sector_div to work with, * so we might need to pre-shift */ - conf->preshift = 0; + conf->sector_shift = 0; if (sizeof(sector_t) > sizeof(u32)) { - sector_t space = conf->hash_spacing; + sector_t space = conf->spacing; while (space > (sector_t)(~(u32)0)) { space >>= 1; - conf->preshift++; + conf->sector_shift++; } } /* @@ -194,9 +193,9 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) unsigned round; unsigned long base; - sz = conf->array_sectors >> (conf->preshift + 1); + sz = conf->array_sectors >> conf->sector_shift; sz += 1; /* force round-up */ - base = conf->hash_spacing >> conf->preshift; + base = conf->spacing >> conf->sector_shift; round = sector_div(sz, base); nb_zone = sz + (round ? 1 : 0); } @@ -221,7 +220,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) i = 0; for (curr_sector = 0; curr_sector < conf->array_sectors; - curr_sector += conf->hash_spacing * 2) { + curr_sector += conf->spacing) { while (i < raid_disks-1 && curr_sector >= conf->disks[i+1].start_sector) @@ -230,12 +229,12 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) *table ++ = conf->disks + i; } - if (conf->preshift) { - conf->hash_spacing >>= conf->preshift; - /* round hash_spacing up so that when we divide by it, + if (conf->sector_shift) { + conf->spacing >>= conf->sector_shift; + /* round spacing up so that when we divide by it, * we err on the side of "too-low", which is safest. */ - conf->hash_spacing++; + conf->spacing++; } BUG_ON(table - conf->hash_table > nb_zone); diff --git a/include/linux/raid/linear.h b/include/linux/raid/linear.h index 87090e98529f..f38b9c586afb 100644 --- a/include/linux/raid/linear.h +++ b/include/linux/raid/linear.h @@ -15,9 +15,11 @@ struct linear_private_data { struct linear_private_data *prev; /* earlier version */ dev_info_t **hash_table; - sector_t hash_spacing; + sector_t spacing; sector_t array_sectors; - int preshift; /* shift before dividing by hash_spacing */ + int sector_shift; /* shift before dividing + * by spacing + */ dev_info_t disks[0]; }; -- cgit v1.2.3 From fb4d8c76e56a887b9eee99fbc55fe82b18625d30 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: Remove unnecessary #includes, #defines, and function declarations. A lot of cruft has gathered over the years. Time to remove it. Signed-off-by: NeilBrown --- drivers/md/linear.c | 8 -------- drivers/md/md.c | 21 ++++----------------- drivers/md/multipath.c | 9 --------- drivers/md/raid0.c | 5 ----- drivers/md/raid5.c | 5 ----- drivers/md/raid6.h | 9 --------- include/linux/raid/md.h | 22 ---------------------- 7 files changed, 4 insertions(+), 75 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 09a64b9cbde8..190147c79e79 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -16,16 +16,8 @@ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include - -#include -#include #include -#define MAJOR_NR MD_MAJOR -#define MD_DRIVER -#define MD_PERSONALITY - /* * find which device holds a particular offset */ diff --git a/drivers/md/md.c b/drivers/md/md.c index fc33082d5ffd..cd97de5982e8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -32,31 +32,20 @@ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include -#include #include -#include #include #include #include #include /* for invalidate_bdev */ #include -#include #include -#include - -#include - +#include +#include +#include +#include #include -#ifdef CONFIG_KMOD -#include -#endif - -#include - #define MAJOR_NR MD_MAJOR -#define MD_DRIVER /* 63 partitions with the alternate major number (mdp) */ #define MdpMinorShift 6 @@ -3559,12 +3548,10 @@ static int do_md_run(mddev_t * mddev) } } -#ifdef CONFIG_KMOD if (mddev->level != LEVEL_NONE) request_module("md-level-%d", mddev->level); else if (mddev->clevel[0]) request_module("md-%s", mddev->clevel); -#endif /* * Drop all container device buffers, from now on diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 8bb8794129b3..8744014b9d80 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -19,16 +19,7 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include -#include -#include #include -#include -#include - -#define MAJOR_NR MD_MAJOR -#define MD_DRIVER -#define MD_PERSONALITY #define MAX_WORK_PER_DISK 128 diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 53508a8a981d..8ac6488ad0dc 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -18,13 +18,8 @@ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include #include -#define MAJOR_NR MD_MAJOR -#define MD_DRIVER -#define MD_PERSONALITY - static void raid0_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ae16794bef20..7e654543c97d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -43,12 +43,7 @@ * miss any bits. */ -#include -#include -#include -#include #include -#include #include "raid6.h" #include diff --git a/drivers/md/raid6.h b/drivers/md/raid6.h index 31cbee71365f..98dcde88470e 100644 --- a/drivers/md/raid6.h +++ b/drivers/md/raid6.h @@ -18,15 +18,6 @@ /* Set to 1 to use kernel-wide empty_zero_page */ #define RAID6_USE_EMPTY_ZERO_PAGE 0 -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index dc0e3fcb9f28..bb727fa1ce7f 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -19,27 +19,7 @@ #define _MD_H #include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* * 'md_p.h' holds the 'physical' layout of RAID devices @@ -83,10 +63,8 @@ extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_check_recovery(mddev_t *mddev); extern void md_write_start(mddev_t *mddev, struct bio *bi); extern void md_write_end(mddev_t *mddev); -extern void md_handle_safemode(mddev_t *mddev); extern void md_done_sync(mddev_t *mddev, int blocks, int ok); extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev); -extern void md_unplug_mddev(mddev_t *mddev); extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, sector_t sector, int size, struct page *page); -- cgit v1.2.3 From d710e13812600037a723a673dc5c96a071de98d3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: remove space after function name in declaration and call. Having function (args) instead of function(args) make is harder to search for calls of particular functions. So remove all those spaces. Signed-off-by: NeilBrown --- drivers/md/md.c | 26 +++++++++++++------------- drivers/md/raid5.c | 30 +++++++++++++++--------------- include/linux/raid/md.h | 10 +++++----- 3 files changed, 33 insertions(+), 33 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index cd97de5982e8..a29187d1fcf5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -55,7 +55,7 @@ #ifndef MODULE -static void autostart_arrays (int part); +static void autostart_arrays(int part); #endif static LIST_HEAD(pers_list); @@ -201,7 +201,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock); ) -static int md_fail_request (struct request_queue *q, struct bio *bio) +static int md_fail_request(struct request_queue *q, struct bio *bio) { bio_io_error(bio); return 0; @@ -3962,10 +3962,10 @@ static void autorun_array(mddev_t *mddev) } printk("\n"); - err = do_md_run (mddev); + err = do_md_run(mddev); if (err) { printk(KERN_WARNING "md: do_md_run() returned %d\n", err); - do_md_stop (mddev, 0, 0); + do_md_stop(mddev, 0, 0); } } @@ -4324,7 +4324,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) if (!(info->state & (1<pers) { - mddev->pers->status (seq, mddev); + mddev->pers->status(seq, mddev); seq_printf(seq, "\n "); if (mddev->pers->sync_request) { if (mddev->curr_resync > 2) { - status_resync (seq, mddev); + status_resync(seq, mddev); seq_printf(seq, "\n "); } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) seq_printf(seq, "\tresync=DELAYED\n "); @@ -6251,7 +6251,7 @@ static int md_notify_reboot(struct notifier_block *this, * appears to still be in use. Hence * the '100'. */ - do_md_stop (mddev, 1, 100); + do_md_stop(mddev, 1, 100); mddev_unlock(mddev); } /* @@ -6295,7 +6295,7 @@ static int __init md_init(void) raid_table_header = register_sysctl_table(raid_root_table); md_geninit(); - return (0); + return 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7e654543c97d..d72be4b89e64 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -270,7 +270,7 @@ static int grow_buffers(struct stripe_head *sh, int num) return 0; } -static void raid5_build_block (struct stripe_head *sh, int i); +static void raid5_build_block(struct stripe_head *sh, int i); static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks) { @@ -1146,7 +1146,7 @@ static void raid5_end_read_request(struct bio * bi, int error) release_stripe(sh); } -static void raid5_end_write_request (struct bio *bi, int error) +static void raid5_end_write_request(struct bio *bi, int error) { struct stripe_head *sh = bi->bi_private; raid5_conf_t *conf = sh->raid_conf; @@ -1178,7 +1178,7 @@ static void raid5_end_write_request (struct bio *bi, int error) static sector_t compute_blocknr(struct stripe_head *sh, int i); -static void raid5_build_block (struct stripe_head *sh, int i) +static void raid5_build_block(struct stripe_head *sh, int i) { struct r5dev *dev = &sh->dev[i]; @@ -1216,10 +1216,10 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) set_bit(MD_RECOVERY_INTR, &mddev->recovery); } set_bit(Faulty, &rdev->flags); - printk (KERN_ALERT - "raid5: Disk failure on %s, disabling device.\n" - "raid5: Operation continuing on %d devices.\n", - bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); + printk(KERN_ALERT + "raid5: Disk failure on %s, disabling device.\n" + "raid5: Operation continuing on %d devices.\n", + bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); } } @@ -1315,8 +1315,8 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks, *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; break; default: - printk (KERN_CRIT "raid6: unsupported algorithm %d\n", - conf->algorithm); + printk(KERN_CRIT "raid6: unsupported algorithm %d\n", + conf->algorithm); } break; } @@ -1391,8 +1391,8 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) } break; default: - printk (KERN_CRIT "raid6: unsupported algorithm %d\n", - conf->algorithm); + printk(KERN_CRIT "raid6: unsupported algorithm %d\n", + conf->algorithm); } break; } @@ -1400,7 +1400,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) chunk_number = stripe * data_disks + i; r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset; - check = raid5_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf); + check = raid5_compute_sector(r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf); if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) { printk(KERN_ERR "compute_blocknr: map not correct\n"); return 0; @@ -4284,7 +4284,7 @@ static int stop(mddev_t *mddev) } #ifdef DEBUG -static void print_sh (struct seq_file *seq, struct stripe_head *sh) +static void print_sh(struct seq_file *seq, struct stripe_head *sh) { int i; @@ -4300,7 +4300,7 @@ static void print_sh (struct seq_file *seq, struct stripe_head *sh) seq_printf(seq, "\n"); } -static void printall (struct seq_file *seq, raid5_conf_t *conf) +static void printall(struct seq_file *seq, raid5_conf_t *conf) { struct stripe_head *sh; struct hlist_node *hn; @@ -4318,7 +4318,7 @@ static void printall (struct seq_file *seq, raid5_conf_t *conf) } #endif -static void status (struct seq_file *seq, mddev_t *mddev) +static void status(struct seq_file *seq, mddev_t *mddev) { raid5_conf_t *conf = (raid5_conf_t *) mddev->private; int i; diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index bb727fa1ce7f..82bea14cae1a 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -54,17 +54,17 @@ extern int mdp_major; -extern int register_md_personality (struct mdk_personality *p); -extern int unregister_md_personality (struct mdk_personality *p); -extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev), +extern int register_md_personality(struct mdk_personality *p); +extern int unregister_md_personality(struct mdk_personality *p); +extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev), mddev_t *mddev, const char *name); -extern void md_unregister_thread (mdk_thread_t *thread); +extern void md_unregister_thread(mdk_thread_t *thread); extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_check_recovery(mddev_t *mddev); extern void md_write_start(mddev_t *mddev, struct bio *bi); extern void md_write_end(mddev_t *mddev); extern void md_done_sync(mddev_t *mddev, int blocks, int ok); -extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev); +extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, sector_t sector, int size, struct page *page); -- cgit v1.2.3 From 4bbf3771ca40d0aaec8316d0e7476b16010288e5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: Relax minimum size restrictions on chunk_size. Currently, the 'chunk_size' of an array must be at-least PAGE_SIZE. This makes moving an array to a machine with a larger PAGE_SIZE, or changing the kernel to use a larger PAGE_SIZE, can stop an array from working. For RAID10 and RAID4/5/6, this is non-trivial to fix as the resync process works on whole pages at a time, and assumes them to be wholly within a stripe. For other raid personalities, this restriction is not needed at all and can be dropped. So remove the test on chunk_size from common can, and add it in just the places where it is needed: raid10 and raid4/5/6. Signed-off-by: NeilBrown --- drivers/md/md.c | 7 +------ drivers/md/raid10.c | 5 +++-- drivers/md/raid5.c | 7 +++++++ 3 files changed, 11 insertions(+), 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index a29187d1fcf5..be2014f6e37b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3520,17 +3520,12 @@ static int do_md_run(mddev_t * mddev) return -EINVAL; } /* - * chunk-size has to be a power of 2 and multiples of PAGE_SIZE + * chunk-size has to be a power of 2 */ if ( (1 << ffz(~chunk_size)) != chunk_size) { printk(KERN_ERR "chunk_size of %d not valid\n", chunk_size); return -EINVAL; } - if (chunk_size < PAGE_SIZE) { - printk(KERN_ERR "too small chunk_size: %d < %ld\n", - chunk_size, PAGE_SIZE); - return -EINVAL; - } /* devices must have minimum size of one chunk */ rdev_for_each(rdev, tmp, mddev) { diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8bdc9bfc2887..e3794799f308 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2028,8 +2028,9 @@ static int run(mddev_t *mddev) int nc, fc, fo; sector_t stride, size; - if (mddev->chunk_size == 0) { - printk(KERN_ERR "md/raid10: non-zero chunk size required.\n"); + if (mddev->chunk_size < PAGE_SIZE) { + printk(KERN_ERR "md/raid10: chunk size must be " + "at least PAGE_SIZE(%ld).\n", PAGE_SIZE); return -EINVAL; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d72be4b89e64..a36a7435edf5 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4007,6 +4007,13 @@ static int run(mddev_t *mddev) return -EIO; } + if (mddev->chunk_size < PAGE_SIZE) { + printk(KERN_ERR "md/raid5: chunk_size must be at least " + "PAGE_SIZE but %d < %ld\n", + mddev->chunk_size, PAGE_SIZE); + return -EINVAL; + } + if (mddev->reshape_position != MaxSector) { /* Check that we can continue the reshape. * Currently only disks can change, it must -- cgit v1.2.3 From 6000a368cd8e6da1caf101411bdb494cd6fb8b09 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 19 Aug 2008 18:45:30 -0500 Subject: [SCSI] block: separate failfast into multiple bits. Multipath is best at handling transport errors. If it gets a device error then there is not much the multipath layer can do. It will just access the same device but from a different path. This patch breaks up failfast into device, transport and driver errors. The multipath layers (md and dm mutlipath) only ask the lower levels to fast fail transport errors. The user of failfast, read ahead, will ask to fast fail on all errors. Note that blk_noretry_request will return true if any failfast bit is set. This allows drivers that do not support the multipath failfast bits to continue to fail on any failfast error like before. Drivers like scsi that are able to fail fast specific errors can check for the specific fail fast type. In the next patch I will convert scsi. Signed-off-by: Mike Christie Cc: Jens Axboe Signed-off-by: James Bottomley --- block/blk-core.c | 11 +++++++++-- drivers/md/dm-mpath.c | 2 +- drivers/md/multipath.c | 4 ++-- drivers/s390/block/dasd_diag.c | 2 +- drivers/s390/block/dasd_eckd.c | 2 +- drivers/s390/block/dasd_fba.c | 2 +- drivers/scsi/device_handler/scsi_dh_alua.c | 3 ++- drivers/scsi/device_handler/scsi_dh_emc.c | 3 ++- drivers/scsi/device_handler/scsi_dh_hp_sw.c | 6 ++++-- drivers/scsi/device_handler/scsi_dh_rdac.c | 3 ++- drivers/scsi/scsi_transport_spi.c | 4 +++- include/linux/bio.h | 26 +++++++++++++++++--------- include/linux/blkdev.h | 15 ++++++++++++--- 13 files changed, 57 insertions(+), 26 deletions(-) (limited to 'drivers/md') diff --git a/block/blk-core.c b/block/blk-core.c index 2d053b584410..9e79a485e4f3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1075,8 +1075,15 @@ void init_request_from_bio(struct request *req, struct bio *bio) /* * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) */ - if (bio_rw_ahead(bio) || bio_failfast(bio)) - req->cmd_flags |= REQ_FAILFAST; + if (bio_rw_ahead(bio)) + req->cmd_flags |= (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER); + if (bio_failfast_dev(bio)) + req->cmd_flags |= REQ_FAILFAST_DEV; + if (bio_failfast_transport(bio)) + req->cmd_flags |= REQ_FAILFAST_TRANSPORT; + if (bio_failfast_driver(bio)) + req->cmd_flags |= REQ_FAILFAST_DRIVER; /* * REQ_BARRIER implies no merging, but lets make it explicit diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 103304c1e3b0..9bf3460c5540 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -849,7 +849,7 @@ static int multipath_map(struct dm_target *ti, struct bio *bio, dm_bio_record(&mpio->details, bio); map_context->ptr = mpio; - bio->bi_rw |= (1 << BIO_RW_FAILFAST); + bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); r = map_io(m, bio, mpio, 0); if (r < 0 || r == DM_MAPIO_REQUEUE) mempool_free(mpio, m->mpio_pool); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 8bb8794129b3..7ae33ebaf7ec 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -176,7 +176,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) mp_bh->bio = *bio; mp_bh->bio.bi_sector += multipath->rdev->data_offset; mp_bh->bio.bi_bdev = multipath->rdev->bdev; - mp_bh->bio.bi_rw |= (1 << BIO_RW_FAILFAST); + mp_bh->bio.bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); mp_bh->bio.bi_end_io = multipath_end_request; mp_bh->bio.bi_private = mp_bh; generic_make_request(&mp_bh->bio); @@ -402,7 +402,7 @@ static void multipathd (mddev_t *mddev) *bio = *(mp_bh->master_bio); bio->bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset; bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev; - bio->bi_rw |= (1 << BIO_RW_FAILFAST); + bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); bio->bi_end_io = multipath_end_request; bio->bi_private = mp_bh; generic_make_request(bio); diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 85fcb4371054..7844461a995b 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -544,7 +544,7 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev, } cqr->retries = DIAG_MAX_RETRIES; cqr->buildclk = get_clock(); - if (req->cmd_flags & REQ_FAILFAST) + if (blk_noretry_request(req)) set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->startdev = memdev; cqr->memdev = memdev; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 49f9d221e23d..2e60d5f968c8 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1700,7 +1700,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp(struct dasd_device *startdev, recid++; } } - if (req->cmd_flags & REQ_FAILFAST) + if (blk_noretry_request(req)) set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->startdev = startdev; cqr->memdev = startdev; diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 93d9b6452a94..7d442aeff3d1 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -355,7 +355,7 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, recid++; } } - if (req->cmd_flags & REQ_FAILFAST) + if (blk_noretry_request(req)) set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->startdev = memdev; cqr->memdev = memdev; diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 708e475896b9..cb8aa3b58c22 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -109,7 +109,8 @@ static struct request *get_alua_req(struct scsi_device *sdev, } rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; + rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER | REQ_NOMERGE; rq->retries = ALUA_FAILOVER_RETRIES; rq->timeout = ALUA_FAILOVER_TIMEOUT; diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index 8f45570a8a01..0e572d2c5b0a 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -303,7 +303,8 @@ static struct request *get_req(struct scsi_device *sdev, int cmd, rq->cmd[4] = len; rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->cmd_flags |= REQ_FAILFAST; + rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER; rq->timeout = CLARIION_TIMEOUT; rq->retries = CLARIION_RETRIES; diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index 5e93c88ad66b..9aec4ca64e56 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -112,7 +112,8 @@ static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h) return SCSI_DH_RES_TEMP_UNAVAIL; req->cmd_type = REQ_TYPE_BLOCK_PC; - req->cmd_flags |= REQ_FAILFAST; + req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER; req->cmd_len = COMMAND_SIZE(TEST_UNIT_READY); req->cmd[0] = TEST_UNIT_READY; req->timeout = HP_SW_TIMEOUT; @@ -204,7 +205,8 @@ static int hp_sw_start_stop(struct scsi_device *sdev, struct hp_sw_dh_data *h) return SCSI_DH_RES_TEMP_UNAVAIL; req->cmd_type = REQ_TYPE_BLOCK_PC; - req->cmd_flags |= REQ_FAILFAST; + req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER; req->cmd_len = COMMAND_SIZE(START_STOP); req->cmd[0] = START_STOP; req->cmd[4] = 1; /* Start spin cycle */ diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 50bf95f3b5c4..a43c3ed4df28 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -226,7 +226,8 @@ static struct request *get_rdac_req(struct scsi_device *sdev, } rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; + rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER; rq->retries = RDAC_RETRIES; rq->timeout = RDAC_TIMEOUT; diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c index b29360ed0bdc..7c2d28924d2a 100644 --- a/drivers/scsi/scsi_transport_spi.c +++ b/drivers/scsi/scsi_transport_spi.c @@ -109,7 +109,9 @@ static int spi_execute(struct scsi_device *sdev, const void *cmd, for(i = 0; i < DV_RETRIES; i++) { result = scsi_execute(sdev, cmd, dir, buffer, bufflen, sense, DV_TIMEOUT, /* retries */ 1, - REQ_FAILFAST); + REQ_FAILFAST_DEV | + REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER); if (result & DRIVER_SENSE) { struct scsi_sense_hdr sshdr_tmp; if (!sshdr) diff --git a/include/linux/bio.h b/include/linux/bio.h index ff5b4cf9e2da..1beda208cbfb 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -129,25 +129,30 @@ struct bio { * bit 2 -- barrier * Insert a serialization point in the IO queue, forcing previously * submitted IO to be completed before this oen is issued. - * bit 3 -- fail fast, don't want low level driver retries - * bit 4 -- synchronous I/O hint: the block layer will unplug immediately + * bit 3 -- synchronous I/O hint: the block layer will unplug immediately * Note that this does NOT indicate that the IO itself is sync, just * that the block layer will not postpone issue of this IO by plugging. - * bit 5 -- metadata request + * bit 4 -- metadata request * Used for tracing to differentiate metadata and data IO. May also * get some preferential treatment in the IO scheduler - * bit 6 -- discard sectors + * bit 5 -- discard sectors * Informs the lower level device that this range of sectors is no longer * used by the file system and may thus be freed by the device. Used * for flash based storage. + * bit 6 -- fail fast device errors + * bit 7 -- fail fast transport errors + * bit 8 -- fail fast driver errors + * Don't want driver retries for any fast fail whatever the reason. */ #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ #define BIO_RW_BARRIER 2 -#define BIO_RW_FAILFAST 3 -#define BIO_RW_SYNC 4 -#define BIO_RW_META 5 -#define BIO_RW_DISCARD 6 +#define BIO_RW_SYNC 3 +#define BIO_RW_META 4 +#define BIO_RW_DISCARD 5 +#define BIO_RW_FAILFAST_DEV 6 +#define BIO_RW_FAILFAST_TRANSPORT 7 +#define BIO_RW_FAILFAST_DRIVER 8 /* * upper 16 bits of bi_rw define the io priority of this bio @@ -174,7 +179,10 @@ struct bio { #define bio_sectors(bio) ((bio)->bi_size >> 9) #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) #define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) -#define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) +#define bio_failfast_dev(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DEV)) +#define bio_failfast_transport(bio) \ + ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_TRANSPORT)) +#define bio_failfast_driver(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DRIVER)) #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) #define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a92d9e4ea96e..f3491d225268 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -87,7 +87,9 @@ enum { */ enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ - __REQ_FAILFAST, /* no low level driver retries */ + __REQ_FAILFAST_DEV, /* no driver retries of device errors */ + __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ + __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ __REQ_DISCARD, /* request to discard sectors */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ @@ -111,8 +113,10 @@ enum rq_flag_bits { }; #define REQ_RW (1 << __REQ_RW) +#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) +#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) +#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) #define REQ_DISCARD (1 << __REQ_DISCARD) -#define REQ_FAILFAST (1 << __REQ_FAILFAST) #define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) @@ -560,7 +564,12 @@ enum { #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) -#define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) +#define blk_failfast_dev(rq) ((rq)->cmd_flags & REQ_FAILFAST_DEV) +#define blk_failfast_transport(rq) ((rq)->cmd_flags & REQ_FAILFAST_TRANSPORT) +#define blk_failfast_driver(rq) ((rq)->cmd_flags & REQ_FAILFAST_DRIVER) +#define blk_noretry_request(rq) (blk_failfast_dev(rq) || \ + blk_failfast_transport(rq) || \ + blk_failfast_driver(rq)) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) -- cgit v1.2.3 From 255707274ea25d486b7de060a30ba4ac50593408 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 15 Oct 2008 09:09:21 +1100 Subject: md: build failure due to missing delay.h Today's linux-next build (powerpc ppc64_defconfig) failed like this: drivers/md/raid1.c: In function 'sync_request': drivers/md/raid1.c:1759: error: implicit declaration of function 'msleep_interruptible' make[3]: *** [drivers/md/raid1.o] Error 1 make[3]: *** Waiting for unfinished jobs.... drivers/md/raid10.c: In function 'sync_request': drivers/md/raid10.c:1749: error: implicit declaration of function 'msleep_interruptible' make[3]: *** [drivers/md/raid10.o] Error 1 drivers/md/md.c: In function 'md_do_sync': drivers/md/md.c:5915: error: implicit declaration of function 'msleep' Caused by commit 6caa3b0bbdb474647f6bdd8a958ffc46f78d8d58 ("md: Remove unnecessary #includes, #defines, and function declarations"). I added the following patch. Signed-off-by: Stephen Rothwell Signed-off-by: NeilBrown --- drivers/md/md.c | 1 + drivers/md/raid1.c | 1 + drivers/md/raid10.c | 1 + 3 files changed, 3 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index be2014f6e37b..39c9c87a1342 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -44,6 +44,7 @@ #include #include #include +#include #define MAJOR_NR MD_MAJOR diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index b9764429d856..9c788e2489b1 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -32,6 +32,7 @@ */ #include "dm-bio-list.h" +#include #include #include diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e3794799f308..da5129a24b18 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -19,6 +19,7 @@ */ #include "dm-bio-list.h" +#include #include #include -- cgit v1.2.3 From 08ff39f1c8f2134f7d0f38123ca5952371665cc5 Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Thu, 16 Oct 2008 14:16:53 +1100 Subject: md: check for memory allocation failure in faulty personality It's a fault injection module, but I don't think we should oops here. Signed-off-by: Sven Wegener Signed-off-by: Andrew Morton Signed-off-by: Neil Brown --- drivers/md/faulty.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 268547dbfbd3..f26c1f9a475b 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -287,6 +287,8 @@ static int run(mddev_t *mddev) int i; conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL); + if (!conf) + return -ENOMEM; for (i=0; icounters[i], 0); -- cgit v1.2.3 From 97ce0a7f9caf9d715cee815a016ee21575f71c95 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 24 Sep 2008 22:48:19 -0700 Subject: md: fix input truncation in safe_delay_store() safe_delay_store() currently truncates the last character of input since it tells strlcpy that the buffer can only hold 'len' characters, off by one. sysfs already null terminates the buffer, so just increase the last argument to strlcpy. Signed-off-by: Dan Williams Signed-off-by: NeilBrown --- drivers/md/md.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 39c9c87a1342..aaa3d465de4e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2394,12 +2394,11 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) int i; unsigned long msec; char buf[30]; - char *e; + /* remove a period, and count digits after it */ if (len >= sizeof(buf)) return -EINVAL; - strlcpy(buf, cbuf, len); - buf[len] = 0; + strlcpy(buf, cbuf, sizeof(buf)); for (i=0; i