From e9bb8fb0b6d61a822201537b25206a0ca34b9d1d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 Sep 2008 22:36:49 -0700 Subject: aoe: Use SKB interfaces for list management instead of home-grown stuff. Signed-off-by: David S. Miller --- drivers/block/aoe/aoe.h | 9 ++--- drivers/block/aoe/aoeblk.c | 8 ++--- drivers/block/aoe/aoechr.c | 8 ++++- drivers/block/aoe/aoecmd.c | 85 +++++++++++++++++---------------------------- drivers/block/aoe/aoedev.c | 12 +++---- drivers/block/aoe/aoemain.c | 1 + drivers/block/aoe/aoenet.c | 9 ++--- 7 files changed, 56 insertions(+), 76 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 5b4c6e649c11..93f3690396a5 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -159,11 +159,8 @@ struct aoedev { sector_t ssize; struct timer_list timer; spinlock_t lock; - struct sk_buff *sendq_hd; /* packets needing to be sent, list head */ - struct sk_buff *sendq_tl; - struct sk_buff *skbpool_hd; - struct sk_buff *skbpool_tl; - int nskbpool; + struct sk_buff_head sendq; + struct sk_buff_head skbpool; mempool_t *bufpool; /* for deadlock-free Buf allocation */ struct list_head bufq; /* queue of bios to work on */ struct buf *inprocess; /* the one we're currently working on */ @@ -199,7 +196,7 @@ int aoedev_flush(const char __user *str, size_t size); int aoenet_init(void); void aoenet_exit(void); -void aoenet_xmit(struct sk_buff *); +void aoenet_xmit(struct sk_buff_head *); int is_aoe_netif(struct net_device *ifp); int set_aoe_iflist(const char __user *str, size_t size); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 0c39782b2660..fd2cf5439a1c 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -158,9 +158,9 @@ aoeblk_release(struct inode *inode, struct file *filp) static int aoeblk_make_request(struct request_queue *q, struct bio *bio) { + struct sk_buff_head queue; struct aoedev *d; struct buf *buf; - struct sk_buff *sl; ulong flags; blk_queue_bounce(q, &bio); @@ -213,11 +213,11 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio) list_add_tail(&buf->bufs, &d->bufq); aoecmd_work(d); - sl = d->sendq_hd; - d->sendq_hd = d->sendq_tl = NULL; + __skb_queue_head_init(&queue); + skb_queue_splice_init(&d->sendq, &queue); spin_unlock_irqrestore(&d->lock, flags); - aoenet_xmit(sl); + aoenet_xmit(&queue); return 0; } diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index 181ebb85f0be..1f56d2c5b7fc 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "aoe.h" enum { @@ -103,7 +104,12 @@ loop: spin_lock_irqsave(&d->lock, flags); goto loop; } - aoenet_xmit(skb); + if (skb) { + struct sk_buff_head queue; + __skb_queue_head_init(&queue); + __skb_queue_tail(&queue, skb); + aoenet_xmit(&queue); + } aoecmd_cfg(major, minor); return 0; } diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 2f1746295d06..e33da30be4c4 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -114,29 +114,22 @@ ifrotate(struct aoetgt *t) static void skb_pool_put(struct aoedev *d, struct sk_buff *skb) { - if (!d->skbpool_hd) - d->skbpool_hd = skb; - else - d->skbpool_tl->next = skb; - d->skbpool_tl = skb; + __skb_queue_tail(&d->skbpool, skb); } static struct sk_buff * skb_pool_get(struct aoedev *d) { - struct sk_buff *skb; + struct sk_buff *skb = skb_peek(&d->skbpool); - skb = d->skbpool_hd; if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) { - d->skbpool_hd = skb->next; - skb->next = NULL; + __skb_unlink(skb, &d->skbpool); return skb; } - if (d->nskbpool < NSKBPOOLMAX - && (skb = new_skb(ETH_ZLEN))) { - d->nskbpool++; + if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX && + (skb = new_skb(ETH_ZLEN))) return skb; - } + return NULL; } @@ -293,29 +286,22 @@ aoecmd_ata_rw(struct aoedev *d) skb->dev = t->ifp->nd; skb = skb_clone(skb, GFP_ATOMIC); - if (skb) { - if (d->sendq_hd) - d->sendq_tl->next = skb; - else - d->sendq_hd = skb; - d->sendq_tl = skb; - } + if (skb) + __skb_queue_tail(&d->sendq, skb); return 1; } /* some callers cannot sleep, and they can call this function, * transmitting the packets later, when interrupts are on */ -static struct sk_buff * -aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail) +static void +aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue) { struct aoe_hdr *h; struct aoe_cfghdr *ch; - struct sk_buff *skb, *sl, *sl_tail; + struct sk_buff *skb; struct net_device *ifp; - sl = sl_tail = NULL; - read_lock(&dev_base_lock); for_each_netdev(&init_net, ifp) { dev_hold(ifp); @@ -329,8 +315,7 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail) } skb_put(skb, sizeof *h + sizeof *ch); skb->dev = ifp; - if (sl_tail == NULL) - sl_tail = skb; + __skb_queue_tail(queue, skb); h = (struct aoe_hdr *) skb_mac_header(skb); memset(h, 0, sizeof *h + sizeof *ch); @@ -342,16 +327,10 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail) h->minor = aoeminor; h->cmd = AOECMD_CFG; - skb->next = sl; - sl = skb; cont: dev_put(ifp); } read_unlock(&dev_base_lock); - - if (tail != NULL) - *tail = sl_tail; - return sl; } static void @@ -406,11 +385,7 @@ resend(struct aoedev *d, struct aoetgt *t, struct frame *f) skb = skb_clone(skb, GFP_ATOMIC); if (skb == NULL) return; - if (d->sendq_hd) - d->sendq_tl->next = skb; - else - d->sendq_hd = skb; - d->sendq_tl = skb; + __skb_queue_tail(&d->sendq, skb); } static int @@ -508,16 +483,15 @@ ata_scnt(unsigned char *packet) { static void rexmit_timer(ulong vp) { + struct sk_buff_head queue; struct aoedev *d; struct aoetgt *t, **tt, **te; struct aoeif *ifp; struct frame *f, *e; - struct sk_buff *sl; register long timeout; ulong flags, n; d = (struct aoedev *) vp; - sl = NULL; /* timeout is always ~150% of the moving average */ timeout = d->rttavg; @@ -589,7 +563,7 @@ rexmit_timer(ulong vp) } } - if (d->sendq_hd) { + if (!skb_queue_empty(&d->sendq)) { n = d->rttavg <<= 1; if (n > MAXTIMER) d->rttavg = MAXTIMER; @@ -600,15 +574,15 @@ rexmit_timer(ulong vp) aoecmd_work(d); } - sl = d->sendq_hd; - d->sendq_hd = d->sendq_tl = NULL; + __skb_queue_head_init(&queue); + skb_queue_splice_init(&d->sendq, &queue); d->timer.expires = jiffies + TIMERTICK; add_timer(&d->timer); spin_unlock_irqrestore(&d->lock, flags); - aoenet_xmit(sl); + aoenet_xmit(&queue); } /* enters with d->lock held */ @@ -767,12 +741,12 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector void aoecmd_ata_rsp(struct sk_buff *skb) { + struct sk_buff_head queue; struct aoedev *d; struct aoe_hdr *hin, *hout; struct aoe_atahdr *ahin, *ahout; struct frame *f; struct buf *buf; - struct sk_buff *sl; struct aoetgt *t; struct aoeif *ifp; register long n; @@ -893,21 +867,21 @@ aoecmd_ata_rsp(struct sk_buff *skb) aoecmd_work(d); xmit: - sl = d->sendq_hd; - d->sendq_hd = d->sendq_tl = NULL; + __skb_queue_head_init(&queue); + skb_queue_splice_init(&d->sendq, &queue); spin_unlock_irqrestore(&d->lock, flags); - aoenet_xmit(sl); + aoenet_xmit(&queue); } void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) { - struct sk_buff *sl; - - sl = aoecmd_cfg_pkts(aoemajor, aoeminor, NULL); + struct sk_buff_head queue; - aoenet_xmit(sl); + __skb_queue_head_init(&queue); + aoecmd_cfg_pkts(aoemajor, aoeminor, &queue); + aoenet_xmit(&queue); } struct sk_buff * @@ -1076,7 +1050,12 @@ aoecmd_cfg_rsp(struct sk_buff *skb) spin_unlock_irqrestore(&d->lock, flags); - aoenet_xmit(sl); + if (sl) { + struct sk_buff_head queue; + __skb_queue_head_init(&queue); + __skb_queue_tail(&queue, sl); + aoenet_xmit(&queue); + } } void diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index a1d813ab0d6b..75a610adf515 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -188,14 +188,12 @@ skbfree(struct sk_buff *skb) static void skbpoolfree(struct aoedev *d) { - struct sk_buff *skb; + struct sk_buff *skb, *tmp; - while ((skb = d->skbpool_hd)) { - d->skbpool_hd = skb->next; - skb->next = NULL; + skb_queue_walk_safe(&d->skbpool, skb, tmp) skbfree(skb); - } - d->skbpool_tl = NULL; + + __skb_queue_head_init(&d->skbpool); } /* find it or malloc it */ @@ -217,6 +215,8 @@ aoedev_by_sysminor_m(ulong sysminor) goto out; INIT_WORK(&d->work, aoecmd_sleepwork); spin_lock_init(&d->lock); + skb_queue_head_init(&d->sendq); + skb_queue_head_init(&d->skbpool); init_timer(&d->timer); d->timer.data = (ulong) d; d->timer.function = dummy_timer; diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c index 7b15a5e9cec0..7f83ad90e76f 100644 --- a/drivers/block/aoe/aoemain.c +++ b/drivers/block/aoe/aoemain.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "aoe.h" MODULE_LICENSE("GPL"); diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 0c81ca731287..8fb26030eba8 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -95,15 +95,12 @@ mac_addr(char addr[6]) } void -aoenet_xmit(struct sk_buff *sl) +aoenet_xmit(struct sk_buff_head *queue) { - struct sk_buff *skb; + struct sk_buff *skb, *tmp; - while ((skb = sl)) { - sl = sl->next; - skb->next = skb->prev = NULL; + skb_queue_walk_safe(queue, skb, tmp) dev_queue_xmit(skb); - } } /* -- cgit v1.2.3 From d87798450a7635ab1bcc80271a13ce4a53b016a9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 20:47:22 -0700 Subject: aoe: Fix OOPS after SKB queue changes. Reported by Thomas Graf. If we don't unlink the SKB from the queue when we send it out in aoenet_xmit(), dev_hard_start_xmit() will see skb->next as non-NULL and interpret this to mean the SKB is part of a GSO segment list. Add __skb_unlink() call to fix that. Signed-off-by: David S. Miller --- drivers/block/aoe/aoenet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 8fb26030eba8..9157d64270cb 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -99,8 +99,10 @@ aoenet_xmit(struct sk_buff_head *queue) { struct sk_buff *skb, *tmp; - skb_queue_walk_safe(queue, skb, tmp) + skb_queue_walk_safe(queue, skb, tmp) { + __skb_unlink(skb, queue); dev_queue_xmit(skb); + } } /* -- cgit v1.2.3 From 1a8e2bddd5c29008f311613e75925fecbf522c5b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 13 Aug 2008 12:35:09 +0100 Subject: Kill REQ_TYPE_FLUSH It was only used by ps3disk, and it should probably have been REQ_TYPE_LINUX_BLOCK + REQ_LB_OP_FLUSH. Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- drivers/block/ps3disk.c | 9 ++++++--- include/linux/blkdev.h | 6 +----- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index d797e209951d..4b0d6c7f4c66 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -199,7 +199,8 @@ static void ps3disk_do_request(struct ps3_storage_device *dev, if (blk_fs_request(req)) { if (ps3disk_submit_request_sg(dev, req)) break; - } else if (req->cmd_type == REQ_TYPE_FLUSH) { + } else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && + req->cmd[0] == REQ_LB_OP_FLUSH) { if (ps3disk_submit_flush_request(dev, req)) break; } else { @@ -257,7 +258,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data) return IRQ_HANDLED; } - if (req->cmd_type == REQ_TYPE_FLUSH) { + if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && + req->cmd[0] == REQ_LB_OP_FLUSH) { read = 0; num_sectors = req->hard_cur_sectors; op = "flush"; @@ -405,7 +407,8 @@ static void ps3disk_prepare_flush(struct request_queue *q, struct request *req) dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); - req->cmd_type = REQ_TYPE_FLUSH; + req->cmd_type = REQ_TYPE_LINUX_BLOCK; + req->cmd[0] = REQ_LB_OP_FLUSH; } static unsigned long ps3disk_mask; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e9eb35c9bf26..f131776f029e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -54,7 +54,6 @@ enum rq_cmd_type_bits { REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ - REQ_TYPE_FLUSH, /* flush request */ REQ_TYPE_SPECIAL, /* driver defined type */ REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ /* @@ -76,11 +75,8 @@ enum rq_cmd_type_bits { * */ enum { - /* - * just examples for now - */ REQ_LB_OP_EJECT = 0x40, /* eject request */ - REQ_LB_OP_FLUSH = 0x41, /* flush device */ + REQ_LB_OP_FLUSH = 0x41, /* flush request */ REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ }; -- cgit v1.2.3 From 766ca4428d1239a970926856c447310c9c191af2 Mon Sep 17 00:00:00 2001 From: Fernando Luis Vázquez Cao Date: Thu, 14 Aug 2008 09:59:13 +0200 Subject: virtio_blk: use a wrapper function to access io context information of IO requests struct request has an ioprio member but it is never updated because currently bios do not hold io context information. The implication of this is that virtio_blk ends up passing useless information to the backend driver. That said, some IO schedulers such as CFQ do store io context information in struct request, but use private members for that, which means that that information cannot be directly accessed in a IO scheduler-independent way. This patch adds a function to obtain the ioprio of a request. We should avoid accessing ioprio directly and use this function instead, so that its users do not have to care about future changes in block layer structures or what the currently active IO controller is. This patch does not introduce any functional changes but paves the way for future clean-ups and enhancements. Signed-off-by: Fernando Luis Vazquez Cao Acked-by: Rusty Russell Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 4 ++-- include/linux/blkdev.h | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 42251095134f..879506a2c234 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -84,11 +84,11 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, if (blk_fs_request(vbr->req)) { vbr->out_hdr.type = 0; vbr->out_hdr.sector = vbr->req->sector; - vbr->out_hdr.ioprio = vbr->req->ioprio; + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); } else if (blk_pc_request(vbr->req)) { vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; vbr->out_hdr.sector = 0; - vbr->out_hdr.ioprio = vbr->req->ioprio; + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); } else { /* We don't put anything else in the queue. */ BUG(); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f131776f029e..490ce458b031 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -232,6 +232,11 @@ struct request { struct request *next_rq; }; +static inline unsigned short req_get_ioprio(struct request *req) +{ + return req->ioprio; +} + /* * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME * requests. Some step values could eventually be made generic. -- cgit v1.2.3 From 310a2c1012934f590192377f65940cad4aa72b15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:17 +0900 Subject: block: misc updates This patch makes the following misc updates in preparation for disk->part dereference fix and extended block devt support. * implment part_to_disk() * fix comment about gendisk->part indexing * rename get_part() to disk_map_sector() * don't use n which is always zero while printing disk information in diskstats_show() Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 7 ++++--- block/blk-merge.c | 4 ++-- block/genhd.c | 4 ++-- drivers/block/aoe/aoecmd.c | 2 +- include/linux/genhd.h | 13 ++++++++++--- 5 files changed, 19 insertions(+), 11 deletions(-) (limited to 'drivers/block') diff --git a/block/blk-core.c b/block/blk-core.c index 86d22e7d65c5..a0dc2e72fcbb 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -60,7 +60,7 @@ static void drive_stat_acct(struct request *rq, int new_io) if (!blk_fs_request(rq) || !rq->rq_disk) return; - part = get_part(rq->rq_disk, rq->sector); + part = disk_map_sector(rq->rq_disk, rq->sector); if (!new_io) __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); else { @@ -1557,7 +1557,8 @@ static int __end_that_request_first(struct request *req, int error, } if (blk_fs_request(req) && req->rq_disk) { - struct hd_struct *part = get_part(req->rq_disk, req->sector); + struct hd_struct *part = + disk_map_sector(req->rq_disk, req->sector); const int rw = rq_data_dir(req); all_stat_add(req->rq_disk, part, sectors[rw], @@ -1745,7 +1746,7 @@ static void end_that_request_last(struct request *req, int error) if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); - struct hd_struct *part = get_part(disk, req->sector); + struct hd_struct *part = disk_map_sector(disk, req->sector); __all_stat_inc(disk, part, ios[rw], req->sector); __all_stat_add(disk, part, ticks[rw], duration, req->sector); diff --git a/block/blk-merge.c b/block/blk-merge.c index d81d91419ff5..9b17da698d7c 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -387,8 +387,8 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); if (req->rq_disk) { - struct hd_struct *part - = get_part(req->rq_disk, req->sector); + struct hd_struct *part = + disk_map_sector(req->rq_disk, req->sector); disk_round_stats(req->rq_disk); req->rq_disk->in_flight--; if (part) { diff --git a/block/genhd.c b/block/genhd.c index 8b9a9ff1a842..11038fbc75ed 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -568,7 +568,7 @@ static int diskstats_show(struct seq_file *s, void *v) { struct gendisk *gp = v; char buf[BDEVNAME_SIZE]; - int n = 0; + int n; /* if (&gp->dev.kobj.entry == block_class.devices.next) @@ -582,7 +582,7 @@ static int diskstats_show(struct seq_file *s, void *v) disk_round_stats(gp); preempt_enable(); seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", - gp->major, n + gp->first_minor, disk_name(gp, n, buf), + gp->major, gp->first_minor, disk_name(gp, 0, buf), disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), (unsigned long long)disk_stat_read(gp, sectors[0]), jiffies_to_msecs(disk_stat_read(gp, ticks[0])), diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 2f1746295d06..885d1409521f 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -757,7 +757,7 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector const int rw = bio_data_dir(bio); struct hd_struct *part; - part = get_part(disk, sector); + part = disk_map_sector(disk, sector); all_stat_inc(disk, part, ios[rw], sector); all_stat_add(disk, part, ticks[rw], duration, sector); all_stat_add(disk, part, sectors[rw], n_sect, sector); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index be4f5e5bfe06..c64e659c9843 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -116,7 +116,7 @@ struct gendisk { int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ char disk_name[32]; /* name of major driver */ - struct hd_struct **part; /* [indexed by minor] */ + struct hd_struct **part; /* [indexed by minor - 1] */ struct block_device_operations *fops; struct request_queue *queue; void *private_data; @@ -145,14 +145,21 @@ struct gendisk { #endif }; +static inline struct gendisk *part_to_disk(struct hd_struct *part) +{ + if (likely(part)) + return dev_to_disk((part)->dev.parent); + return NULL; +} + /* * Macros to operate on percpu disk statistics: * * The __ variants should only be called in critical sections. The full * variants disable/enable preemption. */ -static inline struct hd_struct *get_part(struct gendisk *gendiskp, - sector_t sector) +static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, + sector_t sector) { struct hd_struct *part; int i; -- cgit v1.2.3 From f331c0296f2a9fee0d396a70598b954062603015 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:01:48 +0200 Subject: block: don't depend on consecutive minor space * Implement disk_devt() and part_devt() and use them to directly access devt instead of computing it from ->major and ->first_minor. Note that all references to ->major and ->first_minor outside of block layer is used to determine devt of the disk (the part0) and as ->major and ->first_minor will continue to represent devt for the disk, converting these users aren't strictly necessary. However, convert them for consistency. * Implement disk_max_parts() to avoid directly deferencing genhd->minors. * Update bdget_disk() such that it doesn't assume consecutive minor space. * Move devt computation from register_disk() to add_disk() and make it the only one (all other usages use the initially determined value). These changes clean up the code and will help disk->part dereference fix and extended block device numbers. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/genhd.c | 107 +++++++++++++++++++++++++----------- block/ioctl.c | 6 +- drivers/block/pktcdvd.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/char/random.c | 6 +- drivers/md/dm-ioctl.c | 4 +- drivers/md/dm-stripe.c | 4 +- drivers/md/dm.c | 7 ++- drivers/memstick/core/mspro_block.c | 2 +- drivers/mmc/card/block.c | 2 +- drivers/s390/block/dasd_proc.c | 3 +- drivers/s390/block/dcssblk.c | 4 +- drivers/scsi/sr.c | 2 +- fs/block_dev.c | 2 +- fs/partitions/check.c | 19 ++++--- include/linux/genhd.h | 27 +++++++-- 16 files changed, 132 insertions(+), 67 deletions(-) (limited to 'drivers/block') diff --git a/block/genhd.c b/block/genhd.c index dc9ad4c171e2..fa32d09fda24 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -186,13 +186,14 @@ void add_disk(struct gendisk *disk) int retval; disk->flags |= GENHD_FL_UP; - blk_register_region(MKDEV(disk->major, disk->first_minor), - disk->minors, NULL, exact_match, exact_lock, disk); + disk->dev.devt = MKDEV(disk->major, disk->first_minor); + blk_register_region(disk_devt(disk), disk->minors, NULL, + exact_match, exact_lock, disk); register_disk(disk); blk_register_queue(disk); bdi = &disk->queue->backing_dev_info; - bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor)); + bdi_register_dev(bdi, disk_devt(disk)); retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi"); WARN_ON(retval); } @@ -205,8 +206,7 @@ void unlink_gendisk(struct gendisk *disk) sysfs_remove_link(&disk->dev.kobj, "bdi"); bdi_unregister(&disk->queue->backing_dev_info); blk_unregister_queue(disk); - blk_unregister_region(MKDEV(disk->major, disk->first_minor), - disk->minors); + blk_unregister_region(disk_devt(disk), disk->minors); } /** @@ -225,6 +225,38 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) return kobj ? dev_to_disk(dev) : NULL; } +/** + * bdget_disk - do bdget() by gendisk and partition number + * @disk: gendisk of interest + * @partno: partition number + * + * Find partition @partno from @disk, do bdget() on it. + * + * CONTEXT: + * Don't care. + * + * RETURNS: + * Resulting block_device on success, NULL on failure. + */ +extern struct block_device *bdget_disk(struct gendisk *disk, int partno) +{ + dev_t devt = MKDEV(0, 0); + + if (partno == 0) + devt = disk_devt(disk); + else { + struct hd_struct *part = disk->part[partno - 1]; + + if (part && part->nr_sects) + devt = part_devt(part); + } + + if (likely(devt != MKDEV(0, 0))) + return bdget(devt); + return NULL; +} +EXPORT_SYMBOL(bdget_disk); + /* * print a full list of all partitions - intended for places where the root * filesystem can't be mounted and thus to give the victim some idea of what @@ -255,7 +287,7 @@ void __init printk_all_partitions(void) * option takes. */ printk("%02x%02x %10llu %s", - disk->major, disk->first_minor, + MAJOR(disk_devt(disk)), MINOR(disk_devt(disk)), (unsigned long long)get_capacity(disk) >> 1, disk_name(disk, 0, buf)); if (disk->driverfs_dev != NULL && @@ -266,15 +298,15 @@ void __init printk_all_partitions(void) printk(" (driver?)\n"); /* now show the partitions */ - for (n = 0; n < disk->minors - 1; ++n) { - if (disk->part[n] == NULL) - continue; - if (disk->part[n]->nr_sects == 0) + for (n = 0; n < disk_max_parts(disk); ++n) { + struct hd_struct *part = disk->part[n]; + + if (!part || !part->nr_sects) continue; printk(" %02x%02x %10llu %s\n", - disk->major, n + 1 + disk->first_minor, - (unsigned long long)disk->part[n]->nr_sects >> 1, - disk_name(disk, n + 1, buf)); + MAJOR(part_devt(part)), MINOR(part_devt(part)), + (unsigned long long)part->nr_sects >> 1, + disk_name(disk, part->partno, buf)); } } class_dev_iter_exit(&iter); @@ -343,26 +375,27 @@ static int show_partition(struct seq_file *seqf, void *v) char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ - if (!get_capacity(sgp) || - (sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE))) + if (!get_capacity(sgp) || (!disk_max_parts(sgp) && + (sgp->flags & GENHD_FL_REMOVABLE))) return 0; if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) return 0; /* show the full disk and all non-0 size partitions of it */ seq_printf(seqf, "%4d %4d %10llu %s\n", - sgp->major, sgp->first_minor, + MAJOR(disk_devt(sgp)), MINOR(disk_devt(sgp)), (unsigned long long)get_capacity(sgp) >> 1, disk_name(sgp, 0, buf)); - for (n = 0; n < sgp->minors - 1; n++) { - if (!sgp->part[n]) + for (n = 0; n < disk_max_parts(sgp); n++) { + struct hd_struct *part = sgp->part[n]; + if (!part) continue; - if (sgp->part[n]->nr_sects == 0) + if (part->nr_sects == 0) continue; seq_printf(seqf, "%4d %4d %10llu %s\n", - sgp->major, n + 1 + sgp->first_minor, - (unsigned long long)sgp->part[n]->nr_sects >> 1 , - disk_name(sgp, n + 1, buf)); + MAJOR(part_devt(part)), MINOR(part_devt(part)), + (unsigned long long)part->nr_sects >> 1, + disk_name(sgp, part->partno, buf)); } return 0; @@ -578,7 +611,8 @@ static int diskstats_show(struct seq_file *seqf, void *v) disk_round_stats(gp); preempt_enable(); seq_printf(seqf, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", - gp->major, gp->first_minor, disk_name(gp, 0, buf), + MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)), + disk_name(gp, 0, buf), disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), (unsigned long long)disk_stat_read(gp, sectors[0]), jiffies_to_msecs(disk_stat_read(gp, ticks[0])), @@ -590,7 +624,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); /* now show all non-0 size partitions of it */ - for (n = 0; n < gp->minors - 1; n++) { + for (n = 0; n < disk_max_parts(gp); n++) { struct hd_struct *hd = gp->part[n]; if (!hd || !hd->nr_sects) @@ -601,8 +635,8 @@ static int diskstats_show(struct seq_file *seqf, void *v) preempt_enable(); seq_printf(seqf, "%4d %4d %s %lu %lu %llu " "%u %lu %lu %llu %u %u %u %u\n", - gp->major, n + gp->first_minor + 1, - disk_name(gp, n + 1, buf), + MAJOR(part_devt(hd)), MINOR(part_devt(hd)), + disk_name(gp, hd->partno, buf), part_stat_read(hd, ios[0]), part_stat_read(hd, merges[0]), (unsigned long long)part_stat_read(hd, sectors[0]), @@ -661,11 +695,22 @@ dev_t blk_lookup_devt(const char *name, int partno) while ((dev = class_dev_iter_next(&iter))) { struct gendisk *disk = dev_to_disk(dev); - if (!strcmp(dev->bus_id, name) && partno < disk->minors) { - devt = MKDEV(MAJOR(dev->devt), - MINOR(dev->devt) + partno); - break; + if (strcmp(dev->bus_id, name)) + continue; + if (partno < 0 || partno > disk_max_parts(disk)) + continue; + + if (partno == 0) + devt = disk_devt(disk); + else { + struct hd_struct *part = disk->part[partno - 1]; + + if (!part || !part->nr_sects) + continue; + + devt = part_devt(part); } + break; } class_dev_iter_exit(&iter); return devt; @@ -755,7 +800,7 @@ void set_disk_ro(struct gendisk *disk, int flag) { int i; disk->policy = flag; - for (i = 0; i < disk->minors - 1; i++) + for (i = 0; i < disk_max_parts(disk); i++) if (disk->part[i]) disk->part[i]->policy = flag; } diff --git a/block/ioctl.c b/block/ioctl.c index d77f5e280a6e..403f7d7e0c28 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -29,7 +29,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user if (bdev != bdev->bd_contains) return -EINVAL; partno = p.pno; - if (partno <= 0 || partno >= disk->minors) + if (partno <= 0 || partno > disk_max_parts(disk)) return -EINVAL; switch (a.op) { case BLKPG_ADD_PARTITION: @@ -47,7 +47,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user mutex_lock(&bdev->bd_mutex); /* overlap? */ - for (i = 0; i < disk->minors - 1; i++) { + for (i = 0; i < disk_max_parts(disk); i++) { struct hd_struct *s = disk->part[i]; if (!s) @@ -96,7 +96,7 @@ static int blkdev_reread_part(struct block_device *bdev) struct gendisk *disk = bdev->bd_disk; int res; - if (disk->minors == 1 || bdev != bdev->bd_contains) + if (!disk_max_parts(disk) || bdev != bdev->bd_contains) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 29b7a648cc6e..e1a90bbb4747 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2911,7 +2911,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) if (!disk->queue) goto out_mem2; - pd->pkt_dev = MKDEV(disk->major, disk->first_minor); + pd->pkt_dev = MKDEV(pktdev_major, idx); ret = pkt_new_dev(pd, dev); if (ret) goto out_new_dev; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 4b0d6c7f4c66..936466f62afd 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -541,7 +541,7 @@ static int ps3disk_remove(struct ps3_system_bus_device *_dev) struct ps3disk_private *priv = dev->sbd.core.driver_data; mutex_lock(&ps3disk_mask_mutex); - __clear_bit(priv->gendisk->first_minor / PS3DISK_MINORS, + __clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS, &ps3disk_mask); mutex_unlock(&ps3disk_mask_mutex); del_gendisk(priv->gendisk); diff --git a/drivers/char/random.c b/drivers/char/random.c index 7ce1ac4baa6d..6af435b89867 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -661,10 +661,10 @@ void add_disk_randomness(struct gendisk *disk) if (!disk || !disk->random) return; /* first major is 1, so we get >= 0x200 here */ - DEBUG_ENT("disk event %d:%d\n", disk->major, disk->first_minor); + DEBUG_ENT("disk event %d:%d\n", + MAJOR(disk_devt(disk)), MINOR(disk_devt(disk))); - add_timer_randomness(disk->random, - 0x100 + MKDEV(disk->major, disk->first_minor)); + add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); } #endif diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index b262c0042de3..c3de311117a1 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -426,7 +426,7 @@ static int list_devices(struct dm_ioctl *param, size_t param_size) old_nl->next = (uint32_t) ((void *) nl - (void *) old_nl); disk = dm_disk(hc->md); - nl->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); + nl->dev = huge_encode_dev(disk_devt(disk)); nl->next = 0; strcpy(nl->name, hc->name); @@ -539,7 +539,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) if (dm_suspended(md)) param->flags |= DM_SUSPEND_FLAG; - param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); + param->dev = huge_encode_dev(disk_devt(disk)); /* * Yes, this will be out of date by the time it gets back diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 4de90ab3968b..b745d8ac625b 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -284,8 +284,8 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, memset(major_minor, 0, sizeof(major_minor)); sprintf(major_minor, "%d:%d", - bio->bi_bdev->bd_disk->major, - bio->bi_bdev->bd_disk->first_minor); + MAJOR(disk_devt(bio->bi_bdev->bd_disk)), + MINOR(disk_devt(bio->bi_bdev->bd_disk))); /* * Test to see which stripe drive triggered the event diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ace998ce59f6..a78caad29996 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1146,7 +1146,7 @@ static void unlock_fs(struct mapped_device *md); static void free_dev(struct mapped_device *md) { - int minor = md->disk->first_minor; + int minor = MINOR(disk_devt(md->disk)); if (md->suspended_bdev) { unlock_fs(md); @@ -1267,7 +1267,7 @@ static struct mapped_device *dm_find_md(dev_t dev) md = idr_find(&_minor_idr, minor); if (md && (md == MINOR_ALLOCED || - (dm_disk(md)->first_minor != minor) || + (MINOR(disk_devt(dm_disk(md))) != minor) || test_bit(DMF_FREEING, &md->flags))) { md = NULL; goto out; @@ -1318,7 +1318,8 @@ void dm_put(struct mapped_device *md) if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { map = dm_get_table(md); - idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor); + idr_replace(&_minor_idr, MINOR_ALLOCED, + MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); if (!dm_suspended(md)) { diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index d2d2318dafa4..82bf649ef138 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -197,7 +197,7 @@ static int mspro_block_bd_open(struct inode *inode, struct file *filp) static int mspro_block_disk_release(struct gendisk *disk) { struct mspro_block_data *msb = disk->private_data; - int disk_id = disk->first_minor >> MSPRO_BLOCK_PART_SHIFT; + int disk_id = MINOR(disk_devt(disk)) >> MSPRO_BLOCK_PART_SHIFT; mutex_lock(&mspro_block_disk_lock); diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index ebc8b9d77613..97156b689e82 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -83,7 +83,7 @@ static void mmc_blk_put(struct mmc_blk_data *md) mutex_lock(&open_lock); md->usage--; if (md->usage == 0) { - int devidx = md->disk->first_minor >> MMC_SHIFT; + int devidx = MINOR(disk_devt(md->disk)) >> MMC_SHIFT; __clear_bit(devidx, dev_use); put_disk(md->disk); diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 03c0e40a92ff..e3b5c4d3036e 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -76,7 +76,8 @@ dasd_devices_show(struct seq_file *m, void *v) /* Print kdev. */ if (block->gdp) seq_printf(m, " at (%3d:%6d)", - block->gdp->major, block->gdp->first_minor); + MAJOR(disk_devt(block->gdp)), + MINOR(disk_devt(block->gdp))); else seq_printf(m, " at (???:??????)"); /* Print device name. */ diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 711b3004b3e6..9481e4a3f76e 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -114,7 +114,7 @@ dcssblk_assign_free_minor(struct dcssblk_dev_info *dev_info) found = 0; // test if minor available list_for_each_entry(entry, &dcssblk_devices, lh) - if (minor == entry->gd->first_minor) + if (minor == MINOR(disk_devt(entry->gd))) found++; if (!found) break; // got unused minor } @@ -397,7 +397,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char goto unload_seg; } sprintf(dev_info->gd->disk_name, "dcssblk%d", - dev_info->gd->first_minor); + MINOR(disk_devt(dev_info->gd))); list_add_tail(&dev_info->lh, &dcssblk_devices); if (!try_module_get(THIS_MODULE)) { diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 27f5bfd1def3..8dbe3798d5fd 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -878,7 +878,7 @@ static void sr_kref_release(struct kref *kref) struct gendisk *disk = cd->disk; spin_lock(&sr_index_lock); - clear_bit(disk->first_minor, sr_index_bits); + clear_bit(MINOR(disk_devt(disk)), sr_index_bits); spin_unlock(&sr_index_lock); unregister_cdrom(&cd->cdi); diff --git a/fs/block_dev.c b/fs/block_dev.c index de0776cd7215..72e0a2887cb7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -892,7 +892,7 @@ int check_disk_change(struct block_device *bdev) if (bdops->revalidate_disk) bdops->revalidate_disk(bdev->bd_disk); - if (bdev->bd_disk->minors > 1) + if (disk_max_parts(bdev->bd_disk)) bdev->bd_invalidated = 1; return 1; } diff --git a/fs/partitions/check.c b/fs/partitions/check.c index b86aab1b0df6..e77fa144a07d 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -134,7 +134,11 @@ char *disk_name(struct gendisk *hd, int partno, char *buf) const char *bdevname(struct block_device *bdev, char *buf) { - int partno = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor; + int partno = 0; + + if (bdev->bd_part) + partno = bdev->bd_part->partno; + return disk_name(bdev->bd_disk, partno, buf); } @@ -169,7 +173,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev) if (isdigit(state->name[strlen(state->name)-1])) sprintf(state->name, "p"); - state->limit = hd->minors; + state->limit = disk_max_parts(hd) + 1; i = res = err = 0; while (!res && check_part[i]) { memset(&state->parts, 0, sizeof(state->parts)); @@ -416,7 +420,6 @@ void register_disk(struct gendisk *disk) int err; disk->dev.parent = disk->driverfs_dev; - disk->dev.devt = MKDEV(disk->major, disk->first_minor); strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); /* ewww... some of these buggers have / in the name... */ @@ -440,7 +443,7 @@ void register_disk(struct gendisk *disk) disk_sysfs_add_subdirs(disk); /* No minors to use for partitions */ - if (disk->minors == 1) + if (!disk_max_parts(disk)) goto exit; /* No such device (e.g., media were just removed) */ @@ -463,8 +466,8 @@ exit: kobject_uevent(&disk->dev.kobj, KOBJ_ADD); /* announce possible partitions */ - for (i = 1; i < disk->minors; i++) { - p = disk->part[i-1]; + for (i = 0; i < disk_max_parts(disk); i++) { + p = disk->part[i]; if (!p || !p->nr_sects) continue; kobject_uevent(&p->dev.kobj, KOBJ_ADD); @@ -482,7 +485,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (res) return res; bdev->bd_invalidated = 0; - for (p = 1; p < disk->minors; p++) + for (p = 1; p <= disk_max_parts(disk); p++) delete_partition(disk, p); if (disk->fops->revalidate_disk) disk->fops->revalidate_disk(disk); @@ -545,7 +548,7 @@ void del_gendisk(struct gendisk *disk) int p; /* invalidate stuff */ - for (p = disk->minors - 1; p > 0; p--) { + for (p = disk_max_parts(disk); p > 0; p--) { invalidate_partition(disk, p); delete_partition(disk, p); } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index d1723c0a8600..0ff75329199c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -111,10 +111,14 @@ struct hd_struct { #define GENHD_FL_FAIL 64 struct gendisk { + /* major, first_minor and minors are input parameters only, + * don't use directly. Use disk_devt() and disk_max_parts(). + */ int major; /* major number of driver */ int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ + char disk_name[32]; /* name of major driver */ struct hd_struct **part; /* [indexed by minor - 1] */ struct block_device_operations *fops; @@ -152,6 +156,21 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) return NULL; } +static inline int disk_max_parts(struct gendisk *disk) +{ + return disk->minors - 1; +} + +static inline dev_t disk_devt(struct gendisk *disk) +{ + return disk->dev.devt; +} + +static inline dev_t part_devt(struct hd_struct *part) +{ + return part->dev.devt; +} + /* * Macros to operate on percpu disk statistics: * @@ -163,7 +182,7 @@ static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, { struct hd_struct *part; int i; - for (i = 0; i < gendiskp->minors - 1; i++) { + for (i = 0; i < disk_max_parts(gendiskp); i++) { part = gendiskp->part[i]; if (part && part->start_sect <= sector && sector < part->start_sect + part->nr_sects) @@ -366,6 +385,7 @@ extern void add_disk(struct gendisk *disk); extern void del_gendisk(struct gendisk *gp); extern void unlink_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); +extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); @@ -553,11 +573,6 @@ extern void blk_register_region(dev_t devt, unsigned long range, void *data); extern void blk_unregister_region(dev_t devt, unsigned long range); -static inline struct block_device *bdget_disk(struct gendisk *disk, int partno) -{ - return bdget(MKDEV(disk->major, disk->first_minor) + partno); -} - #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -- cgit v1.2.3 From e71bf0d0ee89e51b92776391c5634938236977d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:03:02 +0200 Subject: block: fix disk->part[] dereferencing race disk->part[] is protected by its matching bdev's lock. However, non-critical accesses like collecting stats and printing out sysfs and proc information used to be performed without any locking. As partitions can come and go dynamically, partitions can go away underneath those non-critical accesses. As some of those accesses are writes, this theoretically can lead to silent corruption. This patch fixes the race by using RCU for the partition array and dev reference counter to hold partitions. * Rename disk->part[] to disk->__part[] to make sure no one outside genhd layer proper accesses it directly. * Use RCU for disk->__part[] dereferencing. * Implement disk_{get|put}_part() which can be used to get and put partitions from gendisk respectively. * Iterators are implemented to help iterate through all partitions safely. * Functions which require RCU readlock are marked with _rcu suffix. * Use disk_put_part() in __blkdev_put() instead of directly putting the contained kobject. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 20 ++++- block/blk-merge.c | 9 +- block/genhd.c | 218 ++++++++++++++++++++++++++++++++++++++------- block/ioctl.c | 26 +++--- drivers/block/aoe/aoecmd.c | 6 +- fs/block_dev.c | 15 ++-- fs/partitions/check.c | 70 +++++++++------ include/linux/genhd.h | 53 ++++++++--- 8 files changed, 323 insertions(+), 94 deletions(-) (limited to 'drivers/block') diff --git a/block/blk-core.c b/block/blk-core.c index a0dc2e72fcbb..d6128d9ad601 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -60,7 +60,9 @@ static void drive_stat_acct(struct request *rq, int new_io) if (!blk_fs_request(rq) || !rq->rq_disk) return; - part = disk_map_sector(rq->rq_disk, rq->sector); + rcu_read_lock(); + + part = disk_map_sector_rcu(rq->rq_disk, rq->sector); if (!new_io) __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); else { @@ -71,6 +73,8 @@ static void drive_stat_acct(struct request *rq, int new_io) part->in_flight++; } } + + rcu_read_unlock(); } void blk_queue_congestion_threshold(struct request_queue *q) @@ -1557,12 +1561,14 @@ static int __end_that_request_first(struct request *req, int error, } if (blk_fs_request(req) && req->rq_disk) { - struct hd_struct *part = - disk_map_sector(req->rq_disk, req->sector); const int rw = rq_data_dir(req); + struct hd_struct *part; + rcu_read_lock(); + part = disk_map_sector_rcu(req->rq_disk, req->sector); all_stat_add(req->rq_disk, part, sectors[rw], nr_bytes >> 9, req->sector); + rcu_read_unlock(); } total_bytes = bio_nbytes = 0; @@ -1746,7 +1752,11 @@ static void end_that_request_last(struct request *req, int error) if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); - struct hd_struct *part = disk_map_sector(disk, req->sector); + struct hd_struct *part; + + rcu_read_lock(); + + part = disk_map_sector_rcu(disk, req->sector); __all_stat_inc(disk, part, ios[rw], req->sector); __all_stat_add(disk, part, ticks[rw], duration, req->sector); @@ -1756,6 +1766,8 @@ static void end_that_request_last(struct request *req, int error) part_round_stats(part); part->in_flight--; } + + rcu_read_unlock(); } if (req->end_io) diff --git a/block/blk-merge.c b/block/blk-merge.c index 9b17da698d7c..eb2a3ca58303 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -387,14 +387,19 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); if (req->rq_disk) { - struct hd_struct *part = - disk_map_sector(req->rq_disk, req->sector); + struct hd_struct *part; + + rcu_read_lock(); + + part = disk_map_sector_rcu(req->rq_disk, req->sector); disk_round_stats(req->rq_disk); req->rq_disk->in_flight--; if (part) { part_round_stats(part); part->in_flight--; } + + rcu_read_unlock(); } req->ioprio = ioprio_best(req->ioprio, next->ioprio); diff --git a/block/genhd.c b/block/genhd.c index fa32d09fda24..b431d6543942 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -26,6 +26,158 @@ struct kobject *block_depr; static struct device_type disk_type; +/** + * disk_get_part - get partition + * @disk: disk to look partition from + * @partno: partition number + * + * Look for partition @partno from @disk. If found, increment + * reference count and return it. + * + * CONTEXT: + * Don't care. + * + * RETURNS: + * Pointer to the found partition on success, NULL if not found. + */ +struct hd_struct *disk_get_part(struct gendisk *disk, int partno) +{ + struct hd_struct *part; + + if (unlikely(partno < 1 || partno > disk_max_parts(disk))) + return NULL; + rcu_read_lock(); + part = rcu_dereference(disk->__part[partno - 1]); + if (part) + get_device(&part->dev); + rcu_read_unlock(); + + return part; +} +EXPORT_SYMBOL_GPL(disk_get_part); + +/** + * disk_part_iter_init - initialize partition iterator + * @piter: iterator to initialize + * @disk: disk to iterate over + * @flags: DISK_PITER_* flags + * + * Initialize @piter so that it iterates over partitions of @disk. + * + * CONTEXT: + * Don't care. + */ +void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, + unsigned int flags) +{ + piter->disk = disk; + piter->part = NULL; + + if (flags & DISK_PITER_REVERSE) + piter->idx = disk_max_parts(piter->disk) - 1; + else + piter->idx = 0; + + piter->flags = flags; +} +EXPORT_SYMBOL_GPL(disk_part_iter_init); + +/** + * disk_part_iter_next - proceed iterator to the next partition and return it + * @piter: iterator of interest + * + * Proceed @piter to the next partition and return it. + * + * CONTEXT: + * Don't care. + */ +struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) +{ + int inc, end; + + /* put the last partition */ + disk_put_part(piter->part); + piter->part = NULL; + + rcu_read_lock(); + + /* determine iteration parameters */ + if (piter->flags & DISK_PITER_REVERSE) { + inc = -1; + end = -1; + } else { + inc = 1; + end = disk_max_parts(piter->disk); + } + + /* iterate to the next partition */ + for (; piter->idx != end; piter->idx += inc) { + struct hd_struct *part; + + part = rcu_dereference(piter->disk->__part[piter->idx]); + if (!part) + continue; + if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) + continue; + + get_device(&part->dev); + piter->part = part; + piter->idx += inc; + break; + } + + rcu_read_unlock(); + + return piter->part; +} +EXPORT_SYMBOL_GPL(disk_part_iter_next); + +/** + * disk_part_iter_exit - finish up partition iteration + * @piter: iter of interest + * + * Called when iteration is over. Cleans up @piter. + * + * CONTEXT: + * Don't care. + */ +void disk_part_iter_exit(struct disk_part_iter *piter) +{ + disk_put_part(piter->part); + piter->part = NULL; +} +EXPORT_SYMBOL_GPL(disk_part_iter_exit); + +/** + * disk_map_sector_rcu - map sector to partition + * @disk: gendisk of interest + * @sector: sector to map + * + * Find out which partition @sector maps to on @disk. This is + * primarily used for stats accounting. + * + * CONTEXT: + * RCU read locked. The returned partition pointer is valid only + * while preemption is disabled. + * + * RETURNS: + * Found partition on success, NULL if there's no matching partition. + */ +struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) +{ + int i; + + for (i = 0; i < disk_max_parts(disk); i++) { + struct hd_struct *part = rcu_dereference(disk->__part[i]); + + if (part && part->start_sect <= sector && + sector < part->start_sect + part->nr_sects) + return part; + } + return NULL; +} +EXPORT_SYMBOL_GPL(disk_map_sector_rcu); + /* * Can be deleted altogether. Later. * @@ -245,10 +397,12 @@ extern struct block_device *bdget_disk(struct gendisk *disk, int partno) if (partno == 0) devt = disk_devt(disk); else { - struct hd_struct *part = disk->part[partno - 1]; + struct hd_struct *part; + part = disk_get_part(disk, partno); if (part && part->nr_sects) devt = part_devt(part); + disk_put_part(part); } if (likely(devt != MKDEV(0, 0))) @@ -270,8 +424,9 @@ void __init printk_all_partitions(void) class_dev_iter_init(&iter, &block_class, NULL, &disk_type); while ((dev = class_dev_iter_next(&iter))) { struct gendisk *disk = dev_to_disk(dev); + struct disk_part_iter piter; + struct hd_struct *part; char buf[BDEVNAME_SIZE]; - int n; /* * Don't show empty devices or things that have been @@ -298,16 +453,13 @@ void __init printk_all_partitions(void) printk(" (driver?)\n"); /* now show the partitions */ - for (n = 0; n < disk_max_parts(disk); ++n) { - struct hd_struct *part = disk->part[n]; - - if (!part || !part->nr_sects) - continue; + disk_part_iter_init(&piter, disk, 0); + while ((part = disk_part_iter_next(&piter))) printk(" %02x%02x %10llu %s\n", MAJOR(part_devt(part)), MINOR(part_devt(part)), (unsigned long long)part->nr_sects >> 1, disk_name(disk, part->partno, buf)); - } + disk_part_iter_exit(&piter); } class_dev_iter_exit(&iter); } @@ -371,7 +523,8 @@ static void *show_partition_start(struct seq_file *seqf, loff_t *pos) static int show_partition(struct seq_file *seqf, void *v) { struct gendisk *sgp = v; - int n; + struct disk_part_iter piter; + struct hd_struct *part; char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ @@ -386,17 +539,14 @@ static int show_partition(struct seq_file *seqf, void *v) MAJOR(disk_devt(sgp)), MINOR(disk_devt(sgp)), (unsigned long long)get_capacity(sgp) >> 1, disk_name(sgp, 0, buf)); - for (n = 0; n < disk_max_parts(sgp); n++) { - struct hd_struct *part = sgp->part[n]; - if (!part) - continue; - if (part->nr_sects == 0) - continue; + + disk_part_iter_init(&piter, sgp, 0); + while ((part = disk_part_iter_next(&piter))) seq_printf(seqf, "%4d %4d %10llu %s\n", MAJOR(part_devt(part)), MINOR(part_devt(part)), (unsigned long long)part->nr_sects >> 1, disk_name(sgp, part->partno, buf)); - } + disk_part_iter_exit(&piter); return 0; } @@ -571,7 +721,7 @@ static void disk_release(struct device *dev) struct gendisk *disk = dev_to_disk(dev); kfree(disk->random); - kfree(disk->part); + kfree(disk->__part); free_disk_stats(disk); kfree(disk); } @@ -596,8 +746,9 @@ static struct device_type disk_type = { static int diskstats_show(struct seq_file *seqf, void *v) { struct gendisk *gp = v; + struct disk_part_iter piter; + struct hd_struct *hd; char buf[BDEVNAME_SIZE]; - int n; /* if (&gp->dev.kobj.entry == block_class.devices.next) @@ -624,12 +775,8 @@ static int diskstats_show(struct seq_file *seqf, void *v) jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); /* now show all non-0 size partitions of it */ - for (n = 0; n < disk_max_parts(gp); n++) { - struct hd_struct *hd = gp->part[n]; - - if (!hd || !hd->nr_sects) - continue; - + disk_part_iter_init(&piter, gp, 0); + while ((hd = disk_part_iter_next(&piter))) { preempt_disable(); part_round_stats(hd); preempt_enable(); @@ -650,6 +797,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) jiffies_to_msecs(part_stat_read(hd, time_in_queue)) ); } + disk_part_iter_exit(&piter); return 0; } @@ -703,12 +851,16 @@ dev_t blk_lookup_devt(const char *name, int partno) if (partno == 0) devt = disk_devt(disk); else { - struct hd_struct *part = disk->part[partno - 1]; + struct hd_struct *part; - if (!part || !part->nr_sects) + part = disk_get_part(disk, partno); + if (!part || !part->nr_sects) { + disk_put_part(part); continue; + } devt = part_devt(part); + disk_put_part(part); } break; } @@ -735,9 +887,9 @@ struct gendisk *alloc_disk_node(int minors, int node_id) } if (minors > 1) { int size = (minors - 1) * sizeof(struct hd_struct *); - disk->part = kmalloc_node(size, + disk->__part = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, node_id); - if (!disk->part) { + if (!disk->__part) { free_disk_stats(disk); kfree(disk); return NULL; @@ -798,10 +950,14 @@ EXPORT_SYMBOL(set_device_ro); void set_disk_ro(struct gendisk *disk, int flag) { - int i; + struct disk_part_iter piter; + struct hd_struct *part; + disk->policy = flag; - for (i = 0; i < disk_max_parts(disk); i++) - if (disk->part[i]) disk->part[i]->policy = flag; + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + while ((part = disk_part_iter_next(&piter))) + part->policy = flag; + disk_part_iter_exit(&piter); } EXPORT_SYMBOL(set_disk_ro); diff --git a/block/ioctl.c b/block/ioctl.c index 403f7d7e0c28..a5f672ad55f6 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -12,11 +12,12 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user { struct block_device *bdevp; struct gendisk *disk; + struct hd_struct *part; struct blkpg_ioctl_arg a; struct blkpg_partition p; + struct disk_part_iter piter; long long start, length; int partno; - int i; int err; if (!capable(CAP_SYS_ADMIN)) @@ -47,28 +48,33 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user mutex_lock(&bdev->bd_mutex); /* overlap? */ - for (i = 0; i < disk_max_parts(disk); i++) { - struct hd_struct *s = disk->part[i]; - - if (!s) - continue; - if (!(start+length <= s->start_sect || - start >= s->start_sect + s->nr_sects)) { + disk_part_iter_init(&piter, disk, + DISK_PITER_INCL_EMPTY); + while ((part = disk_part_iter_next(&piter))) { + if (!(start + length <= part->start_sect || + start >= part->start_sect + part->nr_sects)) { + disk_part_iter_exit(&piter); mutex_unlock(&bdev->bd_mutex); return -EBUSY; } } + disk_part_iter_exit(&piter); + /* all seems OK */ err = add_partition(disk, partno, start, length, ADDPART_FLAG_NONE); mutex_unlock(&bdev->bd_mutex); return err; case BLKPG_DEL_PARTITION: - if (!disk->part[partno - 1]) + part = disk_get_part(disk, partno); + if (!part) return -ENXIO; - bdevp = bdget_disk(disk, partno); + + bdevp = bdget(part_devt(part)); + disk_put_part(part); if (!bdevp) return -ENOMEM; + mutex_lock(&bdevp->bd_mutex); if (bdevp->bd_openers) { mutex_unlock(&bdevp->bd_mutex); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 885d1409521f..84c03d65dcc5 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -757,11 +757,15 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector const int rw = bio_data_dir(bio); struct hd_struct *part; - part = disk_map_sector(disk, sector); + rcu_read_lock(); + + part = disk_map_sector_rcu(disk, sector); all_stat_inc(disk, part, ios[rw], sector); all_stat_add(disk, part, ticks[rw], duration, sector); all_stat_add(disk, part, sectors[rw], n_sect, sector); all_stat_add(disk, part, io_ticks, duration, sector); + + rcu_read_unlock(); } void diff --git a/fs/block_dev.c b/fs/block_dev.c index 72e0a2887cb7..2f2873b9a041 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -929,6 +929,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) { struct module *owner = NULL; struct gendisk *disk; + struct hd_struct *part = NULL; int ret; int partno; int perm = 0; @@ -978,7 +979,6 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (bdev->bd_invalidated) rescan_partitions(disk, bdev); } else { - struct hd_struct *p; struct block_device *whole; whole = bdget_disk(disk, 0); ret = -ENOMEM; @@ -989,16 +989,16 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (ret) goto out_first; bdev->bd_contains = whole; - p = disk->part[partno - 1]; + part = disk_get_part(disk, partno); bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; - if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { + if (!(disk->flags & GENHD_FL_UP) || + !part || !part->nr_sects) { ret = -ENXIO; goto out_first; } - kobject_get(&p->dev.kobj); - bdev->bd_part = p; - bd_set_size(bdev, (loff_t) p->nr_sects << 9); + bdev->bd_part = part; + bd_set_size(bdev, (loff_t)part->nr_sects << 9); } } else { put_disk(disk); @@ -1027,6 +1027,7 @@ out_first: __blkdev_put(bdev->bd_contains, 1); bdev->bd_contains = NULL; put_disk(disk); + disk_put_part(part); module_put(owner); out: mutex_unlock(&bdev->bd_mutex); @@ -1119,7 +1120,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part) module_put(owner); if (bdev->bd_contains != bdev) { - kobject_put(&bdev->bd_part->dev.kobj); + disk_put_part(bdev->bd_part); bdev->bd_part = NULL; } bdev->bd_disk = NULL; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index e77fa144a07d..96c8bf41e455 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -314,19 +314,29 @@ static inline void disk_sysfs_add_subdirs(struct gendisk *disk) kobject_put(k); } +static void delete_partition_rcu_cb(struct rcu_head *head) +{ + struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); + + part->start_sect = 0; + part->nr_sects = 0; + part_stat_set_all(part, 0); + put_device(&part->dev); +} + void delete_partition(struct gendisk *disk, int partno) { - struct hd_struct *p = disk->part[partno - 1]; + struct hd_struct *part; - if (!p) + part = disk->__part[partno-1]; + if (!part) return; - disk->part[partno - 1] = NULL; - p->start_sect = 0; - p->nr_sects = 0; - part_stat_set_all(p, 0); - kobject_put(p->holder_dir); - device_del(&p->dev); - put_device(&p->dev); + + rcu_assign_pointer(disk->__part[partno-1], NULL); + kobject_put(part->holder_dir); + device_del(&part->dev); + + call_rcu(&part->rcu_head, delete_partition_rcu_cb); } static ssize_t whole_disk_show(struct device *dev, @@ -343,7 +353,7 @@ int add_partition(struct gendisk *disk, int partno, struct hd_struct *p; int err; - if (disk->part[partno - 1]) + if (disk->__part[partno - 1]) return -EBUSY; p = kzalloc(sizeof(*p), GFP_KERNEL); @@ -391,7 +401,8 @@ int add_partition(struct gendisk *disk, int partno, } /* everything is up and running, commence */ - disk->part[partno - 1] = p; + INIT_RCU_HEAD(&p->rcu_head); + rcu_assign_pointer(disk->__part[partno - 1], p); /* suppress uevent if the disk supresses it */ if (!disk->dev.uevent_suppress) @@ -414,9 +425,9 @@ out_put: void register_disk(struct gendisk *disk) { struct block_device *bdev; + struct disk_part_iter piter; + struct hd_struct *part; char *s; - int i; - struct hd_struct *p; int err; disk->dev.parent = disk->driverfs_dev; @@ -466,16 +477,16 @@ exit: kobject_uevent(&disk->dev.kobj, KOBJ_ADD); /* announce possible partitions */ - for (i = 0; i < disk_max_parts(disk); i++) { - p = disk->part[i]; - if (!p || !p->nr_sects) - continue; - kobject_uevent(&p->dev.kobj, KOBJ_ADD); - } + disk_part_iter_init(&piter, disk, 0); + while ((part = disk_part_iter_next(&piter))) + kobject_uevent(&part->dev.kobj, KOBJ_ADD); + disk_part_iter_exit(&piter); } int rescan_partitions(struct gendisk *disk, struct block_device *bdev) { + struct disk_part_iter piter; + struct hd_struct *part; struct parsed_partitions *state; int p, res; @@ -485,8 +496,12 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (res) return res; bdev->bd_invalidated = 0; - for (p = 1; p <= disk_max_parts(disk); p++) - delete_partition(disk, p); + + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + while ((part = disk_part_iter_next(&piter))) + delete_partition(disk, part->partno); + disk_part_iter_exit(&piter); + if (disk->fops->revalidate_disk) disk->fops->revalidate_disk(disk); if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) @@ -545,13 +560,18 @@ EXPORT_SYMBOL(read_dev_sector); void del_gendisk(struct gendisk *disk) { - int p; + struct disk_part_iter piter; + struct hd_struct *part; /* invalidate stuff */ - for (p = disk_max_parts(disk); p > 0; p--) { - invalidate_partition(disk, p); - delete_partition(disk, p); + disk_part_iter_init(&piter, disk, + DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); + while ((part = disk_part_iter_next(&piter))) { + invalidate_partition(disk, part->partno); + delete_partition(disk, part->partno); } + disk_part_iter_exit(&piter); + invalidate_partition(disk, 0); disk->capacity = 0; disk->flags &= ~GENHD_FL_UP; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 0ff75329199c..7fbba19e076b 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -11,6 +11,7 @@ #include #include +#include #ifdef CONFIG_BLOCK @@ -100,6 +101,7 @@ struct hd_struct { #else struct disk_stats dkstats; #endif + struct rcu_head rcu_head; }; #define GENHD_FL_REMOVABLE 1 @@ -120,7 +122,14 @@ struct gendisk { * disks that can't be partitioned. */ char disk_name[32]; /* name of major driver */ - struct hd_struct **part; /* [indexed by minor - 1] */ + + /* Array of pointers to partitions indexed by partno - 1. + * Protected with matching bdev lock but stat and other + * non-critical accesses use RCU. Always access through + * helpers. + */ + struct hd_struct **__part; + struct block_device_operations *fops; struct request_queue *queue; void *private_data; @@ -171,25 +180,41 @@ static inline dev_t part_devt(struct hd_struct *part) return part->dev.devt; } +extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); + +static inline void disk_put_part(struct hd_struct *part) +{ + if (likely(part)) + put_device(&part->dev); +} + +/* + * Smarter partition iterator without context limits. + */ +#define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */ +#define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */ + +struct disk_part_iter { + struct gendisk *disk; + struct hd_struct *part; + int idx; + unsigned int flags; +}; + +extern void disk_part_iter_init(struct disk_part_iter *piter, + struct gendisk *disk, unsigned int flags); +extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); +extern void disk_part_iter_exit(struct disk_part_iter *piter); + +extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, + sector_t sector); + /* * Macros to operate on percpu disk statistics: * * The __ variants should only be called in critical sections. The full * variants disable/enable preemption. */ -static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, - sector_t sector) -{ - struct hd_struct *part; - int i; - for (i = 0; i < disk_max_parts(gendiskp); i++) { - part = gendiskp->part[i]; - if (part && part->start_sect <= sector - && sector < part->start_sect + part->nr_sects) - return part; - } - return NULL; -} #ifdef CONFIG_SMP #define __disk_stat_add(gendiskp, field, addnd) \ -- cgit v1.2.3 From c9959059161ddd7bf4670cf47367033d6b2f79c4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:21 +0900 Subject: block: fix diskstats access There are two variants of stat functions - ones prefixed with double underbars which don't care about preemption and ones without which disable preemption before manipulating per-cpu counters. It's unclear whether the underbarred ones assume that preemtion is disabled on entry as some callers don't do that. This patch unifies diskstats access by implementing disk_stat_lock() and disk_stat_unlock() which take care of both RCU (for partition access) and preemption (for per-cpu counter access). diskstats access should always be enclosed between the two functions. As such, there's no need for the versions which disables preemption. They're removed and double underbars ones are renamed to drop the underbars. As an extra argument is added, there's no danger of using the old version unconverted. disk_stat_lock() uses get_cpu() and returns the cpu index and all diskstat functions which access per-cpu counters now has @cpu argument to help RT. This change adds RCU or preemption operations at some places but also collapses several preemption ops into one at others. Overall, the performance difference should be negligible as all involved ops are very lightweight per-cpu ones. Signed-off-by: Tejun Heo Cc: Peter Zijlstra Signed-off-by: Jens Axboe --- block/blk-core.c | 52 +++++++++-------- block/blk-merge.c | 11 ++-- block/genhd.c | 20 ++++--- drivers/block/aoe/aoecmd.c | 15 ++--- drivers/md/dm.c | 26 +++++---- drivers/md/linear.c | 7 ++- drivers/md/multipath.c | 7 ++- drivers/md/raid0.c | 7 ++- drivers/md/raid1.c | 8 ++- drivers/md/raid10.c | 7 ++- drivers/md/raid5.c | 8 ++- fs/partitions/check.c | 7 ++- include/linux/genhd.h | 139 +++++++++++++++++++-------------------------- 13 files changed, 158 insertions(+), 156 deletions(-) (limited to 'drivers/block') diff --git a/block/blk-core.c b/block/blk-core.c index d6128d9ad601..e0a5ee36849c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -56,25 +56,26 @@ static void drive_stat_acct(struct request *rq, int new_io) { struct hd_struct *part; int rw = rq_data_dir(rq); + int cpu; if (!blk_fs_request(rq) || !rq->rq_disk) return; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(rq->rq_disk, rq->sector); + if (!new_io) - __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); + all_stat_inc(cpu, rq->rq_disk, part, merges[rw], rq->sector); else { - disk_round_stats(rq->rq_disk); + disk_round_stats(cpu, rq->rq_disk); rq->rq_disk->in_flight++; if (part) { - part_round_stats(part); + part_round_stats(cpu, part); part->in_flight++; } } - rcu_read_unlock(); + disk_stat_unlock(); } void blk_queue_congestion_threshold(struct request_queue *q) @@ -997,7 +998,7 @@ static inline void add_request(struct request_queue *q, struct request *req) * /proc/diskstats. This accounts immediately for all queue usage up to * the current jiffies and restarts the counters again. */ -void disk_round_stats(struct gendisk *disk) +void disk_round_stats(int cpu, struct gendisk *disk) { unsigned long now = jiffies; @@ -1005,15 +1006,15 @@ void disk_round_stats(struct gendisk *disk) return; if (disk->in_flight) { - __disk_stat_add(disk, time_in_queue, - disk->in_flight * (now - disk->stamp)); - __disk_stat_add(disk, io_ticks, (now - disk->stamp)); + disk_stat_add(cpu, disk, time_in_queue, + disk->in_flight * (now - disk->stamp)); + disk_stat_add(cpu, disk, io_ticks, (now - disk->stamp)); } disk->stamp = now; } EXPORT_SYMBOL_GPL(disk_round_stats); -void part_round_stats(struct hd_struct *part) +void part_round_stats(int cpu, struct hd_struct *part) { unsigned long now = jiffies; @@ -1021,9 +1022,9 @@ void part_round_stats(struct hd_struct *part) return; if (part->in_flight) { - __part_stat_add(part, time_in_queue, - part->in_flight * (now - part->stamp)); - __part_stat_add(part, io_ticks, (now - part->stamp)); + part_stat_add(cpu, part, time_in_queue, + part->in_flight * (now - part->stamp)); + part_stat_add(cpu, part, io_ticks, (now - part->stamp)); } part->stamp = now; } @@ -1563,12 +1564,13 @@ static int __end_that_request_first(struct request *req, int error, if (blk_fs_request(req) && req->rq_disk) { const int rw = rq_data_dir(req); struct hd_struct *part; + int cpu; - rcu_read_lock(); + cpu = disk_stat_lock(); part = disk_map_sector_rcu(req->rq_disk, req->sector); - all_stat_add(req->rq_disk, part, sectors[rw], - nr_bytes >> 9, req->sector); - rcu_read_unlock(); + all_stat_add(cpu, req->rq_disk, part, sectors[rw], + nr_bytes >> 9, req->sector); + disk_stat_unlock(); } total_bytes = bio_nbytes = 0; @@ -1753,21 +1755,21 @@ static void end_that_request_last(struct request *req, int error) unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); struct hd_struct *part; + int cpu; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(disk, req->sector); - __all_stat_inc(disk, part, ios[rw], req->sector); - __all_stat_add(disk, part, ticks[rw], duration, req->sector); - disk_round_stats(disk); + all_stat_inc(cpu, disk, part, ios[rw], req->sector); + all_stat_add(cpu, disk, part, ticks[rw], duration, req->sector); + disk_round_stats(cpu, disk); disk->in_flight--; if (part) { - part_round_stats(part); + part_round_stats(cpu, part); part->in_flight--; } - rcu_read_unlock(); + disk_stat_unlock(); } if (req->end_io) diff --git a/block/blk-merge.c b/block/blk-merge.c index eb2a3ca58303..d926a24bf1fd 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -388,18 +388,19 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (req->rq_disk) { struct hd_struct *part; + int cpu; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(req->rq_disk, req->sector); - disk_round_stats(req->rq_disk); + + disk_round_stats(cpu, req->rq_disk); req->rq_disk->in_flight--; if (part) { - part_round_stats(part); + part_round_stats(cpu, part); part->in_flight--; } - rcu_read_unlock(); + disk_stat_unlock(); } req->ioprio = ioprio_best(req->ioprio, next->ioprio); diff --git a/block/genhd.c b/block/genhd.c index b431d6543942..430626e440f0 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -633,10 +633,11 @@ static ssize_t disk_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); + int cpu; - preempt_disable(); - disk_round_stats(disk); - preempt_enable(); + cpu = disk_stat_lock(); + disk_round_stats(cpu, disk); + disk_stat_unlock(); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " @@ -749,6 +750,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) struct disk_part_iter piter; struct hd_struct *hd; char buf[BDEVNAME_SIZE]; + int cpu; /* if (&gp->dev.kobj.entry == block_class.devices.next) @@ -758,9 +760,9 @@ static int diskstats_show(struct seq_file *seqf, void *v) "\n\n"); */ - preempt_disable(); - disk_round_stats(gp); - preempt_enable(); + cpu = disk_stat_lock(); + disk_round_stats(cpu, gp); + disk_stat_unlock(); seq_printf(seqf, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)), disk_name(gp, 0, buf), @@ -777,9 +779,9 @@ static int diskstats_show(struct seq_file *seqf, void *v) /* now show all non-0 size partitions of it */ disk_part_iter_init(&piter, gp, 0); while ((hd = disk_part_iter_next(&piter))) { - preempt_disable(); - part_round_stats(hd); - preempt_enable(); + cpu = disk_stat_lock(); + part_round_stats(cpu, hd); + disk_stat_unlock(); seq_printf(seqf, "%4d %4d %s %lu %lu %llu " "%u %lu %lu %llu %u %u %u %u\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 84c03d65dcc5..17eed8c025d0 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -756,16 +756,17 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector unsigned long n_sect = bio->bi_size >> 9; const int rw = bio_data_dir(bio); struct hd_struct *part; + int cpu; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(disk, sector); - all_stat_inc(disk, part, ios[rw], sector); - all_stat_add(disk, part, ticks[rw], duration, sector); - all_stat_add(disk, part, sectors[rw], n_sect, sector); - all_stat_add(disk, part, io_ticks, duration, sector); - rcu_read_unlock(); + all_stat_inc(cpu, disk, part, ios[rw], sector); + all_stat_add(cpu, disk, part, ticks[rw], duration, sector); + all_stat_add(cpu, disk, part, sectors[rw], n_sect, sector); + all_stat_add(cpu, disk, part, io_ticks, duration, sector); + + disk_stat_unlock(); } void diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a78caad29996..653624792eaf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -377,12 +377,13 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio) static void start_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; + int cpu; io->start_time = jiffies; - preempt_disable(); - disk_round_stats(dm_disk(md)); - preempt_enable(); + cpu = disk_stat_lock(); + disk_round_stats(cpu, dm_disk(md)); + disk_stat_unlock(); dm_disk(md)->in_flight = atomic_inc_return(&md->pending); } @@ -391,15 +392,15 @@ static int end_io_acct(struct dm_io *io) struct mapped_device *md = io->md; struct bio *bio = io->bio; unsigned long duration = jiffies - io->start_time; - int pending; + int pending, cpu; int rw = bio_data_dir(bio); - preempt_disable(); - disk_round_stats(dm_disk(md)); - preempt_enable(); - dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); + cpu = disk_stat_lock(); + disk_round_stats(cpu, dm_disk(md)); + disk_stat_add(cpu, dm_disk(md), ticks[rw], duration); + disk_stat_unlock(); - disk_stat_add(dm_disk(md), ticks[rw], duration); + dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); return !pending; } @@ -885,6 +886,7 @@ static int dm_request(struct request_queue *q, struct bio *bio) int r = -EIO; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; + int cpu; /* * There is no use in forwarding any barrier request since we can't @@ -897,8 +899,10 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); - disk_stat_inc(dm_disk(md), ios[rw]); - disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, dm_disk(md), ios[rw]); + disk_stat_add(cpu, dm_disk(md), sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); /* * If we're suspended we have to queue diff --git a/drivers/md/linear.c b/drivers/md/linear.c index b1eebf88c209..00cbc8e47294 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -318,14 +318,17 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) mddev_t *mddev = q->queuedata; dev_info_t *tmp_dev; sector_t block; + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); return 0; } - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); tmp_dev = which_dev(mddev, bio->bi_sector); block = bio->bi_sector >> 1; diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index c4779ccba1c3..182f5a94cdc5 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -147,6 +147,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) struct multipath_bh * mp_bh; struct multipath_info *multipath; const int rw = bio_data_dir(bio); + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); @@ -158,8 +159,10 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) mp_bh->master_bio = bio; mp_bh->mddev = mddev; - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); mp_bh->path = multipath_map(conf); if (mp_bh->path < 0) { diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 183610635661..e26030fa59ab 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -399,14 +399,17 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) sector_t chunk; sector_t block, rsect; const int rw = bio_data_dir(bio); + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); return 0; } - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); chunk_size = mddev->chunk_size >> 10; chunk_sects = mddev->chunk_size >> 9; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0b82030c265d..babb13036f93 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -779,7 +779,7 @@ static int make_request(struct request_queue *q, struct bio * bio) struct page **behind_pages = NULL; const int rw = bio_data_dir(bio); const int do_sync = bio_sync(bio); - int do_barriers; + int cpu, do_barriers; mdk_rdev_t *blocked_rdev; /* @@ -804,8 +804,10 @@ static int make_request(struct request_queue *q, struct bio * bio) bitmap = mddev->bitmap; - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); /* * make_request() can abort the operation when READA is being diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d3b9aa096285..5ec80da0a9d7 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -789,6 +789,7 @@ static int make_request(struct request_queue *q, struct bio * bio) mirror_info_t *mirror; r10bio_t *r10_bio; struct bio *read_bio; + int cpu; int i; int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); @@ -843,8 +844,10 @@ static int make_request(struct request_queue *q, struct bio * bio) */ wait_barrier(conf); - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 37e546528f9c..5899f211515f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3387,7 +3387,7 @@ static int make_request(struct request_queue *q, struct bio * bi) sector_t logical_sector, last_sector; struct stripe_head *sh; const int rw = bio_data_dir(bi); - int remaining; + int cpu, remaining; if (unlikely(bio_barrier(bi))) { bio_endio(bi, -EOPNOTSUPP); @@ -3396,8 +3396,10 @@ static int make_request(struct request_queue *q, struct bio * bi) md_write_start(mddev, bi); - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bi)); + disk_stat_unlock(); if (rw == READ && mddev->reshape_position == MaxSector && diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 96c8bf41e455..c442f0aadac3 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -219,10 +219,11 @@ static ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); + int cpu; - preempt_disable(); - part_round_stats(p); - preempt_enable(); + cpu = disk_stat_lock(); + part_round_stats(cpu, p); + disk_stat_unlock(); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7fbba19e076b..ac8a901f2002 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -209,16 +209,24 @@ extern void disk_part_iter_exit(struct disk_part_iter *piter); extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); -/* +/* * Macros to operate on percpu disk statistics: * - * The __ variants should only be called in critical sections. The full - * variants disable/enable preemption. + * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters + * and should be called between disk_stat_lock() and + * disk_stat_unlock(). + * + * part_stat_read() can be called at any time. + * + * part_stat_{add|set_all}() and {init|free}_part_stats are for + * internal use only. */ - #ifdef CONFIG_SMP -#define __disk_stat_add(gendiskp, field, addnd) \ - (per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd) +#define disk_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define disk_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) + +#define disk_stat_add(cpu, gendiskp, field, addnd) \ + (per_cpu_ptr(gendiskp->dkstats, cpu)->field += addnd) #define disk_stat_read(gendiskp, field) \ ({ \ @@ -229,7 +237,8 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, res; \ }) -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { +static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) +{ int i; for_each_possible_cpu(i) @@ -237,14 +246,14 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { sizeof(struct disk_stats)); } -#define __part_stat_add(part, field, addnd) \ - (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd) +#define part_stat_add(cpu, part, field, addnd) \ + (per_cpu_ptr(part->dkstats, cpu)->field += addnd) -#define __all_stat_add(gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - __part_stat_add(part, field, addnd); \ - __disk_stat_add(gendiskp, field, addnd); \ +#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ +({ \ + if (part) \ + part_stat_add(cpu, part, field, addnd); \ + disk_stat_add(cpu, gendiskp, field, addnd); \ }) #define part_stat_read(part, field) \ @@ -264,10 +273,13 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) memset(per_cpu_ptr(part->dkstats, i), value, sizeof(struct disk_stats)); } - + #else /* !CONFIG_SMP */ -#define __disk_stat_add(gendiskp, field, addnd) \ - (gendiskp->dkstats.field += addnd) +#define disk_stat_lock() ({ rcu_read_lock(); 0; }) +#define disk_stat_unlock() rcu_read_unlock() + +#define disk_stat_add(cpu, gendiskp, field, addnd) \ + (gendiskp->dkstats.field += addnd) #define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) @@ -275,14 +287,14 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); } -#define __part_stat_add(part, field, addnd) \ +#define part_stat_add(cpu, part, field, addnd) \ (part->dkstats.field += addnd) -#define __all_stat_add(gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part->dkstats.field += addnd; \ - __disk_stat_add(gendiskp, field, addnd); \ +#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ +({ \ + if (part) \ + part_stat_add(cpu, part, field, addnd); \ + disk_stat_add(cpu, gendiskp, field, addnd); \ }) #define part_stat_read(part, field) (part->dkstats.field) @@ -294,63 +306,26 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) #endif /* CONFIG_SMP */ -#define disk_stat_add(gendiskp, field, addnd) \ - do { \ - preempt_disable(); \ - __disk_stat_add(gendiskp, field, addnd); \ - preempt_enable(); \ - } while (0) - -#define __disk_stat_dec(gendiskp, field) __disk_stat_add(gendiskp, field, -1) -#define disk_stat_dec(gendiskp, field) disk_stat_add(gendiskp, field, -1) - -#define __disk_stat_inc(gendiskp, field) __disk_stat_add(gendiskp, field, 1) -#define disk_stat_inc(gendiskp, field) disk_stat_add(gendiskp, field, 1) - -#define __disk_stat_sub(gendiskp, field, subnd) \ - __disk_stat_add(gendiskp, field, -subnd) -#define disk_stat_sub(gendiskp, field, subnd) \ - disk_stat_add(gendiskp, field, -subnd) - -#define part_stat_add(gendiskp, field, addnd) \ - do { \ - preempt_disable(); \ - __part_stat_add(gendiskp, field, addnd);\ - preempt_enable(); \ - } while (0) - -#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1) -#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1) - -#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1) -#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1) - -#define __part_stat_sub(gendiskp, field, subnd) \ - __part_stat_add(gendiskp, field, -subnd) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) - -#define all_stat_add(gendiskp, part, field, addnd, sector) \ - do { \ - preempt_disable(); \ - __all_stat_add(gendiskp, part, field, addnd, sector); \ - preempt_enable(); \ - } while (0) - -#define __all_stat_dec(gendiskp, field, sector) \ - __all_stat_add(gendiskp, field, -1, sector) -#define all_stat_dec(gendiskp, field, sector) \ - all_stat_add(gendiskp, field, -1, sector) - -#define __all_stat_inc(gendiskp, part, field, sector) \ - __all_stat_add(gendiskp, part, field, 1, sector) -#define all_stat_inc(gendiskp, part, field, sector) \ - all_stat_add(gendiskp, part, field, 1, sector) - -#define __all_stat_sub(gendiskp, part, field, subnd, sector) \ - __all_stat_add(gendiskp, part, field, -subnd, sector) -#define all_stat_sub(gendiskp, part, field, subnd, sector) \ - all_stat_add(gendiskp, part, field, -subnd, sector) +#define disk_stat_dec(cpu, gendiskp, field) \ + disk_stat_add(cpu, gendiskp, field, -1) +#define disk_stat_inc(cpu, gendiskp, field) \ + disk_stat_add(cpu, gendiskp, field, 1) +#define disk_stat_sub(cpu, gendiskp, field, subnd) \ + disk_stat_add(cpu, gendiskp, field, -subnd) + +#define part_stat_dec(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, -1) +#define part_stat_inc(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, 1) +#define part_stat_sub(cpu, gendiskp, field, subnd) \ + part_stat_add(cpu, gendiskp, field, -subnd) + +#define all_stat_dec(cpu, gendiskp, field, sector) \ + all_stat_add(cpu, gendiskp, field, -1, sector) +#define all_stat_inc(cpu, gendiskp, part, field, sector) \ + all_stat_add(cpu, gendiskp, part, field, 1, sector) +#define all_stat_sub(cpu, gendiskp, part, field, subnd, sector) \ + all_stat_add(cpu, gendiskp, part, field, -subnd, sector) /* Inlines to alloc and free disk stats in struct gendisk */ #ifdef CONFIG_SMP @@ -401,8 +376,8 @@ static inline void free_part_stats(struct hd_struct *part) #endif /* CONFIG_SMP */ /* drivers/block/ll_rw_blk.c */ -extern void disk_round_stats(struct gendisk *disk); -extern void part_round_stats(struct hd_struct *part); +extern void disk_round_stats(int cpu, struct gendisk *disk); +extern void part_round_stats(int cpu, struct hd_struct *part); /* drivers/block/genhd.c */ extern int get_blkdev_list(char *, int); -- cgit v1.2.3 From ed9e1982347b36573cd622ee5f4e2a7ccd79b3fd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:05 +0900 Subject: block: implement and use {disk|part}_to_dev() Implement {disk|part}_to_dev() and use them to access generic device instead of directly dereferencing {disk|part}->dev. To make sure no user is left behind, rename generic devices fields to __dev. This is in preparation of unifying partition 0 handling with other partitions. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-integrity.c | 5 +-- block/blk-sysfs.c | 4 +-- block/genhd.c | 27 ++++++++-------- drivers/block/aoe/aoeblk.c | 4 +-- drivers/block/nbd.c | 4 +-- drivers/ide/ide-probe.c | 2 +- drivers/md/dm.c | 4 +-- drivers/md/md.c | 10 +++--- fs/block_dev.c | 4 +-- fs/partitions/check.c | 79 ++++++++++++++++++++++++---------------------- include/linux/genhd.h | 20 ++++++------ 11 files changed, 86 insertions(+), 77 deletions(-) (limited to 'drivers/block') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index d87606eaca1d..69023da63151 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -331,7 +331,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) return -1; if (kobject_init_and_add(&bi->kobj, &integrity_ktype, - &disk->dev.kobj, "%s", "integrity")) { + &disk_to_dev(disk)->kobj, + "%s", "integrity")) { kmem_cache_free(integrity_cachep, bi); return -1; } @@ -375,7 +376,7 @@ void blk_integrity_unregister(struct gendisk *disk) kobject_uevent(&bi->kobj, KOBJ_REMOVE); kobject_del(&bi->kobj); - kobject_put(&disk->dev.kobj); + kobject_put(&disk_to_dev(disk)->kobj); kmem_cache_free(integrity_cachep, bi); } EXPORT_SYMBOL(blk_integrity_unregister); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 304ec73ab821..b9a6ed166649 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -310,7 +310,7 @@ int blk_register_queue(struct gendisk *disk) if (!q->request_fn) return 0; - ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj), + ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), "%s", "queue"); if (ret < 0) return ret; @@ -339,6 +339,6 @@ void blk_unregister_queue(struct gendisk *disk) kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); - kobject_put(&disk->dev.kobj); + kobject_put(&disk_to_dev(disk)->kobj); } } diff --git a/block/genhd.c b/block/genhd.c index 67e5a59ced2a..0a2f16bd54b7 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -59,7 +59,7 @@ struct hd_struct *disk_get_part(struct gendisk *disk, int partno) rcu_read_lock(); part = rcu_dereference(disk->__part[partno - 1]); if (part) - get_device(&part->dev); + get_device(part_to_dev(part)); rcu_read_unlock(); return part; @@ -130,7 +130,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) continue; - get_device(&part->dev); + get_device(part_to_dev(part)); piter->part = part; piter->idx += inc; break; @@ -435,7 +435,7 @@ static struct kobject *exact_match(dev_t devt, int *partno, void *data) { struct gendisk *p = data; - return &p->dev.kobj; + return &disk_to_dev(p)->kobj; } static int exact_lock(dev_t devt, void *data) @@ -460,7 +460,7 @@ void add_disk(struct gendisk *disk) int retval; disk->flags |= GENHD_FL_UP; - disk->dev.devt = MKDEV(disk->major, disk->first_minor); + disk_to_dev(disk)->devt = MKDEV(disk->major, disk->first_minor); blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); register_disk(disk); @@ -468,7 +468,8 @@ void add_disk(struct gendisk *disk) bdi = &disk->queue->backing_dev_info; bdi_register_dev(bdi, disk_devt(disk)); - retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi"); + retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, + "bdi"); WARN_ON(retval); } @@ -477,7 +478,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ void unlink_gendisk(struct gendisk *disk) { - sysfs_remove_link(&disk->dev.kobj, "bdi"); + sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); bdi_unregister(&disk->queue->backing_dev_info); blk_unregister_queue(disk); blk_unregister_region(disk_devt(disk), disk->minors); @@ -903,7 +904,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) int cpu; /* - if (&gp->dev.kobj.entry == block_class.devices.next) + if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) seq_puts(seqf, "major minor name" " rio rmerge rsect ruse wio wmerge " "wsect wuse running use aveq" @@ -972,7 +973,7 @@ static void media_change_notify_thread(struct work_struct *work) * set enviroment vars to indicate which event this is for * so that user space will know to go check the media status. */ - kobject_uevent_env(&gd->dev.kobj, KOBJ_CHANGE, envp); + kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); put_device(gd->driverfs_dev); } @@ -1062,9 +1063,9 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) disk->minors = minors; disk->ext_minors = ext_minors; rand_initialize_disk(disk); - disk->dev.class = &block_class; - disk->dev.type = &disk_type; - device_initialize(&disk->dev); + disk_to_dev(disk)->class = &block_class; + disk_to_dev(disk)->type = &disk_type; + device_initialize(disk_to_dev(disk)); INIT_WORK(&disk->async_notify, media_change_notify_thread); } @@ -1086,7 +1087,7 @@ struct kobject *get_disk(struct gendisk *disk) owner = disk->fops->owner; if (owner && !try_module_get(owner)) return NULL; - kobj = kobject_get(&disk->dev.kobj); + kobj = kobject_get(&disk_to_dev(disk)->kobj); if (kobj == NULL) { module_put(owner); return NULL; @@ -1100,7 +1101,7 @@ EXPORT_SYMBOL(get_disk); void put_disk(struct gendisk *disk) { if (disk) - kobject_put(&disk->dev.kobj); + kobject_put(&disk_to_dev(disk)->kobj); } EXPORT_SYMBOL(put_disk); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 0c39782b2660..3edb6cb7d68f 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -109,12 +109,12 @@ static const struct attribute_group attr_group = { static int aoedisk_add_sysfs(struct aoedev *d) { - return sysfs_create_group(&d->gd->dev.kobj, &attr_group); + return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group); } void aoedisk_rm_sysfs(struct aoedev *d) { - sysfs_remove_group(&d->gd->dev.kobj, &attr_group); + sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group); } static int diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 1778e4a2c672..7b3351260d56 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -403,7 +403,7 @@ static int nbd_do_it(struct nbd_device *lo) BUG_ON(lo->magic != LO_MAGIC); lo->pid = current->pid; - ret = sysfs_create_file(&lo->disk->dev.kobj, &pid_attr.attr); + ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); if (ret) { printk(KERN_ERR "nbd: sysfs_create_file failed!"); return ret; @@ -412,7 +412,7 @@ static int nbd_do_it(struct nbd_device *lo) while ((req = nbd_read_stat(lo)) != NULL) nbd_end_request(req); - sysfs_remove_file(&lo->disk->dev.kobj, &pid_attr.attr); + sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); return 0; } diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index a51a30e9eab3..70aa86c8807e 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1188,7 +1188,7 @@ static struct kobject *exact_match(dev_t dev, int *part, void *data) { struct gendisk *p = data; *part &= (1 << PARTN_BITS) - 1; - return &p->dev.kobj; + return &disk_to_dev(p)->kobj; } static int exact_lock(dev_t dev, void *data) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 653624792eaf..637806695bb9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1186,7 +1186,7 @@ static void event_callback(void *context) list_splice_init(&md->uevent_list, &uevents); spin_unlock_irqrestore(&md->uevent_lock, flags); - dm_send_uevents(&uevents, &md->disk->dev.kobj); + dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); atomic_inc(&md->event_nr); wake_up(&md->eventq); @@ -1643,7 +1643,7 @@ out: *---------------------------------------------------------------*/ void dm_kobject_uevent(struct mapped_device *md) { - kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE); } uint32_t dm_next_uevent_seq(struct mapped_device *md) diff --git a/drivers/md/md.c b/drivers/md/md.c index deeac4b44173..96e9fccd2eab 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1465,9 +1465,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) goto fail; if (rdev->bdev->bd_part) - ko = &rdev->bdev->bd_part->dev.kobj; + ko = &part_to_dev(rdev->bdev->bd_part)->kobj; else - ko = &rdev->bdev->bd_disk->dev.kobj; + ko = &disk_to_dev(rdev->bdev->bd_disk)->kobj; if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { kobject_del(&rdev->kobj); goto fail; @@ -3470,8 +3470,8 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) disk->queue = mddev->queue; add_disk(disk); mddev->gendisk = disk; - error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj, - "%s", "md"); + error = kobject_init_and_add(&mddev->kobj, &md_ktype, + &disk_to_dev(disk)->kobj, "%s", "md"); mutex_unlock(&disks_mutex); if (error) printk(KERN_WARNING "md: cannot register %s/md - name in use\n", @@ -3761,7 +3761,7 @@ static int do_md_run(mddev_t * mddev) sysfs_notify(&mddev->kobj, NULL, "array_state"); sysfs_notify(&mddev->kobj, NULL, "sync_action"); sysfs_notify(&mddev->kobj, NULL, "degraded"); - kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); return 0; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 2f2873b9a041..a02df22f37c3 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -543,9 +543,9 @@ EXPORT_SYMBOL(bd_release); static struct kobject *bdev_get_kobj(struct block_device *bdev) { if (bdev->bd_contains != bdev) - return kobject_get(&bdev->bd_part->dev.kobj); + return kobject_get(&part_to_dev(bdev->bd_part)->kobj); else - return kobject_get(&bdev->bd_disk->dev.kobj); + return kobject_get(&disk_to_dev(bdev->bd_disk)->kobj); } static struct kobject *bdev_get_holder(struct block_device *bdev) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 0d4b7f28f13f..ac0df3acdcda 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -309,7 +309,7 @@ static inline void disk_sysfs_add_subdirs(struct gendisk *disk) { struct kobject *k; - k = kobject_get(&disk->dev.kobj); + k = kobject_get(&disk_to_dev(disk)->kobj); disk->holder_dir = kobject_create_and_add("holders", k); disk->slave_dir = kobject_create_and_add("slaves", k); kobject_put(k); @@ -322,7 +322,7 @@ static void delete_partition_rcu_cb(struct rcu_head *head) part->start_sect = 0; part->nr_sects = 0; part_stat_set_all(part, 0); - put_device(&part->dev); + put_device(part_to_dev(part)); } void delete_partition(struct gendisk *disk, int partno) @@ -336,7 +336,7 @@ void delete_partition(struct gendisk *disk, int partno) blk_free_devt(part_devt(part)); rcu_assign_pointer(disk->__part[partno-1], NULL); kobject_put(part->holder_dir); - device_del(&part->dev); + device_del(part_to_dev(part)); call_rcu(&part->rcu_head, delete_partition_rcu_cb); } @@ -354,6 +354,9 @@ int add_partition(struct gendisk *disk, int partno, { struct hd_struct *p; dev_t devt = MKDEV(0, 0); + struct device *ddev = disk_to_dev(disk); + struct device *pdev; + const char *dname; int err; if (disk->__part[partno - 1]) @@ -367,42 +370,43 @@ int add_partition(struct gendisk *disk, int partno, err = -ENOMEM; goto out_free; } + pdev = part_to_dev(p); + p->start_sect = start; p->nr_sects = len; p->partno = partno; p->policy = disk->policy; - if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) - snprintf(p->dev.bus_id, BUS_ID_SIZE, - "%sp%d", disk->dev.bus_id, partno); + dname = dev_name(ddev); + if (isdigit(dname[strlen(dname) - 1])) + snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno); else - snprintf(p->dev.bus_id, BUS_ID_SIZE, - "%s%d", disk->dev.bus_id, partno); + snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno); - device_initialize(&p->dev); - p->dev.class = &block_class; - p->dev.type = &part_type; - p->dev.parent = &disk->dev; + device_initialize(pdev); + pdev->class = &block_class; + pdev->type = &part_type; + pdev->parent = ddev; err = blk_alloc_devt(p, &devt); if (err) - goto out_put; - p->dev.devt = devt; + goto out_free; + pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ - p->dev.uevent_suppress = 1; - err = device_add(&p->dev); + pdev->uevent_suppress = 1; + err = device_add(pdev); if (err) goto out_put; err = -ENOMEM; - p->holder_dir = kobject_create_and_add("holders", &p->dev.kobj); + p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); if (!p->holder_dir) goto out_del; - p->dev.uevent_suppress = 0; + pdev->uevent_suppress = 0; if (flags & ADDPART_FLAG_WHOLEDISK) { - err = device_create_file(&p->dev, &dev_attr_whole_disk); + err = device_create_file(pdev, &dev_attr_whole_disk); if (err) goto out_del; } @@ -412,8 +416,8 @@ int add_partition(struct gendisk *disk, int partno, rcu_assign_pointer(disk->__part[partno - 1], p); /* suppress uevent if the disk supresses it */ - if (!disk->dev.uevent_suppress) - kobject_uevent(&p->dev.kobj, KOBJ_ADD); + if (!ddev->uevent_suppress) + kobject_uevent(&pdev->kobj, KOBJ_ADD); return 0; @@ -422,9 +426,9 @@ out_free: return err; out_del: kobject_put(p->holder_dir); - device_del(&p->dev); + device_del(pdev); out_put: - put_device(&p->dev); + put_device(pdev); blk_free_devt(devt); return err; } @@ -432,30 +436,31 @@ out_put: /* Not exported, helper to add_disk(). */ void register_disk(struct gendisk *disk) { + struct device *ddev = disk_to_dev(disk); struct block_device *bdev; struct disk_part_iter piter; struct hd_struct *part; char *s; int err; - disk->dev.parent = disk->driverfs_dev; + ddev->parent = disk->driverfs_dev; - strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); + strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE); /* ewww... some of these buggers have / in the name... */ - s = strchr(disk->dev.bus_id, '/'); + s = strchr(ddev->bus_id, '/'); if (s) *s = '!'; /* delay uevents, until we scanned partition table */ - disk->dev.uevent_suppress = 1; + ddev->uevent_suppress = 1; - if (device_add(&disk->dev)) + if (device_add(ddev)) return; #ifndef CONFIG_SYSFS_DEPRECATED - err = sysfs_create_link(block_depr, &disk->dev.kobj, - kobject_name(&disk->dev.kobj)); + err = sysfs_create_link(block_depr, &ddev->kobj, + kobject_name(&ddev->kobj)); if (err) { - device_del(&disk->dev); + device_del(ddev); return; } #endif @@ -481,13 +486,13 @@ void register_disk(struct gendisk *disk) exit: /* announce disk after possible partitions are created */ - disk->dev.uevent_suppress = 0; - kobject_uevent(&disk->dev.kobj, KOBJ_ADD); + ddev->uevent_suppress = 0; + kobject_uevent(&ddev->kobj, KOBJ_ADD); /* announce possible partitions */ disk_part_iter_init(&piter, disk, 0); while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part->dev.kobj, KOBJ_ADD); + kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); disk_part_iter_exit(&piter); } @@ -518,7 +523,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) return -EIO; /* tell userspace that the media / partition table may have changed */ - kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); for (p = 1; p < state->limit; p++) { sector_t size = state->parts[p].size; @@ -591,7 +596,7 @@ void del_gendisk(struct gendisk *disk) kobject_put(disk->slave_dir); disk->driverfs_dev = NULL; #ifndef CONFIG_SYSFS_DEPRECATED - sysfs_remove_link(block_depr, disk->dev.bus_id); + sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); #endif - device_del(&disk->dev); + device_del(disk_to_dev(disk)); } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6fc532424062..e4e18c509ac5 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -15,9 +15,11 @@ #ifdef CONFIG_BLOCK -#define kobj_to_dev(k) container_of(k, struct device, kobj) -#define dev_to_disk(device) container_of(device, struct gendisk, dev) -#define dev_to_part(device) container_of(device, struct hd_struct, dev) +#define kobj_to_dev(k) container_of((k), struct device, kobj) +#define dev_to_disk(device) container_of((device), struct gendisk, __dev) +#define dev_to_part(device) container_of((device), struct hd_struct, __dev) +#define disk_to_dev(disk) (&((disk)->__dev)) +#define part_to_dev(part) (&((part)->__dev)) extern struct device_type part_type; extern struct kobject *block_depr; @@ -88,7 +90,7 @@ struct disk_stats { struct hd_struct { sector_t start_sect; sector_t nr_sects; - struct device dev; + struct device __dev; struct kobject *holder_dir; int policy, partno; #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -139,7 +141,7 @@ struct gendisk { int flags; struct device *driverfs_dev; // FIXME: remove - struct device dev; + struct device __dev; struct kobject *holder_dir; struct kobject *slave_dir; @@ -163,7 +165,7 @@ struct gendisk { static inline struct gendisk *part_to_disk(struct hd_struct *part) { if (likely(part)) - return dev_to_disk((part)->dev.parent); + return dev_to_disk(part_to_dev(part)->parent); return NULL; } @@ -174,12 +176,12 @@ static inline int disk_max_parts(struct gendisk *disk) static inline dev_t disk_devt(struct gendisk *disk) { - return disk->dev.devt; + return disk_to_dev(disk)->devt; } static inline dev_t part_devt(struct hd_struct *part) { - return part->dev.devt; + return part_to_dev(part)->devt; } extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); @@ -187,7 +189,7 @@ extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); static inline void disk_put_part(struct hd_struct *part) { if (likely(part)) - put_device(&part->dev); + put_device(part_to_dev(part)); } /* -- cgit v1.2.3 From 80795aefb76d10c5d698e60c7e7750b5330787da Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:07 +0900 Subject: block: move capacity from disk to part0 Move disk->capacity to part0->nr_sects and convert all users who directly accessed the field to use {get|set}_capacity(). This is done early to allow the __dev field to be moved. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/aoe/aoeblk.c | 2 +- drivers/block/aoe/aoecmd.c | 4 ++-- drivers/block/aoe/aoedev.c | 2 +- fs/partitions/check.c | 2 +- include/linux/genhd.h | 5 ++--- 5 files changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 3edb6cb7d68f..aa69556c3485 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -276,7 +276,7 @@ aoeblk_gdalloc(void *vp) gd->first_minor = d->sysminor * AOE_PARTITIONS; gd->fops = &aoe_bdops; gd->private_data = d; - gd->capacity = d->ssize; + set_capacity(gd, d->ssize); snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 17eed8c025d0..934800f979c9 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -645,7 +645,7 @@ aoecmd_sleepwork(struct work_struct *work) unsigned long flags; u64 ssize; - ssize = d->gd->capacity; + ssize = get_capacity(d->gd); bd = bdget_disk(d->gd, 0); if (bd) { @@ -707,7 +707,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) return; if (d->gd != NULL) { - d->gd->capacity = ssize; + set_capacity(d->gd, ssize); d->flags |= DEVFL_NEWSIZE; } else d->flags |= DEVFL_GDALLOC; diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index a1d813ab0d6b..6a8038d115b5 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -91,7 +91,7 @@ aoedev_downdev(struct aoedev *d) } if (d->gd) - d->gd->capacity = 0; + set_capacity(d->gd, 0); d->flags &= ~DEVFL_UP; } diff --git a/fs/partitions/check.c b/fs/partitions/check.c index b60699c271ac..902b95f1f9d5 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -586,7 +586,7 @@ void del_gendisk(struct gendisk *disk) disk_part_iter_exit(&piter); invalidate_partition(disk, 0); - disk->capacity = 0; + set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; unlink_gendisk(disk); disk_stat_set_all(disk, 0); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9e866a2aee50..1cf828148ec6 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -138,7 +138,6 @@ struct gendisk { struct block_device_operations *fops; struct request_queue *queue; void *private_data; - sector_t capacity; int flags; struct device *driverfs_dev; // FIXME: remove @@ -411,11 +410,11 @@ static inline sector_t get_start_sect(struct block_device *bdev) } static inline sector_t get_capacity(struct gendisk *disk) { - return disk->capacity; + return disk->part0.nr_sects; } static inline void set_capacity(struct gendisk *disk, sector_t size) { - disk->capacity = size; + disk->part0.nr_sects = size; } #ifdef CONFIG_SOLARIS_X86_PARTITION -- cgit v1.2.3 From 074a7aca7afa6f230104e8e65eba3420263714a5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:14 +0900 Subject: block: move stats from disk to part0 Move stats related fields - stamp, in_flight, dkstats - from disk to part0 and unify stat handling such that... * part_stat_*() now updates part0 together if the specified partition is not part0. ie. part_stat_*() are now essentially all_stat_*(). * {disk|all}_stat_*() are gone. * part_round_stats() is updated similary. It handles part0 stats automatically and disk_round_stats() is killed. * part_{inc|dec}_in_fligh() is implemented which automatically updates part0 stats for parts other than part0. * disk_map_sector_rcu() is updated to return part0 if no part matches. Combined with the above changes, this makes NULL special case handling in callers unnecessary. * Separate stats show code paths for disk are collapsed into part stats show code paths. * Rename disk_stat_lock/unlock() to part_stat_lock/unlock() While at it, reposition stat handling macros a bit and add missing parentheses around macro parameters. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 84 ++++++++++-------------- block/blk-merge.c | 12 ++-- block/genhd.c | 97 +++++++-------------------- drivers/block/aoe/aoecmd.c | 12 ++-- drivers/md/dm.c | 27 ++++---- drivers/md/linear.c | 9 +-- drivers/md/md.c | 4 +- drivers/md/multipath.c | 9 +-- drivers/md/raid0.c | 9 +-- drivers/md/raid1.c | 9 +-- drivers/md/raid10.c | 9 +-- drivers/md/raid5.c | 9 +-- fs/partitions/check.c | 12 ++-- include/linux/genhd.h | 159 +++++++++++++-------------------------------- 14 files changed, 165 insertions(+), 296 deletions(-) (limited to 'drivers/block') diff --git a/block/blk-core.c b/block/blk-core.c index 505ec61067df..98138f002524 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -61,21 +61,17 @@ static void drive_stat_acct(struct request *rq, int new_io) if (!blk_fs_request(rq) || !rq->rq_disk) return; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(rq->rq_disk, rq->sector); if (!new_io) - all_stat_inc(cpu, rq->rq_disk, part, merges[rw], rq->sector); + part_stat_inc(cpu, part, merges[rw]); else { - disk_round_stats(cpu, rq->rq_disk); - rq->rq_disk->in_flight++; - if (part) { - part_round_stats(cpu, part); - part->in_flight++; - } + part_round_stats(cpu, part); + part_inc_in_flight(part); } - disk_stat_unlock(); + part_stat_unlock(); } void blk_queue_congestion_threshold(struct request_queue *q) @@ -983,8 +979,22 @@ static inline void add_request(struct request_queue *q, struct request *req) __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); } -/* - * disk_round_stats() - Round off the performance stats on a struct +static void part_round_stats_single(int cpu, struct hd_struct *part, + unsigned long now) +{ + if (now == part->stamp) + return; + + if (part->in_flight) { + __part_stat_add(cpu, part, time_in_queue, + part->in_flight * (now - part->stamp)); + __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); + } + part->stamp = now; +} + +/** + * part_round_stats() - Round off the performance stats on a struct * disk_stats. * * The average IO queue length and utilisation statistics are maintained @@ -998,36 +1008,15 @@ static inline void add_request(struct request_queue *q, struct request *req) * /proc/diskstats. This accounts immediately for all queue usage up to * the current jiffies and restarts the counters again. */ -void disk_round_stats(int cpu, struct gendisk *disk) -{ - unsigned long now = jiffies; - - if (now == disk->stamp) - return; - - if (disk->in_flight) { - disk_stat_add(cpu, disk, time_in_queue, - disk->in_flight * (now - disk->stamp)); - disk_stat_add(cpu, disk, io_ticks, (now - disk->stamp)); - } - disk->stamp = now; -} -EXPORT_SYMBOL_GPL(disk_round_stats); - void part_round_stats(int cpu, struct hd_struct *part) { unsigned long now = jiffies; - if (now == part->stamp) - return; - - if (part->in_flight) { - part_stat_add(cpu, part, time_in_queue, - part->in_flight * (now - part->stamp)); - part_stat_add(cpu, part, io_ticks, (now - part->stamp)); - } - part->stamp = now; + if (part->partno) + part_round_stats_single(cpu, &part_to_disk(part)->part0, now); + part_round_stats_single(cpu, part, now); } +EXPORT_SYMBOL_GPL(part_round_stats); /* * queue lock must be held @@ -1567,11 +1556,10 @@ static int __end_that_request_first(struct request *req, int error, struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(req->rq_disk, req->sector); - all_stat_add(cpu, req->rq_disk, part, sectors[rw], - nr_bytes >> 9, req->sector); - disk_stat_unlock(); + part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9); + part_stat_unlock(); } total_bytes = bio_nbytes = 0; @@ -1758,19 +1746,15 @@ static void end_that_request_last(struct request *req, int error) struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(disk, req->sector); - all_stat_inc(cpu, disk, part, ios[rw], req->sector); - all_stat_add(cpu, disk, part, ticks[rw], duration, req->sector); - disk_round_stats(cpu, disk); - disk->in_flight--; - if (part) { - part_round_stats(cpu, part); - part->in_flight--; - } + part_stat_inc(cpu, part, ios[rw]); + part_stat_add(cpu, part, ticks[rw], duration); + part_round_stats(cpu, part); + part_dec_in_flight(part); - disk_stat_unlock(); + part_stat_unlock(); } if (req->end_io) diff --git a/block/blk-merge.c b/block/blk-merge.c index d926a24bf1fd..c77196d55899 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -390,17 +390,13 @@ static int attempt_merge(struct request_queue *q, struct request *req, struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(req->rq_disk, req->sector); - disk_round_stats(cpu, req->rq_disk); - req->rq_disk->in_flight--; - if (part) { - part_round_stats(cpu, part); - part->in_flight--; - } + part_round_stats(cpu, part); + part_dec_in_flight(part); - disk_stat_unlock(); + part_stat_unlock(); } req->ioprio = ioprio_best(req->ioprio, next->ioprio); diff --git a/block/genhd.c b/block/genhd.c index 06a252f2b967..e1cb96fb883e 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit); * while preemption is disabled. * * RETURNS: - * Found partition on success, NULL if there's no matching partition. + * Found partition on success, part0 is returned if no partition matches */ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) { @@ -189,7 +189,7 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) sector < part->start_sect + part->nr_sects) return part; } - return NULL; + return &disk->part0; } EXPORT_SYMBOL_GPL(disk_map_sector_rcu); @@ -580,24 +580,24 @@ void __init printk_all_partitions(void) * numbers in hex - the same format as the root= * option takes. */ - printk("%s %10llu %s", - bdevt_str(disk_devt(disk), devt_buf), - (unsigned long long)get_capacity(disk) >> 1, - disk_name(disk, 0, name_buf)); - if (disk->driverfs_dev != NULL && - disk->driverfs_dev->driver != NULL) - printk(" driver: %s\n", - disk->driverfs_dev->driver->name); - else - printk(" (driver?)\n"); + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); + while ((part = disk_part_iter_next(&piter))) { + bool is_part0 = part == &disk->part0; - /* now show the partitions */ - disk_part_iter_init(&piter, disk, 0); - while ((part = disk_part_iter_next(&piter))) - printk(" %s %10llu %s\n", + printk("%s%s %10llu %s", is_part0 ? "" : " ", bdevt_str(part_devt(part), devt_buf), (unsigned long long)part->nr_sects >> 1, disk_name(disk, part->partno, name_buf)); + if (is_part0) { + if (disk->driverfs_dev != NULL && + disk->driverfs_dev->driver != NULL) + printk(" driver: %s\n", + disk->driverfs_dev->driver->name); + else + printk(" (driver?)\n"); + } else + printk("\n"); + } disk_part_iter_exit(&piter); } class_dev_iter_exit(&iter); @@ -674,12 +674,7 @@ static int show_partition(struct seq_file *seqf, void *v) return 0; /* show the full disk and all non-0 size partitions of it */ - seq_printf(seqf, "%4d %7d %10llu %s\n", - MAJOR(disk_devt(sgp)), MINOR(disk_devt(sgp)), - (unsigned long long)get_capacity(sgp) >> 1, - disk_name(sgp, 0, buf)); - - disk_part_iter_init(&piter, sgp, 0); + disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) seq_printf(seqf, "%4d %7d %10llu %s\n", MAJOR(part_devt(part)), MINOR(part_devt(part)), @@ -768,40 +763,13 @@ static ssize_t disk_capability_show(struct device *dev, return sprintf(buf, "%x\n", disk->flags); } -static ssize_t disk_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gendisk *disk = dev_to_disk(dev); - int cpu; - - cpu = disk_stat_lock(); - disk_round_stats(cpu, disk); - disk_stat_unlock(); - return sprintf(buf, - "%8lu %8lu %8llu %8u " - "%8lu %8lu %8llu %8u " - "%8u %8u %8u" - "\n", - disk_stat_read(disk, ios[READ]), - disk_stat_read(disk, merges[READ]), - (unsigned long long)disk_stat_read(disk, sectors[READ]), - jiffies_to_msecs(disk_stat_read(disk, ticks[READ])), - disk_stat_read(disk, ios[WRITE]), - disk_stat_read(disk, merges[WRITE]), - (unsigned long long)disk_stat_read(disk, sectors[WRITE]), - jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])), - disk->in_flight, - jiffies_to_msecs(disk_stat_read(disk, io_ticks)), - jiffies_to_msecs(disk_stat_read(disk, time_in_queue))); -} - static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); -static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL); +static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); @@ -836,7 +804,7 @@ static void disk_release(struct device *dev) kfree(disk->random); kfree(disk->__part); - free_disk_stats(disk); + free_part_stats(&disk->part0); kfree(disk); } struct class block_class = { @@ -873,28 +841,11 @@ static int diskstats_show(struct seq_file *seqf, void *v) "\n\n"); */ - cpu = disk_stat_lock(); - disk_round_stats(cpu, gp); - disk_stat_unlock(); - seq_printf(seqf, "%4d %7d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", - MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)), - disk_name(gp, 0, buf), - disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), - (unsigned long long)disk_stat_read(gp, sectors[0]), - jiffies_to_msecs(disk_stat_read(gp, ticks[0])), - disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]), - (unsigned long long)disk_stat_read(gp, sectors[1]), - jiffies_to_msecs(disk_stat_read(gp, ticks[1])), - gp->in_flight, - jiffies_to_msecs(disk_stat_read(gp, io_ticks)), - jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); - - /* now show all non-0 size partitions of it */ - disk_part_iter_init(&piter, gp, 0); + disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0); while ((hd = disk_part_iter_next(&piter))) { - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part_round_stats(cpu, hd); - disk_stat_unlock(); + part_stat_unlock(); seq_printf(seqf, "%4d %7d %s %lu %lu %llu " "%u %lu %lu %llu %u %u %u %u\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), @@ -1000,7 +951,7 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) int tot_minors = minors + ext_minors; int size = tot_minors * sizeof(struct hd_struct *); - if (!init_disk_stats(disk)) { + if (!init_part_stats(&disk->part0)) { kfree(disk); return NULL; } @@ -1008,7 +959,7 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) disk->__part = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, node_id); if (!disk->__part) { - free_disk_stats(disk); + free_part_stats(&disk->part0); kfree(disk); return NULL; } diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 934800f979c9..961d29a53cab 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -758,15 +758,15 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(disk, sector); - all_stat_inc(cpu, disk, part, ios[rw], sector); - all_stat_add(cpu, disk, part, ticks[rw], duration, sector); - all_stat_add(cpu, disk, part, sectors[rw], n_sect, sector); - all_stat_add(cpu, disk, part, io_ticks, duration, sector); + part_stat_inc(cpu, part, ios[rw]); + part_stat_add(cpu, part, ticks[rw], duration); + part_stat_add(cpu, part, sectors[rw], n_sect); + part_stat_add(cpu, part, io_ticks, duration); - disk_stat_unlock(); + part_stat_unlock(); } void diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 637806695bb9..327de03a5bdf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -381,10 +381,10 @@ static void start_io_acct(struct dm_io *io) io->start_time = jiffies; - cpu = disk_stat_lock(); - disk_round_stats(cpu, dm_disk(md)); - disk_stat_unlock(); - dm_disk(md)->in_flight = atomic_inc_return(&md->pending); + cpu = part_stat_lock(); + part_round_stats(cpu, &dm_disk(md)->part0); + part_stat_unlock(); + dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); } static int end_io_acct(struct dm_io *io) @@ -395,12 +395,13 @@ static int end_io_acct(struct dm_io *io) int pending, cpu; int rw = bio_data_dir(bio); - cpu = disk_stat_lock(); - disk_round_stats(cpu, dm_disk(md)); - disk_stat_add(cpu, dm_disk(md), ticks[rw], duration); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_round_stats(cpu, &dm_disk(md)->part0); + part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); + part_stat_unlock(); - dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); + dm_disk(md)->part0.in_flight = pending = + atomic_dec_return(&md->pending); return !pending; } @@ -899,10 +900,10 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); - cpu = disk_stat_lock(); - disk_stat_inc(cpu, dm_disk(md), ios[rw]); - disk_stat_add(cpu, dm_disk(md), sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); + part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); /* * If we're suspended we have to queue diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 00cbc8e47294..c80ea90593d3 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -325,10 +325,11 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) return 0; } - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); tmp_dev = which_dev(mddev, bio->bi_sector); block = bio->bi_sector >> 1; diff --git a/drivers/md/md.c b/drivers/md/md.c index 2bd9cf416123..0a3a4bdcd4af 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5546,8 +5546,8 @@ static int is_mddev_idle(mddev_t *mddev) rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; - curr_events = disk_stat_read(disk, sectors[0]) + - disk_stat_read(disk, sectors[1]) - + curr_events = part_stat_read(&disk->part0, sectors[0]) + + part_stat_read(&disk->part0, sectors[1]) - atomic_read(&disk->sync_io); /* sync IO will cause sync_io to increase before the disk_stats * as sync_io is counted when a request starts, and diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 182f5a94cdc5..8bb8794129b3 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -159,10 +159,11 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) mp_bh->master_bio = bio; mp_bh->mddev = mddev; - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); mp_bh->path = multipath_map(conf); if (mp_bh->path < 0) { diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e26030fa59ab..f52f442a735f 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -406,10 +406,11 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) return 0; } - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); chunk_size = mddev->chunk_size >> 10; chunk_sects = mddev->chunk_size >> 9; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index babb13036f93..b9764429d856 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -804,10 +804,11 @@ static int make_request(struct request_queue *q, struct bio * bio) bitmap = mddev->bitmap; - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); /* * make_request() can abort the operation when READA is being diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5ec80da0a9d7..5f990133f5ef 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -844,10 +844,11 @@ static int make_request(struct request_queue *q, struct bio * bio) */ wait_barrier(conf); - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5899f211515f..ae16794bef20 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3396,10 +3396,11 @@ static int make_request(struct request_queue *q, struct bio * bi) md_write_start(mddev, bi); - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bi)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bi)); + part_stat_unlock(); if (rw == READ && mddev->reshape_position == MaxSector && diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 60592d9f43b6..f517869e8d10 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -210,15 +210,15 @@ ssize_t part_size_show(struct device *dev, return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); } -static ssize_t part_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t part_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part_round_stats(cpu, p); - disk_stat_unlock(); + part_stat_unlock(); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " @@ -575,8 +575,8 @@ void del_gendisk(struct gendisk *disk) set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; unlink_gendisk(disk); - disk_stat_set_all(disk, 0); - disk->stamp = 0; + part_stat_set_all(&disk->part0, 0); + disk->part0.stamp = 0; kobject_put(disk->part0.holder_dir); kobject_put(disk->slave_dir); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3d15b42dc352..c90e1b4fbe5a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -145,13 +145,6 @@ struct gendisk { struct timer_rand_state *random; atomic_t sync_io; /* RAID */ - unsigned long stamp; - int in_flight; -#ifdef CONFIG_SMP - struct disk_stats *dkstats; -#else - struct disk_stats dkstats; -#endif struct work_struct async_notify; #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity *integrity; @@ -232,46 +225,18 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, * internal use only. */ #ifdef CONFIG_SMP -#define disk_stat_lock() ({ rcu_read_lock(); get_cpu(); }) -#define disk_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) - -#define disk_stat_add(cpu, gendiskp, field, addnd) \ - (per_cpu_ptr(gendiskp->dkstats, cpu)->field += addnd) - -#define disk_stat_read(gendiskp, field) \ -({ \ - typeof(gendiskp->dkstats->field) res = 0; \ - int i; \ - for_each_possible_cpu(i) \ - res += per_cpu_ptr(gendiskp->dkstats, i)->field; \ - res; \ -}) - -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) -{ - int i; - - for_each_possible_cpu(i) - memset(per_cpu_ptr(gendiskp->dkstats, i), value, - sizeof(struct disk_stats)); -} +#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) -#define part_stat_add(cpu, part, field, addnd) \ - (per_cpu_ptr(part->dkstats, cpu)->field += addnd) - -#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part_stat_add(cpu, part, field, addnd); \ - disk_stat_add(cpu, gendiskp, field, addnd); \ -}) +#define __part_stat_add(cpu, part, field, addnd) \ + (per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd)) #define part_stat_read(part, field) \ ({ \ - typeof(part->dkstats->field) res = 0; \ + typeof((part)->dkstats->field) res = 0; \ int i; \ for_each_possible_cpu(i) \ - res += per_cpu_ptr(part->dkstats, i)->field; \ + res += per_cpu_ptr((part)->dkstats, i)->field; \ res; \ }) @@ -284,109 +249,73 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) sizeof(struct disk_stats)); } -#else /* !CONFIG_SMP */ -#define disk_stat_lock() ({ rcu_read_lock(); 0; }) -#define disk_stat_unlock() rcu_read_unlock() - -#define disk_stat_add(cpu, gendiskp, field, addnd) \ - (gendiskp->dkstats.field += addnd) -#define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) - -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) +static inline int init_part_stats(struct hd_struct *part) { - memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); + part->dkstats = alloc_percpu(struct disk_stats); + if (!part->dkstats) + return 0; + return 1; } -#define part_stat_add(cpu, part, field, addnd) \ - (part->dkstats.field += addnd) - -#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part_stat_add(cpu, part, field, addnd); \ - disk_stat_add(cpu, gendiskp, field, addnd); \ -}) - -#define part_stat_read(part, field) (part->dkstats.field) - -static inline void part_stat_set_all(struct hd_struct *part, int value) +static inline void free_part_stats(struct hd_struct *part) { - memset(&part->dkstats, value, sizeof(struct disk_stats)); + free_percpu(part->dkstats); } -#endif /* CONFIG_SMP */ - -#define disk_stat_dec(cpu, gendiskp, field) \ - disk_stat_add(cpu, gendiskp, field, -1) -#define disk_stat_inc(cpu, gendiskp, field) \ - disk_stat_add(cpu, gendiskp, field, 1) -#define disk_stat_sub(cpu, gendiskp, field, subnd) \ - disk_stat_add(cpu, gendiskp, field, -subnd) - -#define part_stat_dec(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, -1) -#define part_stat_inc(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, 1) -#define part_stat_sub(cpu, gendiskp, field, subnd) \ - part_stat_add(cpu, gendiskp, field, -subnd) +#else /* !CONFIG_SMP */ +#define part_stat_lock() ({ rcu_read_lock(); 0; }) +#define part_stat_unlock() rcu_read_unlock() -#define all_stat_dec(cpu, gendiskp, field, sector) \ - all_stat_add(cpu, gendiskp, field, -1, sector) -#define all_stat_inc(cpu, gendiskp, part, field, sector) \ - all_stat_add(cpu, gendiskp, part, field, 1, sector) -#define all_stat_sub(cpu, gendiskp, part, field, subnd, sector) \ - all_stat_add(cpu, gendiskp, part, field, -subnd, sector) +#define __part_stat_add(cpu, part, field, addnd) \ + ((part)->dkstats.field += addnd) -/* Inlines to alloc and free disk stats in struct gendisk */ -#ifdef CONFIG_SMP -static inline int init_disk_stats(struct gendisk *disk) -{ - disk->dkstats = alloc_percpu(struct disk_stats); - if (!disk->dkstats) - return 0; - return 1; -} +#define part_stat_read(part, field) ((part)->dkstats.field) -static inline void free_disk_stats(struct gendisk *disk) +static inline void part_stat_set_all(struct hd_struct *part, int value) { - free_percpu(disk->dkstats); + memset(&part->dkstats, value, sizeof(struct disk_stats)); } static inline int init_part_stats(struct hd_struct *part) { - part->dkstats = alloc_percpu(struct disk_stats); - if (!part->dkstats) - return 0; return 1; } static inline void free_part_stats(struct hd_struct *part) { - free_percpu(part->dkstats); } -#else /* CONFIG_SMP */ -static inline int init_disk_stats(struct gendisk *disk) -{ - return 1; -} +#endif /* CONFIG_SMP */ -static inline void free_disk_stats(struct gendisk *disk) -{ -} +#define part_stat_add(cpu, part, field, addnd) do { \ + __part_stat_add((cpu), (part), field, addnd); \ + if ((part)->partno) \ + __part_stat_add((cpu), &part_to_disk((part))->part0, \ + field, addnd); \ +} while (0) -static inline int init_part_stats(struct hd_struct *part) +#define part_stat_dec(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, -1) +#define part_stat_inc(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, 1) +#define part_stat_sub(cpu, gendiskp, field, subnd) \ + part_stat_add(cpu, gendiskp, field, -subnd) + +static inline void part_inc_in_flight(struct hd_struct *part) { - return 1; + part->in_flight++; + if (part->partno) + part_to_disk(part)->part0.in_flight++; } -static inline void free_part_stats(struct hd_struct *part) +static inline void part_dec_in_flight(struct hd_struct *part) { + part->in_flight--; + if (part->partno) + part_to_disk(part)->part0.in_flight--; } -#endif /* CONFIG_SMP */ /* drivers/block/ll_rw_blk.c */ -extern void disk_round_stats(int cpu, struct gendisk *disk); extern void part_round_stats(int cpu, struct hd_struct *part); /* drivers/block/genhd.c */ @@ -595,6 +524,8 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t part_stat_show(struct device *dev, + struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); -- cgit v1.2.3 From 9246b5f06deeea541e7c62437c2ad19a0b1172c0 Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Wed, 17 Sep 2008 14:30:32 -0700 Subject: block: Expand Xen blkfront for > 16 xvd Until recently, the maximum number of xvd block devices you could attach to a Xen domU was 16. This limitation turned out to be problematic for some users, so it was expanded to handle a much larger number of disks. However, this requires a couple of changes in the way that blkfront scans for disks. This functionality is already present in the Xen linux-2.6.18-xen.hg tree; the attached patch adds this functionality to the mainline xen-blkfront implementation. I successfully tested it on a 2.6.25 tree, and build tested it on 2.6.27-rc3. Signed-off-by: Chris Lalancette Acked-by: Jeremy Fitzhardinge Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 76 ++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 21 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 3ca643cafccd..bff602ccccf3 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -105,15 +105,17 @@ static DEFINE_SPINLOCK(blkif_io_lock); #define GRANT_INVALID_REF 0 #define PARTS_PER_DISK 16 +#define PARTS_PER_EXT_DISK 256 #define BLKIF_MAJOR(dev) ((dev)>>8) #define BLKIF_MINOR(dev) ((dev) & 0xff) -#define DEV_NAME "xvd" /* name in /dev */ +#define EXT_SHIFT 28 +#define EXTENDED (1<gd != NULL); BUG_ON(info->rq != NULL); - if ((minor % PARTS_PER_DISK) == 0) - nr_minors = PARTS_PER_DISK; + if ((info->vdevice>>EXT_SHIFT) > 1) { + /* this is above the extended range; something is wrong */ + printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice); + return -ENODEV; + } + + if (!VDEV_IS_EXTENDED(info->vdevice)) { + minor = BLKIF_MINOR(info->vdevice); + nr_parts = PARTS_PER_DISK; + } else { + minor = BLKIF_MINOR_EXT(info->vdevice); + nr_parts = PARTS_PER_EXT_DISK; + } + + if ((minor % nr_parts) == 0) + nr_minors = nr_parts; gd = alloc_disk(nr_minors); if (gd == NULL) goto out; - if (nr_minors > 1) - sprintf(gd->disk_name, "%s%c", DEV_NAME, - 'a' + minor / PARTS_PER_DISK); - else - sprintf(gd->disk_name, "%s%c%d", DEV_NAME, - 'a' + minor / PARTS_PER_DISK, - minor % PARTS_PER_DISK); + offset = minor / nr_parts; + + if (nr_minors > 1) { + if (offset < 26) + sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); + else + sprintf(gd->disk_name, "%s%c%c", DEV_NAME, + 'a' + ((offset / 26)-1), 'a' + (offset % 26)); + } else { + if (offset < 26) + sprintf(gd->disk_name, "%s%c%d", DEV_NAME, + 'a' + offset, + minor & (nr_parts - 1)); + else + sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME, + 'a' + ((offset / 26) - 1), + 'a' + (offset % 26), + minor & (nr_parts - 1)); + } gd->major = XENVBD_MAJOR; gd->first_minor = minor; @@ -699,8 +730,13 @@ static int blkfront_probe(struct xenbus_device *dev, err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device", "%i", &vdevice); if (err != 1) { - xenbus_dev_fatal(dev, err, "reading virtual-device"); - return err; + /* go looking in the extended area instead */ + err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", + "%i", &vdevice); + if (err != 1) { + xenbus_dev_fatal(dev, err, "reading virtual-device"); + return err; + } } info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -861,9 +897,7 @@ static void blkfront_connect(struct blkfront_info *info) if (err) info->feature_barrier = 0; - err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice), - sectors, info->vdevice, - binfo, sector_size, info); + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", info->xbdev->otherend); -- cgit v1.2.3 From 905bd78f2188da69e74966918e3d71df3dff382b Mon Sep 17 00:00:00 2001 From: "scameron@beardog.cca.cpqcorp.net" Date: Fri, 19 Sep 2008 18:27:47 -0700 Subject: cciss: Fix cciss SCSI rescan code to better notice device changes Fix cciss SCSI rescan code to better notice device changes. If you hot-unplug a tape drive, then hot-plug a different tape drive into the same slot in a storage enclosure, the cciss driver wouldn't notice anything had changed, as it was only looking at the LUN address and device type. Now it looks at the inquiry page 0x83 device identifier, and vendor and model strings as well. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss_scsi.c | 151 +++++++++++++++++++++++++++++---------------- drivers/block/cciss_scsi.h | 4 ++ 2 files changed, 102 insertions(+), 53 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index e1233aabda77..a3fd87b41444 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -365,7 +365,7 @@ struct scsi2map { static int cciss_scsi_add_entry(int ctlr, int hostno, - unsigned char *scsi3addr, int devtype, + struct cciss_scsi_dev_t *device, struct scsi2map *added, int *nadded) { /* assumes hba[ctlr]->scsi_ctlr->lock is held */ @@ -384,12 +384,12 @@ cciss_scsi_add_entry(int ctlr, int hostno, lun = 0; /* Is this device a non-zero lun of a multi-lun device */ /* byte 4 of the 8-byte LUN addr will contain the logical unit no. */ - if (scsi3addr[4] != 0) { + if (device->scsi3addr[4] != 0) { /* Search through our list and find the device which */ /* has the same 8 byte LUN address, excepting byte 4. */ /* Assign the same bus and target for this new LUN. */ /* Use the logical unit number from the firmware. */ - memcpy(addr1, scsi3addr, 8); + memcpy(addr1, device->scsi3addr, 8); addr1[4] = 0; for (i = 0; i < n; i++) { sd = &ccissscsi[ctlr].dev[i]; @@ -399,7 +399,7 @@ cciss_scsi_add_entry(int ctlr, int hostno, if (memcmp(addr1, addr2, 8) == 0) { bus = sd->bus; target = sd->target; - lun = scsi3addr[4]; + lun = device->scsi3addr[4]; break; } } @@ -420,8 +420,12 @@ cciss_scsi_add_entry(int ctlr, int hostno, added[*nadded].lun = sd->lun; (*nadded)++; - memcpy(&sd->scsi3addr[0], scsi3addr, 8); - sd->devtype = devtype; + memcpy(sd->scsi3addr, device->scsi3addr, 8); + memcpy(sd->vendor, device->vendor, sizeof(sd->vendor)); + memcpy(sd->revision, device->revision, sizeof(sd->revision)); + memcpy(sd->device_id, device->device_id, sizeof(sd->device_id)); + sd->devtype = device->devtype; + ccissscsi[ctlr].ndevices++; /* initially, (before registering with scsi layer) we don't @@ -487,6 +491,22 @@ static void fixup_botched_add(int ctlr, char *scsi3addr) CPQ_TAPE_UNLOCK(ctlr, flags); } +static int device_is_the_same(struct cciss_scsi_dev_t *dev1, + struct cciss_scsi_dev_t *dev2) +{ + return dev1->devtype == dev2->devtype && + memcmp(dev1->scsi3addr, dev2->scsi3addr, + sizeof(dev1->scsi3addr)) == 0 && + memcmp(dev1->device_id, dev2->device_id, + sizeof(dev1->device_id)) == 0 && + memcmp(dev1->vendor, dev2->vendor, + sizeof(dev1->vendor)) == 0 && + memcmp(dev1->model, dev2->model, + sizeof(dev1->model)) == 0 && + memcmp(dev1->revision, dev2->revision, + sizeof(dev1->revision)) == 0; +} + static int adjust_cciss_scsi_table(int ctlr, int hostno, struct cciss_scsi_dev_t sd[], int nsds) @@ -532,7 +552,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno, for (j=0;jscsi3addr)) { - if (sd[j].devtype == csd->devtype) + if (device_is_the_same(&sd[j], csd)) found=2; else found=1; @@ -548,22 +568,26 @@ adjust_cciss_scsi_table(int ctlr, int hostno, cciss_scsi_remove_entry(ctlr, hostno, i, removed, &nremoved); /* remove ^^^, hence i not incremented */ - } - else if (found == 1) { /* device is different kind */ + } else if (found == 1) { /* device is different in some way */ changes++; - printk("cciss%d: device c%db%dt%dl%d type changed " - "(device type now %s).\n", - ctlr, hostno, csd->bus, csd->target, csd->lun, - scsi_device_type(csd->devtype)); + printk("cciss%d: device c%db%dt%dl%d has changed.\n", + ctlr, hostno, csd->bus, csd->target, csd->lun); cciss_scsi_remove_entry(ctlr, hostno, i, removed, &nremoved); /* remove ^^^, hence i not incremented */ - if (cciss_scsi_add_entry(ctlr, hostno, - &sd[j].scsi3addr[0], sd[j].devtype, + if (cciss_scsi_add_entry(ctlr, hostno, &sd[j], added, &nadded) != 0) /* we just removed one, so add can't fail. */ BUG(); csd->devtype = sd[j].devtype; + memcpy(csd->device_id, sd[j].device_id, + sizeof(csd->device_id)); + memcpy(csd->vendor, sd[j].vendor, + sizeof(csd->vendor)); + memcpy(csd->model, sd[j].model, + sizeof(csd->model)); + memcpy(csd->revision, sd[j].revision, + sizeof(csd->revision)); } else /* device is same as it ever was, */ i++; /* so just move along. */ } @@ -577,7 +601,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno, csd = &ccissscsi[ctlr].dev[j]; if (SCSI3ADDR_EQ(sd[i].scsi3addr, csd->scsi3addr)) { - if (sd[i].devtype == csd->devtype) + if (device_is_the_same(&sd[i], csd)) found=2; /* found device */ else found=1; /* found a bug. */ @@ -586,16 +610,14 @@ adjust_cciss_scsi_table(int ctlr, int hostno, } if (!found) { changes++; - if (cciss_scsi_add_entry(ctlr, hostno, - - &sd[i].scsi3addr[0], sd[i].devtype, + if (cciss_scsi_add_entry(ctlr, hostno, &sd[i], added, &nadded) != 0) break; } else if (found == 1) { /* should never happen... */ changes++; - printk("cciss%d: device unexpectedly changed type\n", - ctlr); + printk(KERN_WARNING "cciss%d: device " + "unexpectedly changed\n", ctlr); /* but if it does happen, we just ignore that device */ } } @@ -1012,7 +1034,8 @@ cciss_scsi_interpret_error(CommandList_struct *cp) static int cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, - unsigned char *buf, unsigned char bufsize) + unsigned char page, unsigned char *buf, + unsigned char bufsize) { int rc; CommandList_struct *cp; @@ -1032,8 +1055,8 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, ei = cp->err_info; cdb[0] = CISS_INQUIRY; - cdb[1] = 0; - cdb[2] = 0; + cdb[1] = (page != 0); + cdb[2] = page; cdb[3] = 0; cdb[4] = bufsize; cdb[5] = 0; @@ -1053,6 +1076,25 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, return rc; } +/* Get the device id from inquiry page 0x83 */ +static int cciss_scsi_get_device_id(ctlr_info_t *c, unsigned char *scsi3addr, + unsigned char *device_id, int buflen) +{ + int rc; + unsigned char *buf; + + if (buflen > 16) + buflen = 16; + buf = kzalloc(64, GFP_KERNEL); + if (!buf) + return -1; + rc = cciss_scsi_do_inquiry(c, scsi3addr, 0x83, buf, 64); + if (rc == 0) + memcpy(device_id, &buf[8], buflen); + kfree(buf); + return rc != 0; +} + static int cciss_scsi_do_report_phys_luns(ctlr_info_t *c, ReportLunData_struct *buf, int bufsize) @@ -1142,25 +1184,21 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) ctlr_info_t *c; __u32 num_luns=0; unsigned char *ch; - /* unsigned char found[CCISS_MAX_SCSI_DEVS_PER_HBA]; */ - struct cciss_scsi_dev_t currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA]; + struct cciss_scsi_dev_t *currentsd, *this_device; int ncurrent=0; int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8; int i; c = (ctlr_info_t *) hba[cntl_num]; ld_buff = kzalloc(reportlunsize, GFP_KERNEL); - if (ld_buff == NULL) { - printk(KERN_ERR "cciss: out of memory\n"); - return; - } inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL); - if (inq_buff == NULL) { - printk(KERN_ERR "cciss: out of memory\n"); - kfree(ld_buff); - return; + currentsd = kzalloc(sizeof(*currentsd) * + (CCISS_MAX_SCSI_DEVS_PER_HBA+1), GFP_KERNEL); + if (ld_buff == NULL || inq_buff == NULL || currentsd == NULL) { + printk(KERN_ERR "cciss: out of memory\n"); + goto out; } - + this_device = ¤tsd[CCISS_MAX_SCSI_DEVS_PER_HBA]; if (cciss_scsi_do_report_phys_luns(c, ld_buff, reportlunsize) == 0) { ch = &ld_buff->LUNListLength[0]; num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8; @@ -1179,23 +1217,34 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) /* adjust our table of devices */ - for(i=0; iLUN[i][3] & 0xC0) continue; memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE); memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8); - if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, inq_buff, - (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) { + if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, 0, inq_buff, + (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) /* Inquiry failed (msg printed already) */ - devtype = 0; /* so we will skip this device. */ - } else /* what kind of device is this? */ - devtype = (inq_buff[0] & 0x1f); - - switch (devtype) + continue; /* so we will skip this device. */ + + this_device->devtype = (inq_buff[0] & 0x1f); + this_device->bus = -1; + this_device->target = -1; + this_device->lun = -1; + memcpy(this_device->scsi3addr, scsi3addr, 8); + memcpy(this_device->vendor, &inq_buff[8], + sizeof(this_device->vendor)); + memcpy(this_device->model, &inq_buff[16], + sizeof(this_device->model)); + memcpy(this_device->revision, &inq_buff[32], + sizeof(this_device->revision)); + memset(this_device->device_id, 0, + sizeof(this_device->device_id)); + cciss_scsi_get_device_id(hba[cntl_num], scsi3addr, + this_device->device_id, sizeof(this_device->device_id)); + + switch (this_device->devtype) { case 0x05: /* CD-ROM */ { @@ -1220,15 +1269,10 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) { printk(KERN_INFO "cciss%d: %s ignored, " "too many devices.\n", cntl_num, - scsi_device_type(devtype)); + scsi_device_type(this_device->devtype)); break; } - memcpy(¤tsd[ncurrent].scsi3addr[0], - &scsi3addr[0], 8); - currentsd[ncurrent].devtype = devtype; - currentsd[ncurrent].bus = -1; - currentsd[ncurrent].target = -1; - currentsd[ncurrent].lun = -1; + currentsd[ncurrent] = *this_device; ncurrent++; break; default: @@ -1240,6 +1284,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) out: kfree(inq_buff); kfree(ld_buff); + kfree(currentsd); return; } diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h index d9c2c586502f..7b750245ae76 100644 --- a/drivers/block/cciss_scsi.h +++ b/drivers/block/cciss_scsi.h @@ -66,6 +66,10 @@ struct cciss_scsi_dev_t { int devtype; int bus, target, lun; /* as presented to the OS */ unsigned char scsi3addr[8]; /* as presented to the HW */ + unsigned char device_id[16]; /* from inquiry pg. 0x83 */ + unsigned char vendor[8]; /* bytes 8-15 of inquiry data */ + unsigned char model[16]; /* bytes 16-31 of inquiry data */ + unsigned char revision[4]; /* bytes 32-35 of inquiry data */ }; struct cciss_scsi_hba_t { -- cgit v1.2.3 From 061837bc8687edc2739ef02f721b7ae0b8076390 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 22 Sep 2008 14:57:16 -0700 Subject: drivers/block: Use DIV_ROUND_UP The kernel.h macro DIV_ROUND_UP performs the computation (((n) + (d) - 1) / (d)) but is perhaps more readable. An extract of the semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @haskernel@ @@ #include @depends on haskernel@ expression n,d; @@ ( - (n + d - 1) / d + DIV_ROUND_UP(n,d) | - (n + (d - 1)) / d + DIV_ROUND_UP(n,d) ) @depends on haskernel@ expression n,d; @@ - DIV_ROUND_UP((n),d) + DIV_ROUND_UP(n,d) @depends on haskernel@ expression n,d; @@ - DIV_ROUND_UP(n,(d)) + DIV_ROUND_UP(n,d) // Signed-off-by: Julia Lawall Cc: Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 8 ++++---- drivers/block/cpqarray.c | 2 +- drivers/block/floppy.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index b73116ef9236..1e1f9153000c 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -3460,8 +3460,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not"); hba[i]->cmd_pool_bits = - kmalloc(((hba[i]->nr_cmds + BITS_PER_LONG - - 1) / BITS_PER_LONG) * sizeof(unsigned long), GFP_KERNEL); + kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG) + * sizeof(unsigned long), GFP_KERNEL); hba[i]->cmd_pool = (CommandList_struct *) pci_alloc_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(CommandList_struct), @@ -3493,8 +3493,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, /* command and error info recs zeroed out before they are used */ memset(hba[i]->cmd_pool_bits, 0, - ((hba[i]->nr_cmds + BITS_PER_LONG - - 1) / BITS_PER_LONG) * sizeof(unsigned long)); + DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG) + * sizeof(unsigned long)); hba[i]->num_luns = 0; hba[i]->highest_lun = -1; diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 09c14341e6e3..3d967525e9a9 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -424,7 +424,7 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev) hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t), &(hba[i]->cmd_pool_dhandle)); hba[i]->cmd_pool_bits = kcalloc( - (NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG, sizeof(unsigned long), + DIV_ROUND_UP(NR_CMDS, BITS_PER_LONG), sizeof(unsigned long), GFP_KERNEL); if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 395f8ea7981c..9c0b494f5e87 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1355,20 +1355,20 @@ static void fdc_specify(void) } /* Convert step rate from microseconds to milliseconds and 4 bits */ - srt = 16 - (DP->srt * scale_dtr / 1000 + NOMINAL_DTR - 1) / NOMINAL_DTR; + srt = 16 - DIV_ROUND_UP(DP->srt * scale_dtr / 1000, NOMINAL_DTR); if (slow_floppy) { srt = srt / 4; } SUPBOUND(srt, 0xf); INFBOUND(srt, 0); - hlt = (DP->hlt * scale_dtr / 2 + NOMINAL_DTR - 1) / NOMINAL_DTR; + hlt = DIV_ROUND_UP(DP->hlt * scale_dtr / 2, NOMINAL_DTR); if (hlt < 0x01) hlt = 0x01; else if (hlt > 0x7f) hlt = hlt_max_code; - hut = (DP->hut * scale_dtr / 16 + NOMINAL_DTR - 1) / NOMINAL_DTR; + hut = DIV_ROUND_UP(DP->hut * scale_dtr / 16, NOMINAL_DTR); if (hut < 0x1) hut = 0x1; else if (hut > 0xf) @@ -2385,7 +2385,7 @@ static void rw_interrupt(void) #ifdef FLOPPY_SANITY_CHECK if (nr_sectors / ssize > - (in_sector_offset + current_count_sectors + ssize - 1) / ssize) { + DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) { DPRINT("long rw: %x instead of %lx\n", nr_sectors, current_count_sectors); printk("rs=%d s=%d\n", R_SECTOR, SECTOR); -- cgit v1.2.3 From 9e49184c82e9ec3ab4d45f9ea5a17ccaf43869f0 Mon Sep 17 00:00:00 2001 From: Keith Wansbrough Date: Mon, 22 Sep 2008 14:57:17 -0700 Subject: floppy: support arbitrary first-sector numbers The current floppy_struct allows floppies to number sectors starting from 0 or 1. This patch allows arbitrary first-sector numbers - for example, 0xC1 for Amstrad CPC disks. This extends the existing 1-bit field (FD_ZEROBASED, bit 2 of stretch) to 8 bits (FD_SECTMASK, bits 2 to 9). Currently 0x00 denotes a first sector number of 1, and 0x01 denotes a first sector number of 0. We extend this by interpreting FD_SECTMASK as the first sector number with the LSB flipped. Signed-off-by: Keith Wansbrough Cc: Alain Knaff Cc: Michael Kerrisk Cc: Karel Zak Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 23 +++++++++++++++-------- include/linux/fd.h | 8 +++++++- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 9c0b494f5e87..cf64ddf5d839 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -423,8 +423,15 @@ static struct floppy_raw_cmd *raw_cmd, default_raw_cmd; * 1581's logical side 0 is on physical side 1, whereas the Sharp's logical * side 0 is on physical side 0 (but with the misnamed sector IDs). * 'stretch' should probably be renamed to something more general, like - * 'options'. Other parameters should be self-explanatory (see also - * setfdprm(8)). + * 'options'. + * + * Bits 2 through 9 of 'stretch' tell the number of the first sector. + * The LSB (bit 2) is flipped. For most disks, the first sector + * is 1 (represented by 0x00<<2). For some CP/M and music sampler + * disks (such as Ensoniq EPS 16plus) it is 0 (represented as 0x01<<2). + * For Amstrad CPC disks it is 0xC1 (represented as 0xC0<<2). + * + * Other parameters should be self-explanatory (see also setfdprm(8)). */ /* Size @@ -2236,9 +2243,9 @@ static void setup_format_params(int track) } } } - if (_floppy->stretch & FD_ZEROBASED) { + if (_floppy->stretch & FD_SECTBASEMASK) { for (count = 0; count < F_SECT_PER_TRACK; count++) - here[count].sect--; + here[count].sect += FD_SECTBASE(_floppy) - 1; } } @@ -2649,7 +2656,7 @@ static int make_raw_rw_request(void) } HEAD = fsector_t / _floppy->sect; - if (((_floppy->stretch & (FD_SWAPSIDES | FD_ZEROBASED)) || + if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) || TESTF(FD_NEED_TWADDLE)) && fsector_t < _floppy->sect) max_sector = _floppy->sect; @@ -2679,7 +2686,7 @@ static int make_raw_rw_request(void) CODE2SIZE; SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + - ((_floppy->stretch & FD_ZEROBASED) ? 0 : 1); + FD_SECTBASE(_floppy); /* tracksize describes the size which can be filled up with sectors * of size ssize. @@ -3311,7 +3318,7 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g, g->head <= 0 || g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) || /* check if reserved bits are set */ - (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_ZEROBASED)) != 0) + (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0) return -EINVAL; if (type) { if (!capable(CAP_SYS_ADMIN)) @@ -3356,7 +3363,7 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g, if (DRS->maxblock > user_params[drive].sect || DRS->maxtrack || ((user_params[drive].sect ^ oldStretch) & - (FD_SWAPSIDES | FD_ZEROBASED))) + (FD_SWAPSIDES | FD_SECTBASEMASK))) invalidate_drive(bdev); else process_fd_request(); diff --git a/include/linux/fd.h b/include/linux/fd.h index b6bd41d2b460..f5d194af07a8 100644 --- a/include/linux/fd.h +++ b/include/linux/fd.h @@ -15,10 +15,16 @@ struct floppy_struct { sect, /* sectors per track */ head, /* nr of heads */ track, /* nr of tracks */ - stretch; /* !=0 means double track steps */ + stretch; /* bit 0 !=0 means double track steps */ + /* bit 1 != 0 means swap sides */ + /* bits 2..9 give the first sector */ + /* number (the LSB is flipped) */ #define FD_STRETCH 1 #define FD_SWAPSIDES 2 #define FD_ZEROBASED 4 +#define FD_SECTBASEMASK 0x3FC +#define FD_MKSECTBASE(s) (((s) ^ 1) << 2) +#define FD_SECTBASE(floppy) ((((floppy)->stretch & FD_SECTBASEMASK) >> 2) ^ 1) unsigned char gap, /* gap1 size */ -- cgit v1.2.3 From 8316982ac06d7d8875dc8738efbb030791dc33bb Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Wed, 1 Oct 2008 10:11:20 -0400 Subject: virtio_blk: change to use __blk_end_request() This patch converts virtio_blk to use __blk_end_request() directly so that end_{queued|dequeued}_request() can be removed. Related 'uptodate' argument is converted to 'error'. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Rusty Russell Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 879506a2c234..6ec5fc052786 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -47,20 +47,20 @@ static void blk_done(struct virtqueue *vq) spin_lock_irqsave(&vblk->lock, flags); while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) { - int uptodate; + int error; switch (vbr->status) { case VIRTIO_BLK_S_OK: - uptodate = 1; + error = 0; break; case VIRTIO_BLK_S_UNSUPP: - uptodate = -ENOTTY; + error = -ENOTTY; break; default: - uptodate = 0; + error = -EIO; break; } - end_dequeued_request(vbr->req, uptodate); + __blk_end_request(vbr->req, error, blk_rq_bytes(vbr->req)); list_del(&vbr->list); mempool_free(vbr, vblk->pool); } -- cgit v1.2.3 From 6feef531f55cf4a20fd9eb39f5352e5745203603 Mon Sep 17 00:00:00 2001 From: Denis ChengRq Date: Thu, 9 Oct 2008 08:57:05 +0200 Subject: block: mark bio_split_pool static Since all bio_split calls refer the same single bio_split_pool, the bio_split function can use bio_split_pool directly instead of the mempool_t parameter; then the mempool_t parameter can be removed from bio_split param list, and bio_split_pool is only referred in fs/bio.c file, can be marked static. Signed-off-by: Denis ChengRq Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 2 +- drivers/md/linear.c | 2 +- drivers/md/raid0.c | 2 +- drivers/md/raid10.c | 2 +- fs/bio.c | 9 ++++----- include/linux/bio.h | 4 +--- 6 files changed, 9 insertions(+), 12 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index e1a90bbb4747..0e077150568b 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2544,7 +2544,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio) if (last_zone != zone) { BUG_ON(last_zone != zone + pd->settings.size); first_sectors = last_zone - bio->bi_sector; - bp = bio_split(bio, bio_split_pool, first_sectors); + bp = bio_split(bio, first_sectors); BUG_ON(!bp); pkt_make_request(q, &bp->bio1); pkt_make_request(q, &bp->bio2); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index c80ea90593d3..b9cbee688fae 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -353,7 +353,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) * split it. */ struct bio_pair *bp; - bp = bio_split(bio, bio_split_pool, + bp = bio_split(bio, ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector); if (linear_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f52f442a735f..53508a8a981d 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -427,7 +427,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); + bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); if (raid0_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); if (raid0_make_request(q, &bp->bio2)) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5f990133f5ef..8bdc9bfc2887 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -817,7 +817,7 @@ static int make_request(struct request_queue *q, struct bio * bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, bio_split_pool, + bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); if (make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); diff --git a/fs/bio.c b/fs/bio.c index a5af5809f566..77a55bcceedb 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -30,7 +30,7 @@ static struct kmem_cache *bio_slab __read_mostly; -mempool_t *bio_split_pool __read_mostly; +static mempool_t *bio_split_pool __read_mostly; /* * if you change this list, also change bvec_alloc or things will @@ -1256,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err) * split a bio - only worry about a bio with a single page * in it's iovec */ -struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) +struct bio_pair *bio_split(struct bio *bi, int first_sectors) { - struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); + struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); if (!bp) return bp; @@ -1292,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) bp->bio2.bi_end_io = bio_pair_end_2; bp->bio1.bi_private = bi; - bp->bio2.bi_private = pool; + bp->bio2.bi_private = bio_split_pool; if (bio_integrity(bi)) bio_integrity_split(bi, bp, first_sectors); @@ -1455,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern); EXPORT_SYMBOL(bio_copy_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); -EXPORT_SYMBOL(bio_split_pool); EXPORT_SYMBOL(bio_copy_user); EXPORT_SYMBOL(bio_uncopy_user); EXPORT_SYMBOL(bioset_create); diff --git a/include/linux/bio.h b/include/linux/bio.h index fe12d0f9ebaa..fb97221d7c30 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -300,9 +300,7 @@ struct bio_pair { atomic_t cnt; int error; }; -extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, - int first_sectors); -extern mempool_t *bio_split_pool; +extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(int, int); -- cgit v1.2.3