diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-17 20:29:55 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-17 20:29:55 +0400 |
commit | c53dbf54863e7f3b0b8810dda2bdd0290006bdac (patch) | |
tree | f783074f1bec1112bf1148a077e0114a38403ad4 | |
parent | b73b636e8987f8728c6c700377615757691b9a55 (diff) | |
parent | f73e2d13a16cc88c4faa4729967f92bfeec8a142 (diff) | |
download | linux-c53dbf54863e7f3b0b8810dda2bdd0290006bdac.tar.xz |
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
block: remove __generic_unplug_device() from exports
block: move q->unplug_work initialization
blktrace: pass zfcp driver data
blktrace: add support for driver data
block: fix current kernel-doc warnings
block: only call ->request_fn when the queue is not stopped
block: simplify string handling in elv_iosched_store()
block: fix kernel-doc for blk_alloc_devt()
block: fix nr_phys_segments miscalculation bug
block: add partition attribute for partition number
block: add BIG FAT WARNING to CONFIG_DEBUG_BLOCK_EXT_DEVT
softirq: Add support for triggering softirq work on softirqs.
-rw-r--r-- | block/blk-core.c | 26 | ||||
-rw-r--r-- | block/blk-merge.c | 20 | ||||
-rw-r--r-- | block/blk-settings.c | 2 | ||||
-rw-r--r-- | block/blk.h | 1 | ||||
-rw-r--r-- | block/elevator.c | 16 | ||||
-rw-r--r-- | block/genhd.c | 3 | ||||
-rw-r--r-- | drivers/ide/ide-io.c | 4 | ||||
-rw-r--r-- | drivers/s390/scsi/zfcp_def.h | 2 | ||||
-rw-r--r-- | drivers/s390/scsi/zfcp_fsf.c | 34 | ||||
-rw-r--r-- | drivers/s390/scsi/zfcp_fsf.h | 12 | ||||
-rw-r--r-- | drivers/s390/scsi/zfcp_qdio.c | 1 | ||||
-rw-r--r-- | fs/block_dev.c | 2 | ||||
-rw-r--r-- | fs/partitions/check.c | 10 | ||||
-rw-r--r-- | include/linux/bio.h | 7 | ||||
-rw-r--r-- | include/linux/blkdev.h | 1 | ||||
-rw-r--r-- | include/linux/blktrace_api.h | 32 | ||||
-rw-r--r-- | include/linux/interrupt.h | 21 | ||||
-rw-r--r-- | include/linux/smp.h | 4 | ||||
-rw-r--r-- | kernel/softirq.c | 129 | ||||
-rw-r--r-- | lib/Kconfig.debug | 5 |
20 files changed, 305 insertions, 27 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 9e79a485e4f3..c3df30cfb3fc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -257,7 +257,6 @@ void __generic_unplug_device(struct request_queue *q) q->request_fn(q); } -EXPORT_SYMBOL(__generic_unplug_device); /** * generic_unplug_device - fire a request queue @@ -325,6 +324,9 @@ EXPORT_SYMBOL(blk_unplug); static void blk_invoke_request_fn(struct request_queue *q) { + if (unlikely(blk_queue_stopped(q))) + return; + /* * one level of recursion is ok and is much faster than kicking * the unplug handling @@ -399,8 +401,13 @@ void blk_sync_queue(struct request_queue *q) EXPORT_SYMBOL(blk_sync_queue); /** - * blk_run_queue - run a single device queue + * __blk_run_queue - run a single device queue * @q: The queue to run + * + * Description: + * See @blk_run_queue. This variant must be called with the queue lock + * held and interrupts disabled. + * */ void __blk_run_queue(struct request_queue *q) { @@ -418,6 +425,12 @@ EXPORT_SYMBOL(__blk_run_queue); /** * blk_run_queue - run a single device queue * @q: The queue to run + * + * Description: + * Invoke request handling on this queue, if it has pending work to do. + * May be used to restart queueing when a request has completed. Also + * See @blk_start_queueing. + * */ void blk_run_queue(struct request_queue *q) { @@ -501,6 +514,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) init_timer(&q->unplug_timer); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->timeout_list); + INIT_WORK(&q->unplug_work, blk_unplug_work); kobject_init(&q->kobj, &blk_queue_ktype); @@ -884,7 +898,8 @@ EXPORT_SYMBOL(blk_get_request); * * This is basically a helper to remove the need to know whether a queue * is plugged or not if someone just wants to initiate dispatch of requests - * for this queue. + * for this queue. Should be used to start queueing on a device outside + * of ->request_fn() context. Also see @blk_run_queue. * * The queue lock must be held with interrupts disabled. */ @@ -1003,8 +1018,9 @@ static void part_round_stats_single(int cpu, struct hd_struct *part, } /** - * part_round_stats() - Round off the performance stats on a struct - * disk_stats. + * part_round_stats() - Round off the performance stats on a struct disk_stats. + * @cpu: cpu number for stats access + * @part: target partition * * The average IO queue length and utilisation statistics are maintained * by observing the current state of the queue length and the amount of diff --git a/block/blk-merge.c b/block/blk-merge.c index 908d3e11ac52..8681cd6f9911 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -77,12 +77,20 @@ void blk_recalc_rq_segments(struct request *rq) continue; } new_segment: + if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) + rq->bio->bi_seg_front_size = seg_size; + nr_phys_segs++; bvprv = bv; seg_size = bv->bv_len; highprv = high; } + if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) + rq->bio->bi_seg_front_size = seg_size; + if (seg_size > rq->biotail->bi_seg_back_size) + rq->biotail->bi_seg_back_size = seg_size; + rq->nr_phys_segments = nr_phys_segs; } @@ -106,7 +114,8 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) return 0; - if (bio->bi_size + nxt->bi_size > q->max_segment_size) + if (bio->bi_seg_back_size + nxt->bi_seg_front_size > + q->max_segment_size) return 0; if (!bio_has_data(bio)) @@ -309,6 +318,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, struct request *next) { int total_phys_segments; + unsigned int seg_size = + req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; /* * First check if the either of the requests are re-queued @@ -324,8 +335,13 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, return 0; total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; - if (blk_phys_contig_segment(q, req->biotail, next->bio)) + if (blk_phys_contig_segment(q, req->biotail, next->bio)) { + if (req->nr_phys_segments == 1) + req->bio->bi_seg_front_size = seg_size; + if (next->nr_phys_segments == 1) + next->biotail->bi_seg_back_size = seg_size; total_phys_segments--; + } if (total_phys_segments > q->max_phys_segments) return 0; diff --git a/block/blk-settings.c b/block/blk-settings.c index b21dcdb64151..41392fbe19ff 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -141,8 +141,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) if (q->unplug_delay == 0) q->unplug_delay = 1; - INIT_WORK(&q->unplug_work, blk_unplug_work); - q->unplug_timer.function = blk_unplug_timeout; q->unplug_timer.data = (unsigned long)q; diff --git a/block/blk.h b/block/blk.h index e5c579769963..d2e49af90db5 100644 --- a/block/blk.h +++ b/block/blk.h @@ -20,6 +20,7 @@ void blk_unplug_timeout(unsigned long data); void blk_rq_timed_out_timer(unsigned long data); void blk_delete_timer(struct request *); void blk_add_timer(struct request *); +void __generic_unplug_device(struct request_queue *); /* * Internal atomic flags for request handling diff --git a/block/elevator.c b/block/elevator.c index 04518921db31..59173a69ebdf 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -612,7 +612,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) * processing. */ blk_remove_plug(q); - q->request_fn(q); + blk_start_queueing(q); break; case ELEVATOR_INSERT_SORT: @@ -950,7 +950,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq) blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); - q->request_fn(q); + blk_start_queueing(q); } } } @@ -1109,8 +1109,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) elv_drain_elevator(q); while (q->rq.elvpriv) { - blk_remove_plug(q); - q->request_fn(q); + blk_start_queueing(q); spin_unlock_irq(q->queue_lock); msleep(10); spin_lock_irq(q->queue_lock); @@ -1166,15 +1165,10 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, size_t count) { char elevator_name[ELV_NAME_MAX]; - size_t len; struct elevator_type *e; - elevator_name[sizeof(elevator_name) - 1] = '\0'; - strncpy(elevator_name, name, sizeof(elevator_name) - 1); - len = strlen(elevator_name); - - if (len && elevator_name[len - 1] == '\n') - elevator_name[len - 1] = '\0'; + strlcpy(elevator_name, name, sizeof(elevator_name)); + strstrip(elevator_name); e = elevator_get(elevator_name); if (!e) { diff --git a/block/genhd.c b/block/genhd.c index 4cd3433c99ac..646e1d2507c7 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -358,7 +358,6 @@ static int blk_mangle_minor(int minor) /** * blk_alloc_devt - allocate a dev_t for a partition * @part: partition to allocate dev_t for - * @gfp_mask: memory allocation flag * @devt: out parameter for resulting dev_t * * Allocate a dev_t for block device. @@ -535,7 +534,7 @@ void unlink_gendisk(struct gendisk *disk) /** * get_gendisk - get partitioning information for a given device * @devt: device to get partitioning information for - * @part: returned partition index + * @partno: returned partition index * * This function gets the structure containing partitioning * information for the given device @devt. diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 77c6eaeacefa..7162d67562af 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -1493,8 +1493,8 @@ void ide_do_drive_cmd(ide_drive_t *drive, struct request *rq) spin_lock_irqsave(&ide_lock, flags); hwgroup->rq = NULL; - __elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 1); - __generic_unplug_device(drive->queue); + __elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0); + blk_start_queueing(drive->queue); spin_unlock_irqrestore(&ide_lock, flags); } diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h index 8a13071c444c..9ce4c75bd190 100644 --- a/drivers/s390/scsi/zfcp_def.h +++ b/drivers/s390/scsi/zfcp_def.h @@ -583,6 +583,8 @@ struct zfcp_fsf_req { unsigned long long issued; /* request sent time (STCK) */ struct zfcp_unit *unit; void (*handler)(struct zfcp_fsf_req *); + u16 qdio_outb_usage;/* usage of outbound queue */ + u16 qdio_inb_usage; /* usage of inbound queue */ }; /* driver data */ diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 739356a5c123..5ae1d497e5ed 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -6,6 +6,7 @@ * Copyright IBM Corporation 2002, 2008 */ +#include <linux/blktrace_api.h> #include "zfcp_ext.h" static void zfcp_fsf_request_timeout_handler(unsigned long data) @@ -777,6 +778,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) list_add_tail(&req->list, &adapter->req_list[idx]); spin_unlock(&adapter->req_list_lock); + req->qdio_outb_usage = atomic_read(&req_q->count); req->issued = get_clock(); if (zfcp_qdio_send(req)) { /* Queues are down..... */ @@ -2082,6 +2084,36 @@ static void zfcp_fsf_req_latency(struct zfcp_fsf_req *req) spin_unlock_irqrestore(&unit->latencies.lock, flags); } +#ifdef CONFIG_BLK_DEV_IO_TRACE +static void zfcp_fsf_trace_latency(struct zfcp_fsf_req *fsf_req) +{ + struct fsf_qual_latency_info *lat_inf; + struct scsi_cmnd *scsi_cmnd = (struct scsi_cmnd *)fsf_req->data; + struct request *req = scsi_cmnd->request; + struct zfcp_blk_drv_data trace; + int ticks = fsf_req->adapter->timer_ticks; + + trace.flags = 0; + trace.magic = ZFCP_BLK_DRV_DATA_MAGIC; + if (fsf_req->adapter->adapter_features & FSF_FEATURE_MEASUREMENT_DATA) { + trace.flags |= ZFCP_BLK_LAT_VALID; + lat_inf = &fsf_req->qtcb->prefix.prot_status_qual.latency_info; + trace.channel_lat = lat_inf->channel_lat * ticks; + trace.fabric_lat = lat_inf->fabric_lat * ticks; + } + if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) + trace.flags |= ZFCP_BLK_REQ_ERROR; + trace.inb_usage = fsf_req->qdio_inb_usage; + trace.outb_usage = fsf_req->qdio_outb_usage; + + blk_add_driver_data(req->q, req, &trace, sizeof(trace)); +} +#else +static inline void zfcp_fsf_trace_latency(struct zfcp_fsf_req *fsf_req) +{ +} +#endif + static void zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *req) { struct scsi_cmnd *scpnt = req->data; @@ -2114,6 +2146,8 @@ static void zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *req) if (req->adapter->adapter_features & FSF_FEATURE_MEASUREMENT_DATA) zfcp_fsf_req_latency(req); + zfcp_fsf_trace_latency(req); + if (unlikely(fcp_rsp_iu->validity.bits.fcp_rsp_len_valid)) { if (fcp_rsp_info[3] == RSP_CODE_GOOD) set_host_byte(scpnt, DID_OK); diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h index fd3a88777ac8..fa2a31780611 100644 --- a/drivers/s390/scsi/zfcp_fsf.h +++ b/drivers/s390/scsi/zfcp_fsf.h @@ -439,4 +439,16 @@ struct fsf_qtcb { u8 log[FSF_QTCB_LOG_SIZE]; } __attribute__ ((packed)); +struct zfcp_blk_drv_data { +#define ZFCP_BLK_DRV_DATA_MAGIC 0x1 + u32 magic; +#define ZFCP_BLK_LAT_VALID 0x1 +#define ZFCP_BLK_REQ_ERROR 0x2 + u16 flags; + u8 inb_usage; + u8 outb_usage; + u64 channel_lat; + u64 fabric_lat; +} __attribute__ ((packed)); + #endif /* FSF_H */ diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 3e05080e62d4..664752f90b20 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -115,6 +115,7 @@ static void zfcp_qdio_reqid_check(struct zfcp_adapter *adapter, spin_unlock_irqrestore(&adapter->req_list_lock, flags); fsf_req->sbal_response = sbal_idx; + fsf_req->qdio_inb_usage = atomic_read(&adapter->resp_q.count); zfcp_fsf_req_complete(fsf_req); } diff --git a/fs/block_dev.c b/fs/block_dev.c index d84f0469a016..218408eed1bb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1262,7 +1262,7 @@ EXPORT_SYMBOL(ioctl_by_bdev); /** * lookup_bdev - lookup a struct block_device by name - * @pathname: special file representing the block device + * @path: special file representing the block device * * Get a reference to the blockdevice at @pathname in the current * namespace if possible and return it. Return ERR_PTR(error) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index fbeb2f372a93..cfb0c80690aa 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -195,6 +195,14 @@ check_partition(struct gendisk *hd, struct block_device *bdev) return ERR_PTR(res); } +static ssize_t part_partition_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + + return sprintf(buf, "%d\n", p->partno); +} + static ssize_t part_start_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -260,6 +268,7 @@ ssize_t part_fail_store(struct device *dev, } #endif +static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); @@ -269,6 +278,7 @@ static struct device_attribute dev_attr_fail = #endif static struct attribute *part_attrs[] = { + &dev_attr_partition.attr, &dev_attr_start.attr, &dev_attr_size.attr, &dev_attr_stat.attr, diff --git a/include/linux/bio.h b/include/linux/bio.h index 1beda208cbfb..1c91a176b9ae 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -79,6 +79,13 @@ struct bio { unsigned int bi_size; /* residual I/O count */ + /* + * To keep track of the max segment size, we account for the + * sizes of the first and last mergeable segments in this bio. + */ + unsigned int bi_seg_front_size; + unsigned int bi_seg_back_size; + unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ unsigned int bi_comp_cpu; /* completion CPU */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3491d225268..b4fe68fe3a57 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,7 +865,6 @@ extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); extern void generic_unplug_device(struct request_queue *); -extern void __generic_unplug_device(struct request_queue *); extern long nr_blockdev_pages(void); int blk_get_queue(struct request_queue *); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 3a31eb506164..bdf505d33e77 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -24,6 +24,7 @@ enum blktrace_cat { BLK_TC_AHEAD = 1 << 11, /* readahead */ BLK_TC_META = 1 << 12, /* metadata */ BLK_TC_DISCARD = 1 << 13, /* discard requests */ + BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */ BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ }; @@ -51,6 +52,7 @@ enum blktrace_act { __BLK_TA_BOUNCE, /* bio was bounced */ __BLK_TA_REMAP, /* bio was remapped */ __BLK_TA_ABORT, /* request aborted */ + __BLK_TA_DRV_DATA, /* driver-specific binary data */ }; /* @@ -82,6 +84,7 @@ enum blktrace_notify { #define BLK_TA_BOUNCE (__BLK_TA_BOUNCE) #define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE)) #define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_DRV_DATA (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA)) #define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) @@ -317,6 +320,34 @@ static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio, __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); } +/** + * blk_add_driver_data - Add binary message with driver-specific data + * @q: queue the io is for + * @rq: io request + * @data: driver-specific data + * @len: length of driver-specific data + * + * Description: + * Some drivers might want to write driver-specific data per request. + * + **/ +static inline void blk_add_driver_data(struct request_queue *q, + struct request *rq, + void *data, size_t len) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + if (blk_pc_request(rq)) + __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, + rq->errors, len, data); + else + __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + 0, BLK_TA_DRV_DATA, rq->errors, len, data); +} + extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); @@ -330,6 +361,7 @@ extern int blk_trace_remove(struct request_queue *q); #define blk_add_trace_generic(q, rq, rw, what) do { } while (0) #define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0) #define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0) +#define blk_add_driver_data(q, rq, data, len) do {} while (0) #define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) #define blk_trace_setup(q, name, dev, arg) (-ENOTTY) #define blk_trace_startstop(q, start) (-ENOTTY) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 54b3623434ec..35a61dc60d51 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -11,6 +11,8 @@ #include <linux/hardirq.h> #include <linux/sched.h> #include <linux/irqflags.h> +#include <linux/smp.h> +#include <linux/percpu.h> #include <asm/atomic.h> #include <asm/ptrace.h> #include <asm/system.h> @@ -273,6 +275,25 @@ extern void softirq_init(void); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); +/* This is the worklist that queues up per-cpu softirq work. + * + * send_remote_sendirq() adds work to these lists, and + * the softirq handler itself dequeues from them. The queues + * are protected by disabling local cpu interrupts and they must + * only be accessed by the local cpu that they are for. + */ +DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); + +/* Try to send a softirq to a remote cpu. If this cannot be done, the + * work will be queued to the local cpu. + */ +extern void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq); + +/* Like send_remote_softirq(), but the caller must disable local cpu interrupts + * and compute the current cpu, passed in as 'this_cpu'. + */ +extern void __send_remote_softirq(struct call_single_data *cp, int cpu, + int this_cpu, int softirq); /* Tasklets --- multithreaded analogue of BHs. diff --git a/include/linux/smp.h b/include/linux/smp.h index 66484d4a8459..2e4d58b26c06 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -7,6 +7,7 @@ */ #include <linux/errno.h> +#include <linux/types.h> #include <linux/list.h> #include <linux/cpumask.h> @@ -16,7 +17,8 @@ struct call_single_data { struct list_head list; void (*func) (void *info); void *info; - unsigned int flags; + u16 flags; + u16 priv; }; #ifdef CONFIG_SMP diff --git a/kernel/softirq.c b/kernel/softirq.c index 37d67aa2d56f..83ba21a13bd4 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -6,6 +6,8 @@ * Distribute under GPLv2. * * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) + * + * Remote softirq infrastructure is by Jens Axboe. */ #include <linux/module.h> @@ -474,17 +476,144 @@ void tasklet_kill(struct tasklet_struct *t) EXPORT_SYMBOL(tasklet_kill); +DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); +EXPORT_PER_CPU_SYMBOL(softirq_work_list); + +static void __local_trigger(struct call_single_data *cp, int softirq) +{ + struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]); + + list_add_tail(&cp->list, head); + + /* Trigger the softirq only if the list was previously empty. */ + if (head->next == &cp->list) + raise_softirq_irqoff(softirq); +} + +#ifdef CONFIG_USE_GENERIC_SMP_HELPERS +static void remote_softirq_receive(void *data) +{ + struct call_single_data *cp = data; + unsigned long flags; + int softirq; + + softirq = cp->priv; + + local_irq_save(flags); + __local_trigger(cp, softirq); + local_irq_restore(flags); +} + +static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq) +{ + if (cpu_online(cpu)) { + cp->func = remote_softirq_receive; + cp->info = cp; + cp->flags = 0; + cp->priv = softirq; + + __smp_call_function_single(cpu, cp); + return 0; + } + return 1; +} +#else /* CONFIG_USE_GENERIC_SMP_HELPERS */ +static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq) +{ + return 1; +} +#endif + +/** + * __send_remote_softirq - try to schedule softirq work on a remote cpu + * @cp: private SMP call function data area + * @cpu: the remote cpu + * @this_cpu: the currently executing cpu + * @softirq: the softirq for the work + * + * Attempt to schedule softirq work on a remote cpu. If this cannot be + * done, the work is instead queued up on the local cpu. + * + * Interrupts must be disabled. + */ +void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq) +{ + if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq)) + __local_trigger(cp, softirq); +} +EXPORT_SYMBOL(__send_remote_softirq); + +/** + * send_remote_softirq - try to schedule softirq work on a remote cpu + * @cp: private SMP call function data area + * @cpu: the remote cpu + * @softirq: the softirq for the work + * + * Like __send_remote_softirq except that disabling interrupts and + * computing the current cpu is done for the caller. + */ +void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq) +{ + unsigned long flags; + int this_cpu; + + local_irq_save(flags); + this_cpu = smp_processor_id(); + __send_remote_softirq(cp, cpu, this_cpu, softirq); + local_irq_restore(flags); +} +EXPORT_SYMBOL(send_remote_softirq); + +static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + /* + * If a CPU goes away, splice its entries to the current CPU + * and trigger a run of the softirq + */ + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { + int cpu = (unsigned long) hcpu; + int i; + + local_irq_disable(); + for (i = 0; i < NR_SOFTIRQS; i++) { + struct list_head *head = &per_cpu(softirq_work_list[i], cpu); + struct list_head *local_head; + + if (list_empty(head)) + continue; + + local_head = &__get_cpu_var(softirq_work_list[i]); + list_splice_init(head, local_head); + raise_softirq_irqoff(i); + } + local_irq_enable(); + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = { + .notifier_call = remote_softirq_cpu_notify, +}; + void __init softirq_init(void) { int cpu; for_each_possible_cpu(cpu) { + int i; + per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; + for (i = 0; i < NR_SOFTIRQS; i++) + INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu)); } + register_hotcpu_notifier(&remote_softirq_cpu_notifier); + open_softirq(TASKLET_SOFTIRQ, tasklet_action); open_softirq(HI_SOFTIRQ, tasklet_hi_action); } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 31d784dd80d0..b0f239e443bc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -652,6 +652,11 @@ config DEBUG_BLOCK_EXT_DEVT depends on BLOCK default n help + BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON + SOME DISTRIBUTIONS. DO NOT ENABLE THIS UNLESS YOU KNOW WHAT + YOU ARE DOING. Distros, please enable this and fix whatever + is broken. + Conventionally, block device numbers are allocated from predetermined contiguous area. However, extended block area may introduce non-contiguous block device numbers. This |