From e743eb1ecd5564b5ae0a4a76c1566f748a358839 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Aug 2017 08:25:38 -0600 Subject: block: remove unused syncfull/asyncfull queue flags We haven't used these in years, but somehow the definitions still remained. Kill them, and renumber the QUEUE_FLAG_ space. We had a hole in the beginning of the space, too. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 60 ++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 31 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25f6a0cb27d3..f45f157b2910 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -601,38 +601,36 @@ struct request_queue { u64 write_hints[BLK_MAX_WRITE_HINTS]; }; -#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ -#define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ -#define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ -#define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ -#define QUEUE_FLAG_DYING 5 /* queue being torn down */ -#define QUEUE_FLAG_BYPASS 6 /* act as dumb FIFO queue */ -#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */ -#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */ -#define QUEUE_FLAG_SAME_COMP 9 /* complete on same CPU-group */ -#define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */ -#define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */ -#define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */ +#define QUEUE_FLAG_QUEUED 0 /* uses generic tag queueing */ +#define QUEUE_FLAG_STOPPED 1 /* queue is stopped */ +#define QUEUE_FLAG_DYING 2 /* queue being torn down */ +#define QUEUE_FLAG_BYPASS 3 /* act as dumb FIFO queue */ +#define QUEUE_FLAG_BIDI 4 /* queue supports bidi requests */ +#define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */ +#define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */ +#define QUEUE_FLAG_FAIL_IO 7 /* fake timeout */ +#define QUEUE_FLAG_STACKABLE 8 /* supports request stacking */ +#define QUEUE_FLAG_NONROT 9 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ -#define QUEUE_FLAG_IO_STAT 13 /* do IO stats */ -#define QUEUE_FLAG_DISCARD 14 /* supports DISCARD */ -#define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */ -#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */ -#define QUEUE_FLAG_SECERASE 17 /* supports secure erase */ -#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ -#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ -#define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ -#define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/ -#define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */ -#define QUEUE_FLAG_WC 23 /* Write back caching */ -#define QUEUE_FLAG_FUA 24 /* device supports FUA writes */ -#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ -#define QUEUE_FLAG_DAX 26 /* device supports DAX */ -#define QUEUE_FLAG_STATS 27 /* track rq completion times */ -#define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */ -#define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */ -#define QUEUE_FLAG_SCSI_PASSTHROUGH 30 /* queue supports SCSI commands */ -#define QUEUE_FLAG_QUIESCED 31 /* queue has been quiesced */ +#define QUEUE_FLAG_IO_STAT 10 /* do IO stats */ +#define QUEUE_FLAG_DISCARD 11 /* supports DISCARD */ +#define QUEUE_FLAG_NOXMERGES 12 /* No extended merges */ +#define QUEUE_FLAG_ADD_RANDOM 13 /* Contributes to random pool */ +#define QUEUE_FLAG_SECERASE 14 /* supports secure erase */ +#define QUEUE_FLAG_SAME_FORCE 15 /* force complete on same CPU */ +#define QUEUE_FLAG_DEAD 16 /* queue tear-down finished */ +#define QUEUE_FLAG_INIT_DONE 17 /* queue is initialized */ +#define QUEUE_FLAG_NO_SG_MERGE 18 /* don't attempt to merge SG segments*/ +#define QUEUE_FLAG_POLL 19 /* IO polling enabled if set */ +#define QUEUE_FLAG_WC 20 /* Write back caching */ +#define QUEUE_FLAG_FUA 21 /* device supports FUA writes */ +#define QUEUE_FLAG_FLUSH_NQ 22 /* flush not queueuable */ +#define QUEUE_FLAG_DAX 23 /* device supports DAX */ +#define QUEUE_FLAG_STATS 24 /* track rq completion times */ +#define QUEUE_FLAG_POLL_STATS 25 /* collecting stats for hybrid polling */ +#define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */ +#define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */ +#define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ -- cgit v1.2.3 From 50b4d485528d1dbe0bd249f2073140e3444f4a7b Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Thu, 24 Aug 2017 01:57:56 +0200 Subject: bsg-lib: fix kernel panic resulting from missing allocation of reply-buffer Since we split the scsi_request out of struct request bsg fails to provide a reply-buffer for the drivers. This was done via the pointer for sense-data, that is not preallocated anymore. Failing to allocate/assign it results in illegal dereferences because LLDs use this pointer unquestioned. An example panic on s390x, using the zFCP driver, looks like this (I had debugging on, otherwise NULL-pointer dereferences wouldn't even panic on s390x): Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 6b6b6b6b6b6b6000 TEID: 6b6b6b6b6b6b6403 Fault in home space mode while using kernel ASCE. AS:0000000001590007 R3:0000000000000024 Oops: 0038 ilc:2 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.12.0-bsg-regression+ #3 Hardware name: IBM 2964 N96 702 (z/VM 6.4.0) task: 0000000065cb0100 task.stack: 0000000065cb4000 Krnl PSW : 0704e00180000000 000003ff801e4156 (zfcp_fc_ct_els_job_handler+0x16/0x58 [zfcp]) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 0000000000000001 000000005fa9d0d0 000000005fa9d078 0000000000e16866 000003ff00000290 6b6b6b6b6b6b6b6b 0000000059f78f00 000000000000000f 00000000593a0958 00000000593a0958 0000000060d88800 000000005ddd4c38 0000000058b50100 07000000659cba08 000003ff801e8556 00000000659cb9a8 Krnl Code: 000003ff801e4146: e31020500004 lg %r1,80(%r2) 000003ff801e414c: 58402040 l %r4,64(%r2) #000003ff801e4150: e35020200004 lg %r5,32(%r2) >000003ff801e4156: 50405004 st %r4,4(%r5) 000003ff801e415a: e54c50080000 mvhi 8(%r5),0 000003ff801e4160: e33010280012 lt %r3,40(%r1) 000003ff801e4166: a718fffb lhi %r1,-5 000003ff801e416a: 1803 lr %r0,%r3 Call Trace: ([<000003ff801e8556>] zfcp_fsf_req_complete+0x726/0x768 [zfcp]) [<000003ff801ea82a>] zfcp_fsf_reqid_check+0x102/0x180 [zfcp] [<000003ff801eb980>] zfcp_qdio_int_resp+0x230/0x278 [zfcp] [<00000000009b91b6>] qdio_kick_handler+0x2ae/0x2c8 [<00000000009b9e3e>] __tiqdio_inbound_processing+0x406/0xc10 [<00000000001684c2>] tasklet_action+0x15a/0x1d8 [<0000000000bd28ec>] __do_softirq+0x3ec/0x848 [<00000000001675a4>] irq_exit+0x74/0xf8 [<000000000010dd6a>] do_IRQ+0xba/0xf0 [<0000000000bd19e8>] io_int_handler+0x104/0x2d4 [<00000000001033b6>] enabled_wait+0xb6/0x188 ([<000000000010339e>] enabled_wait+0x9e/0x188) [<000000000010396a>] arch_cpu_idle+0x32/0x50 [<0000000000bd0112>] default_idle_call+0x52/0x68 [<00000000001cd0fa>] do_idle+0x102/0x188 [<00000000001cd41e>] cpu_startup_entry+0x3e/0x48 [<0000000000118c64>] smp_start_secondary+0x11c/0x130 [<0000000000bd2016>] restart_int_handler+0x62/0x78 [<0000000000000000>] (null) INFO: lockdep is turned off. Last Breaking-Event-Address: [<000003ff801e41d6>] zfcp_fc_ct_job_handler+0x3e/0x48 [zfcp] Kernel panic - not syncing: Fatal exception in interrupt This patch moves bsg-lib to allocate and setup struct bsg_job ahead of time, including the allocation of a buffer for the reply-data. This means, struct bsg_job is not allocated separately anymore, but as part of struct request allocation - similar to struct scsi_cmd. Reflect this in the function names that used to handle creation/destruction of struct bsg_job. Reported-by: Steffen Maier Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Benjamin Block Fixes: 82ed4db499b8 ("block: split scsi_request out of struct request") Cc: #4.11+ Signed-off-by: Jens Axboe --- block/bsg-lib.c | 74 +++++++++++++++++++++++++++++-------------------- include/linux/blkdev.h | 1 - include/linux/bsg-lib.h | 2 ++ 3 files changed, 46 insertions(+), 31 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/bsg-lib.c b/block/bsg-lib.c index c4513b23f57a..dd56d7460cb9 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -29,26 +29,25 @@ #include /** - * bsg_destroy_job - routine to teardown/delete a bsg job + * bsg_teardown_job - routine to teardown a bsg job * @job: bsg_job that is to be torn down */ -static void bsg_destroy_job(struct kref *kref) +static void bsg_teardown_job(struct kref *kref) { struct bsg_job *job = container_of(kref, struct bsg_job, kref); struct request *rq = job->req; - blk_end_request_all(rq, BLK_STS_OK); - put_device(job->dev); /* release reference for the request */ kfree(job->request_payload.sg_list); kfree(job->reply_payload.sg_list); - kfree(job); + + blk_end_request_all(rq, BLK_STS_OK); } void bsg_job_put(struct bsg_job *job) { - kref_put(&job->kref, bsg_destroy_job); + kref_put(&job->kref, bsg_teardown_job); } EXPORT_SYMBOL_GPL(bsg_job_put); @@ -100,7 +99,7 @@ EXPORT_SYMBOL_GPL(bsg_job_done); */ static void bsg_softirq_done(struct request *rq) { - struct bsg_job *job = rq->special; + struct bsg_job *job = blk_mq_rq_to_pdu(rq); bsg_job_put(job); } @@ -122,33 +121,20 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) } /** - * bsg_create_job - create the bsg_job structure for the bsg request + * bsg_prepare_job - create the bsg_job structure for the bsg request * @dev: device that is being sent the bsg request * @req: BSG request that needs a job structure */ -static int bsg_create_job(struct device *dev, struct request *req) +static int bsg_prepare_job(struct device *dev, struct request *req) { struct request *rsp = req->next_rq; - struct request_queue *q = req->q; struct scsi_request *rq = scsi_req(req); - struct bsg_job *job; + struct bsg_job *job = blk_mq_rq_to_pdu(req); int ret; - BUG_ON(req->special); - - job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL); - if (!job) - return -ENOMEM; - - req->special = job; - job->req = req; - if (q->bsg_job_size) - job->dd_data = (void *)&job[1]; job->request = rq->cmd; job->request_len = rq->cmd_len; - job->reply = rq->sense; - job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer - * allocated */ + if (req->bio) { ret = bsg_map_buffer(&job->request_payload, req); if (ret) @@ -187,7 +173,6 @@ static void bsg_request_fn(struct request_queue *q) { struct device *dev = q->queuedata; struct request *req; - struct bsg_job *job; int ret; if (!get_device(dev)) @@ -199,7 +184,7 @@ static void bsg_request_fn(struct request_queue *q) break; spin_unlock_irq(q->queue_lock); - ret = bsg_create_job(dev, req); + ret = bsg_prepare_job(dev, req); if (ret) { scsi_req(req)->result = ret; blk_end_request_all(req, BLK_STS_OK); @@ -207,8 +192,7 @@ static void bsg_request_fn(struct request_queue *q) continue; } - job = req->special; - ret = q->bsg_job_fn(job); + ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req)); spin_lock_irq(q->queue_lock); if (ret) break; @@ -219,6 +203,35 @@ static void bsg_request_fn(struct request_queue *q) spin_lock_irq(q->queue_lock); } +static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + + memset(job, 0, sizeof(*job)); + + scsi_req_init(sreq); + sreq->sense_len = SCSI_SENSE_BUFFERSIZE; + sreq->sense = kzalloc(sreq->sense_len, gfp); + if (!sreq->sense) + return -ENOMEM; + + job->req = req; + job->reply = sreq->sense; + job->reply_len = sreq->sense_len; + job->dd_data = job + 1; + + return 0; +} + +static void bsg_exit_rq(struct request_queue *q, struct request *req) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + + kfree(sreq->sense); +} + /** * bsg_setup_queue - Create and add the bsg hooks so we can receive requests * @dev: device to attach bsg device to @@ -235,7 +248,9 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name, q = blk_alloc_queue(GFP_KERNEL); if (!q) return ERR_PTR(-ENOMEM); - q->cmd_size = sizeof(struct scsi_request); + q->cmd_size = sizeof(struct bsg_job) + dd_job_size; + q->init_rq_fn = bsg_init_rq; + q->exit_rq_fn = bsg_exit_rq; q->request_fn = bsg_request_fn; ret = blk_init_allocated_queue(q); @@ -243,7 +258,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name, goto out_cleanup_queue; q->queuedata = dev; - q->bsg_job_size = dd_job_size; q->bsg_job_fn = job_fn; queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25f6a0cb27d3..2a5d52fa90f5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -568,7 +568,6 @@ struct request_queue { #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; - int bsg_job_size; struct bsg_class_device bsg_dev; #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index e34dde2da0ef..637a20cfb237 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -24,6 +24,7 @@ #define _BLK_BSG_ #include +#include struct request; struct device; @@ -37,6 +38,7 @@ struct bsg_buffer { }; struct bsg_job { + struct scsi_request sreq; struct device *dev; struct request *req; -- cgit v1.2.3 From 966a967116e699762dbf4af7f9e0d1955c25aa37 Mon Sep 17 00:00:00 2001 From: Ying Huang Date: Tue, 8 Aug 2017 12:30:00 +0800 Subject: smp: Avoid using two cache lines for struct call_single_data struct call_single_data is used in IPIs to transfer information between CPUs. Its size is bigger than sizeof(unsigned long) and less than cache line size. Currently it is not allocated with any explicit alignment requirements. This makes it possible for allocated call_single_data to cross two cache lines, which results in double the number of the cache lines that need to be transferred among CPUs. This can be fixed by requiring call_single_data to be aligned with the size of call_single_data. Currently the size of call_single_data is the power of 2. If we add new fields to call_single_data, we may need to add padding to make sure the size of new definition is the power of 2 as well. Fortunately, this is enforced by GCC, which will report bad sizes. To set alignment requirements of call_single_data to the size of call_single_data, a struct definition and a typedef is used. To test the effect of the patch, I used the vm-scalability multiple thread swap test case (swap-w-seq-mt). The test will create multiple threads and each thread will eat memory until all RAM and part of swap is used, so that huge number of IPIs are triggered when unmapping memory. In the test, the throughput of memory writing improves ~5% compared with misaligned call_single_data, because of faster IPIs. Suggested-by: Peter Zijlstra Signed-off-by: Huang, Ying [ Add call_single_data_t and align with size of call_single_data. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Aaron Lu Cc: Borislav Petkov Cc: Eric Dumazet Cc: Juergen Gross Cc: Linus Torvalds Cc: Michael Ellerman Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/87bmnqd6lz.fsf@yhuang-mobile.sh.intel.com Signed-off-by: Ingo Molnar --- arch/mips/kernel/smp.c | 6 ++-- block/blk-softirq.c | 2 +- drivers/block/null_blk.c | 2 +- drivers/cpuidle/coupled.c | 10 +++---- drivers/net/ethernet/cavium/liquidio/lio_main.c | 2 +- drivers/net/ethernet/cavium/liquidio/octeon_droq.h | 2 +- include/linux/blkdev.h | 2 +- include/linux/netdevice.h | 2 +- include/linux/smp.h | 8 ++++-- kernel/sched/sched.h | 2 +- kernel/smp.c | 32 ++++++++++++---------- kernel/up.c | 2 +- 12 files changed, 39 insertions(+), 33 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 6bace7695788..c7cbddfcdc3b 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -648,12 +648,12 @@ EXPORT_SYMBOL(flush_tlb_one); #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST static DEFINE_PER_CPU(atomic_t, tick_broadcast_count); -static DEFINE_PER_CPU(struct call_single_data, tick_broadcast_csd); +static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd); void tick_broadcast(const struct cpumask *mask) { atomic_t *count; - struct call_single_data *csd; + call_single_data_t *csd; int cpu; for_each_cpu(cpu, mask) { @@ -674,7 +674,7 @@ static void tick_broadcast_callee(void *info) static int __init tick_broadcast_init(void) { - struct call_single_data *csd; + call_single_data_t *csd; int cpu; for (cpu = 0; cpu < NR_CPUS; cpu++) { diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 87b7df4851bf..07125e7941f4 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -60,7 +60,7 @@ static void trigger_softirq(void *data) static int raise_blk_irq(int cpu, struct request *rq) { if (cpu_online(cpu)) { - struct call_single_data *data = &rq->csd; + call_single_data_t *data = &rq->csd; data->func = trigger_softirq; data->info = rq; diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 85c24cace973..81142ce781da 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -13,7 +13,7 @@ struct nullb_cmd { struct list_head list; struct llist_node ll_list; - struct call_single_data csd; + call_single_data_t csd; struct request *rq; struct bio *bio; unsigned int tag; diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index 71e586d7df71..147f38ea0fcd 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -119,13 +119,13 @@ struct cpuidle_coupled { #define CPUIDLE_COUPLED_NOT_IDLE (-1) -static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb); +static DEFINE_PER_CPU(call_single_data_t, cpuidle_coupled_poke_cb); /* * The cpuidle_coupled_poke_pending mask is used to avoid calling - * __smp_call_function_single with the per cpu call_single_data struct already + * __smp_call_function_single with the per cpu call_single_data_t struct already * in use. This prevents a deadlock where two cpus are waiting for each others - * call_single_data struct to be available + * call_single_data_t struct to be available */ static cpumask_t cpuidle_coupled_poke_pending; @@ -339,7 +339,7 @@ static void cpuidle_coupled_handle_poke(void *info) */ static void cpuidle_coupled_poke(int cpu) { - struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); + call_single_data_t *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) smp_call_function_single_async(cpu, csd); @@ -651,7 +651,7 @@ int cpuidle_coupled_register_device(struct cpuidle_device *dev) { int cpu; struct cpuidle_device *other_dev; - struct call_single_data *csd; + call_single_data_t *csd; struct cpuidle_coupled *coupled; if (cpumask_empty(&dev->coupled_cpus)) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 51583ae4b1eb..120b6e537b28 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -2468,7 +2468,7 @@ static void liquidio_napi_drv_callback(void *arg) if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) { napi_schedule_irqoff(&droq->napi); } else { - struct call_single_data *csd = &droq->csd; + call_single_data_t *csd = &droq->csd; csd->func = napi_schedule_wrapper; csd->info = &droq->napi; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h index 6efd139b894d..f91bc84d1719 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h @@ -328,7 +328,7 @@ struct octeon_droq { u32 cpu_id; - struct call_single_data csd; + call_single_data_t csd; }; #define OCT_DROQ_SIZE (sizeof(struct octeon_droq)) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25f6a0cb27d3..006fa09a641e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -134,7 +134,7 @@ typedef __u32 __bitwise req_flags_t; struct request { struct list_head queuelist; union { - struct call_single_data csd; + call_single_data_t csd; u64 fifo_time; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 779b23595596..6557f320b66e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2774,7 +2774,7 @@ struct softnet_data { unsigned int input_queue_head ____cacheline_aligned_in_smp; /* Elements below can be accessed between CPUs for RPS/RFS */ - struct call_single_data csd ____cacheline_aligned_in_smp; + call_single_data_t csd ____cacheline_aligned_in_smp; struct softnet_data *rps_ipi_next; unsigned int cpu; unsigned int input_queue_tail; diff --git a/include/linux/smp.h b/include/linux/smp.h index 68123c1fe549..98b1fe027fc9 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -14,13 +14,17 @@ #include typedef void (*smp_call_func_t)(void *info); -struct call_single_data { +struct __call_single_data { struct llist_node llist; smp_call_func_t func; void *info; unsigned int flags; }; +/* Use __aligned() to avoid to use 2 cache lines for 1 csd */ +typedef struct __call_single_data call_single_data_t + __aligned(sizeof(struct __call_single_data)); + /* total number of cpus in this system (may exceed NR_CPUS) */ extern unsigned int total_cpus; @@ -48,7 +52,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), smp_call_func_t func, void *info, bool wait, gfp_t gfp_flags); -int smp_call_function_single_async(int cpu, struct call_single_data *csd); +int smp_call_function_single_async(int cpu, call_single_data_t *csd); #ifdef CONFIG_SMP diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index eeef1a3086d1..f29a7d2b57e1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -769,7 +769,7 @@ struct rq { #ifdef CONFIG_SCHED_HRTICK #ifdef CONFIG_SMP int hrtick_csd_pending; - struct call_single_data hrtick_csd; + call_single_data_t hrtick_csd; #endif struct hrtimer hrtick_timer; #endif diff --git a/kernel/smp.c b/kernel/smp.c index 3061483cb3ad..81cfca9b4cc3 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -28,7 +28,7 @@ enum { }; struct call_function_data { - struct call_single_data __percpu *csd; + call_single_data_t __percpu *csd; cpumask_var_t cpumask; cpumask_var_t cpumask_ipi; }; @@ -51,7 +51,7 @@ int smpcfd_prepare_cpu(unsigned int cpu) free_cpumask_var(cfd->cpumask); return -ENOMEM; } - cfd->csd = alloc_percpu(struct call_single_data); + cfd->csd = alloc_percpu(call_single_data_t); if (!cfd->csd) { free_cpumask_var(cfd->cpumask); free_cpumask_var(cfd->cpumask_ipi); @@ -103,12 +103,12 @@ void __init call_function_init(void) * previous function call. For multi-cpu calls its even more interesting * as we'll have to ensure no other cpu is observing our csd. */ -static __always_inline void csd_lock_wait(struct call_single_data *csd) +static __always_inline void csd_lock_wait(call_single_data_t *csd) { smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); } -static __always_inline void csd_lock(struct call_single_data *csd) +static __always_inline void csd_lock(call_single_data_t *csd) { csd_lock_wait(csd); csd->flags |= CSD_FLAG_LOCK; @@ -116,12 +116,12 @@ static __always_inline void csd_lock(struct call_single_data *csd) /* * prevent CPU from reordering the above assignment * to ->flags with any subsequent assignments to other - * fields of the specified call_single_data structure: + * fields of the specified call_single_data_t structure: */ smp_wmb(); } -static __always_inline void csd_unlock(struct call_single_data *csd) +static __always_inline void csd_unlock(call_single_data_t *csd) { WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); @@ -131,14 +131,14 @@ static __always_inline void csd_unlock(struct call_single_data *csd) smp_store_release(&csd->flags, 0); } -static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); +static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); /* - * Insert a previously allocated call_single_data element + * Insert a previously allocated call_single_data_t element * for execution on the given CPU. data must already have * ->func, ->info, and ->flags set. */ -static int generic_exec_single(int cpu, struct call_single_data *csd, +static int generic_exec_single(int cpu, call_single_data_t *csd, smp_call_func_t func, void *info) { if (cpu == smp_processor_id()) { @@ -210,7 +210,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) { struct llist_head *head; struct llist_node *entry; - struct call_single_data *csd, *csd_next; + call_single_data_t *csd, *csd_next; static bool warned; WARN_ON(!irqs_disabled()); @@ -268,8 +268,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) int smp_call_function_single(int cpu, smp_call_func_t func, void *info, int wait) { - struct call_single_data *csd; - struct call_single_data csd_stack = { .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS }; + call_single_data_t *csd; + call_single_data_t csd_stack = { + .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS, + }; int this_cpu; int err; @@ -321,7 +323,7 @@ EXPORT_SYMBOL(smp_call_function_single); * NOTE: Be careful, there is unfortunately no current debugging facility to * validate the correctness of this serialization. */ -int smp_call_function_single_async(int cpu, struct call_single_data *csd) +int smp_call_function_single_async(int cpu, call_single_data_t *csd) { int err = 0; @@ -444,7 +446,7 @@ void smp_call_function_many(const struct cpumask *mask, cpumask_clear(cfd->cpumask_ipi); for_each_cpu(cpu, cfd->cpumask) { - struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu); + call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu); csd_lock(csd); if (wait) @@ -460,7 +462,7 @@ void smp_call_function_many(const struct cpumask *mask, if (wait) { for_each_cpu(cpu, cfd->cpumask) { - struct call_single_data *csd; + call_single_data_t *csd; csd = per_cpu_ptr(cfd->csd, cpu); csd_lock_wait(csd); diff --git a/kernel/up.c b/kernel/up.c index ee81ac9af4ca..42c46bf3e0a5 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -23,7 +23,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single); -int smp_call_function_single_async(int cpu, struct call_single_data *csd) +int smp_call_function_single_async(int cpu, call_single_data_t *csd) { unsigned long flags; -- cgit v1.2.3