From b01a60be7a4a161ac0a11df30569d21a20795aef Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Jul 2013 17:43:56 +0200 Subject: ssb: fix alignment of struct bcma_device_id The ARM OABI and EABI disagree on the alignment of structures with small members, so module init tools may interpret the ssb device table incorrectly, as shown by this warning when building the b43 device driver in an OABI kernel: FATAL: drivers/net/wireless/b43/b43: sizeof(struct ssb_device_id)=6 is not a modulo of the size of section __mod_ssb_device_table=88. Forcing the default (EABI) alignment on the structure makes this problem go away. Since the ssb_device_id may have the same problem, better fix both structures. Signed-off-by: Arnd Bergmann Cc: Russell King Cc: John W. Linville Cc: Michael Buesch Cc: Larry Finger Signed-off-by: John W. Linville --- include/linux/mod_devicetable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index b62d4af6c667..45e921401b06 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -361,7 +361,8 @@ struct ssb_device_id { __u16 vendor; __u16 coreid; __u8 revision; -}; + __u8 __pad; +} __attribute__((packed, aligned(2))); #define SSB_DEVICE(_vendor, _coreid, _revision) \ { .vendor = _vendor, .coreid = _coreid, .revision = _revision, } #define SSB_DEVTABLE_END \ @@ -377,7 +378,7 @@ struct bcma_device_id { __u16 id; __u8 rev; __u8 class; -}; +} __attribute__((packed,aligned(2))); #define BCMA_CORE(_manuf, _id, _rev, _class) \ { .manuf = _manuf, .id = _id, .rev = _rev, .class = _class, } #define BCMA_CORETABLE_END \ -- cgit v1.2.3 From a1a8e1dc111d6f05e7164e851e58219d428359e1 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 16 Jul 2013 15:28:00 +0100 Subject: iio:trigger: Fix use_count race condition When using more than one trigger consumer it can happen that multiple threads perform a read-modify-update cycle on 'use_count' concurrently. This can cause updates to be lost and use_count can get stuck at non-zero value, in which case the IIO core assumes that at least one thread is still running and will wait for it to finish before running any trigger handlers again. This effectively renders the trigger disabled and a reboot is necessary before it can be used again. To fix this make use_count an atomic variable. Also set it to the number of consumers before starting the first consumer, otherwise it might happen that use_count drops to 0 even though not all consumers have been run yet. Signed-off-by: Lars-Peter Clausen Tested-by: Denis Ciocca Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-trigger.c | 34 ++++++++++++++++++++++------------ include/linux/iio/trigger.h | 3 ++- 2 files changed, 24 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c index ea8a4146620d..0dd9bb873130 100644 --- a/drivers/iio/industrialio-trigger.c +++ b/drivers/iio/industrialio-trigger.c @@ -127,12 +127,17 @@ static struct iio_trigger *iio_trigger_find_by_name(const char *name, void iio_trigger_poll(struct iio_trigger *trig, s64 time) { int i; - if (!trig->use_count) - for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) - if (trig->subirqs[i].enabled) { - trig->use_count++; + + if (!atomic_read(&trig->use_count)) { + atomic_set(&trig->use_count, CONFIG_IIO_CONSUMERS_PER_TRIGGER); + + for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) { + if (trig->subirqs[i].enabled) generic_handle_irq(trig->subirq_base + i); - } + else + iio_trigger_notify_done(trig); + } + } } EXPORT_SYMBOL(iio_trigger_poll); @@ -146,19 +151,24 @@ EXPORT_SYMBOL(iio_trigger_generic_data_rdy_poll); void iio_trigger_poll_chained(struct iio_trigger *trig, s64 time) { int i; - if (!trig->use_count) - for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) - if (trig->subirqs[i].enabled) { - trig->use_count++; + + if (!atomic_read(&trig->use_count)) { + atomic_set(&trig->use_count, CONFIG_IIO_CONSUMERS_PER_TRIGGER); + + for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) { + if (trig->subirqs[i].enabled) handle_nested_irq(trig->subirq_base + i); - } + else + iio_trigger_notify_done(trig); + } + } } EXPORT_SYMBOL(iio_trigger_poll_chained); void iio_trigger_notify_done(struct iio_trigger *trig) { - trig->use_count--; - if (trig->use_count == 0 && trig->ops && trig->ops->try_reenable) + if (atomic_dec_and_test(&trig->use_count) && trig->ops && + trig->ops->try_reenable) if (trig->ops->try_reenable(trig)) /* Missed an interrupt so launch new poll now */ iio_trigger_poll(trig, 0); diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index 3869c525b052..369cf2cd5144 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -8,6 +8,7 @@ */ #include #include +#include #ifndef _IIO_TRIGGER_H_ #define _IIO_TRIGGER_H_ @@ -61,7 +62,7 @@ struct iio_trigger { struct list_head list; struct list_head alloc_list; - int use_count; + atomic_t use_count; struct irq_chip subirq_chip; int subirq_base; -- cgit v1.2.3 From b1451e546899bc8f450773b2af02e0cd000cf1fa Mon Sep 17 00:00:00 2001 From: "Patil, Rachna" Date: Sat, 20 Jul 2013 17:27:00 +0100 Subject: iio: ti_am335x_adc: Fix wrong samples received on 1st read Previously we tried to read data form ADC even before ADC sequencer finished sampling. This led to wrong samples. We now wait on ADC status register idle bit to be set. Signed-off-by: Patil, Rachna Signed-off-by: Zubair Lutfullah Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti_am335x_adc.c | 30 ++++++++++++++++++++++-------- include/linux/mfd/ti_am335x_tscadc.h | 16 ++++++++++++++++ 2 files changed, 38 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index 0ad208a69c29..3ceac3e91dde 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -60,7 +60,6 @@ static void tiadc_step_config(struct tiadc_device *adc_dev) { unsigned int stepconfig; int i, steps; - u32 step_en; /* * There are 16 configurable steps and 8 analog input @@ -86,8 +85,7 @@ static void tiadc_step_config(struct tiadc_device *adc_dev) adc_dev->channel_step[i] = steps; steps++; } - step_en = get_adc_step_mask(adc_dev); - am335x_tsc_se_set(adc_dev->mfd_tscadc, step_en); + } static const char * const chan_name_ain[] = { @@ -142,10 +140,22 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct tiadc_device *adc_dev = iio_priv(indio_dev); - int i; - unsigned int fifo1count, read; + int i, map_val; + unsigned int fifo1count, read, stepid; u32 step = UINT_MAX; bool found = false; + u32 step_en; + unsigned long timeout = jiffies + usecs_to_jiffies + (IDLE_TIMEOUT * adc_dev->channels); + step_en = get_adc_step_mask(adc_dev); + am335x_tsc_se_set(adc_dev->mfd_tscadc, step_en); + + /* Wait for ADC sequencer to complete sampling */ + while (tiadc_readl(adc_dev, REG_ADCFSM) & SEQ_STATUS) { + if (time_after(jiffies, timeout)) + return -EAGAIN; + } + map_val = chan->channel + TOTAL_CHANNELS; /* * When the sub-system is first enabled, @@ -170,12 +180,16 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT); for (i = 0; i < fifo1count; i++) { read = tiadc_readl(adc_dev, REG_FIFO1); - if (read >> 16 == step) { - *val = read & 0xfff; + stepid = read & FIFOREAD_CHNLID_MASK; + stepid = stepid >> 0x10; + + if (stepid == map_val) { + read = read & FIFOREAD_DATA_MASK; found = true; + *val = read; } } - am335x_tsc_se_update(adc_dev->mfd_tscadc); + if (found == false) return -EBUSY; return IIO_VAL_INT; diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index 8d73fe29796a..db1791bb997a 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -113,11 +113,27 @@ #define CNTRLREG_8WIRE CNTRLREG_AFE_CTRL(3) #define CNTRLREG_TSCENB BIT(7) +/* FIFO READ Register */ +#define FIFOREAD_DATA_MASK (0xfff << 0) +#define FIFOREAD_CHNLID_MASK (0xf << 16) + +/* Sequencer Status */ +#define SEQ_STATUS BIT(5) + #define ADC_CLK 3000000 #define MAX_CLK_DIV 7 #define TOTAL_STEPS 16 #define TOTAL_CHANNELS 8 +/* +* ADC runs at 3MHz, and it takes +* 15 cycles to latch one data output. +* Hence the idle time for ADC to +* process one sample data would be +* around 5 micro seconds. +*/ +#define IDLE_TIMEOUT 5 /* microsec */ + #define TSCADC_CELLS 2 struct ti_tscadc_dev { -- cgit v1.2.3 From 81913283c80be8c0b7e038c26e2a611ab38394f1 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 28 Jun 2013 05:44:22 -0300 Subject: [media] v4l2: added missing mutex.h include to v4l2-ctrls.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes following error: include/media/v4l2-ctrls.h:193:15: error: field ‘_lock’ has incomplete type include/media/v4l2-ctrls.h: In function ‘v4l2_ctrl_lock’: include/media/v4l2-ctrls.h:570:2: error: implicit declaration of function ‘mutex_lock’ [-Werror=implicit-function-declaration] include/media/v4l2-ctrls.h: In function ‘v4l2_ctrl_unlock’: include/media/v4l2-ctrls.h:579:2: error: implicit declaration of function ‘mutex_unlock’ [-Werror=implicit-function-declaration] Signed-off-by: Andrzej Hajda Signed-off-by: Kyungmin Park Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-ctrls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index 7343a27fe819..47ada23345a1 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -22,6 +22,7 @@ #define _V4L2_CTRLS_H #include +#include #include /* forward references */ -- cgit v1.2.3 From 0699a73af3811b66b1ab5650575acee5eea841ab Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 22 Jul 2013 21:32:09 +0200 Subject: firewire: fix libdc1394/FlyCap2 iso event regression Commit 18d627113b83 (firewire: prevent dropping of completed iso packet header data) was intended to be an obvious bug fix, but libdc1394 and FlyCap2 depend on the old behaviour by ignoring all returned information and thus not noticing that not all packets have been received yet. The result was that the video frame buffers would be saved before they contained the correct data. Reintroduce the old behaviour for old clients. Tested-by: Stepan Salenikovich Tested-by: Josep Bosch Cc: # 3.4+ Signed-off-by: Clemens Ladisch Signed-off-by: Stefan Richter --- drivers/firewire/core-cdev.c | 3 +++ drivers/firewire/ohci.c | 10 ++++++++-- include/linux/firewire.h | 1 + include/uapi/linux/firewire-cdev.h | 4 ++-- 4 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 7ef316fdc4d9..ac1b43a04285 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -54,6 +54,7 @@ #define FW_CDEV_KERNEL_VERSION 5 #define FW_CDEV_VERSION_EVENT_REQUEST2 4 #define FW_CDEV_VERSION_ALLOCATE_REGION_END 4 +#define FW_CDEV_VERSION_AUTO_FLUSH_ISO_OVERFLOW 5 struct client { u32 version; @@ -1005,6 +1006,8 @@ static int ioctl_create_iso_context(struct client *client, union ioctl_arg *arg) a->channel, a->speed, a->header_size, cb, client); if (IS_ERR(context)) return PTR_ERR(context); + if (client->version < FW_CDEV_VERSION_AUTO_FLUSH_ISO_OVERFLOW) + context->drop_overflow_headers = true; /* We only support one context at this time. */ spin_lock_irq(&client->lock); diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 9e1db6490b9a..afb701ec90ca 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2749,8 +2749,11 @@ static void copy_iso_headers(struct iso_context *ctx, const u32 *dma_hdr) { u32 *ctx_hdr; - if (ctx->header_length + ctx->base.header_size > PAGE_SIZE) + if (ctx->header_length + ctx->base.header_size > PAGE_SIZE) { + if (ctx->base.drop_overflow_headers) + return; flush_iso_completions(ctx); + } ctx_hdr = ctx->header + ctx->header_length; ctx->last_timestamp = (u16)le32_to_cpu((__force __le32)dma_hdr[0]); @@ -2910,8 +2913,11 @@ static int handle_it_packet(struct context *context, sync_it_packet_for_cpu(context, d); - if (ctx->header_length + 4 > PAGE_SIZE) + if (ctx->header_length + 4 > PAGE_SIZE) { + if (ctx->base.drop_overflow_headers) + return 1; flush_iso_completions(ctx); + } ctx_hdr = ctx->header + ctx->header_length; ctx->last_timestamp = le16_to_cpu(last->res_count); diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 3b0e820375ab..5d7782e42b8f 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -436,6 +436,7 @@ struct fw_iso_context { int type; int channel; int speed; + bool drop_overflow_headers; size_t header_size; union { fw_iso_callback_t sc; diff --git a/include/uapi/linux/firewire-cdev.h b/include/uapi/linux/firewire-cdev.h index d50036953497..1db453e4b550 100644 --- a/include/uapi/linux/firewire-cdev.h +++ b/include/uapi/linux/firewire-cdev.h @@ -215,8 +215,8 @@ struct fw_cdev_event_request2 { * with the %FW_CDEV_ISO_INTERRUPT bit set, when explicitly requested with * %FW_CDEV_IOC_FLUSH_ISO, or when there have been so many completed packets * without the interrupt bit set that the kernel's internal buffer for @header - * is about to overflow. (In the last case, kernels with ABI version < 5 drop - * header data up to the next interrupt packet.) + * is about to overflow. (In the last case, ABI versions < 5 drop header data + * up to the next interrupt packet.) * * Isochronous transmit events (context type %FW_CDEV_ISO_CONTEXT_TRANSMIT): * -- cgit v1.2.3 From 148519120c6d1f19ad53349683aeae9f228b0b8d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 27 Jul 2013 01:41:34 +0200 Subject: Revert "cpuidle: Quickly notice prediction failure for repeat mode" Revert commit 69a37bea (cpuidle: Quickly notice prediction failure for repeat mode), because it has been identified as the source of a significant performance regression in v3.8 and later as explained by Jeremy Eder: We believe we've identified a particular commit to the cpuidle code that seems to be impacting performance of variety of workloads. The simplest way to reproduce is using netperf TCP_RR test, so we're using that, on a pair of Sandy Bridge based servers. We also have data from a large database setup where performance is also measurably/positively impacted, though that test data isn't easily share-able. Included below are test results from 3 test kernels: kernel reverts ----------------------------------------------------------- 1) vanilla upstream (no reverts) 2) perfteam2 reverts e11538d1f03914eb92af5a1a378375c05ae8520c 3) test reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 e11538d1f03914eb92af5a1a378375c05ae8520c In summary, netperf TCP_RR numbers improve by approximately 4% after reverting 69a37beabf1f0a6705c08e879bdd5d82ff6486c4. When 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 is included, C0 residency never seems to get above 40%. Taking that patch out gets C0 near 100% quite often, and performance increases. The below data are histograms representing the %c0 residency @ 1-second sample rates (using turbostat), while under netperf test. - If you look at the first 4 histograms, you can see %c0 residency almost entirely in the 30,40% bin. - The last pair, which reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4, shows %c0 in the 80,90,100% bins. Below each kernel name are netperf TCP_RR trans/s numbers for the particular kernel that can be disclosed publicly, comparing the 3 test kernels. We ran a 4th test with the vanilla kernel where we've also set /dev/cpu_dma_latency=0 to show overall impact boosting single-threaded TCP_RR performance over 11% above baseline. 3.10-rc2 vanilla RX + c0 lock (/dev/cpu_dma_latency=0): TCP_RR trans/s 54323.78 ----------------------------------------------------------- 3.10-rc2 vanilla RX (no reverts) TCP_RR trans/s 48192.47 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 59]: *********************************************************** 40.0000 - 50.0000 [ 1]: * 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: Sender %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 11]: *********** 40.0000 - 50.0000 [ 49]: ************************************************* 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: ----------------------------------------------------------- 3.10-rc2 perfteam2 RX (reverts commit e11538d1f03914eb92af5a1a378375c05ae8520c) TCP_RR trans/s 49698.69 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 1]: * 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 59]: *********************************************************** 40.0000 - 50.0000 [ 0]: 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: Sender %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 2]: ** 40.0000 - 50.0000 [ 58]: ********************************************************** 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: ----------------------------------------------------------- 3.10-rc2 test RX (reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 and e11538d1f03914eb92af5a1a378375c05ae8520c) TCP_RR trans/s 47766.95 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 1]: * 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 27]: *************************** 40.0000 - 50.0000 [ 2]: ** 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 2]: ** 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 28]: **************************** Sender: 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 11]: *********** 40.0000 - 50.0000 [ 0]: 50.0000 - 60.0000 [ 1]: * 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 3]: *** 80.0000 - 90.0000 [ 7]: ******* 90.0000 - 100.0000 [ 38]: ************************************** These results demonstrate gaining back the tendency of the CPU to stay in more responsive, performant C-states (and thus yield measurably better performance), by reverting commit 69a37beabf1f0a6705c08e879bdd5d82ff6486c4. Requested-by: Jeremy Eder Tested-by: Len Brown Cc: 3.8+ Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 73 +++------------------------------------- include/linux/tick.h | 6 ---- kernel/time/tick-sched.c | 9 ++--- 3 files changed, 6 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index b69a87e22155..bc580b67a652 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -28,13 +28,6 @@ #define MAX_INTERESTING 50000 #define STDDEV_THRESH 400 -/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */ -#define MAX_DEVIATION 60 - -static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer); -static DEFINE_PER_CPU(int, hrtimer_status); -/* menu hrtimer mode */ -enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT}; /* * Concepts and ideas behind the menu governor @@ -198,42 +191,17 @@ static u64 div_round64(u64 dividend, u32 divisor) return div_u64(dividend + (divisor / 2), divisor); } -/* Cancel the hrtimer if it is not triggered yet */ -void menu_hrtimer_cancel(void) -{ - int cpu = smp_processor_id(); - struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); - - /* The timer is still not time out*/ - if (per_cpu(hrtimer_status, cpu)) { - hrtimer_cancel(hrtmr); - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; - } -} -EXPORT_SYMBOL_GPL(menu_hrtimer_cancel); - -/* Call back for hrtimer is triggered */ -static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) -{ - int cpu = smp_processor_id(); - - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; - - return HRTIMER_NORESTART; -} - /* * Try detecting repeating patterns by keeping track of the last 8 * intervals, and checking if the standard deviation of that set * of points is below a threshold. If it is... then use the * average of these 8 points as the estimated value. */ -static u32 get_typical_interval(struct menu_device *data) +static void get_typical_interval(struct menu_device *data) { int i = 0, divisor = 0; uint64_t max = 0, avg = 0, stddev = 0; int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */ - unsigned int ret = 0; again: @@ -274,16 +242,13 @@ again: if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) || stddev <= 20) { data->predicted_us = avg; - ret = 1; - return ret; + return; } else if ((divisor * 4) > INTERVALS * 3) { /* Exclude the max interval */ thresh = max - 1; goto again; } - - return ret; } /** @@ -298,9 +263,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) int i; int multiplier; struct timespec t; - int repeat = 0, low_predicted = 0; - int cpu = smp_processor_id(); - struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); if (data->needs_update) { menu_update(drv, dev); @@ -335,7 +297,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], RESOLUTION * DECAY); - repeat = get_typical_interval(data); + get_typical_interval(data); /* * We want to default to C1 (hlt), not to busy polling @@ -356,10 +318,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (s->disabled || su->disable) continue; - if (s->target_residency > data->predicted_us) { - low_predicted = 1; + if (s->target_residency > data->predicted_us) continue; - } if (s->exit_latency > latency_req) continue; if (s->exit_latency * multiplier > data->predicted_us) @@ -369,28 +329,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->exit_us = s->exit_latency; } - /* not deepest C-state chosen for low predicted residency */ - if (low_predicted) { - unsigned int timer_us = 0; - - /* - * Set a timer to detect whether this sleep is much - * longer than repeat mode predicted. If the timer - * triggers, the code will evaluate whether to put - * the CPU into a deeper C-state. - * The timer is cancelled on CPU wakeup. - */ - timer_us = 2 * (data->predicted_us + MAX_DEVIATION); - - if (repeat && (4 * timer_us < data->expected_us)) { - RCU_NONIDLE(hrtimer_start(hrtmr, - ns_to_ktime(1000 * timer_us), - HRTIMER_MODE_REL_PINNED)); - /* In repeat case, menu hrtimer is started */ - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; - } - } - return data->last_state_idx; } @@ -481,9 +419,6 @@ static int menu_enable_device(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = &per_cpu(menu_devices, dev->cpu); - struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu); - hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - t->function = menu_hrtimer_notify; memset(data, 0, sizeof(struct menu_device)); diff --git a/include/linux/tick.h b/include/linux/tick.h index 9180f4b85e6d..62bd8b72873c 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -174,10 +174,4 @@ static inline void tick_nohz_task_switch(struct task_struct *tsk) { } #endif -# ifdef CONFIG_CPU_IDLE_GOV_MENU -extern void menu_hrtimer_cancel(void); -# else -static inline void menu_hrtimer_cancel(void) {} -# endif /* CONFIG_CPU_IDLE_GOV_MENU */ - #endif diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e80183f4a6c4..e77edc97e036 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -827,13 +827,10 @@ void tick_nohz_irq_exit(void) { struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); - if (ts->inidle) { - /* Cancel the timer because CPU already waken up from the C-states*/ - menu_hrtimer_cancel(); + if (ts->inidle) __tick_nohz_idle_enter(ts); - } else { + else tick_nohz_full_stop_tick(ts); - } } /** @@ -931,8 +928,6 @@ void tick_nohz_idle_exit(void) ts->inidle = 0; - /* Cancel the timer because CPU already waken up from the C-states*/ - menu_hrtimer_cancel(); if (ts->idle_active || ts->tick_stopped) now = ktime_get(); -- cgit v1.2.3 From 2b44c4db2e2f1765d35163a861d301038e0c8a75 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 24 Jul 2013 17:41:33 -0700 Subject: freezer: set PF_SUSPEND_TASK flag on tasks that call freeze_processes Calling freeze_processes sets a global flag that will cause any process that calls try_to_freeze to enter the refrigerator. It skips sending a signal to the current task, but if the current task ever hits try_to_freeze, all threads will be frozen and the system will deadlock. Set a new flag, PF_SUSPEND_TASK, on the task that calls freeze_processes. The flag notifies the freezer that the thread is involved in suspend and should not be frozen. Also add a WARN_ON in thaw_processes if the caller does not have the PF_SUSPEND_TASK flag set to catch if a different task calls thaw_processes than the one that called freeze_processes, leaving a task with PF_SUSPEND_TASK permanently set on it. Threads that spawn off a task with PF_SUSPEND_TASK set (which swsusp does) will also have PF_SUSPEND_TASK set, preventing them from freezing while they are helping with suspend, but they need to be dead by the time suspend is triggered, otherwise they may run when userspace is expected to be frozen. Add a WARN_ON in thaw_processes if more than one thread has the PF_SUSPEND_TASK flag set. Reported-and-tested-by: Michael Leun Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- include/linux/sched.h | 1 + kernel/freezer.c | 2 +- kernel/power/process.c | 11 +++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 50d04b92ceda..d722490da030 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1628,6 +1628,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ +#define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */ /* * Only the _current_ task can read/write to tsk->flags, but other diff --git a/kernel/freezer.c b/kernel/freezer.c index 8b2afc1c9df0..b462fa197517 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -33,7 +33,7 @@ static DEFINE_SPINLOCK(freezer_lock); */ bool freezing_slow_path(struct task_struct *p) { - if (p->flags & PF_NOFREEZE) + if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK)) return false; if (pm_nosig_freezing || cgroup_freezing(p)) diff --git a/kernel/power/process.c b/kernel/power/process.c index fc0df8486449..06ec8869dbf1 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -109,6 +109,8 @@ static int try_to_freeze_tasks(bool user_only) /** * freeze_processes - Signal user space processes to enter the refrigerator. + * The current thread will not be frozen. The same process that calls + * freeze_processes must later call thaw_processes. * * On success, returns 0. On failure, -errno and system is fully thawed. */ @@ -120,6 +122,9 @@ int freeze_processes(void) if (error) return error; + /* Make sure this task doesn't get frozen */ + current->flags |= PF_SUSPEND_TASK; + if (!pm_freezing) atomic_inc(&system_freezing_cnt); @@ -168,6 +173,7 @@ int freeze_kernel_threads(void) void thaw_processes(void) { struct task_struct *g, *p; + struct task_struct *curr = current; if (pm_freezing) atomic_dec(&system_freezing_cnt); @@ -182,10 +188,15 @@ void thaw_processes(void) read_lock(&tasklist_lock); do_each_thread(g, p) { + /* No other threads should have PF_SUSPEND_TASK set */ + WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK)); __thaw_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); + WARN_ON(!(curr->flags & PF_SUSPEND_TASK)); + curr->flags &= ~PF_SUSPEND_TASK; + usermodehelper_enable(); schedule(); -- cgit v1.2.3 From a838834b2f7cbc09b6319a1fc332c03e4d665b20 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 30 Jul 2013 16:43:55 -0400 Subject: drm: fix 64 bit drm fixed point helpers Sign bit wasn't handled properly and a small typo. Thanks to Christian for helping me sort this out. Signed-off-by: Alex Deucher --- include/drm/drm_fixed.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index f5e1168c7647..d639049a613d 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -84,12 +84,12 @@ static inline int drm_fixp2int(int64_t a) return ((s64)a) >> DRM_FIXED_POINT; } -static inline s64 drm_fixp_msbset(int64_t a) +static inline unsigned drm_fixp_msbset(int64_t a) { unsigned shift, sign = (a >> 63) & 1; for (shift = 62; shift > 0; --shift) - if ((a >> shift) != sign) + if (((a >> shift) & 1) != sign) return shift; return 0; @@ -100,9 +100,9 @@ static inline s64 drm_fixp_mul(s64 a, s64 b) unsigned shift = drm_fixp_msbset(a) + drm_fixp_msbset(b); s64 result; - if (shift > 63) { - shift = shift - 63; - a >>= shift >> 1; + if (shift > 61) { + shift = shift - 61; + a >>= (shift >> 1) + (shift & 1); b >>= shift >> 1; } else shift = 0; @@ -120,7 +120,7 @@ static inline s64 drm_fixp_mul(s64 a, s64 b) static inline s64 drm_fixp_div(s64 a, s64 b) { - unsigned shift = 63 - drm_fixp_msbset(a); + unsigned shift = 62 - drm_fixp_msbset(a); s64 result; a <<= shift; @@ -154,7 +154,7 @@ static inline s64 drm_fixp_exp(s64 x) } if (x < 0) - sum = drm_fixp_div(1, sum); + sum = drm_fixp_div(DRM_FIXED_ONE, sum); return sum; } -- cgit v1.2.3 From 9ea7187c53f63e31f2d1b2b1e474e31808565009 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 31 Jul 2013 01:19:43 +0200 Subject: NFC: netlink: Rename CMD_FW_UPLOAD to CMD_FW_DOWNLOAD Loading a firmware into a target is typically called firmware download, not firmware upload. So we rename the netlink API to NFC_CMD_FW_DOWNLOAD in order to avoid any terminology confusion from userspace. Signed-off-by: Samuel Ortiz --- include/net/nfc/hci.h | 2 +- include/net/nfc/nfc.h | 4 ++-- include/uapi/linux/nfc.h | 6 +++--- net/nfc/core.c | 20 ++++++++++---------- net/nfc/hci/core.c | 8 ++++---- net/nfc/netlink.c | 12 ++++++------ net/nfc/nfc.h | 6 +++--- 7 files changed, 29 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 0af851c3b038..b64b7bce4b94 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -59,7 +59,7 @@ struct nfc_hci_ops { struct nfc_target *target); int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event, struct sk_buff *skb); - int (*fw_upload)(struct nfc_hci_dev *hdev, const char *firmware_name); + int (*fw_download)(struct nfc_hci_dev *hdev, const char *firmware_name); int (*discover_se)(struct nfc_hci_dev *dev); int (*enable_se)(struct nfc_hci_dev *dev, u32 se_idx); int (*disable_se)(struct nfc_hci_dev *dev, u32 se_idx); diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 0e353f1658bb..5f286b726bb6 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -68,7 +68,7 @@ struct nfc_ops { void *cb_context); int (*tm_send)(struct nfc_dev *dev, struct sk_buff *skb); int (*check_presence)(struct nfc_dev *dev, struct nfc_target *target); - int (*fw_upload)(struct nfc_dev *dev, const char *firmware_name); + int (*fw_download)(struct nfc_dev *dev, const char *firmware_name); /* Secure Element API */ int (*discover_se)(struct nfc_dev *dev); @@ -127,7 +127,7 @@ struct nfc_dev { int targets_generation; struct device dev; bool dev_up; - bool fw_upload_in_progress; + bool fw_download_in_progress; u8 rf_mode; bool polling; struct nfc_target *active_target; diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index caed0f324d5f..8137dd8d2adf 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -69,8 +69,8 @@ * starting a poll from a device which has a secure element enabled means * we want to do SE based card emulation. * @NFC_CMD_DISABLE_SE: Disable the physical link to a specific secure element. - * @NFC_CMD_FW_UPLOAD: Request to Load/flash firmware, or event to inform that - * some firmware was loaded + * @NFC_CMD_FW_DOWNLOAD: Request to Load/flash firmware, or event to inform + * that some firmware was loaded */ enum nfc_commands { NFC_CMD_UNSPEC, @@ -94,7 +94,7 @@ enum nfc_commands { NFC_CMD_DISABLE_SE, NFC_CMD_LLC_SDREQ, NFC_EVENT_LLC_SDRES, - NFC_CMD_FW_UPLOAD, + NFC_CMD_FW_DOWNLOAD, NFC_EVENT_SE_ADDED, NFC_EVENT_SE_REMOVED, /* private: internal use only */ diff --git a/net/nfc/core.c b/net/nfc/core.c index dc96a83aa6ab..1d074dd1650f 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -44,7 +44,7 @@ DEFINE_MUTEX(nfc_devlist_mutex); /* NFC device ID bitmap */ static DEFINE_IDA(nfc_index_ida); -int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name) +int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name) { int rc = 0; @@ -62,28 +62,28 @@ int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name) goto error; } - if (!dev->ops->fw_upload) { + if (!dev->ops->fw_download) { rc = -EOPNOTSUPP; goto error; } - dev->fw_upload_in_progress = true; - rc = dev->ops->fw_upload(dev, firmware_name); + dev->fw_download_in_progress = true; + rc = dev->ops->fw_download(dev, firmware_name); if (rc) - dev->fw_upload_in_progress = false; + dev->fw_download_in_progress = false; error: device_unlock(&dev->dev); return rc; } -int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name) { - dev->fw_upload_in_progress = false; + dev->fw_download_in_progress = false; - return nfc_genl_fw_upload_done(dev, firmware_name); + return nfc_genl_fw_download_done(dev, firmware_name); } -EXPORT_SYMBOL(nfc_fw_upload_done); +EXPORT_SYMBOL(nfc_fw_download_done); /** * nfc_dev_up - turn on the NFC device @@ -110,7 +110,7 @@ int nfc_dev_up(struct nfc_dev *dev) goto error; } - if (dev->fw_upload_in_progress) { + if (dev->fw_download_in_progress) { rc = -EBUSY; goto error; } diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 7b1c186736eb..fe66908401f5 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -809,14 +809,14 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) } } -static int hci_fw_upload(struct nfc_dev *nfc_dev, const char *firmware_name) +static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (!hdev->ops->fw_upload) + if (!hdev->ops->fw_download) return -ENOTSUPP; - return hdev->ops->fw_upload(hdev, firmware_name); + return hdev->ops->fw_download(hdev, firmware_name); } static struct nfc_ops hci_nfc_ops = { @@ -831,7 +831,7 @@ static struct nfc_ops hci_nfc_ops = { .im_transceive = hci_transceive, .tm_send = hci_tm_send, .check_presence = hci_check_presence, - .fw_upload = hci_fw_upload, + .fw_download = hci_fw_download, .discover_se = hci_discover_se, .enable_se = hci_enable_se, .disable_se = hci_disable_se, diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index b05ad909778f..f16fd59d4160 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1089,7 +1089,7 @@ exit: return rc; } -static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info) +static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; @@ -1108,13 +1108,13 @@ static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info) nla_strlcpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME], sizeof(firmware_name)); - rc = nfc_fw_upload(dev, firmware_name); + rc = nfc_fw_download(dev, firmware_name); nfc_put_device(dev); return rc; } -int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name) { struct sk_buff *msg; void *hdr; @@ -1124,7 +1124,7 @@ int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, - NFC_CMD_FW_UPLOAD); + NFC_CMD_FW_DOWNLOAD); if (!hdr) goto free_msg; @@ -1251,8 +1251,8 @@ static struct genl_ops nfc_genl_ops[] = { .policy = nfc_genl_policy, }, { - .cmd = NFC_CMD_FW_UPLOAD, - .doit = nfc_genl_fw_upload, + .cmd = NFC_CMD_FW_DOWNLOAD, + .doit = nfc_genl_fw_download, .policy = nfc_genl_policy, }, { diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index ee85a1fc1b24..820a7850c36a 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -123,10 +123,10 @@ static inline void nfc_device_iter_exit(struct class_dev_iter *iter) class_dev_iter_exit(iter); } -int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name); -int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); +int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name); +int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name); -int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); +int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name); int nfc_dev_up(struct nfc_dev *dev); -- cgit v1.2.3 From 2816c551c796ec14620325b2c9ed75b9979d3125 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 29 Jul 2013 19:50:33 +0200 Subject: tracing: trace_remove_event_call() should fail if call/file is in use Change trace_remove_event_call(call) to return the error if this call is active. This is what the callers assume but can't verify outside of the tracing locks. Both trace_kprobe.c/trace_uprobe.c need the additional changes, unregister_trace_probe() should abort if trace_remove_event_call() fails. The caller is going to free this call/file so we must ensure that nobody can use them after trace_remove_event_call() succeeds. debugfs should be fine after the previous changes and event_remove() does TRACE_REG_UNREGISTER, but still there are 2 reasons why we need the additional checks: - There could be a perf_event(s) attached to this tp_event, so the patch checks ->perf_refcount. - TRACE_REG_UNREGISTER can be suppressed by FTRACE_EVENT_FL_SOFT_MODE, so we simply check FTRACE_EVENT_FL_ENABLED protected by event_mutex. Link: http://lkml.kernel.org/r/20130729175033.GB26284@redhat.com Reviewed-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- kernel/trace/trace_events.c | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4372658c73ae..f98ab063e95e 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -332,7 +332,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type); extern int trace_add_event_call(struct ftrace_event_call *call); -extern void trace_remove_event_call(struct ftrace_event_call *call); +extern int trace_remove_event_call(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < (type)1) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a67c913e2f9f..ec04836273c0 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1713,16 +1713,47 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) destroy_preds(call); } +static int probe_remove_event_call(struct ftrace_event_call *call) +{ + struct trace_array *tr; + struct ftrace_event_file *file; + +#ifdef CONFIG_PERF_EVENTS + if (call->perf_refcount) + return -EBUSY; +#endif + do_for_each_event_file(tr, file) { + if (file->event_call != call) + continue; + /* + * We can't rely on ftrace_event_enable_disable(enable => 0) + * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress + * TRACE_REG_UNREGISTER. + */ + if (file->flags & FTRACE_EVENT_FL_ENABLED) + return -EBUSY; + break; + } while_for_each_event_file(); + + __trace_remove_event_call(call); + + return 0; +} + /* Remove an event_call */ -void trace_remove_event_call(struct ftrace_event_call *call) +int trace_remove_event_call(struct ftrace_event_call *call) { + int ret; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); down_write(&trace_event_sem); - __trace_remove_event_call(call); + ret = probe_remove_event_call(call); up_write(&trace_event_sem); mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + + return ret; } #define for_each_event(event, start, end) \ -- cgit v1.2.3 From cd23b14b654769db83c9684ae1ba32c0e066670f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 18 Jul 2013 15:31:08 +0300 Subject: mlx5_core: Implement new initialization sequence Introduce enbale_hca and disable_hca commands to signify when the driver starts or ceases to operate on the device. In addition the driver will use boot and init pages count; boot pages is required to allow firmware to complete boot commands and the other to complete init hca. Command interface revision is bumped to 4 to enforce using supported firmware. This patch breaks compatibility with old versions of firmware (< 4); however, the first GA firmware we will publish will support version 4 so this should not be a problem. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 8 ++- drivers/net/ethernet/mellanox/mlx5/core/main.c | 69 +++++++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 20 +++++-- include/linux/mlx5/device.h | 20 +++++++ include/linux/mlx5/driver.h | 4 +- 5 files changed, 106 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 40374063c01e..c571de85d0f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -46,7 +46,7 @@ #include "mlx5_core.h" enum { - CMD_IF_REV = 3, + CMD_IF_REV = 4, }; enum { @@ -282,6 +282,12 @@ const char *mlx5_command_str(int command) case MLX5_CMD_OP_TEARDOWN_HCA: return "TEARDOWN_HCA"; + case MLX5_CMD_OP_ENABLE_HCA: + return "MLX5_CMD_OP_ENABLE_HCA"; + + case MLX5_CMD_OP_DISABLE_HCA: + return "MLX5_CMD_OP_DISABLE_HCA"; + case MLX5_CMD_OP_QUERY_PAGES: return "QUERY_PAGES"; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 12242de2b0e3..b47739b0b5f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -249,6 +249,44 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) return err; } +static int mlx5_core_enable_hca(struct mlx5_core_dev *dev) +{ + int err; + struct mlx5_enable_hca_mbox_in in; + struct mlx5_enable_hca_mbox_out out; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ENABLE_HCA); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + return 0; +} + +static int mlx5_core_disable_hca(struct mlx5_core_dev *dev) +{ + int err; + struct mlx5_disable_hca_mbox_in in; + struct mlx5_disable_hca_mbox_out out; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DISABLE_HCA); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + return 0; +} + int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) { struct mlx5_priv *priv = &dev->priv; @@ -304,28 +342,41 @@ int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) } mlx5_pagealloc_init(dev); + + err = mlx5_core_enable_hca(dev); + if (err) { + dev_err(&pdev->dev, "enable hca failed\n"); + goto err_pagealloc_cleanup; + } + + err = mlx5_satisfy_startup_pages(dev, 1); + if (err) { + dev_err(&pdev->dev, "failed to allocate boot pages\n"); + goto err_disable_hca; + } + err = set_hca_ctrl(dev); if (err) { dev_err(&pdev->dev, "set_hca_ctrl failed\n"); - goto err_pagealloc_cleanup; + goto reclaim_boot_pages; } err = handle_hca_cap(dev); if (err) { dev_err(&pdev->dev, "handle_hca_cap failed\n"); - goto err_pagealloc_cleanup; + goto reclaim_boot_pages; } - err = mlx5_satisfy_startup_pages(dev); + err = mlx5_satisfy_startup_pages(dev, 0); if (err) { - dev_err(&pdev->dev, "failed to allocate startup pages\n"); - goto err_pagealloc_cleanup; + dev_err(&pdev->dev, "failed to allocate init pages\n"); + goto reclaim_boot_pages; } err = mlx5_pagealloc_start(dev); if (err) { dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n"); - goto err_reclaim_pages; + goto reclaim_boot_pages; } err = mlx5_cmd_init_hca(dev); @@ -396,9 +447,12 @@ err_stop_poll: err_pagealloc_stop: mlx5_pagealloc_stop(dev); -err_reclaim_pages: +reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); +err_disable_hca: + mlx5_core_disable_hca(dev); + err_pagealloc_cleanup: mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); @@ -434,6 +488,7 @@ void mlx5_dev_cleanup(struct mlx5_core_dev *dev) mlx5_cmd_teardown_hca(dev); mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); + mlx5_core_disable_hca(dev); mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); iounmap(dev->iseg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index f0bf46339b28..4a3e137931a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -64,7 +64,7 @@ struct mlx5_query_pages_inbox { struct mlx5_query_pages_outbox { struct mlx5_outbox_hdr hdr; - u8 reserved[2]; + __be16 num_boot_pages; __be16 func_id; __be16 init_pages; __be16 num_pages; @@ -146,7 +146,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr) } static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, - s16 *pages, s16 *init_pages) + s16 *pages, s16 *init_pages, u16 *boot_pages) { struct mlx5_query_pages_inbox in; struct mlx5_query_pages_outbox out; @@ -164,8 +164,13 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, if (pages) *pages = be16_to_cpu(out.num_pages); + if (init_pages) *init_pages = be16_to_cpu(out.init_pages); + + if (boot_pages) + *boot_pages = be16_to_cpu(out.num_boot_pages); + *func_id = be16_to_cpu(out.func_id); return err; @@ -357,19 +362,22 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, queue_work(dev->priv.pg_wq, &req->work); } -int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev) +int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) { + u16 uninitialized_var(boot_pages); s16 uninitialized_var(init_pages); u16 uninitialized_var(func_id); int err; - err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages); + err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages, + &boot_pages); if (err) return err; - mlx5_core_dbg(dev, "requested %d init pages for func_id 0x%x\n", init_pages, func_id); - return give_pages(dev, func_id, init_pages, 0); + mlx5_core_dbg(dev, "requested %d init pages and %d boot pages for func_id 0x%x\n", + init_pages, boot_pages, func_id); + return give_pages(dev, func_id, boot ? boot_pages : init_pages, 0); } static int optimal_reclaimed_pages(void) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8de8d8f22384..737685e9e852 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -690,6 +690,26 @@ struct mlx5_query_cq_mbox_out { __be64 pas[0]; }; +struct mlx5_enable_hca_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_enable_hca_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_disable_hca_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_disable_hca_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + struct mlx5_eq_context { u8 status; u8 ec_oi; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f22e4419839b..2aa258b0ced1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -101,6 +101,8 @@ enum { MLX5_CMD_OP_QUERY_ADAPTER = 0x101, MLX5_CMD_OP_INIT_HCA = 0x102, MLX5_CMD_OP_TEARDOWN_HCA = 0x103, + MLX5_CMD_OP_ENABLE_HCA = 0x104, + MLX5_CMD_OP_DISABLE_HCA = 0x105, MLX5_CMD_OP_QUERY_PAGES = 0x107, MLX5_CMD_OP_MANAGE_PAGES = 0x108, MLX5_CMD_OP_SET_HCA_CAP = 0x109, @@ -690,7 +692,7 @@ int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s16 npages); -int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev); +int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); void mlx5_unregister_debugfs(void); -- cgit v1.2.3 From 22f2020f84c6da2dd0acb2dce12e39e59ff7c8be Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 31 Jul 2013 13:53:48 -0700 Subject: vmpressure: change vmpressure::sr_lock to spinlock There is nothing that can sleep inside critical sections protected by this lock and those sections are really small so there doesn't make much sense to use mutex for them. Change the log to a spinlock Signed-off-by: Michal Hocko Reported-by: Tejun Heo Cc: Anton Vorontsov Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Li Zefan Reviewed-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmpressure.h | 2 +- mm/vmpressure.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 76be077340ea..2081680e015d 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -12,7 +12,7 @@ struct vmpressure { unsigned long scanned; unsigned long reclaimed; /* The lock is used to keep the scanned/reclaimed above in sync. */ - struct mutex sr_lock; + struct spinlock sr_lock; /* The list of vmpressure_event structs. */ struct list_head events; diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 736a6011c2c8..f4ee6a190a4d 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -180,12 +180,12 @@ static void vmpressure_work_fn(struct work_struct *work) if (!vmpr->scanned) return; - mutex_lock(&vmpr->sr_lock); + spin_lock(&vmpr->sr_lock); scanned = vmpr->scanned; reclaimed = vmpr->reclaimed; vmpr->scanned = 0; vmpr->reclaimed = 0; - mutex_unlock(&vmpr->sr_lock); + spin_unlock(&vmpr->sr_lock); do { if (vmpressure_event(vmpr, scanned, reclaimed)) @@ -240,11 +240,11 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, if (!scanned) return; - mutex_lock(&vmpr->sr_lock); + spin_lock(&vmpr->sr_lock); vmpr->scanned += scanned; vmpr->reclaimed += reclaimed; scanned = vmpr->scanned; - mutex_unlock(&vmpr->sr_lock); + spin_unlock(&vmpr->sr_lock); if (scanned < vmpressure_win || work_pending(&vmpr->work)) return; @@ -367,7 +367,7 @@ void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, */ void vmpressure_init(struct vmpressure *vmpr) { - mutex_init(&vmpr->sr_lock); + spin_lock_init(&vmpr->sr_lock); mutex_init(&vmpr->events_lock); INIT_LIST_HEAD(&vmpr->events); INIT_WORK(&vmpr->work, vmpressure_work_fn); -- cgit v1.2.3 From 33cb876e947b9ddda8dca3fb99234b743a597ef9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 31 Jul 2013 13:53:51 -0700 Subject: vmpressure: make sure there are no events queued after memcg is offlined vmpressure is called synchronously from reclaim where the target_memcg is guaranteed to be alive but the eventfd is signaled from the work queue context. This means that memcg (along with vmpressure structure which is embedded into it) might go away while the work item is pending which would result in use-after-release bug. We have two possible ways how to fix this. Either vmpressure pins memcg before it schedules vmpr->work and unpin it in vmpressure_work_fn or explicitely flush the work item from the css_offline context (as suggested by Tejun). This patch implements the later one and it introduces vmpressure_cleanup which flushes the vmpressure work queue item item. It hooks into mem_cgroup_css_offline after the memcg itself is cleaned up. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Michal Hocko Reported-by: Tejun Heo Cc: Anton Vorontsov Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Li Zefan Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmpressure.h | 1 + mm/memcontrol.c | 1 + mm/vmpressure.c | 16 ++++++++++++++++ 3 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 2081680e015d..7dc17e2456de 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -30,6 +30,7 @@ extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); extern void vmpressure_init(struct vmpressure *vmpr); +extern void vmpressure_cleanup(struct vmpressure *vmpr); extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 00a7a664b9c1..c290a1cf3862 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6335,6 +6335,7 @@ static void mem_cgroup_css_offline(struct cgroup *cont) mem_cgroup_invalidate_reclaim_iterators(memcg); mem_cgroup_reparent_charges(memcg); mem_cgroup_destroy_all_caches(memcg); + vmpressure_cleanup(&memcg->vmpressure); } static void mem_cgroup_css_free(struct cgroup *cont) diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 192f9731931d..0c1e37d829fa 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -372,3 +372,19 @@ void vmpressure_init(struct vmpressure *vmpr) INIT_LIST_HEAD(&vmpr->events); INIT_WORK(&vmpr->work, vmpressure_work_fn); } + +/** + * vmpressure_cleanup() - shuts down vmpressure control structure + * @vmpr: Structure to be cleaned up + * + * This function should be called before the structure in which it is + * embedded is cleaned up. + */ +void vmpressure_cleanup(struct vmpressure *vmpr) +{ + /* + * Make sure there is no pending work before eventfd infrastructure + * goes away. + */ + flush_work(&vmpr->work); +} -- cgit v1.2.3 From d9d10a30964504af834d8d250a0c76d4ae91eb1e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 30 Jul 2013 10:31:00 -0700 Subject: ndisc: Add missing inline to ndisc_addr_option_pad Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/ndisc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 949d77528f2f..6fea32340ae8 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -119,7 +119,7 @@ extern struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may * also need a pad of 2. */ -static int ndisc_addr_option_pad(unsigned short type) +static inline int ndisc_addr_option_pad(unsigned short type) { switch (type) { case ARPHRD_INFINIBAND: return 2; -- cgit v1.2.3 From 2ac3ac8f86f2fe065d746d9a9abaca867adec577 Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Thu, 1 Aug 2013 10:04:14 +0200 Subject: ipv6: prevent fib6_run_gc() contention On a high-traffic router with many processors and many IPv6 dst entries, soft lockup in fib6_run_gc() can occur when number of entries reaches gc_thresh. This happens because fib6_run_gc() uses fib6_gc_lock to allow only one thread to run the garbage collector but ip6_dst_gc() doesn't update net->ipv6.ip6_rt_last_gc until fib6_run_gc() returns. On a system with many entries, this can take some time so that in the meantime, other threads pass the tests in ip6_dst_gc() (ip6_rt_last_gc is still not updated) and wait for the lock. They then have to run the garbage collector one after another which blocks them for quite long. Resolve this by replacing special value ~0UL of expire parameter to fib6_run_gc() by explicit "force" parameter to choose between spin_lock_bh() and spin_trylock_bh() and call fib6_run_gc() with force=false if gc_thresh is reached but not max_size. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 +- net/ipv6/ip6_fib.c | 19 ++++++++----------- net/ipv6/ndisc.c | 4 ++-- net/ipv6/route.c | 4 ++-- 4 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 2a601e7da1bf..48ec25a7fcb6 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -300,7 +300,7 @@ extern void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info); extern void fib6_run_gc(unsigned long expires, - struct net *net); + struct net *net, bool force); extern void fib6_gc_cleanup(void); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5fc9c7a68d8d..d872553ca933 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1632,19 +1632,16 @@ static int fib6_age(struct rt6_info *rt, void *arg) static DEFINE_SPINLOCK(fib6_gc_lock); -void fib6_run_gc(unsigned long expires, struct net *net) +void fib6_run_gc(unsigned long expires, struct net *net, bool force) { - if (expires != ~0UL) { + if (force) { spin_lock_bh(&fib6_gc_lock); - gc_args.timeout = expires ? (int)expires : - net->ipv6.sysctl.ip6_rt_gc_interval; - } else { - if (!spin_trylock_bh(&fib6_gc_lock)) { - mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); - return; - } - gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; + } else if (!spin_trylock_bh(&fib6_gc_lock)) { + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); + return; } + gc_args.timeout = expires ? (int)expires : + net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = icmp6_dst_gc(); @@ -1661,7 +1658,7 @@ void fib6_run_gc(unsigned long expires, struct net *net) static void fib6_gc_timer_cb(unsigned long arg) { - fib6_run_gc(0, (struct net *)arg); + fib6_run_gc(0, (struct net *)arg, true); } static int __net_init fib6_net_init(struct net *net) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 24c03396e008..79aa9652ed86 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1576,7 +1576,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); - fib6_run_gc(~0UL, net); + fib6_run_gc(0, net, false); idev = in6_dev_get(dev); if (!idev) break; @@ -1586,7 +1586,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); - fib6_run_gc(~0UL, net); + fib6_run_gc(0, net, false); break; case NETDEV_NOTIFY_PEERS: ndisc_send_unsol_na(dev); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a8c891aa2464..824c424f9648 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1326,7 +1326,7 @@ static int ip6_dst_gc(struct dst_ops *ops) goto out; net->ipv6.ip6_rt_gc_expire++; - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); net->ipv6.ip6_rt_last_gc = now; entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) @@ -2827,7 +2827,7 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, net = (struct net *)ctl->extra1; delay = net->ipv6.sysctl.flush_delay; proc_dointvec(ctl, write, buffer, lenp, ppos); - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); + fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); return 0; } -- cgit v1.2.3 From dfcefb0be1231982784df2152213103ad33c1cfd Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Aug 2013 11:10:24 +0800 Subject: net: fix a compile error when CONFIG_NET_LL_RX_POLL is not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_NET_LL_RX_POLL is not set, I got: net/socket.c: In function ‘sock_poll’: net/socket.c:1165:4: error: implicit declaration of function ‘sk_busy_loop’ [-Werror=implicit-function-declaration] Fix this by adding a nop when !CONFIG_NET_LL_RX_POLL. Cc: Eliezer Tamir Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/busy_poll.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index a14339c2985f..6cd8848fec68 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -181,5 +181,10 @@ static inline bool busy_loop_timeout(unsigned long end_time) return true; } +static inline bool sk_busy_loop(struct sock *sk, int nonblock) +{ + return false; +} + #endif /* CONFIG_NET_LL_RX_POLL */ #endif /* _LINUX_NET_BUSY_POLL_H */ -- cgit v1.2.3 From e0d1095ae3405404d247afb00233ef837d58da83 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Aug 2013 11:10:25 +0800 Subject: net: rename CONFIG_NET_LL_RX_POLL to CONFIG_NET_RX_BUSY_POLL Eliezer renames several *ll_poll to *busy_poll, but forgets CONFIG_NET_LL_RX_POLL, so in case of confusion, rename it too. Cc: Eliezer Tamir Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- Documentation/sysctl/net.txt | 4 ++-- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 8 ++++---- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 12 ++++++------ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 10 +++++----- include/linux/netdevice.h | 2 +- include/linux/skbuff.h | 2 +- include/net/busy_poll.h | 6 +++--- include/net/sock.h | 2 +- net/Kconfig | 2 +- net/core/skbuff.c | 2 +- net/core/sock.c | 6 +++--- net/core/sysctl_net_core.c | 2 +- net/socket.c | 2 +- 18 files changed, 41 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 1c15043aaee4..d569f2a424d5 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -52,7 +52,7 @@ Default: 64 busy_read ---------------- -Low latency busy poll timeout for socket reads. (needs CONFIG_NET_LL_RX_POLL) +Low latency busy poll timeout for socket reads. (needs CONFIG_NET_RX_BUSY_POLL) Approximate time in us to busy loop waiting for packets on the device queue. This sets the default value of the SO_BUSY_POLL socket option. Can be set or overridden per socket by setting socket option SO_BUSY_POLL, @@ -63,7 +63,7 @@ Default: 0 (off) busy_poll ---------------- -Low latency busy poll timeout for poll and select. (needs CONFIG_NET_LL_RX_POLL) +Low latency busy poll timeout for poll and select. (needs CONFIG_NET_RX_BUSY_POLL) Approximate time in us to busy loop waiting for events. Recommended value depends on the number of sockets you poll on. For several sockets 50, for several hundreds 100. diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index dedbd76c033e..d80e34b8285f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -486,7 +486,7 @@ struct bnx2x_fastpath { struct napi_struct napi; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int state; #define BNX2X_FP_STATE_IDLE 0 #define BNX2X_FP_STATE_NAPI (1 << 0) /* NAPI owns this FP */ @@ -498,7 +498,7 @@ struct bnx2x_fastpath { #define BNX2X_FP_USER_PEND (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_POLL_YIELD) /* protect state */ spinlock_t lock; -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ union host_hc_status_block status_blk; /* chip independent shortcuts into sb structure */ @@ -572,7 +572,7 @@ struct bnx2x_fastpath { #define bnx2x_fp_stats(bp, fp) (&((bp)->fp_stats[(fp)->index])) #define bnx2x_fp_qstats(bp, fp) (&((bp)->fp_stats[(fp)->index].eth_q_stats)) -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp) { spin_lock_init(&fp->lock); @@ -680,7 +680,7 @@ static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp) { return false; } -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ /* Use 2500 as a mini-jumbo MTU for FCoE */ #define BNX2X_FCOE_MINI_JUMBO_MTU 2500 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index ee350bde1818..f2d1ff10054b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3117,7 +3117,7 @@ int bnx2x_poll(struct napi_struct *napi, int budget) return work_done; } -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL /* must be called with local_bh_disable()d */ int bnx2x_low_latency_recv(struct napi_struct *napi) { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index e5da07858a2f..e06186c305d8 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12026,7 +12026,7 @@ static const struct net_device_ops bnx2x_netdev_ops = { .ndo_fcoe_get_wwn = bnx2x_fcoe_get_wwn, #endif -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = bnx2x_low_latency_recv, #endif }; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 7be725cdfea8..a6494e5daffe 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -54,7 +54,7 @@ #include -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL #define LL_EXTENDED_STATS #endif /* common prefix used by pr_<> macros */ @@ -366,7 +366,7 @@ struct ixgbe_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[IFNAMSIZ + 9]; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int state; #define IXGBE_QV_STATE_IDLE 0 #define IXGBE_QV_STATE_NAPI 1 /* NAPI owns this QV */ @@ -377,12 +377,12 @@ struct ixgbe_q_vector { #define IXGBE_QV_YIELD (IXGBE_QV_STATE_NAPI_YIELD | IXGBE_QV_STATE_POLL_YIELD) #define IXGBE_QV_USER_PEND (IXGBE_QV_STATE_POLL | IXGBE_QV_STATE_POLL_YIELD) spinlock_t lock; -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ /* for dynamic allocation of rings associated with this q_vector */ struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp; }; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL static inline void ixgbe_qv_init_lock(struct ixgbe_q_vector *q_vector) { @@ -462,7 +462,7 @@ static inline bool ixgbe_qv_ll_polling(struct ixgbe_q_vector *q_vector) WARN_ON(!(q_vector->state & IXGBE_QV_LOCKED)); return q_vector->state & IXGBE_QV_USER_PEND; } -#else /* CONFIG_NET_LL_RX_POLL */ +#else /* CONFIG_NET_RX_BUSY_POLL */ static inline void ixgbe_qv_init_lock(struct ixgbe_q_vector *q_vector) { } @@ -491,7 +491,7 @@ static inline bool ixgbe_qv_ll_polling(struct ixgbe_q_vector *q_vector) { return false; } -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ #ifdef CONFIG_IXGBE_HWMON diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index bad8f14b1941..be4b1fb3d0d2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1998,7 +1998,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, return total_rx_packets; } -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL /* must be called with local_bh_disable()d */ static int ixgbe_low_latency_recv(struct napi_struct *napi) { @@ -2030,7 +2030,7 @@ static int ixgbe_low_latency_recv(struct napi_struct *napi) return found; } -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ /** * ixgbe_configure_msix - Configure MSI-X hardware @@ -7227,7 +7227,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ixgbe_netpoll, #endif -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = ixgbe_low_latency_recv, #endif #ifdef IXGBE_FCOE diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 727874f575ce..a28cd801a236 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -223,7 +223,7 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset) case ETH_SS_STATS: return (priv->stats_bitmap ? bit_count : NUM_ALL_STATS) + (priv->tx_ring_num * 2) + -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL (priv->rx_ring_num * 5); #else (priv->rx_ring_num * 2); @@ -276,7 +276,7 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, for (i = 0; i < priv->rx_ring_num; i++) { data[index++] = priv->rx_ring[i].packets; data[index++] = priv->rx_ring[i].bytes; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL data[index++] = priv->rx_ring[i].yields; data[index++] = priv->rx_ring[i].misses; data[index++] = priv->rx_ring[i].cleaned; @@ -344,7 +344,7 @@ static void mlx4_en_get_strings(struct net_device *dev, "rx%d_packets", i); sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_bytes", i); -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_napi_yield", i); sprintf(data + (index++) * ETH_GSTRING_LEN, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 5eac871399d8..fa37b7a61213 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -68,7 +68,7 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) return 0; } -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL /* must be called with local_bh_disable()d */ static int mlx4_en_low_latency_recv(struct napi_struct *napi) { @@ -94,7 +94,7 @@ static int mlx4_en_low_latency_recv(struct napi_struct *napi) return done; } -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ #ifdef CONFIG_RFS_ACCEL @@ -2140,7 +2140,7 @@ static const struct net_device_ops mlx4_netdev_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = mlx4_en_low_latency_recv, #endif }; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 35fb60e2320c..5e0aa569306a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -292,7 +292,7 @@ struct mlx4_en_rx_ring { void *rx_info; unsigned long bytes; unsigned long packets; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned long yields; unsigned long misses; unsigned long cleaned; @@ -318,7 +318,7 @@ struct mlx4_en_cq { struct mlx4_cqe *buf; #define MLX4_EN_OPCODE_ERROR 0x1e -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int state; #define MLX4_EN_CQ_STATE_IDLE 0 #define MLX4_EN_CQ_STATE_NAPI 1 /* NAPI owns this CQ */ @@ -329,7 +329,7 @@ struct mlx4_en_cq { #define CQ_YIELD (MLX4_EN_CQ_STATE_NAPI_YIELD | MLX4_EN_CQ_STATE_POLL_YIELD) #define CQ_USER_PEND (MLX4_EN_CQ_STATE_POLL | MLX4_EN_CQ_STATE_POLL_YIELD) spinlock_t poll_lock; /* protects from LLS/napi conflicts */ -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ }; struct mlx4_en_port_profile { @@ -580,7 +580,7 @@ struct mlx4_mac_entry { struct rcu_head rcu; }; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) { spin_lock_init(&cq->poll_lock); @@ -687,7 +687,7 @@ static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) { return false; } -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0741a1e919a5..9a4156845e93 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -973,7 +973,7 @@ struct net_device_ops { gfp_t gfp); void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL int (*ndo_busy_poll)(struct napi_struct *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5afefa01a13c..3b71a4e83642 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -501,7 +501,7 @@ struct sk_buff { /* 7/9 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); -#if defined CONFIG_NET_DMA || defined CONFIG_NET_LL_RX_POLL +#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL union { unsigned int napi_id; dma_cookie_t dma_cookie; diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 6cd8848fec68..f18b91966d3d 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -27,7 +27,7 @@ #include #include -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL struct napi_struct; extern unsigned int sysctl_net_busy_read __read_mostly; @@ -146,7 +146,7 @@ static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb) sk->sk_napi_id = skb->napi_id; } -#else /* CONFIG_NET_LL_RX_POLL */ +#else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) { return 0; @@ -186,5 +186,5 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) return false; } -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ #endif /* _LINUX_NET_BUSY_POLL_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 95a5a2c6925a..31d5cfbb51ec 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -327,7 +327,7 @@ struct sock { #ifdef CONFIG_RPS __u32 sk_rxhash; #endif -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sk_napi_id; unsigned int sk_ll_usec; #endif diff --git a/net/Kconfig b/net/Kconfig index 37702491abe9..2b406608a1a4 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -244,7 +244,7 @@ config NETPRIO_CGROUP Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis -config NET_LL_RX_POLL +config NET_RX_BUSY_POLL boolean default y diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3df4d4ccf440..2c3d0f53d198 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -740,7 +740,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) skb_copy_secmark(new, old); -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL new->napi_id = old->napi_id; #endif } diff --git a/net/core/sock.c b/net/core/sock.c index 548d716c5f62..2c097c5a35dd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -900,7 +900,7 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); break; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: /* allow unprivileged users to decrease the value */ if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) @@ -1170,7 +1170,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); break; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: v.val = sk->sk_ll_usec; break; @@ -2292,7 +2292,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; sk->sk_ll_usec = sysctl_net_busy_read; #endif diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 660968616637..b59b6804fd98 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -298,7 +298,7 @@ static struct ctl_table net_core_table[] = { .proc_handler = flow_limit_table_len_sysctl }, #endif /* CONFIG_NET_FLOW_LIMIT */ -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL { .procname = "busy_poll", .data = &sysctl_net_busy_poll, diff --git a/net/socket.c b/net/socket.c index 829b460acb87..b2d7c629eeb9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -106,7 +106,7 @@ #include #include -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_poll __read_mostly; #endif -- cgit v1.2.3 From ed5467da0e369e65b247b99eb6403cb79172bcda Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Fri, 2 Aug 2013 21:16:43 +0400 Subject: tracing: Fix fields of struct trace_iterator that are zeroed by mistake tracing_read_pipe zeros all fields bellow "seq". The declaration contains a comment about that, but it doesn't help. The first field is "snapshot", it's true when current open file is snapshot. Looks obvious, that it should not be zeroed. The second field is "started". It was converted from cpumask_t to cpumask_var_t (v2.6.28-4983-g4462344), in other words it was converted from cpumask to pointer on cpumask. Currently the reference on "started" memory is lost after the first read from tracing_read_pipe and a proper object will never be freed. The "started" is never dereferenced for trace_pipe, because trace_pipe can't have the TRACE_FILE_ANNOTATE options. Link: http://lkml.kernel.org/r/1375463803-3085183-1-git-send-email-avagin@openvz.org Cc: stable@vger.kernel.org # 2.6.30 Signed-off-by: Andrew Vagin Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 10 ++++++---- kernel/trace/trace.c | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index f98ab063e95e..120d57a1c3a5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -78,6 +78,11 @@ struct trace_iterator { /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; + cpumask_var_t started; + + /* it's true when current open file is snapshot */ + bool snapshot; + /* The below is zeroed out in pipe_read */ struct trace_seq seq; struct trace_entry *ent; @@ -90,10 +95,7 @@ struct trace_iterator { loff_t pos; long idx; - cpumask_var_t started; - - /* it's true when current open file is snapshot */ - bool snapshot; + /* All new field here will be zeroed out in pipe_read */ }; enum trace_iter_flags { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 882ec1dd1515..f5b35a5e852f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4151,6 +4151,7 @@ waitagain: memset(&iter->seq, 0, sizeof(struct trace_iterator) - offsetof(struct trace_iterator, seq)); + cpumask_clear(iter->started); iter->pos = -1; trace_event_read_lock(); -- cgit v1.2.3 From e67bc51e574ffe3c4bc1e09cab7658b1e780b4ce Mon Sep 17 00:00:00 2001 From: Dhaval Giani Date: Fri, 2 Aug 2013 14:47:29 -0400 Subject: tracing: Fix trace_dump_stack() proto when CONFIG_TRACING is not set When CONFIG_TRACING is not enabled, the stub prototype for trace_dump_stack() is incorrect. It has (void) when it should be (int). Link: http://lkml.kernel.org/r/CAPhKKr_H=ukFnBL4WgDOVT5ay2xeF-Ho+CA0DWZX0E2JW-=vSQ@mail.gmail.com Signed-off-by: Dhaval Giani Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3bef14c6586b..482ad2d84a32 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -629,7 +629,7 @@ extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); static inline void tracing_start(void) { } static inline void tracing_stop(void) { } static inline void ftrace_off_permanent(void) { } -static inline void trace_dump_stack(void) { } +static inline void trace_dump_stack(int skip) { } static inline void tracing_on(void) { } static inline void tracing_off(void) { } -- cgit v1.2.3 From 007ccfcf89401e764c33965b739310d86a94626d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 Aug 2013 14:32:54 +0200 Subject: ACPI: Drop physical_node_id_bitmap from struct acpi_device The physical_node_id_bitmap in struct acpi_device is only used for looking up the first currently unused dependent phyiscal node ID by acpi_bind_one(). It is not really necessary, however, because acpi_bind_one() walks the entire physical_node_list of the given device object for sanity checking anyway and if that list is always sorted by node_id, it is straightforward to find the first gap between the currently used node IDs and use that number as the ID of the new list node. This also removes the artificial limit of the maximum number of dependent physical devices per ACPI device object, which now depends only on the capacity of unsigend int. As a result, it fixes a regression introduced by commit e2ff394 (ACPI / memhotplug: Bind removable memory blocks to ACPI device nodes) that caused acpi_memory_enable_device() to fail when the number of 128 MB blocks within one removable memory module was greater than 32. Reported-and-tested-by: Yasuaki Ishimatsu Signed-off-by: Rafael J. Wysocki Acked-by: Toshi Kani Reviewed-by: Yasuaki Ishimatsu --- drivers/acpi/glue.c | 34 +++++++++++++++++++--------------- include/acpi/acpi_bus.h | 8 ++------ 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index f68095756fb7..17e15d11bd39 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -31,6 +31,7 @@ static LIST_HEAD(bus_type_list); static DECLARE_RWSEM(bus_type_sem); #define PHYSICAL_NODE_STRING "physical_node" +#define PHYSICAL_NODE_NAME_SIZE (sizeof(PHYSICAL_NODE_STRING) + 10) int register_acpi_bus_type(struct acpi_bus_type *type) { @@ -112,7 +113,9 @@ int acpi_bind_one(struct device *dev, acpi_handle handle) struct acpi_device *acpi_dev; acpi_status status; struct acpi_device_physical_node *physical_node, *pn; - char physical_node_name[sizeof(PHYSICAL_NODE_STRING) + 2]; + char physical_node_name[PHYSICAL_NODE_NAME_SIZE]; + struct list_head *physnode_list; + unsigned int node_id; int retval = -EINVAL; if (ACPI_HANDLE(dev)) { @@ -139,25 +142,27 @@ int acpi_bind_one(struct device *dev, acpi_handle handle) mutex_lock(&acpi_dev->physical_node_lock); - /* Sanity check. */ - list_for_each_entry(pn, &acpi_dev->physical_node_list, node) + /* + * Keep the list sorted by node_id so that the IDs of removed nodes can + * be recycled easily. + */ + physnode_list = &acpi_dev->physical_node_list; + node_id = 0; + list_for_each_entry(pn, &acpi_dev->physical_node_list, node) { + /* Sanity check. */ if (pn->dev == dev) { dev_warn(dev, "Already associated with ACPI node\n"); goto err_free; } - - /* allocate physical node id according to physical_node_id_bitmap */ - physical_node->node_id = - find_first_zero_bit(acpi_dev->physical_node_id_bitmap, - ACPI_MAX_PHYSICAL_NODE); - if (physical_node->node_id >= ACPI_MAX_PHYSICAL_NODE) { - retval = -ENOSPC; - goto err_free; + if (pn->node_id == node_id) { + physnode_list = &pn->node; + node_id++; + } } - set_bit(physical_node->node_id, acpi_dev->physical_node_id_bitmap); + physical_node->node_id = node_id; physical_node->dev = dev; - list_add_tail(&physical_node->node, &acpi_dev->physical_node_list); + list_add(&physical_node->node, physnode_list); acpi_dev->physical_node_count++; mutex_unlock(&acpi_dev->physical_node_lock); @@ -208,7 +213,7 @@ int acpi_unbind_one(struct device *dev) mutex_lock(&acpi_dev->physical_node_lock); list_for_each_safe(node, next, &acpi_dev->physical_node_list) { - char physical_node_name[sizeof(PHYSICAL_NODE_STRING) + 2]; + char physical_node_name[PHYSICAL_NODE_NAME_SIZE]; entry = list_entry(node, struct acpi_device_physical_node, node); @@ -216,7 +221,6 @@ int acpi_unbind_one(struct device *dev) continue; list_del(node); - clear_bit(entry->node_id, acpi_dev->physical_node_id_bitmap); acpi_dev->physical_node_count--; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 56e6b68c8d2f..5026aaa35133 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -274,15 +274,12 @@ struct acpi_device_wakeup { }; struct acpi_device_physical_node { - u8 node_id; + unsigned int node_id; struct list_head node; struct device *dev; bool put_online:1; }; -/* set maximum of physical nodes to 32 for expansibility */ -#define ACPI_MAX_PHYSICAL_NODE 32 - /* Device */ struct acpi_device { int device_type; @@ -302,10 +299,9 @@ struct acpi_device { struct acpi_driver *driver; void *driver_data; struct device dev; - u8 physical_node_count; + unsigned int physical_node_count; struct list_head physical_node_list; struct mutex physical_node_lock; - DECLARE_BITMAP(physical_node_id_bitmap, ACPI_MAX_PHYSICAL_NODE); struct list_head power_dependent; void (*remove)(struct acpi_device *); }; -- cgit v1.2.3 From 49ccc142f9cbc33fdda18e8fa90c1c5b4a79c0ad Mon Sep 17 00:00:00 2001 From: Mateusz Krawczuk Date: Tue, 6 Aug 2013 18:34:40 +0200 Subject: regmap: Add missing header for !CONFIG_REGMAP stubs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit regmap.h requires linux/err.h if CONFIG_REGMAP is not defined. Without it I get error. CC drivers/media/platform/exynos4-is/fimc-reg.o In file included from drivers/media/platform/exynos4-is/fimc-reg.c:14:0: include/linux/regmap.h: In function ‘regmap_write’: include/linux/regmap.h:525:10: error: ‘EINVAL’ undeclared (first use in this function) include/linux/regmap.h:525:10: note: each undeclared identifier is reported only once for each function it appears in Signed-off-by: Mateusz Krawczuk Signed-off-by: Kyungmin Park Signed-off-by: Mark Brown Cc: stable@kernel.org --- include/linux/regmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 75981d0b57dc..580a5320cc96 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -15,6 +15,7 @@ #include #include +#include struct module; struct device; -- cgit v1.2.3 From 60f75b8e97daf4a39790a20d962cb861b9220af5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Aug 2013 22:55:00 +0200 Subject: ACPI: Try harder to resolve _ADR collisions for bridges In theory, under a given ACPI namespace node there should be only one child device object with _ADR whose value matches a given bus address exactly. In practice, however, there are systems in which multiple child device objects under a given parent have _ADR matching exactly the same address. In those cases we use _STA to determine which of the multiple matching devices is enabled, since some systems are known to indicate which ACPI device object to associate with the given physical (usually PCI) device this way. Unfortunately, as it turns out, there are systems in which many device objects under the same parent have _ADR matching exactly the same bus address and none of them has _STA, in which case they all should be regarded as enabled according to the spec. Still, if those device objects are supposed to represent bridges (e.g. this is the case for device objects corresponding to PCIe ports), we can try harder and skip the ones that have no child device objects in the ACPI namespace. With luck, we can avoid using device objects that we are not expected to use this way. Although this only works for bridges whose children also have ACPI namespace representation, it is sufficient to address graphics adapter detection issues on some systems, so rework the code finding a matching device ACPI handle for a given bus address to implement this idea. Introduce a new function, acpi_find_child(), taking three arguments: the ACPI handle of the device's parent, a bus address suitable for the device's bus type and a bool indicating if the device is a bridge and make it work as outlined above. Reimplement the function currently used for this purpose, acpi_get_child(), as a call to acpi_find_child() with the last argument set to 'false' and make the PCI subsystem use acpi_find_child() with the bridge information passed as the last argument to it. [Lan Tianyu notices that it is not sufficient to use pci_is_bridge() for that, because the device's subordinate pointer hasn't been set yet at this point, so use hdr_type instead.] This change fixes a regression introduced inadvertently by commit 33f767d (ACPI: Rework acpi_get_child() to be more efficient) which overlooked the fact that for acpi_walk_namespace() "post-order" means "after all children have been visited" rather than "on the way back", so for device objects without children and for namespace walks of depth 1, as in the acpi_get_child() case, the "post-order" callbacks ordering is actually the same as the ordering of "pre-order" ones. Since that commit changed the namespace walk in acpi_get_child() to terminate after finding the first matching object instead of going through all of them and returning the last one, it effectively changed the result returned by that function in some rare cases and that led to problems (the switch from a "pre-order" to a "post-order" callback was supposed to prevent that from happening, but it was ineffective). As it turns out, the systems where the change made by commit 33f767d actually matters are those where there are multiple ACPI device objects representing the same PCIe port (which effectively is a bridge). Moreover, only one of them, and the one we are expected to use, has child device objects in the ACPI namespace, so the regression can be addressed as described above. References: https://bugzilla.kernel.org/show_bug.cgi?id=60561 Reported-by: Peter Wu Tested-by: Vladimir Lalov Signed-off-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas Cc: 3.9+ # 3.9+ --- drivers/acpi/glue.c | 99 ++++++++++++++++++++++++++++++++++++++++--------- drivers/pci/pci-acpi.c | 15 ++++++-- include/acpi/acpi_bus.h | 6 ++- 3 files changed, 98 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 17e15d11bd39..408f6b2a5fa8 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -79,34 +79,99 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) return ret; } -static acpi_status do_acpi_find_child(acpi_handle handle, u32 lvl_not_used, - void *addr_p, void **ret_p) +static acpi_status acpi_dev_present(acpi_handle handle, u32 lvl_not_used, + void *not_used, void **ret_p) { - unsigned long long addr, sta; - acpi_status status; + struct acpi_device *adev = NULL; - status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &addr); - if (ACPI_SUCCESS(status) && addr == *((u64 *)addr_p)) { + acpi_bus_get_device(handle, &adev); + if (adev) { *ret_p = handle; - status = acpi_bus_get_status_handle(handle, &sta); - if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_ENABLED)) - return AE_CTRL_TERMINATE; + return AE_CTRL_TERMINATE; } return AE_OK; } -acpi_handle acpi_get_child(acpi_handle parent, u64 address) +static bool acpi_extra_checks_passed(acpi_handle handle, bool is_bridge) { - void *ret = NULL; + unsigned long long sta; + acpi_status status; + + status = acpi_bus_get_status_handle(handle, &sta); + if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED)) + return false; + + if (is_bridge) { + void *test = NULL; + + /* Check if this object has at least one child device. */ + acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, + acpi_dev_present, NULL, NULL, &test); + return !!test; + } + return true; +} + +struct find_child_context { + u64 addr; + bool is_bridge; + acpi_handle ret; + bool ret_checked; +}; + +static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used, + void *data, void **not_used) +{ + struct find_child_context *context = data; + unsigned long long addr; + acpi_status status; - if (!parent) - return NULL; + status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &addr); + if (ACPI_FAILURE(status) || addr != context->addr) + return AE_OK; - acpi_walk_namespace(ACPI_TYPE_DEVICE, parent, 1, NULL, - do_acpi_find_child, &address, &ret); - return (acpi_handle)ret; + if (!context->ret) { + /* This is the first matching object. Save its handle. */ + context->ret = handle; + return AE_OK; + } + /* + * There is more than one matching object with the same _ADR value. + * That really is unexpected, so we are kind of beyond the scope of the + * spec here. We have to choose which one to return, though. + * + * First, check if the previously found object is good enough and return + * its handle if so. Second, check the same for the object that we've + * just found. + */ + if (!context->ret_checked) { + if (acpi_extra_checks_passed(context->ret, context->is_bridge)) + return AE_CTRL_TERMINATE; + else + context->ret_checked = true; + } + if (acpi_extra_checks_passed(handle, context->is_bridge)) { + context->ret = handle; + return AE_CTRL_TERMINATE; + } + return AE_OK; +} + +acpi_handle acpi_find_child(acpi_handle parent, u64 addr, bool is_bridge) +{ + if (parent) { + struct find_child_context context = { + .addr = addr, + .is_bridge = is_bridge, + }; + + acpi_walk_namespace(ACPI_TYPE_DEVICE, parent, 1, do_find_child, + NULL, &context, NULL); + return context.ret; + } + return NULL; } -EXPORT_SYMBOL(acpi_get_child); +EXPORT_SYMBOL_GPL(acpi_find_child); int acpi_bind_one(struct device *dev, acpi_handle handle) { diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index dbdc5f7e2b29..01e264fb50e0 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -317,13 +317,20 @@ void acpi_pci_remove_bus(struct pci_bus *bus) /* ACPI bus type */ static int acpi_pci_find_device(struct device *dev, acpi_handle *handle) { - struct pci_dev * pci_dev; - u64 addr; + struct pci_dev *pci_dev = to_pci_dev(dev); + bool is_bridge; + u64 addr; - pci_dev = to_pci_dev(dev); + /* + * pci_is_bridge() is not suitable here, because pci_dev->subordinate + * is set only after acpi_pci_find_device() has been called for the + * given device. + */ + is_bridge = pci_dev->hdr_type == PCI_HEADER_TYPE_BRIDGE + || pci_dev->hdr_type == PCI_HEADER_TYPE_CARDBUS; /* Please ref to ACPI spec for the syntax of _ADR */ addr = (PCI_SLOT(pci_dev->devfn) << 16) | PCI_FUNC(pci_dev->devfn); - *handle = acpi_get_child(DEVICE_ACPI_HANDLE(dev->parent), addr); + *handle = acpi_find_child(ACPI_HANDLE(dev->parent), addr, is_bridge); if (!*handle) return -ENODEV; return 0; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 5026aaa35133..94383a70c1a3 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -441,7 +441,11 @@ struct acpi_pci_root { }; /* helper */ -acpi_handle acpi_get_child(acpi_handle, u64); +acpi_handle acpi_find_child(acpi_handle, u64, bool); +static inline acpi_handle acpi_get_child(acpi_handle handle, u64 addr) +{ + return acpi_find_child(handle, addr, false); +} int acpi_is_root_bridge(acpi_handle); struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle); #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)ACPI_HANDLE(dev)) -- cgit v1.2.3 From 786615bc1ce84150ded80daea6bd9f6297f48e73 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 16:04:47 -0400 Subject: SUNRPC: If the rpcbind channel is disconnected, fail the call to unregister If rpcbind causes our connection to the AF_LOCAL socket to close after we've registered a service, then we want to be careful about reconnecting since the mount namespace may have changed. By simply refusing to reconnect the AF_LOCAL socket in the case of unregister, we avoid the need to somehow save the mount namespace. While this may lead to some services not unregistering properly, it should be safe. Signed-off-by: Trond Myklebust Cc: Nix Cc: Jeff Layton Cc: stable@vger.kernel.org # 3.9.x --- include/linux/sunrpc/sched.h | 1 + net/sunrpc/clnt.c | 4 ++++ net/sunrpc/netns.h | 1 + net/sunrpc/rpcb_clnt.c | 40 +++++++++++++++++++++++++++------------- 4 files changed, 33 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 6d870353674a..1821445708d6 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -121,6 +121,7 @@ struct rpc_task_setup { #define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ #define RPC_TASK_SENT 0x0800 /* message was sent */ #define RPC_TASK_TIMEOUT 0x1000 /* fail with ETIMEDOUT on timeout */ +#define RPC_TASK_NOCONNECT 0x2000 /* return ENOTCONN if not connected */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 74f6a704e374..ecbc4e3d83ad 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1660,6 +1660,10 @@ call_connect(struct rpc_task *task) task->tk_action = call_connect_status; if (task->tk_status < 0) return; + if (task->tk_flags & RPC_TASK_NOCONNECT) { + rpc_exit(task, -ENOTCONN); + return; + } xprt_connect(task); } } diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 74d948f5d5a1..779742cfc1ff 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -23,6 +23,7 @@ struct sunrpc_net { struct rpc_clnt *rpcb_local_clnt4; spinlock_t rpcb_clnt_lock; unsigned int rpcb_users; + unsigned int rpcb_is_af_local : 1; struct mutex gssp_lock; wait_queue_head_t gssp_wq; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index b0f723227157..1891a1022c17 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -204,13 +204,15 @@ void rpcb_put_local(struct net *net) } static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, - struct rpc_clnt *clnt4) + struct rpc_clnt *clnt4, + bool is_af_local) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); /* Protected by rpcb_create_local_mutex */ sn->rpcb_local_clnt = clnt; sn->rpcb_local_clnt4 = clnt4; + sn->rpcb_is_af_local = is_af_local ? 1 : 0; smp_wmb(); sn->rpcb_users = 1; dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " @@ -271,7 +273,7 @@ static int rpcb_create_local_unix(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, true); out: return result; @@ -323,7 +325,7 @@ static int rpcb_create_local_net(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, false); out: return result; @@ -384,13 +386,16 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, return rpc_create(&args); } -static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) +static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, struct rpc_message *msg, bool is_set) { - int result, error = 0; + int flags = RPC_TASK_NOCONNECT; + int error, result = 0; + if (is_set || !sn->rpcb_is_af_local) + flags = RPC_TASK_SOFTCONN; msg->rpc_resp = &result; - error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN); + error = rpc_call_sync(clnt, msg, flags); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -447,16 +452,19 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short .rpc_argp = &map, }; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + bool is_set = false; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; + is_set = true; + } - return rpcb_register_call(sn->rpcb_local_clnt, &msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt, &msg, is_set); } /* @@ -469,6 +477,7 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -479,10 +488,12 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -497,6 +508,7 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -507,10 +519,12 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -527,7 +541,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(sn->rpcb_local_clnt4, msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, false); } /** -- cgit v1.2.3 From 8742f229b635bf1c1c84a3dfe5e47c814c20b5c8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 Aug 2013 18:55:32 +0200 Subject: userns: limit the maximum depth of user_namespace->parent chain Ensure that user_namespace->parent chain can't grow too much. Currently we use the hardroded 32 as limit. Reported-by: Andy Lutomirski Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- include/linux/user_namespace.h | 1 + kernel/user_namespace.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index b6b215f13b45..14105c26a836 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -23,6 +23,7 @@ struct user_namespace { struct uid_gid_map projid_map; atomic_t count; struct user_namespace *parent; + int level; kuid_t owner; kgid_t group; unsigned int proc_inum; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 6e50a44610ee..9064b919a406 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -62,6 +62,9 @@ int create_user_ns(struct cred *new) kgid_t group = new->egid; int ret; + if (parent_ns->level > 32) + return -EUSERS; + /* * Verify that we can not violate the policy of which files * may be accessed that is specified by the root directory, @@ -92,6 +95,7 @@ int create_user_ns(struct cred *new) atomic_set(&ns->count, 1); /* Leave the new->user_ns reference with the new user namespace. */ ns->parent = parent_ns; + ns->level = parent_ns->level + 1; ns->owner = owner; ns->group = group; -- cgit v1.2.3