From d752b2696072ed52fd5afab08b601e2220a3b87e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 25 Jun 2013 16:50:08 +0200 Subject: drbd: Allow online change of al-stripes and al-stripe-size Allow to change the AL layout with an resize operation. For that the reisze command gets two new fields: al_stripes and al_stripe_size. In order to make the operation crash save: 1) Lock out all IO and MD-IO 2) Write the super block with MDF_PRIMARY_IND clear 3) write the bitmap to the new location (all zeros, since we allow only while connected) 4) Initialize the new AL-area 5) Write the super block with the restored MDF_PRIMARY_IND. 6) Unfreeze all IO Since the AL-layout has no influence on the protocol, this operation needs to be beforemed on both sides of a resource (if intended). Signed-off-by: Andreas Gruenbacher Signed-off-by: Philipp Reisner Signed-off-by: Jens Axboe --- include/linux/drbd.h | 6 +++++- include/linux/drbd_genl.h | 2 ++ include/linux/drbd_limits.h | 9 +++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 1b4d4ee1168f..de7d74ab3de6 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -177,7 +177,11 @@ enum drbd_ret_code { ERR_NEED_APV_100 = 163, ERR_NEED_ALLOW_TWO_PRI = 164, ERR_MD_UNCLEAN = 165, - + ERR_MD_LAYOUT_CONNECTED = 166, + ERR_MD_LAYOUT_TOO_BIG = 167, + ERR_MD_LAYOUT_TOO_SMALL = 168, + ERR_MD_LAYOUT_NO_FIT = 169, + ERR_IMPLICIT_SHRINK = 170, /* insert new ones above this line */ AFTER_LAST_ERR_CODE }; diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index d0d8fac8a6e4..e8c44572b8cb 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -181,6 +181,8 @@ GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, __u64_field(1, DRBD_GENLA_F_MANDATORY, resize_size) __flg_field(2, DRBD_GENLA_F_MANDATORY, resize_force) __flg_field(3, DRBD_GENLA_F_MANDATORY, no_resync) + __u32_field_def(4, 0 /* OPTIONAL */, al_stripes, DRBD_AL_STRIPES_DEF) + __u32_field_def(5, 0 /* OPTIONAL */, al_stripe_size, DRBD_AL_STRIPE_SIZE_DEF) ) GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 1fedf2b17cc8..17e50bb00521 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -215,4 +215,13 @@ #define DRBD_ALWAYS_ASBP_DEF 0 #define DRBD_USE_RLE_DEF 1 +#define DRBD_AL_STRIPES_MIN 1 +#define DRBD_AL_STRIPES_MAX 1024 +#define DRBD_AL_STRIPES_DEF 1 +#define DRBD_AL_STRIPES_SCALE '1' + +#define DRBD_AL_STRIPE_SIZE_MIN 4 +#define DRBD_AL_STRIPE_SIZE_MAX 16777216 +#define DRBD_AL_STRIPE_SIZE_DEF 32 +#define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */ #endif -- cgit v1.2.3 From add0c59d802e6118e51e21244c3871be35164e4b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 9 Jul 2013 16:17:39 -0700 Subject: cgroup: remove bcache_subsys_id which got added stealthily cafe563591 ("bcache: A block layer cache") added a new cgroup subsystem bcache_subsys without proper review and ack. bcache_subsys seems to use cgroup for group stats and per-group cache_mode configuration. This is very much the type of usage that we don't want to allow. Fortunately, CONFIG_CGROUP_BCACHE which enables bcache_subsys is currently commented out, so this shouldn't have any upstream users. Let's nip in the bud. While at it, clarify in cgroup_subsys.h that no new subsystem should be added without explicit acks from cgroup maintainers. Signed-off-by: Tejun Heo Cc: Li Zefan Cc: cgroups@vger.kernel.org Cc: Kent Overstreet Cc: Jens Axboe Cc: linux-bcache@vger.kernel.org --- include/linux/cgroup_subsys.h | 45 +++++++------------------------------------ 1 file changed, 7 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 6e7ec64b69ab..b613ffd402d1 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -1,86 +1,55 @@ -/* Add subsystem definitions of the form SUBSYS() in this - * file. Surround each one by a line of comment markers so that - * patches don't collide +/* + * List of cgroup subsystems. + * + * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. */ - -/* */ - -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CPUSETS) SUBSYS(cpuset) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEBUG) SUBSYS(debug) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_SCHED) SUBSYS(cpu_cgroup) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_CPUACCT) SUBSYS(cpuacct) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_MEMCG) SUBSYS(mem_cgroup) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEVICE) SUBSYS(devices) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_FREEZER) SUBSYS(freezer) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_NET_CLS_CGROUP) SUBSYS(net_cls) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_BLK_CGROUP) SUBSYS(blkio) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_PERF) SUBSYS(perf) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_NETPRIO_CGROUP) SUBSYS(net_prio) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB) SUBSYS(hugetlb) #endif - -/* */ - -#ifdef CONFIG_CGROUP_BCACHE -SUBSYS(bcache) -#endif - -/* */ +/* + * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. + */ -- cgit v1.2.3 From 913ffdb54366f94eec65c656cae8c6e00e1ab1b0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 11 Jul 2013 16:34:48 -0700 Subject: cgroup: replace task_cgroup_path_from_hierarchy() with task_cgroup_path() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit task_cgroup_path_from_hierarchy() was added for the planned new users and none of the currently planned users wants to know about multiple hierarchies. This patch drops the multiple hierarchy part and makes it always return the path in the first non-dummy hierarchy. As unified hierarchy will always have id 1, this is guaranteed to return the path for the unified hierarchy if mounted; otherwise, it will return the path from the hierarchy which happens to occupy the lowest hierarchy id, which will usually be the first hierarchy mounted after boot. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Lennart Poettering Cc: Kay Sievers Cc: Jan Kaluža --- include/linux/cgroup.h | 3 +-- kernel/cgroup.c | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index fd097ecfcd97..21cfaff7e002 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -540,8 +540,7 @@ int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); -int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id, - char *buf, size_t buflen); +int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); int cgroup_task_count(const struct cgroup *cgrp); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e5583d10a325..afb8d53ca6c7 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1846,36 +1846,43 @@ out: EXPORT_SYMBOL_GPL(cgroup_path); /** - * task_cgroup_path_from_hierarchy - cgroup path of a task on a hierarchy + * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy * @task: target task - * @hierarchy_id: the hierarchy to look up @task's cgroup from * @buf: the buffer to write the path into * @buflen: the length of the buffer * - * Determine @task's cgroup on the hierarchy specified by @hierarchy_id and - * copy its path into @buf. This function grabs cgroup_mutex and shouldn't - * be used inside locks used by cgroup controller callbacks. + * Determine @task's cgroup on the first (the one with the lowest non-zero + * hierarchy_id) cgroup hierarchy and copy its path into @buf. This + * function grabs cgroup_mutex and shouldn't be used inside locks used by + * cgroup controller callbacks. + * + * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short. */ -int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id, - char *buf, size_t buflen) +int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) { struct cgroupfs_root *root; - struct cgroup *cgrp = NULL; - int ret = -ENOENT; + struct cgroup *cgrp; + int hierarchy_id = 1, ret = 0; + + if (buflen < 2) + return -ENAMETOOLONG; mutex_lock(&cgroup_mutex); - root = idr_find(&cgroup_hierarchy_idr, hierarchy_id); + root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id); + if (root) { cgrp = task_cgroup_from_root(task, root); ret = cgroup_path(cgrp, buf, buflen); + } else { + /* if no hierarchy exists, everyone is in "/" */ + memcpy(buf, "/", 2); } mutex_unlock(&cgroup_mutex); - return ret; } -EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy); +EXPORT_SYMBOL_GPL(task_cgroup_path); /* * Control Group taskset -- cgit v1.2.3 From ceac9b9214df539ca814a784c2af94f554bc78d4 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 26 Jun 2013 15:08:48 +0200 Subject: ARM i.MX6Q: Fix IOMUXC GPR1 defines for ENET_CLK_SEL and IPU1/2_MUX Signed-off-by: Philipp Zabel Signed-off-by: Shawn Guo --- include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h index dab34a1deb2c..b1521e82fecf 100644 --- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h @@ -103,15 +103,15 @@ #define IMX6Q_GPR1_EXC_MON_MASK BIT(22) #define IMX6Q_GPR1_EXC_MON_OKAY 0x0 #define IMX6Q_GPR1_EXC_MON_SLVE BIT(22) -#define IMX6Q_GPR1_MIPI_IPU2_SEL_MASK BIT(21) -#define IMX6Q_GPR1_MIPI_IPU2_SEL_GASKET 0x0 -#define IMX6Q_GPR1_MIPI_IPU2_SEL_IOMUX BIT(21) -#define IMX6Q_GPR1_MIPI_IPU1_MUX_MASK BIT(20) -#define IMX6Q_GPR1_MIPI_IPU1_MUX_GASKET 0x0 -#define IMX6Q_GPR1_MIPI_IPU1_MUX_IOMUX BIT(20) -#define IMX6Q_GPR1_MIPI_IPU2_MUX_MASK BIT(19) +#define IMX6Q_GPR1_ENET_CLK_SEL_MASK BIT(21) +#define IMX6Q_GPR1_ENET_CLK_SEL_PAD 0 +#define IMX6Q_GPR1_ENET_CLK_SEL_ANATOP BIT(21) +#define IMX6Q_GPR1_MIPI_IPU2_MUX_MASK BIT(20) #define IMX6Q_GPR1_MIPI_IPU2_MUX_GASKET 0x0 -#define IMX6Q_GPR1_MIPI_IPU2_MUX_IOMUX BIT(19) +#define IMX6Q_GPR1_MIPI_IPU2_MUX_IOMUX BIT(20) +#define IMX6Q_GPR1_MIPI_IPU1_MUX_MASK BIT(19) +#define IMX6Q_GPR1_MIPI_IPU1_MUX_GASKET 0x0 +#define IMX6Q_GPR1_MIPI_IPU1_MUX_IOMUX BIT(19) #define IMX6Q_GPR1_PCIE_TEST_PD BIT(18) #define IMX6Q_GPR1_IPU_VPU_MUX_MASK BIT(17) #define IMX6Q_GPR1_IPU_VPU_MUX_IPU1 0x0 -- cgit v1.2.3 From 36ff66db3fb5642906e46e73ca9cf92f1c5974ff Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 27 Jun 2013 15:27:07 -0400 Subject: USB: move the definition of USB_MAXCHILDREN The USB_MAXCHILDREN symbol is used in include/uapi/linux/usb/ch11.h, a user-mode header, even though it is defined in include/linux/usb.h, which is kernel-only. This causes compile-time errors when user programs try to #include linux/usb/ch11.h. This patch fixes the problem by moving the definition of USB_MAXCHILDREN into ch11.h. It also gets rid of unneeded parentheses. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 11 ----------- include/uapi/linux/usb/ch11.h | 11 +++++++++++ 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index a232b7ece1f6..0eec2689b955 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -367,17 +367,6 @@ struct usb_bus { /* ----------------------------------------------------------------------- */ -/* This is arbitrary. - * From USB 2.0 spec Table 11-13, offset 7, a hub can - * have up to 255 ports. The most yet reported is 10. - * - * Current Wireless USB host hardware (Intel i1480 for example) allows - * up to 22 devices to connect. Upcoming hardware might raise that - * limit. Because the arrays need to add a bit for hub status data, we - * do 31, so plus one evens out to four bytes. - */ -#define USB_MAXCHILDREN (31) - struct usb_tt; enum usb_device_removable { diff --git a/include/uapi/linux/usb/ch11.h b/include/uapi/linux/usb/ch11.h index 7692dc69ccf7..331499d597fa 100644 --- a/include/uapi/linux/usb/ch11.h +++ b/include/uapi/linux/usb/ch11.h @@ -11,6 +11,17 @@ #include /* __u8 etc */ +/* This is arbitrary. + * From USB 2.0 spec Table 11-13, offset 7, a hub can + * have up to 255 ports. The most yet reported is 10. + * + * Current Wireless USB host hardware (Intel i1480 for example) allows + * up to 22 devices to connect. Upcoming hardware might raise that + * limit. Because the arrays need to add a bit for hub status data, we + * use 31, so plus one evens out to four bytes. + */ +#define USB_MAXCHILDREN 31 + /* * Hub request types */ -- cgit v1.2.3 From e69f61862ab833e9b8d3c15b6ce07fd69f3bfecc Mon Sep 17 00:00:00 2001 From: Yacine Belkadi Date: Fri, 12 Jul 2013 20:45:47 +0200 Subject: sched: Fix some kernel-doc warnings When building the htmldocs (in verbose mode), scripts/kernel-doc reports the follwing type of warnings: Warning(kernel/sched/core.c:936): No description found for return value of 'task_curr' ... Fix those by: - adding the missing descriptions - using "Return" sections for the descriptions Signed-off-by: Yacine Belkadi Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1373654747-2389-1-git-send-email-yacine.belkadi.1@gmail.com [ While at it, fix the cpupri_set() explanation. ] Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++ kernel/sched/core.c | 82 ++++++++++++++++++++++++++++++++++++++------------- kernel/sched/cpupri.c | 4 +-- kernel/sched/fair.c | 9 ++++-- 4 files changed, 76 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 50d04b92ceda..82300247974c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1532,6 +1532,8 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) * Test if a process is not yet dead (at most zombie state) * If pid_alive fails, then pointers within the task structure * can be stale and must not be dereferenced. + * + * Return: 1 if the process is alive. 0 otherwise. */ static inline int pid_alive(struct task_struct *p) { @@ -1543,6 +1545,8 @@ static inline int pid_alive(struct task_struct *p) * @tsk: Task structure to be checked. * * Check if a task structure is the first user space task the kernel created. + * + * Return: 1 if the task structure is init. 0 otherwise. */ static inline int is_global_init(struct task_struct *tsk) { @@ -1893,6 +1897,8 @@ extern struct task_struct *idle_task(int cpu); /** * is_idle_task - is the specified task an idle task? * @p: the task in question. + * + * Return: 1 if @p is an idle task. 0 otherwise. */ static inline bool is_idle_task(const struct task_struct *p) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0d8eb4525e76..4c3967f91e20 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -933,6 +933,8 @@ static int effective_prio(struct task_struct *p) /** * task_curr - is this task currently executing on a CPU? * @p: the task in question. + * + * Return: 1 if the task is currently executing. 0 otherwise. */ inline int task_curr(const struct task_struct *p) { @@ -1482,7 +1484,7 @@ static void ttwu_queue(struct task_struct *p, int cpu) * the simpler "current->state = TASK_RUNNING" to mark yourself * runnable without the overhead of this. * - * Returns %true if @p was woken up, %false if it was already running + * Return: %true if @p was woken up, %false if it was already running. * or @state didn't match @p's state. */ static int @@ -1577,8 +1579,9 @@ out: * @p: The process to be woken up. * * Attempt to wake up the nominated process and move it to the set of runnable - * processes. Returns 1 if the process was woken up, 0 if it was already - * running. + * processes. + * + * Return: 1 if the process was woken up, 0 if it was already running. * * It may be assumed that this function implies a write memory barrier before * changing the task state if and only if any tasks are woken up. @@ -2191,6 +2194,8 @@ void scheduler_tick(void) * This makes sure that uptime, CFS vruntime, load * balancing, etc... continue to move forward, even * with a very low granularity. + * + * Return: Maximum deferment in nanoseconds. */ u64 scheduler_tick_max_deferment(void) { @@ -2796,8 +2801,8 @@ EXPORT_SYMBOL(wait_for_completion); * specified timeout to expire. The timeout is in jiffies. It is not * interruptible. * - * The return value is 0 if timed out, and positive (at least 1, or number of - * jiffies left till timeout) if completed. + * Return: 0 if timed out, and positive (at least 1, or number of jiffies left + * till timeout) if completed. */ unsigned long __sched wait_for_completion_timeout(struct completion *x, unsigned long timeout) @@ -2829,8 +2834,8 @@ EXPORT_SYMBOL(wait_for_completion_io); * specified timeout to expire. The timeout is in jiffies. It is not * interruptible. The caller is accounted as waiting for IO. * - * The return value is 0 if timed out, and positive (at least 1, or number of - * jiffies left till timeout) if completed. + * Return: 0 if timed out, and positive (at least 1, or number of jiffies left + * till timeout) if completed. */ unsigned long __sched wait_for_completion_io_timeout(struct completion *x, unsigned long timeout) @@ -2846,7 +2851,7 @@ EXPORT_SYMBOL(wait_for_completion_io_timeout); * This waits for completion of a specific task to be signaled. It is * interruptible. * - * The return value is -ERESTARTSYS if interrupted, 0 if completed. + * Return: -ERESTARTSYS if interrupted, 0 if completed. */ int __sched wait_for_completion_interruptible(struct completion *x) { @@ -2865,8 +2870,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible); * This waits for either a completion of a specific task to be signaled or for a * specified timeout to expire. It is interruptible. The timeout is in jiffies. * - * The return value is -ERESTARTSYS if interrupted, 0 if timed out, - * positive (at least 1, or number of jiffies left till timeout) if completed. + * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1, + * or number of jiffies left till timeout) if completed. */ long __sched wait_for_completion_interruptible_timeout(struct completion *x, @@ -2883,7 +2888,7 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); * This waits to be signaled for completion of a specific task. It can be * interrupted by a kill signal. * - * The return value is -ERESTARTSYS if interrupted, 0 if completed. + * Return: -ERESTARTSYS if interrupted, 0 if completed. */ int __sched wait_for_completion_killable(struct completion *x) { @@ -2903,8 +2908,8 @@ EXPORT_SYMBOL(wait_for_completion_killable); * signaled or for a specified timeout to expire. It can be * interrupted by a kill signal. The timeout is in jiffies. * - * The return value is -ERESTARTSYS if interrupted, 0 if timed out, - * positive (at least 1, or number of jiffies left till timeout) if completed. + * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1, + * or number of jiffies left till timeout) if completed. */ long __sched wait_for_completion_killable_timeout(struct completion *x, @@ -2918,7 +2923,7 @@ EXPORT_SYMBOL(wait_for_completion_killable_timeout); * try_wait_for_completion - try to decrement a completion without blocking * @x: completion structure * - * Returns: 0 if a decrement cannot be done without blocking + * Return: 0 if a decrement cannot be done without blocking * 1 if a decrement succeeded. * * If a completion is being used as a counting completion, @@ -2945,7 +2950,7 @@ EXPORT_SYMBOL(try_wait_for_completion); * completion_done - Test to see if a completion has any waiters * @x: completion structure * - * Returns: 0 if there are waiters (wait_for_completion() in progress) + * Return: 0 if there are waiters (wait_for_completion() in progress) * 1 if there are no waiters. * */ @@ -3182,7 +3187,7 @@ SYSCALL_DEFINE1(nice, int, increment) * task_prio - return the priority value of a given task. * @p: the task in question. * - * This is the priority value as seen by users in /proc. + * Return: The priority value as seen by users in /proc. * RT tasks are offset by -200. Normal tasks are centered * around 0, value goes from -16 to +15. */ @@ -3194,6 +3199,8 @@ int task_prio(const struct task_struct *p) /** * task_nice - return the nice value of a given task. * @p: the task in question. + * + * Return: The nice value [ -20 ... 0 ... 19 ]. */ int task_nice(const struct task_struct *p) { @@ -3204,6 +3211,8 @@ EXPORT_SYMBOL(task_nice); /** * idle_cpu - is a given cpu idle currently? * @cpu: the processor in question. + * + * Return: 1 if the CPU is currently idle. 0 otherwise. */ int idle_cpu(int cpu) { @@ -3226,6 +3235,8 @@ int idle_cpu(int cpu) /** * idle_task - return the idle task for a given cpu. * @cpu: the processor in question. + * + * Return: The idle task for the cpu @cpu. */ struct task_struct *idle_task(int cpu) { @@ -3235,6 +3246,8 @@ struct task_struct *idle_task(int cpu) /** * find_process_by_pid - find a process with a matching PID value. * @pid: the pid in question. + * + * The task of @pid, if found. %NULL otherwise. */ static struct task_struct *find_process_by_pid(pid_t pid) { @@ -3432,6 +3445,8 @@ recheck: * @policy: new policy. * @param: structure containing the new RT priority. * + * Return: 0 on success. An error code otherwise. + * * NOTE that the task may be already dead. */ int sched_setscheduler(struct task_struct *p, int policy, @@ -3451,6 +3466,8 @@ EXPORT_SYMBOL_GPL(sched_setscheduler); * current context has permission. For example, this is needed in * stop_machine(): we create temporary high priority worker threads, * but our caller might not have that capability. + * + * Return: 0 on success. An error code otherwise. */ int sched_setscheduler_nocheck(struct task_struct *p, int policy, const struct sched_param *param) @@ -3485,6 +3502,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) * @pid: the pid in question. * @policy: new policy. * @param: structure containing the new RT priority. + * + * Return: 0 on success. An error code otherwise. */ SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param) @@ -3500,6 +3519,8 @@ SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, * sys_sched_setparam - set/change the RT priority of a thread * @pid: the pid in question. * @param: structure containing the new RT priority. + * + * Return: 0 on success. An error code otherwise. */ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) { @@ -3509,6 +3530,9 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) /** * sys_sched_getscheduler - get the policy (scheduling class) of a thread * @pid: the pid in question. + * + * Return: On success, the policy of the thread. Otherwise, a negative error + * code. */ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) { @@ -3535,6 +3559,9 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) * sys_sched_getparam - get the RT priority of a thread * @pid: the pid in question. * @param: structure containing the RT priority. + * + * Return: On success, 0 and the RT priority is in @param. Otherwise, an error + * code. */ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) { @@ -3659,6 +3686,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, * @pid: pid of the process * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to the new cpu mask + * + * Return: 0 on success. An error code otherwise. */ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, unsigned long __user *, user_mask_ptr) @@ -3710,6 +3739,8 @@ out_unlock: * @pid: pid of the process * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to hold the current cpu mask + * + * Return: 0 on success. An error code otherwise. */ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, unsigned long __user *, user_mask_ptr) @@ -3744,6 +3775,8 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, * * This function yields the current CPU to other tasks. If there are no * other threads running on this CPU then this function will return. + * + * Return: 0. */ SYSCALL_DEFINE0(sched_yield) { @@ -3869,7 +3902,7 @@ EXPORT_SYMBOL(yield); * It's the caller's job to ensure that the target task struct * can't go away on us before we can do any checks. * - * Returns: + * Return: * true (>0) if we indeed boosted the target task. * false (0) if we failed to boost the target. * -ESRCH if there's no task to yield to. @@ -3972,8 +4005,9 @@ long __sched io_schedule_timeout(long timeout) * sys_sched_get_priority_max - return maximum RT priority. * @policy: scheduling class. * - * this syscall returns the maximum rt_priority that can be used - * by a given scheduling class. + * Return: On success, this syscall returns the maximum + * rt_priority that can be used by a given scheduling class. + * On failure, a negative error code is returned. */ SYSCALL_DEFINE1(sched_get_priority_max, int, policy) { @@ -3997,8 +4031,9 @@ SYSCALL_DEFINE1(sched_get_priority_max, int, policy) * sys_sched_get_priority_min - return minimum RT priority. * @policy: scheduling class. * - * this syscall returns the minimum rt_priority that can be used - * by a given scheduling class. + * Return: On success, this syscall returns the minimum + * rt_priority that can be used by a given scheduling class. + * On failure, a negative error code is returned. */ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) { @@ -4024,6 +4059,9 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) * * this syscall writes the default timeslice value of a given process * into the user-space timespec buffer. A value of '0' means infinity. + * + * Return: On success, 0 and the timeslice is in @interval. Otherwise, + * an error code. */ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, struct timespec __user *, interval) @@ -6632,6 +6670,8 @@ void normalize_rt_tasks(void) * @cpu: the processor in question. * * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! + * + * Return: The current task for @cpu. */ struct task_struct *curr_task(int cpu) { diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 1095e878a46f..8b836b376d91 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -62,7 +62,7 @@ static int convert_prio(int prio) * any discrepancies created by racing against the uncertainty of the current * priority configuration. * - * Returns: (int)bool - CPUs were found + * Return: (int)bool - CPUs were found */ int cpupri_find(struct cpupri *cp, struct task_struct *p, struct cpumask *lowest_mask) @@ -203,7 +203,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) * cpupri_init - initialize the cpupri structure * @cp: The cpupri context * - * Returns: -ENOMEM if memory fails. + * Return: -ENOMEM on memory allocation failure. */ int cpupri_init(struct cpupri *cp) { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f77f9c527449..98d135584b4b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4280,6 +4280,8 @@ struct sg_lb_stats { * get_sd_load_idx - Obtain the load index for a given sched domain. * @sd: The sched_domain whose load_idx is to be obtained. * @idle: The Idle status of the CPU for whose sd load_icx is obtained. + * + * Return: The load index. */ static inline int get_sd_load_idx(struct sched_domain *sd, enum cpu_idle_type idle) @@ -4574,6 +4576,9 @@ static inline void update_sg_lb_stats(struct lb_env *env, * * Determine if @sg is a busier group than the previously selected * busiest group. + * + * Return: %true if @sg is a busier group than the previously selected + * busiest group. %false otherwise. */ static bool update_sd_pick_busiest(struct lb_env *env, struct sd_lb_stats *sds, @@ -4691,7 +4696,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, * assuming lower CPU number will be equivalent to lower a SMT thread * number. * - * Returns 1 when packing is required and a task should be moved to + * Return: 1 when packing is required and a task should be moved to * this CPU. The amount of the imbalance is returned in *imbalance. * * @env: The load balancing environment. @@ -4869,7 +4874,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * @balance: Pointer to a variable indicating if this_cpu * is the appropriate cpu to perform load balancing at this_level. * - * Returns: - the busiest group if imbalance exists. + * Return: - The busiest group if imbalance exists. * - If no imbalance and user has opted for power-savings balance, * return the least loaded group whose CPUs can be * put to idle by rebalancing its tasks onto our group. -- cgit v1.2.3 From b01a60be7a4a161ac0a11df30569d21a20795aef Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Jul 2013 17:43:56 +0200 Subject: ssb: fix alignment of struct bcma_device_id The ARM OABI and EABI disagree on the alignment of structures with small members, so module init tools may interpret the ssb device table incorrectly, as shown by this warning when building the b43 device driver in an OABI kernel: FATAL: drivers/net/wireless/b43/b43: sizeof(struct ssb_device_id)=6 is not a modulo of the size of section __mod_ssb_device_table=88. Forcing the default (EABI) alignment on the structure makes this problem go away. Since the ssb_device_id may have the same problem, better fix both structures. Signed-off-by: Arnd Bergmann Cc: Russell King Cc: John W. Linville Cc: Michael Buesch Cc: Larry Finger Signed-off-by: John W. Linville --- include/linux/mod_devicetable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index b62d4af6c667..45e921401b06 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -361,7 +361,8 @@ struct ssb_device_id { __u16 vendor; __u16 coreid; __u8 revision; -}; + __u8 __pad; +} __attribute__((packed, aligned(2))); #define SSB_DEVICE(_vendor, _coreid, _revision) \ { .vendor = _vendor, .coreid = _coreid, .revision = _revision, } #define SSB_DEVTABLE_END \ @@ -377,7 +378,7 @@ struct bcma_device_id { __u16 id; __u8 rev; __u8 class; -}; +} __attribute__((packed,aligned(2))); #define BCMA_CORE(_manuf, _id, _rev, _class) \ { .manuf = _manuf, .id = _id, .rev = _rev, .class = _class, } #define BCMA_CORETABLE_END \ -- cgit v1.2.3 From a1a8e1dc111d6f05e7164e851e58219d428359e1 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 16 Jul 2013 15:28:00 +0100 Subject: iio:trigger: Fix use_count race condition When using more than one trigger consumer it can happen that multiple threads perform a read-modify-update cycle on 'use_count' concurrently. This can cause updates to be lost and use_count can get stuck at non-zero value, in which case the IIO core assumes that at least one thread is still running and will wait for it to finish before running any trigger handlers again. This effectively renders the trigger disabled and a reboot is necessary before it can be used again. To fix this make use_count an atomic variable. Also set it to the number of consumers before starting the first consumer, otherwise it might happen that use_count drops to 0 even though not all consumers have been run yet. Signed-off-by: Lars-Peter Clausen Tested-by: Denis Ciocca Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-trigger.c | 34 ++++++++++++++++++++++------------ include/linux/iio/trigger.h | 3 ++- 2 files changed, 24 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c index ea8a4146620d..0dd9bb873130 100644 --- a/drivers/iio/industrialio-trigger.c +++ b/drivers/iio/industrialio-trigger.c @@ -127,12 +127,17 @@ static struct iio_trigger *iio_trigger_find_by_name(const char *name, void iio_trigger_poll(struct iio_trigger *trig, s64 time) { int i; - if (!trig->use_count) - for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) - if (trig->subirqs[i].enabled) { - trig->use_count++; + + if (!atomic_read(&trig->use_count)) { + atomic_set(&trig->use_count, CONFIG_IIO_CONSUMERS_PER_TRIGGER); + + for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) { + if (trig->subirqs[i].enabled) generic_handle_irq(trig->subirq_base + i); - } + else + iio_trigger_notify_done(trig); + } + } } EXPORT_SYMBOL(iio_trigger_poll); @@ -146,19 +151,24 @@ EXPORT_SYMBOL(iio_trigger_generic_data_rdy_poll); void iio_trigger_poll_chained(struct iio_trigger *trig, s64 time) { int i; - if (!trig->use_count) - for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) - if (trig->subirqs[i].enabled) { - trig->use_count++; + + if (!atomic_read(&trig->use_count)) { + atomic_set(&trig->use_count, CONFIG_IIO_CONSUMERS_PER_TRIGGER); + + for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) { + if (trig->subirqs[i].enabled) handle_nested_irq(trig->subirq_base + i); - } + else + iio_trigger_notify_done(trig); + } + } } EXPORT_SYMBOL(iio_trigger_poll_chained); void iio_trigger_notify_done(struct iio_trigger *trig) { - trig->use_count--; - if (trig->use_count == 0 && trig->ops && trig->ops->try_reenable) + if (atomic_dec_and_test(&trig->use_count) && trig->ops && + trig->ops->try_reenable) if (trig->ops->try_reenable(trig)) /* Missed an interrupt so launch new poll now */ iio_trigger_poll(trig, 0); diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index 3869c525b052..369cf2cd5144 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -8,6 +8,7 @@ */ #include #include +#include #ifndef _IIO_TRIGGER_H_ #define _IIO_TRIGGER_H_ @@ -61,7 +62,7 @@ struct iio_trigger { struct list_head list; struct list_head alloc_list; - int use_count; + atomic_t use_count; struct irq_chip subirq_chip; int subirq_base; -- cgit v1.2.3 From b1451e546899bc8f450773b2af02e0cd000cf1fa Mon Sep 17 00:00:00 2001 From: "Patil, Rachna" Date: Sat, 20 Jul 2013 17:27:00 +0100 Subject: iio: ti_am335x_adc: Fix wrong samples received on 1st read Previously we tried to read data form ADC even before ADC sequencer finished sampling. This led to wrong samples. We now wait on ADC status register idle bit to be set. Signed-off-by: Patil, Rachna Signed-off-by: Zubair Lutfullah Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti_am335x_adc.c | 30 ++++++++++++++++++++++-------- include/linux/mfd/ti_am335x_tscadc.h | 16 ++++++++++++++++ 2 files changed, 38 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index 0ad208a69c29..3ceac3e91dde 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -60,7 +60,6 @@ static void tiadc_step_config(struct tiadc_device *adc_dev) { unsigned int stepconfig; int i, steps; - u32 step_en; /* * There are 16 configurable steps and 8 analog input @@ -86,8 +85,7 @@ static void tiadc_step_config(struct tiadc_device *adc_dev) adc_dev->channel_step[i] = steps; steps++; } - step_en = get_adc_step_mask(adc_dev); - am335x_tsc_se_set(adc_dev->mfd_tscadc, step_en); + } static const char * const chan_name_ain[] = { @@ -142,10 +140,22 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct tiadc_device *adc_dev = iio_priv(indio_dev); - int i; - unsigned int fifo1count, read; + int i, map_val; + unsigned int fifo1count, read, stepid; u32 step = UINT_MAX; bool found = false; + u32 step_en; + unsigned long timeout = jiffies + usecs_to_jiffies + (IDLE_TIMEOUT * adc_dev->channels); + step_en = get_adc_step_mask(adc_dev); + am335x_tsc_se_set(adc_dev->mfd_tscadc, step_en); + + /* Wait for ADC sequencer to complete sampling */ + while (tiadc_readl(adc_dev, REG_ADCFSM) & SEQ_STATUS) { + if (time_after(jiffies, timeout)) + return -EAGAIN; + } + map_val = chan->channel + TOTAL_CHANNELS; /* * When the sub-system is first enabled, @@ -170,12 +180,16 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT); for (i = 0; i < fifo1count; i++) { read = tiadc_readl(adc_dev, REG_FIFO1); - if (read >> 16 == step) { - *val = read & 0xfff; + stepid = read & FIFOREAD_CHNLID_MASK; + stepid = stepid >> 0x10; + + if (stepid == map_val) { + read = read & FIFOREAD_DATA_MASK; found = true; + *val = read; } } - am335x_tsc_se_update(adc_dev->mfd_tscadc); + if (found == false) return -EBUSY; return IIO_VAL_INT; diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index 8d73fe29796a..db1791bb997a 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -113,11 +113,27 @@ #define CNTRLREG_8WIRE CNTRLREG_AFE_CTRL(3) #define CNTRLREG_TSCENB BIT(7) +/* FIFO READ Register */ +#define FIFOREAD_DATA_MASK (0xfff << 0) +#define FIFOREAD_CHNLID_MASK (0xf << 16) + +/* Sequencer Status */ +#define SEQ_STATUS BIT(5) + #define ADC_CLK 3000000 #define MAX_CLK_DIV 7 #define TOTAL_STEPS 16 #define TOTAL_CHANNELS 8 +/* +* ADC runs at 3MHz, and it takes +* 15 cycles to latch one data output. +* Hence the idle time for ADC to +* process one sample data would be +* around 5 micro seconds. +*/ +#define IDLE_TIMEOUT 5 /* microsec */ + #define TSCADC_CELLS 2 struct ti_tscadc_dev { -- cgit v1.2.3 From ab116a4df4942c78c189d9b0744dd940ab9e00b9 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 10 Jul 2013 11:09:12 +0900 Subject: dmaengine: shdma: fix a build failure on platforms with no DMA support On platforms with no support for the shdma dmaengine driver build is currently failing with drivers/built-in.o: In function `sh_mobile_sdhi_probe': drivers/mmc/host/sh_mobile_sdhi.c:170: undefined reference to`shdma_chan_filter' Fix the breakage by defining shdma_chan_filter to NULL in such configurations. Signed-off-by: Guennadi Liakhovetski [horms+renesas@verge.net.au: Apply change to shdma-base.h instead of sh_dma.h] Signed-off-by: Simon Horman Signed-off-by: Olof Johansson --- include/linux/shdma-base.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h index 382cf710ca9a..5b1c9848124c 100644 --- a/include/linux/shdma-base.h +++ b/include/linux/shdma-base.h @@ -124,6 +124,10 @@ void shdma_chan_remove(struct shdma_chan *schan); int shdma_init(struct device *dev, struct shdma_dev *sdev, int chan_num); void shdma_cleanup(struct shdma_dev *sdev); +#if IS_ENABLED(CONFIG_SH_DMAE_BASE) bool shdma_chan_filter(struct dma_chan *chan, void *arg); +#else +#define shdma_chan_filter NULL +#endif #endif -- cgit v1.2.3 From a829abf8daa2dcf8223a9284b76d221e61130e13 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Jul 2013 17:51:20 +0200 Subject: ARM: pxa: propagate errors from regulator_enable() to pxamci The em_x270_mci_setpower() and em_x270_usb_hub_init() functions call regulator_enable(), which may return an error that must be checked. This changes the em_x270_usb_hub_init() function to bail out if it fails, and changes the pxamci_platform_data->setpower callback so that the a failed em_x270_mci_setpower call can be propagated by the pxamci driver into the mmc core. Signed-off-by: Arnd Bergmann Cc: Mike Rapoport Cc: Paul Gortmaker Cc: Mark Brown Cc: Haojian Zhuang Acked-by: Chris Ball [olof: fixed order of regulator_enable() and test in em_x270_usb_hub_init] Signed-off-by: Olof Johansson --- arch/arm/mach-pxa/em-x270.c | 17 +++++++++++++---- arch/arm/mach-pxa/mainstone.c | 3 ++- arch/arm/mach-pxa/pcm990-baseboard.c | 3 ++- arch/arm/mach-pxa/poodle.c | 4 +++- arch/arm/mach-pxa/spitz.c | 4 +++- arch/arm/mach-pxa/stargate2.c | 3 ++- drivers/mmc/host/pxamci.c | 2 +- include/linux/platform_data/mmc-pxamci.h | 2 +- 8 files changed, 27 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index f6726bb4eb95..3a3362fa793e 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -477,16 +477,24 @@ static int em_x270_usb_hub_init(void) /* USB Hub power-on and reset */ gpio_direction_output(usb_hub_reset, 1); gpio_direction_output(GPIO9_USB_VBUS_EN, 0); - regulator_enable(em_x270_usb_ldo); + err = regulator_enable(em_x270_usb_ldo); + if (err) + goto err_free_rst_gpio; + gpio_set_value(usb_hub_reset, 0); gpio_set_value(usb_hub_reset, 1); regulator_disable(em_x270_usb_ldo); - regulator_enable(em_x270_usb_ldo); + err = regulator_enable(em_x270_usb_ldo); + if (err) + goto err_free_rst_gpio; + gpio_set_value(usb_hub_reset, 0); gpio_set_value(GPIO9_USB_VBUS_EN, 1); return 0; +err_free_rst_gpio: + gpio_free(usb_hub_reset); err_free_vbus_gpio: gpio_free(GPIO9_USB_VBUS_EN); err_free_usb_ldo: @@ -592,7 +600,7 @@ err_irq: return err; } -static void em_x270_mci_setpower(struct device *dev, unsigned int vdd) +static int em_x270_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data* p_d = dev->platform_data; @@ -600,10 +608,11 @@ static void em_x270_mci_setpower(struct device *dev, unsigned int vdd) int vdd_uV = (2000 + (vdd - __ffs(MMC_VDD_20_21)) * 100) * 1000; regulator_set_voltage(em_x270_sdio_ldo, vdd_uV, vdd_uV); - regulator_enable(em_x270_sdio_ldo); + return regulator_enable(em_x270_sdio_ldo); } else { regulator_disable(em_x270_sdio_ldo); } + return 0; } static void em_x270_mci_exit(struct device *dev, void *data) diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index d2c652318376..dd70343c8708 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -408,7 +408,7 @@ static int mainstone_mci_init(struct device *dev, irq_handler_t mstone_detect_in return err; } -static void mainstone_mci_setpower(struct device *dev, unsigned int vdd) +static int mainstone_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data* p_d = dev->platform_data; @@ -420,6 +420,7 @@ static void mainstone_mci_setpower(struct device *dev, unsigned int vdd) printk(KERN_DEBUG "%s: off\n", __func__); MST_MSCWR1 &= ~MST_MSCWR1_MMC_ON; } + return 0; } static void mainstone_mci_exit(struct device *dev, void *data) diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c index fb7f1d1627dc..13e5b00eae90 100644 --- a/arch/arm/mach-pxa/pcm990-baseboard.c +++ b/arch/arm/mach-pxa/pcm990-baseboard.c @@ -335,7 +335,7 @@ static int pcm990_mci_init(struct device *dev, irq_handler_t mci_detect_int, return err; } -static void pcm990_mci_setpower(struct device *dev, unsigned int vdd) +static int pcm990_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data *p_d = dev->platform_data; u8 val; @@ -348,6 +348,7 @@ static void pcm990_mci_setpower(struct device *dev, unsigned int vdd) val &= ~PCM990_CTRL_MMC2PWR; pcm990_cpld_writeb(PCM990_CTRL_MMC2PWR, PCM990_CTRL_REG5); + return 0; } static void pcm990_mci_exit(struct device *dev, void *data) diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index 711d37e26bd8..aedf053a1de5 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -258,7 +258,7 @@ err_free_2: return err; } -static void poodle_mci_setpower(struct device *dev, unsigned int vdd) +static int poodle_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data* p_d = dev->platform_data; @@ -270,6 +270,8 @@ static void poodle_mci_setpower(struct device *dev, unsigned int vdd) gpio_set_value(POODLE_GPIO_SD_PWR1, 0); gpio_set_value(POODLE_GPIO_SD_PWR, 0); } + + return 0; } static void poodle_mci_exit(struct device *dev, void *data) diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 2125df0444e7..4c29173026e8 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -598,7 +598,7 @@ static inline void spitz_spi_init(void) {} * NOTE: The card detect interrupt isn't debounced so we delay it by 250ms to * give the card a chance to fully insert/eject. */ -static void spitz_mci_setpower(struct device *dev, unsigned int vdd) +static int spitz_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data* p_d = dev->platform_data; @@ -606,6 +606,8 @@ static void spitz_mci_setpower(struct device *dev, unsigned int vdd) spitz_card_pwr_ctrl(SCOOP_CPR_SD_3V, SCOOP_CPR_SD_3V); else spitz_card_pwr_ctrl(SCOOP_CPR_SD_3V, 0x0); + + return 0; } static struct pxamci_platform_data spitz_mci_platform_data = { diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c index 88fde43c948c..62aea3e835f3 100644 --- a/arch/arm/mach-pxa/stargate2.c +++ b/arch/arm/mach-pxa/stargate2.c @@ -734,9 +734,10 @@ static int stargate2_mci_init(struct device *dev, * * Very simple control. Either it is on or off and is controlled by * a gpio pin */ -static void stargate2_mci_setpower(struct device *dev, unsigned int vdd) +static int stargate2_mci_setpower(struct device *dev, unsigned int vdd) { gpio_set_value(SG2_SD_POWER_ENABLE, !!vdd); + return 0; } static void stargate2_mci_exit(struct device *dev, void *data) diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 847b1996ce8e..2c5a91bb8ec3 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -128,7 +128,7 @@ static inline int pxamci_set_power(struct pxamci_host *host, !!on ^ host->pdata->gpio_power_invert); } if (!host->vcc && host->pdata && host->pdata->setpower) - host->pdata->setpower(mmc_dev(host->mmc), vdd); + return host->pdata->setpower(mmc_dev(host->mmc), vdd); return 0; } diff --git a/include/linux/platform_data/mmc-pxamci.h b/include/linux/platform_data/mmc-pxamci.h index 9eb515bb799d..1706b3597ce0 100644 --- a/include/linux/platform_data/mmc-pxamci.h +++ b/include/linux/platform_data/mmc-pxamci.h @@ -12,7 +12,7 @@ struct pxamci_platform_data { unsigned long detect_delay_ms; /* delay in millisecond before detecting cards after interrupt */ int (*init)(struct device *, irq_handler_t , void *); int (*get_ro)(struct device *); - void (*setpower)(struct device *, unsigned int); + int (*setpower)(struct device *, unsigned int); void (*exit)(struct device *, void *); int gpio_card_detect; /* gpio detecting card insertion */ int gpio_card_ro; /* gpio detecting read only toggle */ -- cgit v1.2.3 From 88d84ac97378c2f1d5fec9af1e8b7d9a662d6b00 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 19 Jul 2013 12:28:25 +0200 Subject: EDAC: Fix lockdep splat Fix the following: BUG: key ffff88043bdd0330 not in .data! ------------[ cut here ]------------ WARNING: at kernel/lockdep.c:2987 lockdep_init_map+0x565/0x5a0() DEBUG_LOCKS_WARN_ON(1) Modules linked in: glue_helper sb_edac(+) edac_core snd acpi_cpufreq lrw gf128mul ablk_helper iTCO_wdt evdev i2c_i801 dcdbas button cryptd pcspkr iTCO_vendor_support usb_common lpc_ich mfd_core soundcore mperf processor microcode CPU: 2 PID: 599 Comm: modprobe Not tainted 3.10.0 #1 Hardware name: Dell Inc. Precision T3600/0PTTT9, BIOS A08 01/24/2013 0000000000000009 ffff880439a1d920 ffffffff8160a9a9 ffff880439a1d958 ffffffff8103d9e0 ffff88043af4a510 ffffffff81a16e11 0000000000000000 ffff88043bdd0330 0000000000000000 ffff880439a1d9b8 ffffffff8103dacc Call Trace: dump_stack warn_slowpath_common warn_slowpath_fmt lockdep_init_map ? trace_hardirqs_on_caller ? trace_hardirqs_on debug_mutex_init __mutex_init bus_register edac_create_sysfs_mci_device edac_mc_add_mc sbridge_probe pci_device_probe driver_probe_device __driver_attach ? driver_probe_device bus_for_each_dev driver_attach bus_add_driver driver_register __pci_register_driver ? 0xffffffffa0010fff sbridge_init ? 0xffffffffa0010fff do_one_initcall load_module ? unset_module_init_ro_nx SyS_init_module tracesys ---[ end trace d24a70b0d3ddf733 ]--- EDAC MC0: Giving out device to 'sbridge_edac.c' 'Sandy Bridge Socket#0': DEV 0000:3f:0e.0 EDAC sbridge: Driver loaded. What happens is that bus_register needs a statically allocated lock_key because the last is handed in to lockdep. However, struct mem_ctl_info embeds struct bus_type (the whole struct, not a pointer to it) and the whole thing gets dynamically allocated. Fix this by using a statically allocated struct bus_type for the MC bus. Signed-off-by: Borislav Petkov Acked-by: Mauro Carvalho Chehab Cc: Markus Trippelsdorf Cc: stable@kernel.org # v3.10 Signed-off-by: Tony Luck --- drivers/edac/edac_mc.c | 9 +++++++++ drivers/edac/edac_mc_sysfs.c | 28 +++++++++++++++------------- drivers/edac/i5100_edac.c | 2 +- include/linux/edac.h | 7 ++++++- 4 files changed, 31 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 27e86d938262..89e109022d78 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -48,6 +48,8 @@ static LIST_HEAD(mc_devices); */ static void const *edac_mc_owner; +static struct bus_type mc_bus[EDAC_MAX_MCS]; + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -723,6 +725,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) int ret = -EINVAL; edac_dbg(0, "\n"); + if (mci->mc_idx >= EDAC_MAX_MCS) { + pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx); + return -ENODEV; + } + #ifdef CONFIG_EDAC_DEBUG if (edac_debug_level >= 3) edac_mc_dump_mci(mci); @@ -762,6 +769,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) /* set load time so that error rate can be tracked */ mci->start_time = jiffies; + mci->bus = &mc_bus[mci->mc_idx]; + if (edac_create_sysfs_mci_device(mci)) { edac_mc_printk(mci, KERN_WARNING, "failed to create sysfs device\n"); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index ef15a7e613bc..e7c32c4f7837 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -370,7 +370,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci, return -ENODEV; csrow->dev.type = &csrow_attr_type; - csrow->dev.bus = &mci->bus; + csrow->dev.bus = mci->bus; device_initialize(&csrow->dev); csrow->dev.parent = &mci->dev; csrow->mci = mci; @@ -605,7 +605,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci, dimm->mci = mci; dimm->dev.type = &dimm_attr_type; - dimm->dev.bus = &mci->bus; + dimm->dev.bus = mci->bus; device_initialize(&dimm->dev); dimm->dev.parent = &mci->dev; @@ -975,11 +975,13 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) * The memory controller needs its own bus, in order to avoid * namespace conflicts at /sys/bus/edac. */ - mci->bus.name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); - if (!mci->bus.name) + mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); + if (!mci->bus->name) return -ENOMEM; - edac_dbg(0, "creating bus %s\n", mci->bus.name); - err = bus_register(&mci->bus); + + edac_dbg(0, "creating bus %s\n", mci->bus->name); + + err = bus_register(mci->bus); if (err < 0) return err; @@ -988,7 +990,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) device_initialize(&mci->dev); mci->dev.parent = mci_pdev; - mci->dev.bus = &mci->bus; + mci->dev.bus = mci->bus; dev_set_name(&mci->dev, "mc%d", mci->mc_idx); dev_set_drvdata(&mci->dev, mci); pm_runtime_forbid(&mci->dev); @@ -997,8 +999,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) err = device_add(&mci->dev); if (err < 0) { edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); return err; } @@ -1064,8 +1066,8 @@ fail: } fail2: device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); return err; } @@ -1098,8 +1100,8 @@ void edac_unregister_sysfs(struct mem_ctl_info *mci) { edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); } static void mc_attr_release(struct device *dev) diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index 1b635178cc44..157b934e8ce3 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -974,7 +974,7 @@ static int i5100_setup_debugfs(struct mem_ctl_info *mci) if (!i5100_debugfs) return -ENODEV; - priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs); + priv->debugfs = debugfs_create_dir(mci->bus->name, i5100_debugfs); if (!priv->debugfs) return -ENOMEM; diff --git a/include/linux/edac.h b/include/linux/edac.h index 0b763276f619..5c6d7fbaf89e 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -622,7 +622,7 @@ struct edac_raw_error_desc { */ struct mem_ctl_info { struct device dev; - struct bus_type bus; + struct bus_type *bus; struct list_head link; /* for global list of mem_ctl_info structs */ @@ -742,4 +742,9 @@ struct mem_ctl_info { #endif }; +/* + * Maximum number of memory controllers in the coherent fabric. + */ +#define EDAC_MAX_MCS 16 + #endif -- cgit v1.2.3 From e70308ec0e4bff344fcfdf160de40e1150552c5f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 24 Jul 2013 17:04:16 +1000 Subject: Revert "crypto: crct10dif - Wrap crc_t10dif function all to use crypto transform framework" This reverts commits 67822649d7305caf3dd50ed46c27b99c94eff996 39761214eefc6b070f29402aa1165f24d789b3f7 0b95a7f85718adcbba36407ef88bba0a7379ed03 31d939625a9a20b1badd2d4e6bf6fd39fa523405 2d31e518a42828df7877bca23a958627d60408bc Unfortunately this change broke boot on some systems that used an initrd which does not include the newly created crct10dif modules. As these modules are required by sd_mod under certain configurations this is a serious problem. Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 - arch/x86/crypto/crct10dif-pcl-asm_64.S | 643 -------------------------------- arch/x86/crypto/crct10dif-pclmul_glue.c | 151 -------- crypto/Kconfig | 19 - crypto/Makefile | 1 - crypto/crct10dif.c | 178 --------- crypto/tcrypt.c | 8 - crypto/testmgr.c | 10 - crypto/testmgr.h | 33 -- include/linux/crc-t10dif.h | 4 - lib/Kconfig | 2 - lib/crc-t10dif.c | 73 ++-- 12 files changed, 43 insertions(+), 1081 deletions(-) delete mode 100644 arch/x86/crypto/crct10dif-pcl-asm_64.S delete mode 100644 arch/x86/crypto/crct10dif-pclmul_glue.c delete mode 100644 crypto/crct10dif.c (limited to 'include/linux') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 7d6ba9db1be9..6c63c358a7e6 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -27,7 +27,6 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o -obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o # These modules require assembler to support AVX. ifeq ($(avx_supported),yes) @@ -82,4 +81,3 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o -crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S deleted file mode 100644 index 35e97569d05f..000000000000 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ /dev/null @@ -1,643 +0,0 @@ -######################################################################## -# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions -# -# Copyright (c) 2013, Intel Corporation -# -# Authors: -# Erdinc Ozturk -# Vinodh Gopal -# James Guilford -# Tim Chen -# -# This software is available to you under a choice of one of two -# licenses. You may choose to be licensed under the terms of the GNU -# General Public License (GPL) Version 2, available from the file -# COPYING in the main directory of this source tree, or the -# OpenIB.org BSD license below: -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# * Neither the name of the Intel Corporation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# -# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -######################################################################## -# Function API: -# UINT16 crc_t10dif_pcl( -# UINT16 init_crc, //initial CRC value, 16 bits -# const unsigned char *buf, //buffer pointer to calculate CRC on -# UINT64 len //buffer length in bytes (64-bit data) -# ); -# -# Reference paper titled "Fast CRC Computation for Generic -# Polynomials Using PCLMULQDQ Instruction" -# URL: http://www.intel.com/content/dam/www/public/us/en/documents -# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf -# -# - -#include - -.text - -#define arg1 %rdi -#define arg2 %rsi -#define arg3 %rdx - -#define arg1_low32 %edi - -ENTRY(crc_t10dif_pcl) -.align 16 - - # adjust the 16-bit initial_crc value, scale it to 32 bits - shl $16, arg1_low32 - - # Allocate Stack Space - mov %rsp, %rcx - sub $16*2, %rsp - # align stack to 16 byte boundary - and $~(0x10 - 1), %rsp - - # check if smaller than 256 - cmp $256, arg3 - - # for sizes less than 128, we can't fold 64B at a time... - jl _less_than_128 - - - # load the initial crc value - movd arg1_low32, %xmm10 # initial crc - - # crc value does not need to be byte-reflected, but it needs - # to be moved to the high part of the register. - # because data will be byte-reflected and will align with - # initial crc at correct place. - pslldq $12, %xmm10 - - movdqa SHUF_MASK(%rip), %xmm11 - # receive the initial 64B data, xor the initial crc value - movdqu 16*0(arg2), %xmm0 - movdqu 16*1(arg2), %xmm1 - movdqu 16*2(arg2), %xmm2 - movdqu 16*3(arg2), %xmm3 - movdqu 16*4(arg2), %xmm4 - movdqu 16*5(arg2), %xmm5 - movdqu 16*6(arg2), %xmm6 - movdqu 16*7(arg2), %xmm7 - - pshufb %xmm11, %xmm0 - # XOR the initial_crc value - pxor %xmm10, %xmm0 - pshufb %xmm11, %xmm1 - pshufb %xmm11, %xmm2 - pshufb %xmm11, %xmm3 - pshufb %xmm11, %xmm4 - pshufb %xmm11, %xmm5 - pshufb %xmm11, %xmm6 - pshufb %xmm11, %xmm7 - - movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4 - #imm value of pclmulqdq instruction - #will determine which constant to use - - ################################################################# - # we subtract 256 instead of 128 to save one instruction from the loop - sub $256, arg3 - - # at this section of the code, there is 64*x+y (0<=y<64) bytes of - # buffer. The _fold_64_B_loop will fold 64B at a time - # until we have 64+y Bytes of buffer - - - # fold 64B at a time. This section of the code folds 4 xmm - # registers in parallel -_fold_64_B_loop: - - # update the buffer pointer - add $128, arg2 # buf += 64# - - movdqu 16*0(arg2), %xmm9 - movdqu 16*1(arg2), %xmm12 - pshufb %xmm11, %xmm9 - pshufb %xmm11, %xmm12 - movdqa %xmm0, %xmm8 - movdqa %xmm1, %xmm13 - pclmulqdq $0x0 , %xmm10, %xmm0 - pclmulqdq $0x11, %xmm10, %xmm8 - pclmulqdq $0x0 , %xmm10, %xmm1 - pclmulqdq $0x11, %xmm10, %xmm13 - pxor %xmm9 , %xmm0 - xorps %xmm8 , %xmm0 - pxor %xmm12, %xmm1 - xorps %xmm13, %xmm1 - - movdqu 16*2(arg2), %xmm9 - movdqu 16*3(arg2), %xmm12 - pshufb %xmm11, %xmm9 - pshufb %xmm11, %xmm12 - movdqa %xmm2, %xmm8 - movdqa %xmm3, %xmm13 - pclmulqdq $0x0, %xmm10, %xmm2 - pclmulqdq $0x11, %xmm10, %xmm8 - pclmulqdq $0x0, %xmm10, %xmm3 - pclmulqdq $0x11, %xmm10, %xmm13 - pxor %xmm9 , %xmm2 - xorps %xmm8 , %xmm2 - pxor %xmm12, %xmm3 - xorps %xmm13, %xmm3 - - movdqu 16*4(arg2), %xmm9 - movdqu 16*5(arg2), %xmm12 - pshufb %xmm11, %xmm9 - pshufb %xmm11, %xmm12 - movdqa %xmm4, %xmm8 - movdqa %xmm5, %xmm13 - pclmulqdq $0x0, %xmm10, %xmm4 - pclmulqdq $0x11, %xmm10, %xmm8 - pclmulqdq $0x0, %xmm10, %xmm5 - pclmulqdq $0x11, %xmm10, %xmm13 - pxor %xmm9 , %xmm4 - xorps %xmm8 , %xmm4 - pxor %xmm12, %xmm5 - xorps %xmm13, %xmm5 - - movdqu 16*6(arg2), %xmm9 - movdqu 16*7(arg2), %xmm12 - pshufb %xmm11, %xmm9 - pshufb %xmm11, %xmm12 - movdqa %xmm6 , %xmm8 - movdqa %xmm7 , %xmm13 - pclmulqdq $0x0 , %xmm10, %xmm6 - pclmulqdq $0x11, %xmm10, %xmm8 - pclmulqdq $0x0 , %xmm10, %xmm7 - pclmulqdq $0x11, %xmm10, %xmm13 - pxor %xmm9 , %xmm6 - xorps %xmm8 , %xmm6 - pxor %xmm12, %xmm7 - xorps %xmm13, %xmm7 - - sub $128, arg3 - - # check if there is another 64B in the buffer to be able to fold - jge _fold_64_B_loop - ################################################################## - - - add $128, arg2 - # at this point, the buffer pointer is pointing at the last y Bytes - # of the buffer the 64B of folded data is in 4 of the xmm - # registers: xmm0, xmm1, xmm2, xmm3 - - - # fold the 8 xmm registers to 1 xmm register with different constants - - movdqa rk9(%rip), %xmm10 - movdqa %xmm0, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm0 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - xorps %xmm0, %xmm7 - - movdqa rk11(%rip), %xmm10 - movdqa %xmm1, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm1 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - xorps %xmm1, %xmm7 - - movdqa rk13(%rip), %xmm10 - movdqa %xmm2, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm2 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - pxor %xmm2, %xmm7 - - movdqa rk15(%rip), %xmm10 - movdqa %xmm3, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm3 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - xorps %xmm3, %xmm7 - - movdqa rk17(%rip), %xmm10 - movdqa %xmm4, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm4 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - pxor %xmm4, %xmm7 - - movdqa rk19(%rip), %xmm10 - movdqa %xmm5, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm5 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - xorps %xmm5, %xmm7 - - movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2 - #imm value of pclmulqdq instruction - #will determine which constant to use - movdqa %xmm6, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm6 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - pxor %xmm6, %xmm7 - - - # instead of 64, we add 48 to the loop counter to save 1 instruction - # from the loop instead of a cmp instruction, we use the negative - # flag with the jl instruction - add $128-16, arg3 - jl _final_reduction_for_128 - - # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 - # and the rest is in memory. We can fold 16 bytes at a time if y>=16 - # continue folding 16B at a time - -_16B_reduction_loop: - movdqa %xmm7, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm7 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - movdqu (arg2), %xmm0 - pshufb %xmm11, %xmm0 - pxor %xmm0 , %xmm7 - add $16, arg2 - sub $16, arg3 - # instead of a cmp instruction, we utilize the flags with the - # jge instruction equivalent of: cmp arg3, 16-16 - # check if there is any more 16B in the buffer to be able to fold - jge _16B_reduction_loop - - #now we have 16+z bytes left to reduce, where 0<= z < 16. - #first, we reduce the data in the xmm7 register - - -_final_reduction_for_128: - # check if any more data to fold. If not, compute the CRC of - # the final 128 bits - add $16, arg3 - je _128_done - - # here we are getting data that is less than 16 bytes. - # since we know that there was data before the pointer, we can - # offset the input pointer before the actual point, to receive - # exactly 16 bytes. after that the registers need to be adjusted. -_get_last_two_xmms: - movdqa %xmm7, %xmm2 - - movdqu -16(arg2, arg3), %xmm1 - pshufb %xmm11, %xmm1 - - # get rid of the extra data that was loaded before - # load the shift constant - lea pshufb_shf_table+16(%rip), %rax - sub arg3, %rax - movdqu (%rax), %xmm0 - - # shift xmm2 to the left by arg3 bytes - pshufb %xmm0, %xmm2 - - # shift xmm7 to the right by 16-arg3 bytes - pxor mask1(%rip), %xmm0 - pshufb %xmm0, %xmm7 - pblendvb %xmm2, %xmm1 #xmm0 is implicit - - # fold 16 Bytes - movdqa %xmm1, %xmm2 - movdqa %xmm7, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm7 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - pxor %xmm2, %xmm7 - -_128_done: - # compute crc of a 128-bit value - movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10 - movdqa %xmm7, %xmm0 - - #64b fold - pclmulqdq $0x1, %xmm10, %xmm7 - pslldq $8 , %xmm0 - pxor %xmm0, %xmm7 - - #32b fold - movdqa %xmm7, %xmm0 - - pand mask2(%rip), %xmm0 - - psrldq $12, %xmm7 - pclmulqdq $0x10, %xmm10, %xmm7 - pxor %xmm0, %xmm7 - - #barrett reduction -_barrett: - movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10 - movdqa %xmm7, %xmm0 - pclmulqdq $0x01, %xmm10, %xmm7 - pslldq $4, %xmm7 - pclmulqdq $0x11, %xmm10, %xmm7 - - pslldq $4, %xmm7 - pxor %xmm0, %xmm7 - pextrd $1, %xmm7, %eax - -_cleanup: - # scale the result back to 16 bits - shr $16, %eax - mov %rcx, %rsp - ret - -######################################################################## - -.align 16 -_less_than_128: - - # check if there is enough buffer to be able to fold 16B at a time - cmp $32, arg3 - jl _less_than_32 - movdqa SHUF_MASK(%rip), %xmm11 - - # now if there is, load the constants - movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 - - movd arg1_low32, %xmm0 # get the initial crc value - pslldq $12, %xmm0 # align it to its correct place - movdqu (arg2), %xmm7 # load the plaintext - pshufb %xmm11, %xmm7 # byte-reflect the plaintext - pxor %xmm0, %xmm7 - - - # update the buffer pointer - add $16, arg2 - - # update the counter. subtract 32 instead of 16 to save one - # instruction from the loop - sub $32, arg3 - - jmp _16B_reduction_loop - - -.align 16 -_less_than_32: - # mov initial crc to the return value. this is necessary for - # zero-length buffers. - mov arg1_low32, %eax - test arg3, arg3 - je _cleanup - - movdqa SHUF_MASK(%rip), %xmm11 - - movd arg1_low32, %xmm0 # get the initial crc value - pslldq $12, %xmm0 # align it to its correct place - - cmp $16, arg3 - je _exact_16_left - jl _less_than_16_left - - movdqu (arg2), %xmm7 # load the plaintext - pshufb %xmm11, %xmm7 # byte-reflect the plaintext - pxor %xmm0 , %xmm7 # xor the initial crc value - add $16, arg2 - sub $16, arg3 - movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 - jmp _get_last_two_xmms - - -.align 16 -_less_than_16_left: - # use stack space to load data less than 16 bytes, zero-out - # the 16B in memory first. - - pxor %xmm1, %xmm1 - mov %rsp, %r11 - movdqa %xmm1, (%r11) - - cmp $4, arg3 - jl _only_less_than_4 - - # backup the counter value - mov arg3, %r9 - cmp $8, arg3 - jl _less_than_8_left - - # load 8 Bytes - mov (arg2), %rax - mov %rax, (%r11) - add $8, %r11 - sub $8, arg3 - add $8, arg2 -_less_than_8_left: - - cmp $4, arg3 - jl _less_than_4_left - - # load 4 Bytes - mov (arg2), %eax - mov %eax, (%r11) - add $4, %r11 - sub $4, arg3 - add $4, arg2 -_less_than_4_left: - - cmp $2, arg3 - jl _less_than_2_left - - # load 2 Bytes - mov (arg2), %ax - mov %ax, (%r11) - add $2, %r11 - sub $2, arg3 - add $2, arg2 -_less_than_2_left: - cmp $1, arg3 - jl _zero_left - - # load 1 Byte - mov (arg2), %al - mov %al, (%r11) -_zero_left: - movdqa (%rsp), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - # shl r9, 4 - lea pshufb_shf_table+16(%rip), %rax - sub %r9, %rax - movdqu (%rax), %xmm0 - pxor mask1(%rip), %xmm0 - - pshufb %xmm0, %xmm7 - jmp _128_done - -.align 16 -_exact_16_left: - movdqu (arg2), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - jmp _128_done - -_only_less_than_4: - cmp $3, arg3 - jl _only_less_than_3 - - # load 3 Bytes - mov (arg2), %al - mov %al, (%r11) - - mov 1(arg2), %al - mov %al, 1(%r11) - - mov 2(arg2), %al - mov %al, 2(%r11) - - movdqa (%rsp), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - psrldq $5, %xmm7 - - jmp _barrett -_only_less_than_3: - cmp $2, arg3 - jl _only_less_than_2 - - # load 2 Bytes - mov (arg2), %al - mov %al, (%r11) - - mov 1(arg2), %al - mov %al, 1(%r11) - - movdqa (%rsp), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - psrldq $6, %xmm7 - - jmp _barrett -_only_less_than_2: - - # load 1 Byte - mov (arg2), %al - mov %al, (%r11) - - movdqa (%rsp), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - psrldq $7, %xmm7 - - jmp _barrett - -ENDPROC(crc_t10dif_pcl) - -.data - -# precomputed constants -# these constants are precomputed from the poly: -# 0x8bb70000 (0x8bb7 scaled to 32 bits) -.align 16 -# Q = 0x18BB70000 -# rk1 = 2^(32*3) mod Q << 32 -# rk2 = 2^(32*5) mod Q << 32 -# rk3 = 2^(32*15) mod Q << 32 -# rk4 = 2^(32*17) mod Q << 32 -# rk5 = 2^(32*3) mod Q << 32 -# rk6 = 2^(32*2) mod Q << 32 -# rk7 = floor(2^64/Q) -# rk8 = Q -rk1: -.quad 0x2d56000000000000 -rk2: -.quad 0x06df000000000000 -rk3: -.quad 0x9d9d000000000000 -rk4: -.quad 0x7cf5000000000000 -rk5: -.quad 0x2d56000000000000 -rk6: -.quad 0x1368000000000000 -rk7: -.quad 0x00000001f65a57f8 -rk8: -.quad 0x000000018bb70000 - -rk9: -.quad 0xceae000000000000 -rk10: -.quad 0xbfd6000000000000 -rk11: -.quad 0x1e16000000000000 -rk12: -.quad 0x713c000000000000 -rk13: -.quad 0xf7f9000000000000 -rk14: -.quad 0x80a6000000000000 -rk15: -.quad 0x044c000000000000 -rk16: -.quad 0xe658000000000000 -rk17: -.quad 0xad18000000000000 -rk18: -.quad 0xa497000000000000 -rk19: -.quad 0x6ee3000000000000 -rk20: -.quad 0xe7b5000000000000 - - - -mask1: -.octa 0x80808080808080808080808080808080 -mask2: -.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF - -SHUF_MASK: -.octa 0x000102030405060708090A0B0C0D0E0F - -pshufb_shf_table: -# use these values for shift constants for the pshufb instruction -# different alignments result in values as shown: -# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1 -# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2 -# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3 -# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4 -# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5 -# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6 -# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7 -# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8 -# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9 -# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10 -# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11 -# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12 -# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13 -# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14 -# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15 -.octa 0x8f8e8d8c8b8a89888786858483828100 -.octa 0x000e0d0c0b0a09080706050403020100 diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c deleted file mode 100644 index 7845d7fd54c0..000000000000 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Cryptographic API. - * - * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions - * - * Copyright (C) 2013 Intel Corporation - * Author: Tim Chen - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, - size_t len); - -struct chksum_desc_ctx { - __u16 crc; -}; - -/* - * Steps through buffer one byte at at time, calculates reflected - * crc using table. - */ - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = 0; - - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - if (irq_fpu_usable()) { - kernel_fpu_begin(); - ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); - kernel_fpu_end(); - } else - ctx->crc = crc_t10dif_generic(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__u16 *)out = ctx->crc; - return 0; -} - -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) -{ - if (irq_fpu_usable()) { - kernel_fpu_begin(); - *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); - kernel_fpu_end(); - } else - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); -} - -static struct shash_alg alg = { - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .init = chksum_init, - .update = chksum_update, - .final = chksum_final, - .finup = chksum_finup, - .digest = chksum_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base = { - .cra_name = "crct10dif", - .cra_driver_name = "crct10dif-pclmul", - .cra_priority = 200, - .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static const struct x86_cpu_id crct10dif_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id); - -static int __init crct10dif_intel_mod_init(void) -{ - if (!x86_match_cpu(crct10dif_cpu_id)) - return -ENODEV; - - return crypto_register_shash(&alg); -} - -static void __exit crct10dif_intel_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crct10dif_intel_mod_init); -module_exit(crct10dif_intel_mod_fini); - -MODULE_AUTHOR("Tim Chen "); -MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS("crct10dif"); -MODULE_ALIAS("crct10dif-pclmul"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 904ffe838567..2754f2bf5d91 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -376,25 +376,6 @@ config CRYPTO_CRC32_PCLMUL which will enable any routine to use the CRC-32-IEEE 802.3 checksum and gain better performance as compared with the table implementation. -config CRYPTO_CRCT10DIF - tristate "CRCT10DIF algorithm" - select CRYPTO_HASH - help - CRC T10 Data Integrity Field computation is being cast as - a crypto transform. This allows for faster crc t10 diff - transforms to be used if they are available. - -config CRYPTO_CRCT10DIF_PCLMUL - tristate "CRCT10DIF PCLMULQDQ hardware acceleration" - depends on X86 && 64BIT && CRC_T10DIF - select CRYPTO_HASH - help - For x86_64 processors with SSE4.2 and PCLMULQDQ supported, - CRC T10 DIF PCLMULQDQ computation can be hardware - accelerated PCLMULQDQ instruction. This option will create - 'crct10dif-plcmul' module, which is faster when computing the - crct10dif checksum as compared with the generic table implementation. - config CRYPTO_GHASH tristate "GHASH digest algorithm" select CRYPTO_GF128MUL diff --git a/crypto/Makefile b/crypto/Makefile index 62af87df8729..a8e9b0fefbe9 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -83,7 +83,6 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o obj-$(CONFIG_CRYPTO_CRC32) += crc32.o -obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_842) += 842.o diff --git a/crypto/crct10dif.c b/crypto/crct10dif.c deleted file mode 100644 index 92aca96d6b98..000000000000 --- a/crypto/crct10dif.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Cryptographic API. - * - * T10 Data Integrity Field CRC16 Crypto Transform - * - * Copyright (c) 2007 Oracle Corporation. All rights reserved. - * Written by Martin K. Petersen - * Copyright (C) 2013 Intel Corporation - * Author: Tim Chen - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include -#include -#include -#include -#include -#include -#include - -struct chksum_desc_ctx { - __u16 crc; -}; - -/* Table generated using the following polynomium: - * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 - * gt: 0x8bb7 - */ -static const __u16 t10_dif_crc_table[256] = { - 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, - 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, - 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, - 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, - 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, - 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, - 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, - 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, - 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, - 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, - 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, - 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, - 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, - 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, - 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, - 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, - 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, - 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, - 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, - 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, - 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, - 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, - 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, - 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, - 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, - 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, - 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, - 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, - 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, - 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, - 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, - 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 -}; - -__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len) -{ - unsigned int i; - - for (i = 0 ; i < len ; i++) - crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; - - return crc; -} -EXPORT_SYMBOL(crc_t10dif_generic); - -/* - * Steps through buffer one byte at at time, calculates reflected - * crc using table. - */ - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = 0; - - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = crc_t10dif_generic(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__u16 *)out = ctx->crc; - return 0; -} - -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) -{ - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); -} - -static struct shash_alg alg = { - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .init = chksum_init, - .update = chksum_update, - .final = chksum_final, - .finup = chksum_finup, - .digest = chksum_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base = { - .cra_name = "crct10dif", - .cra_driver_name = "crct10dif-generic", - .cra_priority = 100, - .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init crct10dif_mod_init(void) -{ - int ret; - - ret = crypto_register_shash(&alg); - return ret; -} - -static void __exit crct10dif_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crct10dif_mod_init); -module_exit(crct10dif_mod_fini); - -MODULE_AUTHOR("Tim Chen "); -MODULE_DESCRIPTION("T10 DIF CRC calculation."); -MODULE_LICENSE("GPL"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 25a5934f0e50..66d254ce0d11 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1174,10 +1174,6 @@ static int do_test(int m) ret += tcrypt_test("ghash"); break; - case 47: - ret += tcrypt_test("crct10dif"); - break; - case 100: ret += tcrypt_test("hmac(md5)"); break; @@ -1502,10 +1498,6 @@ static int do_test(int m) test_hash_speed("crc32c", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; - case 320: - test_hash_speed("crct10dif", sec, generic_hash_speed_template); - if (mode > 300 && mode < 400) break; - case 399: break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 2f00607039e2..ecddf921a9db 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2045,16 +2045,6 @@ static const struct alg_test_desc alg_test_descs[] = { .count = CRC32C_TEST_VECTORS } } - }, { - .alg = "crct10dif", - .test = alg_test_hash, - .fips_allowed = 1, - .suite = { - .hash = { - .vecs = crct10dif_tv_template, - .count = CRCT10DIF_TEST_VECTORS - } - } }, { .alg = "cryptd(__driver-cbc-aes-aesni)", .test = alg_test_null, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 7d44aa3d6b44..1e701bc075b9 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -450,39 +450,6 @@ static struct hash_testvec rmd320_tv_template[] = { } }; -#define CRCT10DIF_TEST_VECTORS 3 -static struct hash_testvec crct10dif_tv_template[] = { - { - .plaintext = "abc", - .psize = 3, -#ifdef __LITTLE_ENDIAN - .digest = "\x3b\x44", -#else - .digest = "\x44\x3b", -#endif - }, { - .plaintext = "1234567890123456789012345678901234567890" - "123456789012345678901234567890123456789", - .psize = 79, -#ifdef __LITTLE_ENDIAN - .digest = "\x70\x4b", -#else - .digest = "\x4b\x70", -#endif - }, { - .plaintext = - "abcddddddddddddddddddddddddddddddddddddddddddddddddddddd", - .psize = 56, -#ifdef __LITTLE_ENDIAN - .digest = "\xe3\x9c", -#else - .digest = "\x9c\xe3", -#endif - .np = 2, - .tap = { 28, 28 } - } -}; - /* * SHA1 test vectors from from FIPS PUB 180-1 * Long vector from CAVS 5.0 diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index b3cb71f0d3b0..a9c96d865ee7 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -3,10 +3,6 @@ #include -#define CRC_T10DIF_DIGEST_SIZE 2 -#define CRC_T10DIF_BLOCK_SIZE 1 - -__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len); __u16 crc_t10dif(unsigned char const *, size_t); #endif diff --git a/lib/Kconfig b/lib/Kconfig index 339d5a4292f8..fe01d418b09a 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -63,8 +63,6 @@ config CRC16 config CRC_T10DIF tristate "CRC calculation for the T10 Data Integrity Field" - select CRYPTO - select CRYPTO_CRCT10DIF help This option is only needed if a module that's not in the kernel tree needs to calculate CRC checks for use with the diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c index fe3428c07b47..fbbd66ed86cd 100644 --- a/lib/crc-t10dif.c +++ b/lib/crc-t10dif.c @@ -11,44 +11,57 @@ #include #include #include -#include -#include -#include -static struct crypto_shash *crct10dif_tfm; +/* Table generated using the following polynomium: + * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 + * gt: 0x8bb7 + */ +static const __u16 t10_dif_crc_table[256] = { + 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, + 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, + 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, + 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, + 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, + 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, + 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, + 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, + 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, + 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, + 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, + 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, + 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, + 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, + 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, + 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, + 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, + 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, + 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, + 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, + 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, + 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, + 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, + 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, + 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, + 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, + 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, + 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, + 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, + 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, + 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, + 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 +}; __u16 crc_t10dif(const unsigned char *buffer, size_t len) { - struct { - struct shash_desc shash; - char ctx[2]; - } desc; - int err; - - desc.shash.tfm = crct10dif_tfm; - desc.shash.flags = 0; - *(__u16 *)desc.ctx = 0; + __u16 crc = 0; + unsigned int i; - err = crypto_shash_update(&desc.shash, buffer, len); - BUG_ON(err); + for (i = 0 ; i < len ; i++) + crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; - return *(__u16 *)desc.ctx; + return crc; } EXPORT_SYMBOL(crc_t10dif); -static int __init crc_t10dif_mod_init(void) -{ - crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0); - return PTR_RET(crct10dif_tfm); -} - -static void __exit crc_t10dif_mod_fini(void) -{ - crypto_free_shash(crct10dif_tfm); -} - -module_init(crc_t10dif_mod_init); -module_exit(crc_t10dif_mod_fini); - MODULE_DESCRIPTION("T10 DIF CRC calculation"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 6a6c21ef487be47b300a0b24cd6afeb69d8b9a1a Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Wed, 24 Jul 2013 14:15:28 +0800 Subject: ARM: imx6q: update the sata bits definitions of gpr13 Replace the SATA_PHY_# by the more readable definitons. tj: Being routed through libata branch to enable implementation of ahci_imx. Signed-off-by: Richard Zhu Acked-by: Shawn Guo Signed-off-by: Tejun Heo --- include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 121 +++++++++++++++++++--------- 1 file changed, 84 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h index dab34a1deb2c..e235251f1ba9 100644 --- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h @@ -279,41 +279,88 @@ #define IMX6Q_GPR13_CAN2_STOP_REQ BIT(29) #define IMX6Q_GPR13_CAN1_STOP_REQ BIT(28) #define IMX6Q_GPR13_ENET_STOP_REQ BIT(27) -#define IMX6Q_GPR13_SATA_PHY_8_MASK (0x7 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_0_5_DB (0x0 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_1_0_DB (0x1 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_1_5_DB (0x2 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_2_0_DB (0x3 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_2_5_DB (0x4 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_3_0_DB (0x5 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_3_5_DB (0x6 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_4_0_DB (0x7 << 24) -#define IMX6Q_GPR13_SATA_PHY_7_MASK (0x1f << 19) -#define IMX6Q_GPR13_SATA_PHY_7_SATA1I (0x10 << 19) -#define IMX6Q_GPR13_SATA_PHY_7_SATA1M (0x10 << 19) -#define IMX6Q_GPR13_SATA_PHY_7_SATA1X (0x1a << 19) -#define IMX6Q_GPR13_SATA_PHY_7_SATA2I (0x12 << 19) -#define IMX6Q_GPR13_SATA_PHY_7_SATA2M (0x12 << 19) -#define IMX6Q_GPR13_SATA_PHY_7_SATA2X (0x1a << 19) -#define IMX6Q_GPR13_SATA_PHY_6_MASK (0x7 << 16) -#define IMX6Q_GPR13_SATA_SPEED_MASK BIT(15) -#define IMX6Q_GPR13_SATA_SPEED_1P5G 0x0 -#define IMX6Q_GPR13_SATA_SPEED_3P0G BIT(15) -#define IMX6Q_GPR13_SATA_PHY_5 BIT(14) -#define IMX6Q_GPR13_SATA_PHY_4_MASK (0x7 << 11) -#define IMX6Q_GPR13_SATA_PHY_4_16_16 (0x0 << 11) -#define IMX6Q_GPR13_SATA_PHY_4_14_16 (0x1 << 11) -#define IMX6Q_GPR13_SATA_PHY_4_12_16 (0x2 << 11) -#define IMX6Q_GPR13_SATA_PHY_4_10_16 (0x3 << 11) -#define IMX6Q_GPR13_SATA_PHY_4_9_16 (0x4 << 11) -#define IMX6Q_GPR13_SATA_PHY_4_8_16 (0x5 << 11) -#define IMX6Q_GPR13_SATA_PHY_3_MASK (0xf << 7) -#define IMX6Q_GPR13_SATA_PHY_3_OFF 0x7 -#define IMX6Q_GPR13_SATA_PHY_2_MASK (0x1f << 2) -#define IMX6Q_GPR13_SATA_PHY_2_OFF 0x2 -#define IMX6Q_GPR13_SATA_PHY_1_MASK (0x3 << 0) -#define IMX6Q_GPR13_SATA_PHY_1_FAST (0x0 << 0) -#define IMX6Q_GPR13_SATA_PHY_1_MED (0x1 << 0) -#define IMX6Q_GPR13_SATA_PHY_1_SLOW (0x2 << 0) - +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK (0x7 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_0_5_DB (0x0 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_1_0_DB (0x1 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_1_5_DB (0x2 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_2_0_DB (0x3 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_2_5_DB (0x4 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB (0x5 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_3_5_DB (0x6 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_4_0_DB (0x7 << 24) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK (0x1f << 19) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1I (0x10 << 19) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1M (0x10 << 19) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1X (0x1a << 19) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2I (0x12 << 19) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M (0x12 << 19) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2X (0x1a << 19) +#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK (0x7 << 16) +#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_1P_1F (0x0 << 16) +#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_2F (0x1 << 16) +#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_1P_4F (0x2 << 16) +#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F (0x3 << 16) +#define IMX6Q_GPR13_SATA_SPD_MODE_MASK BIT(15) +#define IMX6Q_GPR13_SATA_SPD_MODE_1P5G 0x0 +#define IMX6Q_GPR13_SATA_SPD_MODE_3P0G BIT(15) +#define IMX6Q_GPR13_SATA_MPLL_SS_EN BIT(14) +#define IMX6Q_GPR13_SATA_TX_ATTEN_MASK (0x7 << 11) +#define IMX6Q_GPR13_SATA_TX_ATTEN_16_16 (0x0 << 11) +#define IMX6Q_GPR13_SATA_TX_ATTEN_14_16 (0x1 << 11) +#define IMX6Q_GPR13_SATA_TX_ATTEN_12_16 (0x2 << 11) +#define IMX6Q_GPR13_SATA_TX_ATTEN_10_16 (0x3 << 11) +#define IMX6Q_GPR13_SATA_TX_ATTEN_9_16 (0x4 << 11) +#define IMX6Q_GPR13_SATA_TX_ATTEN_8_16 (0x5 << 11) +#define IMX6Q_GPR13_SATA_TX_BOOST_MASK (0xf << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_0_00_DB (0x0 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_0_37_DB (0x1 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_0_74_DB (0x2 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_1_11_DB (0x3 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_1_48_DB (0x4 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_1_85_DB (0x5 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_2_22_DB (0x6 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_2_59_DB (0x7 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_2_96_DB (0x8 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB (0x9 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_3_70_DB (0xa << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_4_07_DB (0xb << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_4_44_DB (0xc << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_4_81_DB (0xd << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_5_28_DB (0xe << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_5_75_DB (0xf << 7) +#define IMX6Q_GPR13_SATA_TX_LVL_MASK (0x1f << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_937_V (0x00 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_947_V (0x01 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_957_V (0x02 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_966_V (0x03 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_976_V (0x04 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_986_V (0x05 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_996_V (0x06 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_005_V (0x07 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_015_V (0x08 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_025_V (0x09 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_035_V (0x0a << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_045_V (0x0b << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_054_V (0x0c << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_064_V (0x0d << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_074_V (0x0e << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_084_V (0x0f << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_094_V (0x10 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_104_V (0x11 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_113_V (0x12 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_123_V (0x13 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_133_V (0x14 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_143_V (0x15 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_152_V (0x16 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_162_V (0x17 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_172_V (0x18 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_182_V (0x19 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_191_V (0x1a << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_201_V (0x1b << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_211_V (0x1c << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_221_V (0x1d << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_230_V (0x1e << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_240_V (0x1f << 2) +#define IMX6Q_GPR13_SATA_MPLL_CLK_EN BIT(1) +#define IMX6Q_GPR13_SATA_TX_EDGE_RATE BIT(0) #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */ -- cgit v1.2.3 From 8e5c2b776ae4c35f54547c017e0a943429f5748a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Jul 2013 21:43:39 +0200 Subject: Revert "ACPI / video / i915: No ACPI backlight if firmware expects Windows 8" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We attempted to address a regression introduced by commit a57f7f9 (ACPICA: Add Windows8/Server2012 string for _OSI method.) after which ACPI video backlight support doesn't work on a number of systems, because the relevant AML methods in the ACPI tables in their BIOSes become useless after the BIOS has been told that the OS is compatible with Windows 8. That problem is tracked by the bug entry at: https://bugzilla.kernel.org/show_bug.cgi?id=51231 Commit 8c5bd7a (ACPI / video / i915: No ACPI backlight if firmware expects Windows 8) introduced for this purpose essentially prevented the ACPI backlight support from being used if the BIOS had been told that the OS was compatible with Windows 8 and the i915 driver was loaded, in which case the backlight would always be handled by i915. Unfortunately, however, that turned out to cause problems with backlight to appear on multiple systems with symptoms indicating that i915 was unable to control the backlight on those systems as expected. For this reason, revert commit 8c5bd7a, but leave the function acpi_video_backlight_quirks() introduced by it, because another commit on top of it uses that function. References: https://lkml.org/lkml/2013/7/21/119 References: https://lkml.org/lkml/2013/7/22/261 References: https://lkml.org/lkml/2013/7/23/429 References: https://lkml.org/lkml/2013/7/23/459 References: https://lkml.org/lkml/2013/7/23/81 References: https://lkml.org/lkml/2013/7/24/27 Reported-and-tested-by: James Hogan Reported-and-tested-by: Kamal Mostafa Reported-and-tested-by: Jörg Otte Reported-and-tested-by: Steven Newbury Reported-by: Martin Steigerwald Reported-by: Kalle Valo Tested-by: Joerg Platte Signed-off-by: Rafael J. Wysocki --- drivers/acpi/internal.h | 2 -- drivers/acpi/video.c | 67 +++++------------------------------------ drivers/acpi/video_detect.c | 15 +-------- drivers/gpu/drm/i915/i915_dma.c | 2 +- include/acpi/video.h | 11 +------ include/linux/acpi.h | 1 - 6 files changed, 11 insertions(+), 87 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 227aca77ee1e..5da44e81dd4d 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -169,10 +169,8 @@ int acpi_create_platform_device(struct acpi_device *adev, -------------------------------------------------------------------------- */ #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) bool acpi_video_backlight_quirks(void); -bool acpi_video_verify_backlight_support(void); #else static inline bool acpi_video_backlight_quirks(void) { return false; } -static inline bool acpi_video_verify_backlight_support(void) { return false; } #endif #endif /* _ACPI_INTERNAL_H_ */ diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 6dd237e79b4f..0ec434d2586d 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -911,7 +911,7 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device) if (acpi_video_init_brightness(device)) return; - if (acpi_video_verify_backlight_support()) { + if (acpi_video_backlight_support()) { struct backlight_properties props; struct pci_dev *pdev; acpi_handle acpi_parent; @@ -1366,8 +1366,8 @@ acpi_video_switch_brightness(struct acpi_video_device *device, int event) unsigned long long level_current, level_next; int result = -EINVAL; - /* no warning message if acpi_backlight=vendor or a quirk is used */ - if (!acpi_video_verify_backlight_support()) + /* no warning message if acpi_backlight=vendor is used */ + if (!acpi_video_backlight_support()) return 0; if (!device->brightness) @@ -1875,46 +1875,6 @@ static int acpi_video_bus_remove(struct acpi_device *device) return 0; } -static acpi_status video_unregister_backlight(acpi_handle handle, u32 lvl, - void *context, void **rv) -{ - struct acpi_device *acpi_dev; - struct acpi_video_bus *video; - struct acpi_video_device *dev, *next; - - if (acpi_bus_get_device(handle, &acpi_dev)) - return AE_OK; - - if (acpi_match_device_ids(acpi_dev, video_device_ids)) - return AE_OK; - - video = acpi_driver_data(acpi_dev); - if (!video) - return AE_OK; - - acpi_video_bus_stop_devices(video); - mutex_lock(&video->device_list_lock); - list_for_each_entry_safe(dev, next, &video->video_device_list, entry) { - if (dev->backlight) { - backlight_device_unregister(dev->backlight); - dev->backlight = NULL; - kfree(dev->brightness->levels); - kfree(dev->brightness); - } - if (dev->cooling_dev) { - sysfs_remove_link(&dev->dev->dev.kobj, - "thermal_cooling"); - sysfs_remove_link(&dev->cooling_dev->device.kobj, - "device"); - thermal_cooling_device_unregister(dev->cooling_dev); - dev->cooling_dev = NULL; - } - } - mutex_unlock(&video->device_list_lock); - acpi_video_bus_start_devices(video); - return AE_OK; -} - static int __init is_i740(struct pci_dev *dev) { if (dev->device == 0x00D1) @@ -1946,25 +1906,14 @@ static int __init intel_opregion_present(void) return opregion; } -int __acpi_video_register(bool backlight_quirks) +int acpi_video_register(void) { - bool no_backlight; - int result; - - no_backlight = backlight_quirks ? acpi_video_backlight_quirks() : false; - + int result = 0; if (register_count) { /* - * If acpi_video_register() has been called already, don't try - * to register acpi_video_bus, but unregister backlight devices - * if no backlight support is requested. + * if the function of acpi_video_register is already called, + * don't register the acpi_vide_bus again and return no error. */ - if (no_backlight) - acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, - video_unregister_backlight, - NULL, NULL, NULL); - return 0; } @@ -1980,7 +1929,7 @@ int __acpi_video_register(bool backlight_quirks) return 0; } -EXPORT_SYMBOL(__acpi_video_register); +EXPORT_SYMBOL(acpi_video_register); void acpi_video_unregister(void) { diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 826e52def080..c3397748ba46 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -238,12 +238,7 @@ static void acpi_video_caps_check(void) bool acpi_video_backlight_quirks(void) { - if (acpi_gbl_osi_data >= ACPI_OSI_WIN_8) { - acpi_video_caps_check(); - acpi_video_support |= ACPI_VIDEO_SKIP_BACKLIGHT; - return true; - } - return false; + return acpi_gbl_osi_data >= ACPI_OSI_WIN_8; } EXPORT_SYMBOL(acpi_video_backlight_quirks); @@ -291,14 +286,6 @@ int acpi_video_backlight_support(void) } EXPORT_SYMBOL(acpi_video_backlight_support); -/* For the ACPI video driver use only. */ -bool acpi_video_verify_backlight_support(void) -{ - return (acpi_video_support & ACPI_VIDEO_SKIP_BACKLIGHT) ? - false : acpi_video_backlight_support(); -} -EXPORT_SYMBOL(acpi_video_verify_backlight_support); - /* * Use acpi_backlight=vendor/video to force that backlight switching * is processed by vendor specific acpi drivers or video.ko driver. diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index cf188ab7051a..adb319b53ecd 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1648,7 +1648,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (INTEL_INFO(dev)->num_pipes) { /* Must be done after probing outputs */ intel_opregion_init(dev); - acpi_video_register_with_quirks(); + acpi_video_register(); } if (IS_GEN5(dev)) diff --git a/include/acpi/video.h b/include/acpi/video.h index b26dc4fb7ba8..61109f2609fc 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -17,21 +17,12 @@ struct acpi_device; #define ACPI_VIDEO_DISPLAY_LEGACY_TV 0x0200 #if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE) -extern int __acpi_video_register(bool backlight_quirks); -static inline int acpi_video_register(void) -{ - return __acpi_video_register(false); -} -static inline int acpi_video_register_with_quirks(void) -{ - return __acpi_video_register(true); -} +extern int acpi_video_register(void); extern void acpi_video_unregister(void); extern int acpi_video_get_edid(struct acpi_device *device, int type, int device_id, void **edid); #else static inline int acpi_video_register(void) { return 0; } -static inline int acpi_video_register_with_quirks(void) { return 0; } static inline void acpi_video_unregister(void) { return; } static inline int acpi_video_get_edid(struct acpi_device *device, int type, int device_id, void **edid) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6ad72f92469c..353ba256f368 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -191,7 +191,6 @@ extern bool wmi_has_guid(const char *guid); #define ACPI_VIDEO_BACKLIGHT_DMI_VIDEO 0x0200 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR 0x0400 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO 0x0800 -#define ACPI_VIDEO_SKIP_BACKLIGHT 0x1000 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) -- cgit v1.2.3 From 0699a73af3811b66b1ab5650575acee5eea841ab Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 22 Jul 2013 21:32:09 +0200 Subject: firewire: fix libdc1394/FlyCap2 iso event regression Commit 18d627113b83 (firewire: prevent dropping of completed iso packet header data) was intended to be an obvious bug fix, but libdc1394 and FlyCap2 depend on the old behaviour by ignoring all returned information and thus not noticing that not all packets have been received yet. The result was that the video frame buffers would be saved before they contained the correct data. Reintroduce the old behaviour for old clients. Tested-by: Stepan Salenikovich Tested-by: Josep Bosch Cc: # 3.4+ Signed-off-by: Clemens Ladisch Signed-off-by: Stefan Richter --- drivers/firewire/core-cdev.c | 3 +++ drivers/firewire/ohci.c | 10 ++++++++-- include/linux/firewire.h | 1 + include/uapi/linux/firewire-cdev.h | 4 ++-- 4 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 7ef316fdc4d9..ac1b43a04285 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -54,6 +54,7 @@ #define FW_CDEV_KERNEL_VERSION 5 #define FW_CDEV_VERSION_EVENT_REQUEST2 4 #define FW_CDEV_VERSION_ALLOCATE_REGION_END 4 +#define FW_CDEV_VERSION_AUTO_FLUSH_ISO_OVERFLOW 5 struct client { u32 version; @@ -1005,6 +1006,8 @@ static int ioctl_create_iso_context(struct client *client, union ioctl_arg *arg) a->channel, a->speed, a->header_size, cb, client); if (IS_ERR(context)) return PTR_ERR(context); + if (client->version < FW_CDEV_VERSION_AUTO_FLUSH_ISO_OVERFLOW) + context->drop_overflow_headers = true; /* We only support one context at this time. */ spin_lock_irq(&client->lock); diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 9e1db6490b9a..afb701ec90ca 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2749,8 +2749,11 @@ static void copy_iso_headers(struct iso_context *ctx, const u32 *dma_hdr) { u32 *ctx_hdr; - if (ctx->header_length + ctx->base.header_size > PAGE_SIZE) + if (ctx->header_length + ctx->base.header_size > PAGE_SIZE) { + if (ctx->base.drop_overflow_headers) + return; flush_iso_completions(ctx); + } ctx_hdr = ctx->header + ctx->header_length; ctx->last_timestamp = (u16)le32_to_cpu((__force __le32)dma_hdr[0]); @@ -2910,8 +2913,11 @@ static int handle_it_packet(struct context *context, sync_it_packet_for_cpu(context, d); - if (ctx->header_length + 4 > PAGE_SIZE) + if (ctx->header_length + 4 > PAGE_SIZE) { + if (ctx->base.drop_overflow_headers) + return 1; flush_iso_completions(ctx); + } ctx_hdr = ctx->header + ctx->header_length; ctx->last_timestamp = le16_to_cpu(last->res_count); diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 3b0e820375ab..5d7782e42b8f 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -436,6 +436,7 @@ struct fw_iso_context { int type; int channel; int speed; + bool drop_overflow_headers; size_t header_size; union { fw_iso_callback_t sc; diff --git a/include/uapi/linux/firewire-cdev.h b/include/uapi/linux/firewire-cdev.h index d50036953497..1db453e4b550 100644 --- a/include/uapi/linux/firewire-cdev.h +++ b/include/uapi/linux/firewire-cdev.h @@ -215,8 +215,8 @@ struct fw_cdev_event_request2 { * with the %FW_CDEV_ISO_INTERRUPT bit set, when explicitly requested with * %FW_CDEV_IOC_FLUSH_ISO, or when there have been so many completed packets * without the interrupt bit set that the kernel's internal buffer for @header - * is about to overflow. (In the last case, kernels with ABI version < 5 drop - * header data up to the next interrupt packet.) + * is about to overflow. (In the last case, ABI versions < 5 drop header data + * up to the next interrupt packet.) * * Isochronous transmit events (context type %FW_CDEV_ISO_CONTEXT_TRANSMIT): * -- cgit v1.2.3 From 148519120c6d1f19ad53349683aeae9f228b0b8d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 27 Jul 2013 01:41:34 +0200 Subject: Revert "cpuidle: Quickly notice prediction failure for repeat mode" Revert commit 69a37bea (cpuidle: Quickly notice prediction failure for repeat mode), because it has been identified as the source of a significant performance regression in v3.8 and later as explained by Jeremy Eder: We believe we've identified a particular commit to the cpuidle code that seems to be impacting performance of variety of workloads. The simplest way to reproduce is using netperf TCP_RR test, so we're using that, on a pair of Sandy Bridge based servers. We also have data from a large database setup where performance is also measurably/positively impacted, though that test data isn't easily share-able. Included below are test results from 3 test kernels: kernel reverts ----------------------------------------------------------- 1) vanilla upstream (no reverts) 2) perfteam2 reverts e11538d1f03914eb92af5a1a378375c05ae8520c 3) test reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 e11538d1f03914eb92af5a1a378375c05ae8520c In summary, netperf TCP_RR numbers improve by approximately 4% after reverting 69a37beabf1f0a6705c08e879bdd5d82ff6486c4. When 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 is included, C0 residency never seems to get above 40%. Taking that patch out gets C0 near 100% quite often, and performance increases. The below data are histograms representing the %c0 residency @ 1-second sample rates (using turbostat), while under netperf test. - If you look at the first 4 histograms, you can see %c0 residency almost entirely in the 30,40% bin. - The last pair, which reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4, shows %c0 in the 80,90,100% bins. Below each kernel name are netperf TCP_RR trans/s numbers for the particular kernel that can be disclosed publicly, comparing the 3 test kernels. We ran a 4th test with the vanilla kernel where we've also set /dev/cpu_dma_latency=0 to show overall impact boosting single-threaded TCP_RR performance over 11% above baseline. 3.10-rc2 vanilla RX + c0 lock (/dev/cpu_dma_latency=0): TCP_RR trans/s 54323.78 ----------------------------------------------------------- 3.10-rc2 vanilla RX (no reverts) TCP_RR trans/s 48192.47 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 59]: *********************************************************** 40.0000 - 50.0000 [ 1]: * 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: Sender %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 11]: *********** 40.0000 - 50.0000 [ 49]: ************************************************* 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: ----------------------------------------------------------- 3.10-rc2 perfteam2 RX (reverts commit e11538d1f03914eb92af5a1a378375c05ae8520c) TCP_RR trans/s 49698.69 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 1]: * 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 59]: *********************************************************** 40.0000 - 50.0000 [ 0]: 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: Sender %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 2]: ** 40.0000 - 50.0000 [ 58]: ********************************************************** 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: ----------------------------------------------------------- 3.10-rc2 test RX (reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 and e11538d1f03914eb92af5a1a378375c05ae8520c) TCP_RR trans/s 47766.95 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 1]: * 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 27]: *************************** 40.0000 - 50.0000 [ 2]: ** 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 2]: ** 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 28]: **************************** Sender: 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 11]: *********** 40.0000 - 50.0000 [ 0]: 50.0000 - 60.0000 [ 1]: * 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 3]: *** 80.0000 - 90.0000 [ 7]: ******* 90.0000 - 100.0000 [ 38]: ************************************** These results demonstrate gaining back the tendency of the CPU to stay in more responsive, performant C-states (and thus yield measurably better performance), by reverting commit 69a37beabf1f0a6705c08e879bdd5d82ff6486c4. Requested-by: Jeremy Eder Tested-by: Len Brown Cc: 3.8+ Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 73 +++------------------------------------- include/linux/tick.h | 6 ---- kernel/time/tick-sched.c | 9 ++--- 3 files changed, 6 insertions(+), 82 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index b69a87e22155..bc580b67a652 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -28,13 +28,6 @@ #define MAX_INTERESTING 50000 #define STDDEV_THRESH 400 -/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */ -#define MAX_DEVIATION 60 - -static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer); -static DEFINE_PER_CPU(int, hrtimer_status); -/* menu hrtimer mode */ -enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT}; /* * Concepts and ideas behind the menu governor @@ -198,42 +191,17 @@ static u64 div_round64(u64 dividend, u32 divisor) return div_u64(dividend + (divisor / 2), divisor); } -/* Cancel the hrtimer if it is not triggered yet */ -void menu_hrtimer_cancel(void) -{ - int cpu = smp_processor_id(); - struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); - - /* The timer is still not time out*/ - if (per_cpu(hrtimer_status, cpu)) { - hrtimer_cancel(hrtmr); - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; - } -} -EXPORT_SYMBOL_GPL(menu_hrtimer_cancel); - -/* Call back for hrtimer is triggered */ -static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) -{ - int cpu = smp_processor_id(); - - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; - - return HRTIMER_NORESTART; -} - /* * Try detecting repeating patterns by keeping track of the last 8 * intervals, and checking if the standard deviation of that set * of points is below a threshold. If it is... then use the * average of these 8 points as the estimated value. */ -static u32 get_typical_interval(struct menu_device *data) +static void get_typical_interval(struct menu_device *data) { int i = 0, divisor = 0; uint64_t max = 0, avg = 0, stddev = 0; int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */ - unsigned int ret = 0; again: @@ -274,16 +242,13 @@ again: if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) || stddev <= 20) { data->predicted_us = avg; - ret = 1; - return ret; + return; } else if ((divisor * 4) > INTERVALS * 3) { /* Exclude the max interval */ thresh = max - 1; goto again; } - - return ret; } /** @@ -298,9 +263,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) int i; int multiplier; struct timespec t; - int repeat = 0, low_predicted = 0; - int cpu = smp_processor_id(); - struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); if (data->needs_update) { menu_update(drv, dev); @@ -335,7 +297,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], RESOLUTION * DECAY); - repeat = get_typical_interval(data); + get_typical_interval(data); /* * We want to default to C1 (hlt), not to busy polling @@ -356,10 +318,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (s->disabled || su->disable) continue; - if (s->target_residency > data->predicted_us) { - low_predicted = 1; + if (s->target_residency > data->predicted_us) continue; - } if (s->exit_latency > latency_req) continue; if (s->exit_latency * multiplier > data->predicted_us) @@ -369,28 +329,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->exit_us = s->exit_latency; } - /* not deepest C-state chosen for low predicted residency */ - if (low_predicted) { - unsigned int timer_us = 0; - - /* - * Set a timer to detect whether this sleep is much - * longer than repeat mode predicted. If the timer - * triggers, the code will evaluate whether to put - * the CPU into a deeper C-state. - * The timer is cancelled on CPU wakeup. - */ - timer_us = 2 * (data->predicted_us + MAX_DEVIATION); - - if (repeat && (4 * timer_us < data->expected_us)) { - RCU_NONIDLE(hrtimer_start(hrtmr, - ns_to_ktime(1000 * timer_us), - HRTIMER_MODE_REL_PINNED)); - /* In repeat case, menu hrtimer is started */ - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; - } - } - return data->last_state_idx; } @@ -481,9 +419,6 @@ static int menu_enable_device(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = &per_cpu(menu_devices, dev->cpu); - struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu); - hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - t->function = menu_hrtimer_notify; memset(data, 0, sizeof(struct menu_device)); diff --git a/include/linux/tick.h b/include/linux/tick.h index 9180f4b85e6d..62bd8b72873c 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -174,10 +174,4 @@ static inline void tick_nohz_task_switch(struct task_struct *tsk) { } #endif -# ifdef CONFIG_CPU_IDLE_GOV_MENU -extern void menu_hrtimer_cancel(void); -# else -static inline void menu_hrtimer_cancel(void) {} -# endif /* CONFIG_CPU_IDLE_GOV_MENU */ - #endif diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e80183f4a6c4..e77edc97e036 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -827,13 +827,10 @@ void tick_nohz_irq_exit(void) { struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); - if (ts->inidle) { - /* Cancel the timer because CPU already waken up from the C-states*/ - menu_hrtimer_cancel(); + if (ts->inidle) __tick_nohz_idle_enter(ts); - } else { + else tick_nohz_full_stop_tick(ts); - } } /** @@ -931,8 +928,6 @@ void tick_nohz_idle_exit(void) ts->inidle = 0; - /* Cancel the timer because CPU already waken up from the C-states*/ - menu_hrtimer_cancel(); if (ts->idle_active || ts->tick_stopped) now = ktime_get(); -- cgit v1.2.3 From 2b44c4db2e2f1765d35163a861d301038e0c8a75 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 24 Jul 2013 17:41:33 -0700 Subject: freezer: set PF_SUSPEND_TASK flag on tasks that call freeze_processes Calling freeze_processes sets a global flag that will cause any process that calls try_to_freeze to enter the refrigerator. It skips sending a signal to the current task, but if the current task ever hits try_to_freeze, all threads will be frozen and the system will deadlock. Set a new flag, PF_SUSPEND_TASK, on the task that calls freeze_processes. The flag notifies the freezer that the thread is involved in suspend and should not be frozen. Also add a WARN_ON in thaw_processes if the caller does not have the PF_SUSPEND_TASK flag set to catch if a different task calls thaw_processes than the one that called freeze_processes, leaving a task with PF_SUSPEND_TASK permanently set on it. Threads that spawn off a task with PF_SUSPEND_TASK set (which swsusp does) will also have PF_SUSPEND_TASK set, preventing them from freezing while they are helping with suspend, but they need to be dead by the time suspend is triggered, otherwise they may run when userspace is expected to be frozen. Add a WARN_ON in thaw_processes if more than one thread has the PF_SUSPEND_TASK flag set. Reported-and-tested-by: Michael Leun Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- include/linux/sched.h | 1 + kernel/freezer.c | 2 +- kernel/power/process.c | 11 +++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 50d04b92ceda..d722490da030 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1628,6 +1628,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ +#define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */ /* * Only the _current_ task can read/write to tsk->flags, but other diff --git a/kernel/freezer.c b/kernel/freezer.c index 8b2afc1c9df0..b462fa197517 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -33,7 +33,7 @@ static DEFINE_SPINLOCK(freezer_lock); */ bool freezing_slow_path(struct task_struct *p) { - if (p->flags & PF_NOFREEZE) + if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK)) return false; if (pm_nosig_freezing || cgroup_freezing(p)) diff --git a/kernel/power/process.c b/kernel/power/process.c index fc0df8486449..06ec8869dbf1 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -109,6 +109,8 @@ static int try_to_freeze_tasks(bool user_only) /** * freeze_processes - Signal user space processes to enter the refrigerator. + * The current thread will not be frozen. The same process that calls + * freeze_processes must later call thaw_processes. * * On success, returns 0. On failure, -errno and system is fully thawed. */ @@ -120,6 +122,9 @@ int freeze_processes(void) if (error) return error; + /* Make sure this task doesn't get frozen */ + current->flags |= PF_SUSPEND_TASK; + if (!pm_freezing) atomic_inc(&system_freezing_cnt); @@ -168,6 +173,7 @@ int freeze_kernel_threads(void) void thaw_processes(void) { struct task_struct *g, *p; + struct task_struct *curr = current; if (pm_freezing) atomic_dec(&system_freezing_cnt); @@ -182,10 +188,15 @@ void thaw_processes(void) read_lock(&tasklist_lock); do_each_thread(g, p) { + /* No other threads should have PF_SUSPEND_TASK set */ + WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK)); __thaw_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); + WARN_ON(!(curr->flags & PF_SUSPEND_TASK)); + curr->flags &= ~PF_SUSPEND_TASK; + usermodehelper_enable(); schedule(); -- cgit v1.2.3 From 2816c551c796ec14620325b2c9ed75b9979d3125 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 29 Jul 2013 19:50:33 +0200 Subject: tracing: trace_remove_event_call() should fail if call/file is in use Change trace_remove_event_call(call) to return the error if this call is active. This is what the callers assume but can't verify outside of the tracing locks. Both trace_kprobe.c/trace_uprobe.c need the additional changes, unregister_trace_probe() should abort if trace_remove_event_call() fails. The caller is going to free this call/file so we must ensure that nobody can use them after trace_remove_event_call() succeeds. debugfs should be fine after the previous changes and event_remove() does TRACE_REG_UNREGISTER, but still there are 2 reasons why we need the additional checks: - There could be a perf_event(s) attached to this tp_event, so the patch checks ->perf_refcount. - TRACE_REG_UNREGISTER can be suppressed by FTRACE_EVENT_FL_SOFT_MODE, so we simply check FTRACE_EVENT_FL_ENABLED protected by event_mutex. Link: http://lkml.kernel.org/r/20130729175033.GB26284@redhat.com Reviewed-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- kernel/trace/trace_events.c | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4372658c73ae..f98ab063e95e 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -332,7 +332,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type); extern int trace_add_event_call(struct ftrace_event_call *call); -extern void trace_remove_event_call(struct ftrace_event_call *call); +extern int trace_remove_event_call(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < (type)1) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a67c913e2f9f..ec04836273c0 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1713,16 +1713,47 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) destroy_preds(call); } +static int probe_remove_event_call(struct ftrace_event_call *call) +{ + struct trace_array *tr; + struct ftrace_event_file *file; + +#ifdef CONFIG_PERF_EVENTS + if (call->perf_refcount) + return -EBUSY; +#endif + do_for_each_event_file(tr, file) { + if (file->event_call != call) + continue; + /* + * We can't rely on ftrace_event_enable_disable(enable => 0) + * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress + * TRACE_REG_UNREGISTER. + */ + if (file->flags & FTRACE_EVENT_FL_ENABLED) + return -EBUSY; + break; + } while_for_each_event_file(); + + __trace_remove_event_call(call); + + return 0; +} + /* Remove an event_call */ -void trace_remove_event_call(struct ftrace_event_call *call) +int trace_remove_event_call(struct ftrace_event_call *call) { + int ret; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); down_write(&trace_event_sem); - __trace_remove_event_call(call); + ret = probe_remove_event_call(call); up_write(&trace_event_sem); mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + + return ret; } #define for_each_event(event, start, end) \ -- cgit v1.2.3 From cd23b14b654769db83c9684ae1ba32c0e066670f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 18 Jul 2013 15:31:08 +0300 Subject: mlx5_core: Implement new initialization sequence Introduce enbale_hca and disable_hca commands to signify when the driver starts or ceases to operate on the device. In addition the driver will use boot and init pages count; boot pages is required to allow firmware to complete boot commands and the other to complete init hca. Command interface revision is bumped to 4 to enforce using supported firmware. This patch breaks compatibility with old versions of firmware (< 4); however, the first GA firmware we will publish will support version 4 so this should not be a problem. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 8 ++- drivers/net/ethernet/mellanox/mlx5/core/main.c | 69 +++++++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 20 +++++-- include/linux/mlx5/device.h | 20 +++++++ include/linux/mlx5/driver.h | 4 +- 5 files changed, 106 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 40374063c01e..c571de85d0f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -46,7 +46,7 @@ #include "mlx5_core.h" enum { - CMD_IF_REV = 3, + CMD_IF_REV = 4, }; enum { @@ -282,6 +282,12 @@ const char *mlx5_command_str(int command) case MLX5_CMD_OP_TEARDOWN_HCA: return "TEARDOWN_HCA"; + case MLX5_CMD_OP_ENABLE_HCA: + return "MLX5_CMD_OP_ENABLE_HCA"; + + case MLX5_CMD_OP_DISABLE_HCA: + return "MLX5_CMD_OP_DISABLE_HCA"; + case MLX5_CMD_OP_QUERY_PAGES: return "QUERY_PAGES"; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 12242de2b0e3..b47739b0b5f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -249,6 +249,44 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) return err; } +static int mlx5_core_enable_hca(struct mlx5_core_dev *dev) +{ + int err; + struct mlx5_enable_hca_mbox_in in; + struct mlx5_enable_hca_mbox_out out; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ENABLE_HCA); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + return 0; +} + +static int mlx5_core_disable_hca(struct mlx5_core_dev *dev) +{ + int err; + struct mlx5_disable_hca_mbox_in in; + struct mlx5_disable_hca_mbox_out out; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DISABLE_HCA); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + return 0; +} + int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) { struct mlx5_priv *priv = &dev->priv; @@ -304,28 +342,41 @@ int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) } mlx5_pagealloc_init(dev); + + err = mlx5_core_enable_hca(dev); + if (err) { + dev_err(&pdev->dev, "enable hca failed\n"); + goto err_pagealloc_cleanup; + } + + err = mlx5_satisfy_startup_pages(dev, 1); + if (err) { + dev_err(&pdev->dev, "failed to allocate boot pages\n"); + goto err_disable_hca; + } + err = set_hca_ctrl(dev); if (err) { dev_err(&pdev->dev, "set_hca_ctrl failed\n"); - goto err_pagealloc_cleanup; + goto reclaim_boot_pages; } err = handle_hca_cap(dev); if (err) { dev_err(&pdev->dev, "handle_hca_cap failed\n"); - goto err_pagealloc_cleanup; + goto reclaim_boot_pages; } - err = mlx5_satisfy_startup_pages(dev); + err = mlx5_satisfy_startup_pages(dev, 0); if (err) { - dev_err(&pdev->dev, "failed to allocate startup pages\n"); - goto err_pagealloc_cleanup; + dev_err(&pdev->dev, "failed to allocate init pages\n"); + goto reclaim_boot_pages; } err = mlx5_pagealloc_start(dev); if (err) { dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n"); - goto err_reclaim_pages; + goto reclaim_boot_pages; } err = mlx5_cmd_init_hca(dev); @@ -396,9 +447,12 @@ err_stop_poll: err_pagealloc_stop: mlx5_pagealloc_stop(dev); -err_reclaim_pages: +reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); +err_disable_hca: + mlx5_core_disable_hca(dev); + err_pagealloc_cleanup: mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); @@ -434,6 +488,7 @@ void mlx5_dev_cleanup(struct mlx5_core_dev *dev) mlx5_cmd_teardown_hca(dev); mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); + mlx5_core_disable_hca(dev); mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); iounmap(dev->iseg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index f0bf46339b28..4a3e137931a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -64,7 +64,7 @@ struct mlx5_query_pages_inbox { struct mlx5_query_pages_outbox { struct mlx5_outbox_hdr hdr; - u8 reserved[2]; + __be16 num_boot_pages; __be16 func_id; __be16 init_pages; __be16 num_pages; @@ -146,7 +146,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr) } static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, - s16 *pages, s16 *init_pages) + s16 *pages, s16 *init_pages, u16 *boot_pages) { struct mlx5_query_pages_inbox in; struct mlx5_query_pages_outbox out; @@ -164,8 +164,13 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, if (pages) *pages = be16_to_cpu(out.num_pages); + if (init_pages) *init_pages = be16_to_cpu(out.init_pages); + + if (boot_pages) + *boot_pages = be16_to_cpu(out.num_boot_pages); + *func_id = be16_to_cpu(out.func_id); return err; @@ -357,19 +362,22 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, queue_work(dev->priv.pg_wq, &req->work); } -int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev) +int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) { + u16 uninitialized_var(boot_pages); s16 uninitialized_var(init_pages); u16 uninitialized_var(func_id); int err; - err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages); + err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages, + &boot_pages); if (err) return err; - mlx5_core_dbg(dev, "requested %d init pages for func_id 0x%x\n", init_pages, func_id); - return give_pages(dev, func_id, init_pages, 0); + mlx5_core_dbg(dev, "requested %d init pages and %d boot pages for func_id 0x%x\n", + init_pages, boot_pages, func_id); + return give_pages(dev, func_id, boot ? boot_pages : init_pages, 0); } static int optimal_reclaimed_pages(void) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8de8d8f22384..737685e9e852 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -690,6 +690,26 @@ struct mlx5_query_cq_mbox_out { __be64 pas[0]; }; +struct mlx5_enable_hca_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_enable_hca_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_disable_hca_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_disable_hca_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + struct mlx5_eq_context { u8 status; u8 ec_oi; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f22e4419839b..2aa258b0ced1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -101,6 +101,8 @@ enum { MLX5_CMD_OP_QUERY_ADAPTER = 0x101, MLX5_CMD_OP_INIT_HCA = 0x102, MLX5_CMD_OP_TEARDOWN_HCA = 0x103, + MLX5_CMD_OP_ENABLE_HCA = 0x104, + MLX5_CMD_OP_DISABLE_HCA = 0x105, MLX5_CMD_OP_QUERY_PAGES = 0x107, MLX5_CMD_OP_MANAGE_PAGES = 0x108, MLX5_CMD_OP_SET_HCA_CAP = 0x109, @@ -690,7 +692,7 @@ int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s16 npages); -int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev); +int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); void mlx5_unregister_debugfs(void); -- cgit v1.2.3 From 22f2020f84c6da2dd0acb2dce12e39e59ff7c8be Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 31 Jul 2013 13:53:48 -0700 Subject: vmpressure: change vmpressure::sr_lock to spinlock There is nothing that can sleep inside critical sections protected by this lock and those sections are really small so there doesn't make much sense to use mutex for them. Change the log to a spinlock Signed-off-by: Michal Hocko Reported-by: Tejun Heo Cc: Anton Vorontsov Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Li Zefan Reviewed-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmpressure.h | 2 +- mm/vmpressure.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 76be077340ea..2081680e015d 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -12,7 +12,7 @@ struct vmpressure { unsigned long scanned; unsigned long reclaimed; /* The lock is used to keep the scanned/reclaimed above in sync. */ - struct mutex sr_lock; + struct spinlock sr_lock; /* The list of vmpressure_event structs. */ struct list_head events; diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 736a6011c2c8..f4ee6a190a4d 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -180,12 +180,12 @@ static void vmpressure_work_fn(struct work_struct *work) if (!vmpr->scanned) return; - mutex_lock(&vmpr->sr_lock); + spin_lock(&vmpr->sr_lock); scanned = vmpr->scanned; reclaimed = vmpr->reclaimed; vmpr->scanned = 0; vmpr->reclaimed = 0; - mutex_unlock(&vmpr->sr_lock); + spin_unlock(&vmpr->sr_lock); do { if (vmpressure_event(vmpr, scanned, reclaimed)) @@ -240,11 +240,11 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, if (!scanned) return; - mutex_lock(&vmpr->sr_lock); + spin_lock(&vmpr->sr_lock); vmpr->scanned += scanned; vmpr->reclaimed += reclaimed; scanned = vmpr->scanned; - mutex_unlock(&vmpr->sr_lock); + spin_unlock(&vmpr->sr_lock); if (scanned < vmpressure_win || work_pending(&vmpr->work)) return; @@ -367,7 +367,7 @@ void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, */ void vmpressure_init(struct vmpressure *vmpr) { - mutex_init(&vmpr->sr_lock); + spin_lock_init(&vmpr->sr_lock); mutex_init(&vmpr->events_lock); INIT_LIST_HEAD(&vmpr->events); INIT_WORK(&vmpr->work, vmpressure_work_fn); -- cgit v1.2.3 From 33cb876e947b9ddda8dca3fb99234b743a597ef9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 31 Jul 2013 13:53:51 -0700 Subject: vmpressure: make sure there are no events queued after memcg is offlined vmpressure is called synchronously from reclaim where the target_memcg is guaranteed to be alive but the eventfd is signaled from the work queue context. This means that memcg (along with vmpressure structure which is embedded into it) might go away while the work item is pending which would result in use-after-release bug. We have two possible ways how to fix this. Either vmpressure pins memcg before it schedules vmpr->work and unpin it in vmpressure_work_fn or explicitely flush the work item from the css_offline context (as suggested by Tejun). This patch implements the later one and it introduces vmpressure_cleanup which flushes the vmpressure work queue item item. It hooks into mem_cgroup_css_offline after the memcg itself is cleaned up. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Michal Hocko Reported-by: Tejun Heo Cc: Anton Vorontsov Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Li Zefan Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmpressure.h | 1 + mm/memcontrol.c | 1 + mm/vmpressure.c | 16 ++++++++++++++++ 3 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 2081680e015d..7dc17e2456de 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -30,6 +30,7 @@ extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); extern void vmpressure_init(struct vmpressure *vmpr); +extern void vmpressure_cleanup(struct vmpressure *vmpr); extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 00a7a664b9c1..c290a1cf3862 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6335,6 +6335,7 @@ static void mem_cgroup_css_offline(struct cgroup *cont) mem_cgroup_invalidate_reclaim_iterators(memcg); mem_cgroup_reparent_charges(memcg); mem_cgroup_destroy_all_caches(memcg); + vmpressure_cleanup(&memcg->vmpressure); } static void mem_cgroup_css_free(struct cgroup *cont) diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 192f9731931d..0c1e37d829fa 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -372,3 +372,19 @@ void vmpressure_init(struct vmpressure *vmpr) INIT_LIST_HEAD(&vmpr->events); INIT_WORK(&vmpr->work, vmpressure_work_fn); } + +/** + * vmpressure_cleanup() - shuts down vmpressure control structure + * @vmpr: Structure to be cleaned up + * + * This function should be called before the structure in which it is + * embedded is cleaned up. + */ +void vmpressure_cleanup(struct vmpressure *vmpr) +{ + /* + * Make sure there is no pending work before eventfd infrastructure + * goes away. + */ + flush_work(&vmpr->work); +} -- cgit v1.2.3 From e0d1095ae3405404d247afb00233ef837d58da83 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Aug 2013 11:10:25 +0800 Subject: net: rename CONFIG_NET_LL_RX_POLL to CONFIG_NET_RX_BUSY_POLL Eliezer renames several *ll_poll to *busy_poll, but forgets CONFIG_NET_LL_RX_POLL, so in case of confusion, rename it too. Cc: Eliezer Tamir Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- Documentation/sysctl/net.txt | 4 ++-- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 8 ++++---- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 12 ++++++------ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 10 +++++----- include/linux/netdevice.h | 2 +- include/linux/skbuff.h | 2 +- include/net/busy_poll.h | 6 +++--- include/net/sock.h | 2 +- net/Kconfig | 2 +- net/core/skbuff.c | 2 +- net/core/sock.c | 6 +++--- net/core/sysctl_net_core.c | 2 +- net/socket.c | 2 +- 18 files changed, 41 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 1c15043aaee4..d569f2a424d5 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -52,7 +52,7 @@ Default: 64 busy_read ---------------- -Low latency busy poll timeout for socket reads. (needs CONFIG_NET_LL_RX_POLL) +Low latency busy poll timeout for socket reads. (needs CONFIG_NET_RX_BUSY_POLL) Approximate time in us to busy loop waiting for packets on the device queue. This sets the default value of the SO_BUSY_POLL socket option. Can be set or overridden per socket by setting socket option SO_BUSY_POLL, @@ -63,7 +63,7 @@ Default: 0 (off) busy_poll ---------------- -Low latency busy poll timeout for poll and select. (needs CONFIG_NET_LL_RX_POLL) +Low latency busy poll timeout for poll and select. (needs CONFIG_NET_RX_BUSY_POLL) Approximate time in us to busy loop waiting for events. Recommended value depends on the number of sockets you poll on. For several sockets 50, for several hundreds 100. diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index dedbd76c033e..d80e34b8285f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -486,7 +486,7 @@ struct bnx2x_fastpath { struct napi_struct napi; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int state; #define BNX2X_FP_STATE_IDLE 0 #define BNX2X_FP_STATE_NAPI (1 << 0) /* NAPI owns this FP */ @@ -498,7 +498,7 @@ struct bnx2x_fastpath { #define BNX2X_FP_USER_PEND (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_POLL_YIELD) /* protect state */ spinlock_t lock; -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ union host_hc_status_block status_blk; /* chip independent shortcuts into sb structure */ @@ -572,7 +572,7 @@ struct bnx2x_fastpath { #define bnx2x_fp_stats(bp, fp) (&((bp)->fp_stats[(fp)->index])) #define bnx2x_fp_qstats(bp, fp) (&((bp)->fp_stats[(fp)->index].eth_q_stats)) -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp) { spin_lock_init(&fp->lock); @@ -680,7 +680,7 @@ static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp) { return false; } -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ /* Use 2500 as a mini-jumbo MTU for FCoE */ #define BNX2X_FCOE_MINI_JUMBO_MTU 2500 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index ee350bde1818..f2d1ff10054b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3117,7 +3117,7 @@ int bnx2x_poll(struct napi_struct *napi, int budget) return work_done; } -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL /* must be called with local_bh_disable()d */ int bnx2x_low_latency_recv(struct napi_struct *napi) { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index e5da07858a2f..e06186c305d8 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12026,7 +12026,7 @@ static const struct net_device_ops bnx2x_netdev_ops = { .ndo_fcoe_get_wwn = bnx2x_fcoe_get_wwn, #endif -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = bnx2x_low_latency_recv, #endif }; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 7be725cdfea8..a6494e5daffe 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -54,7 +54,7 @@ #include -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL #define LL_EXTENDED_STATS #endif /* common prefix used by pr_<> macros */ @@ -366,7 +366,7 @@ struct ixgbe_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[IFNAMSIZ + 9]; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int state; #define IXGBE_QV_STATE_IDLE 0 #define IXGBE_QV_STATE_NAPI 1 /* NAPI owns this QV */ @@ -377,12 +377,12 @@ struct ixgbe_q_vector { #define IXGBE_QV_YIELD (IXGBE_QV_STATE_NAPI_YIELD | IXGBE_QV_STATE_POLL_YIELD) #define IXGBE_QV_USER_PEND (IXGBE_QV_STATE_POLL | IXGBE_QV_STATE_POLL_YIELD) spinlock_t lock; -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ /* for dynamic allocation of rings associated with this q_vector */ struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp; }; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL static inline void ixgbe_qv_init_lock(struct ixgbe_q_vector *q_vector) { @@ -462,7 +462,7 @@ static inline bool ixgbe_qv_ll_polling(struct ixgbe_q_vector *q_vector) WARN_ON(!(q_vector->state & IXGBE_QV_LOCKED)); return q_vector->state & IXGBE_QV_USER_PEND; } -#else /* CONFIG_NET_LL_RX_POLL */ +#else /* CONFIG_NET_RX_BUSY_POLL */ static inline void ixgbe_qv_init_lock(struct ixgbe_q_vector *q_vector) { } @@ -491,7 +491,7 @@ static inline bool ixgbe_qv_ll_polling(struct ixgbe_q_vector *q_vector) { return false; } -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ #ifdef CONFIG_IXGBE_HWMON diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index bad8f14b1941..be4b1fb3d0d2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1998,7 +1998,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, return total_rx_packets; } -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL /* must be called with local_bh_disable()d */ static int ixgbe_low_latency_recv(struct napi_struct *napi) { @@ -2030,7 +2030,7 @@ static int ixgbe_low_latency_recv(struct napi_struct *napi) return found; } -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ /** * ixgbe_configure_msix - Configure MSI-X hardware @@ -7227,7 +7227,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ixgbe_netpoll, #endif -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = ixgbe_low_latency_recv, #endif #ifdef IXGBE_FCOE diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 727874f575ce..a28cd801a236 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -223,7 +223,7 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset) case ETH_SS_STATS: return (priv->stats_bitmap ? bit_count : NUM_ALL_STATS) + (priv->tx_ring_num * 2) + -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL (priv->rx_ring_num * 5); #else (priv->rx_ring_num * 2); @@ -276,7 +276,7 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, for (i = 0; i < priv->rx_ring_num; i++) { data[index++] = priv->rx_ring[i].packets; data[index++] = priv->rx_ring[i].bytes; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL data[index++] = priv->rx_ring[i].yields; data[index++] = priv->rx_ring[i].misses; data[index++] = priv->rx_ring[i].cleaned; @@ -344,7 +344,7 @@ static void mlx4_en_get_strings(struct net_device *dev, "rx%d_packets", i); sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_bytes", i); -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_napi_yield", i); sprintf(data + (index++) * ETH_GSTRING_LEN, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 5eac871399d8..fa37b7a61213 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -68,7 +68,7 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) return 0; } -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL /* must be called with local_bh_disable()d */ static int mlx4_en_low_latency_recv(struct napi_struct *napi) { @@ -94,7 +94,7 @@ static int mlx4_en_low_latency_recv(struct napi_struct *napi) return done; } -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ #ifdef CONFIG_RFS_ACCEL @@ -2140,7 +2140,7 @@ static const struct net_device_ops mlx4_netdev_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = mlx4_en_low_latency_recv, #endif }; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 35fb60e2320c..5e0aa569306a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -292,7 +292,7 @@ struct mlx4_en_rx_ring { void *rx_info; unsigned long bytes; unsigned long packets; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned long yields; unsigned long misses; unsigned long cleaned; @@ -318,7 +318,7 @@ struct mlx4_en_cq { struct mlx4_cqe *buf; #define MLX4_EN_OPCODE_ERROR 0x1e -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int state; #define MLX4_EN_CQ_STATE_IDLE 0 #define MLX4_EN_CQ_STATE_NAPI 1 /* NAPI owns this CQ */ @@ -329,7 +329,7 @@ struct mlx4_en_cq { #define CQ_YIELD (MLX4_EN_CQ_STATE_NAPI_YIELD | MLX4_EN_CQ_STATE_POLL_YIELD) #define CQ_USER_PEND (MLX4_EN_CQ_STATE_POLL | MLX4_EN_CQ_STATE_POLL_YIELD) spinlock_t poll_lock; /* protects from LLS/napi conflicts */ -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ }; struct mlx4_en_port_profile { @@ -580,7 +580,7 @@ struct mlx4_mac_entry { struct rcu_head rcu; }; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) { spin_lock_init(&cq->poll_lock); @@ -687,7 +687,7 @@ static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) { return false; } -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0741a1e919a5..9a4156845e93 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -973,7 +973,7 @@ struct net_device_ops { gfp_t gfp); void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL int (*ndo_busy_poll)(struct napi_struct *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5afefa01a13c..3b71a4e83642 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -501,7 +501,7 @@ struct sk_buff { /* 7/9 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); -#if defined CONFIG_NET_DMA || defined CONFIG_NET_LL_RX_POLL +#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL union { unsigned int napi_id; dma_cookie_t dma_cookie; diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 6cd8848fec68..f18b91966d3d 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -27,7 +27,7 @@ #include #include -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL struct napi_struct; extern unsigned int sysctl_net_busy_read __read_mostly; @@ -146,7 +146,7 @@ static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb) sk->sk_napi_id = skb->napi_id; } -#else /* CONFIG_NET_LL_RX_POLL */ +#else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) { return 0; @@ -186,5 +186,5 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) return false; } -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ #endif /* _LINUX_NET_BUSY_POLL_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 95a5a2c6925a..31d5cfbb51ec 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -327,7 +327,7 @@ struct sock { #ifdef CONFIG_RPS __u32 sk_rxhash; #endif -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sk_napi_id; unsigned int sk_ll_usec; #endif diff --git a/net/Kconfig b/net/Kconfig index 37702491abe9..2b406608a1a4 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -244,7 +244,7 @@ config NETPRIO_CGROUP Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis -config NET_LL_RX_POLL +config NET_RX_BUSY_POLL boolean default y diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3df4d4ccf440..2c3d0f53d198 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -740,7 +740,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) skb_copy_secmark(new, old); -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL new->napi_id = old->napi_id; #endif } diff --git a/net/core/sock.c b/net/core/sock.c index 548d716c5f62..2c097c5a35dd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -900,7 +900,7 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); break; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: /* allow unprivileged users to decrease the value */ if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) @@ -1170,7 +1170,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); break; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: v.val = sk->sk_ll_usec; break; @@ -2292,7 +2292,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; sk->sk_ll_usec = sysctl_net_busy_read; #endif diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 660968616637..b59b6804fd98 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -298,7 +298,7 @@ static struct ctl_table net_core_table[] = { .proc_handler = flow_limit_table_len_sysctl }, #endif /* CONFIG_NET_FLOW_LIMIT */ -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL { .procname = "busy_poll", .data = &sysctl_net_busy_poll, diff --git a/net/socket.c b/net/socket.c index 829b460acb87..b2d7c629eeb9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -106,7 +106,7 @@ #include #include -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_poll __read_mostly; #endif -- cgit v1.2.3 From ed5467da0e369e65b247b99eb6403cb79172bcda Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Fri, 2 Aug 2013 21:16:43 +0400 Subject: tracing: Fix fields of struct trace_iterator that are zeroed by mistake tracing_read_pipe zeros all fields bellow "seq". The declaration contains a comment about that, but it doesn't help. The first field is "snapshot", it's true when current open file is snapshot. Looks obvious, that it should not be zeroed. The second field is "started". It was converted from cpumask_t to cpumask_var_t (v2.6.28-4983-g4462344), in other words it was converted from cpumask to pointer on cpumask. Currently the reference on "started" memory is lost after the first read from tracing_read_pipe and a proper object will never be freed. The "started" is never dereferenced for trace_pipe, because trace_pipe can't have the TRACE_FILE_ANNOTATE options. Link: http://lkml.kernel.org/r/1375463803-3085183-1-git-send-email-avagin@openvz.org Cc: stable@vger.kernel.org # 2.6.30 Signed-off-by: Andrew Vagin Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 10 ++++++---- kernel/trace/trace.c | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index f98ab063e95e..120d57a1c3a5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -78,6 +78,11 @@ struct trace_iterator { /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; + cpumask_var_t started; + + /* it's true when current open file is snapshot */ + bool snapshot; + /* The below is zeroed out in pipe_read */ struct trace_seq seq; struct trace_entry *ent; @@ -90,10 +95,7 @@ struct trace_iterator { loff_t pos; long idx; - cpumask_var_t started; - - /* it's true when current open file is snapshot */ - bool snapshot; + /* All new field here will be zeroed out in pipe_read */ }; enum trace_iter_flags { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 882ec1dd1515..f5b35a5e852f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4151,6 +4151,7 @@ waitagain: memset(&iter->seq, 0, sizeof(struct trace_iterator) - offsetof(struct trace_iterator, seq)); + cpumask_clear(iter->started); iter->pos = -1; trace_event_read_lock(); -- cgit v1.2.3 From e67bc51e574ffe3c4bc1e09cab7658b1e780b4ce Mon Sep 17 00:00:00 2001 From: Dhaval Giani Date: Fri, 2 Aug 2013 14:47:29 -0400 Subject: tracing: Fix trace_dump_stack() proto when CONFIG_TRACING is not set When CONFIG_TRACING is not enabled, the stub prototype for trace_dump_stack() is incorrect. It has (void) when it should be (int). Link: http://lkml.kernel.org/r/CAPhKKr_H=ukFnBL4WgDOVT5ay2xeF-Ho+CA0DWZX0E2JW-=vSQ@mail.gmail.com Signed-off-by: Dhaval Giani Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3bef14c6586b..482ad2d84a32 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -629,7 +629,7 @@ extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); static inline void tracing_start(void) { } static inline void tracing_stop(void) { } static inline void ftrace_off_permanent(void) { } -static inline void trace_dump_stack(void) { } +static inline void trace_dump_stack(int skip) { } static inline void tracing_on(void) { } static inline void tracing_off(void) { } -- cgit v1.2.3 From 7d46daba8dd5df1aa45724518a041ef7163d3ad5 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 5 Aug 2013 16:05:32 +0300 Subject: mlx5: remove health handler plugin Remove this code, per Dave Miller's request, since it is not being used anywhere in the kernel. Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 29 +----------------------- include/linux/mlx5/driver.h | 3 --- 2 files changed, 1 insertion(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 748f10a155c4..3e6670c4a7cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -55,33 +55,9 @@ enum { }; static DEFINE_SPINLOCK(health_lock); - static LIST_HEAD(health_list); static struct work_struct health_work; -static health_handler_t reg_handler; -int mlx5_register_health_report_handler(health_handler_t handler) -{ - spin_lock_irq(&health_lock); - if (reg_handler) { - spin_unlock_irq(&health_lock); - return -EEXIST; - } - reg_handler = handler; - spin_unlock_irq(&health_lock); - - return 0; -} -EXPORT_SYMBOL(mlx5_register_health_report_handler); - -void mlx5_unregister_health_report_handler(void) -{ - spin_lock_irq(&health_lock); - reg_handler = NULL; - spin_unlock_irq(&health_lock); -} -EXPORT_SYMBOL(mlx5_unregister_health_report_handler); - static void health_care(struct work_struct *work) { struct mlx5_core_health *health, *n; @@ -98,11 +74,8 @@ static void health_care(struct work_struct *work) priv = container_of(health, struct mlx5_priv, health); dev = container_of(priv, struct mlx5_core_dev, priv); mlx5_core_warn(dev, "handling bad device here\n"); + /* nothing yet */ spin_lock_irq(&health_lock); - if (reg_handler) - reg_handler(dev->pdev, health->health, - sizeof(health->health)); - list_del_init(&health->list); spin_unlock_irq(&health_lock); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2aa258b0ced1..611e65e76b00 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -731,9 +731,6 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); -typedef void (*health_handler_t)(struct pci_dev *pdev, struct health_buffer __iomem *buf, int size); -int mlx5_register_health_report_handler(health_handler_t handler); -void mlx5_unregister_health_report_handler(void); const char *mlx5_command_str(int command); int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 49ccc142f9cbc33fdda18e8fa90c1c5b4a79c0ad Mon Sep 17 00:00:00 2001 From: Mateusz Krawczuk Date: Tue, 6 Aug 2013 18:34:40 +0200 Subject: regmap: Add missing header for !CONFIG_REGMAP stubs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit regmap.h requires linux/err.h if CONFIG_REGMAP is not defined. Without it I get error. CC drivers/media/platform/exynos4-is/fimc-reg.o In file included from drivers/media/platform/exynos4-is/fimc-reg.c:14:0: include/linux/regmap.h: In function ‘regmap_write’: include/linux/regmap.h:525:10: error: ‘EINVAL’ undeclared (first use in this function) include/linux/regmap.h:525:10: note: each undeclared identifier is reported only once for each function it appears in Signed-off-by: Mateusz Krawczuk Signed-off-by: Kyungmin Park Signed-off-by: Mark Brown Cc: stable@kernel.org --- include/linux/regmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 75981d0b57dc..580a5320cc96 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -15,6 +15,7 @@ #include #include +#include struct module; struct device; -- cgit v1.2.3 From 786615bc1ce84150ded80daea6bd9f6297f48e73 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 16:04:47 -0400 Subject: SUNRPC: If the rpcbind channel is disconnected, fail the call to unregister If rpcbind causes our connection to the AF_LOCAL socket to close after we've registered a service, then we want to be careful about reconnecting since the mount namespace may have changed. By simply refusing to reconnect the AF_LOCAL socket in the case of unregister, we avoid the need to somehow save the mount namespace. While this may lead to some services not unregistering properly, it should be safe. Signed-off-by: Trond Myklebust Cc: Nix Cc: Jeff Layton Cc: stable@vger.kernel.org # 3.9.x --- include/linux/sunrpc/sched.h | 1 + net/sunrpc/clnt.c | 4 ++++ net/sunrpc/netns.h | 1 + net/sunrpc/rpcb_clnt.c | 40 +++++++++++++++++++++++++++------------- 4 files changed, 33 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 6d870353674a..1821445708d6 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -121,6 +121,7 @@ struct rpc_task_setup { #define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ #define RPC_TASK_SENT 0x0800 /* message was sent */ #define RPC_TASK_TIMEOUT 0x1000 /* fail with ETIMEDOUT on timeout */ +#define RPC_TASK_NOCONNECT 0x2000 /* return ENOTCONN if not connected */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 74f6a704e374..ecbc4e3d83ad 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1660,6 +1660,10 @@ call_connect(struct rpc_task *task) task->tk_action = call_connect_status; if (task->tk_status < 0) return; + if (task->tk_flags & RPC_TASK_NOCONNECT) { + rpc_exit(task, -ENOTCONN); + return; + } xprt_connect(task); } } diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 74d948f5d5a1..779742cfc1ff 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -23,6 +23,7 @@ struct sunrpc_net { struct rpc_clnt *rpcb_local_clnt4; spinlock_t rpcb_clnt_lock; unsigned int rpcb_users; + unsigned int rpcb_is_af_local : 1; struct mutex gssp_lock; wait_queue_head_t gssp_wq; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index b0f723227157..1891a1022c17 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -204,13 +204,15 @@ void rpcb_put_local(struct net *net) } static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, - struct rpc_clnt *clnt4) + struct rpc_clnt *clnt4, + bool is_af_local) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); /* Protected by rpcb_create_local_mutex */ sn->rpcb_local_clnt = clnt; sn->rpcb_local_clnt4 = clnt4; + sn->rpcb_is_af_local = is_af_local ? 1 : 0; smp_wmb(); sn->rpcb_users = 1; dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " @@ -271,7 +273,7 @@ static int rpcb_create_local_unix(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, true); out: return result; @@ -323,7 +325,7 @@ static int rpcb_create_local_net(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, false); out: return result; @@ -384,13 +386,16 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, return rpc_create(&args); } -static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) +static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, struct rpc_message *msg, bool is_set) { - int result, error = 0; + int flags = RPC_TASK_NOCONNECT; + int error, result = 0; + if (is_set || !sn->rpcb_is_af_local) + flags = RPC_TASK_SOFTCONN; msg->rpc_resp = &result; - error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN); + error = rpc_call_sync(clnt, msg, flags); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -447,16 +452,19 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short .rpc_argp = &map, }; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + bool is_set = false; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; + is_set = true; + } - return rpcb_register_call(sn->rpcb_local_clnt, &msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt, &msg, is_set); } /* @@ -469,6 +477,7 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -479,10 +488,12 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -497,6 +508,7 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -507,10 +519,12 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -527,7 +541,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(sn->rpcb_local_clnt4, msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, false); } /** -- cgit v1.2.3 From 8742f229b635bf1c1c84a3dfe5e47c814c20b5c8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 Aug 2013 18:55:32 +0200 Subject: userns: limit the maximum depth of user_namespace->parent chain Ensure that user_namespace->parent chain can't grow too much. Currently we use the hardroded 32 as limit. Reported-by: Andy Lutomirski Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- include/linux/user_namespace.h | 1 + kernel/user_namespace.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index b6b215f13b45..14105c26a836 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -23,6 +23,7 @@ struct user_namespace { struct uid_gid_map projid_map; atomic_t count; struct user_namespace *parent; + int level; kuid_t owner; kgid_t group; unsigned int proc_inum; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 6e50a44610ee..9064b919a406 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -62,6 +62,9 @@ int create_user_ns(struct cred *new) kgid_t group = new->egid; int ret; + if (parent_ns->level > 32) + return -EUSERS; + /* * Verify that we can not violate the policy of which files * may be accessed that is specified by the root directory, @@ -92,6 +95,7 @@ int create_user_ns(struct cred *new) atomic_set(&ns->count, 1); /* Leave the new->user_ns reference with the new user namespace. */ ns->parent = parent_ns; + ns->level = parent_ns->level + 1; ns->owner = owner; ns->group = group; -- cgit v1.2.3 From e0acd0a68ec7dbf6b7a81a87a867ebd7ac9b76c4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 12 Aug 2013 18:14:00 +0200 Subject: sched: fix the theoretical signal_wake_up() vs schedule() race This is only theoretical, but after try_to_wake_up(p) was changed to check p->state under p->pi_lock the code like __set_current_state(TASK_INTERRUPTIBLE); schedule(); can miss a signal. This is the special case of wait-for-condition, it relies on try_to_wake_up/schedule interaction and thus it does not need mb() between __set_current_state() and if(signal_pending). However, this __set_current_state() can move into the critical section protected by rq->lock, now that try_to_wake_up() takes another lock we need to ensure that it can't be reordered with "if (signal_pending(current))" check inside that section. The patch is actually one-liner, it simply adds smp_wmb() before spin_lock_irq(rq->lock). This is what try_to_wake_up() already does by the same reason. We turn this wmb() into the new helper, smp_mb__before_spinlock(), for better documentation and to allow the architectures to change the default implementation. While at it, kill smp_mb__after_lock(), it has no callers. Perhaps we can also add smp_mb__before/after_spinunlock() for prepare_to_wait(). Signed-off-by: Oleg Nesterov Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- arch/x86/include/asm/spinlock.h | 4 ---- include/linux/spinlock.h | 14 +++++++++++--- kernel/sched/core.c | 14 +++++++++++++- 3 files changed, 24 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 33692eaabab5..e3ddd7db723f 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -233,8 +233,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) #define arch_read_relax(lock) cpu_relax() #define arch_write_relax(lock) cpu_relax() -/* The {read|write|spin}_lock() on x86 are full memory barriers. */ -static inline void smp_mb__after_lock(void) { } -#define ARCH_HAS_SMP_MB_AFTER_LOCK - #endif /* _ASM_X86_SPINLOCK_H */ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 7d537ced949a..75f34949d9ab 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -117,9 +117,17 @@ do { \ #endif /*arch_spin_is_contended*/ #endif -/* The lock does not imply full memory barrier. */ -#ifndef ARCH_HAS_SMP_MB_AFTER_LOCK -static inline void smp_mb__after_lock(void) { smp_mb(); } +/* + * Despite its name it doesn't necessarily has to be a full barrier. + * It should only guarantee that a STORE before the critical section + * can not be reordered with a LOAD inside this section. + * spin_lock() is the one-way barrier, this LOAD can not escape out + * of the region. So the default implementation simply ensures that + * a STORE can not move into the critical section, smp_wmb() should + * serialize it with another STORE done by spin_lock(). + */ +#ifndef smp_mb__before_spinlock +#define smp_mb__before_spinlock() smp_wmb() #endif /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b7c32cb7bfeb..ef51b0ef4bdc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1491,7 +1491,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) unsigned long flags; int cpu, success = 0; - smp_wmb(); + /* + * If we are going to wake up a thread waiting for CONDITION we + * need to ensure that CONDITION=1 done by the caller can not be + * reordered with p->state check below. This pairs with mb() in + * set_current_state() the waiting thread does. + */ + smp_mb__before_spinlock(); raw_spin_lock_irqsave(&p->pi_lock, flags); if (!(p->state & state)) goto out; @@ -2394,6 +2400,12 @@ need_resched: if (sched_feat(HRTICK)) hrtick_clear(rq); + /* + * Make sure that signal_pending_state()->signal_pending() below + * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) + * done by the caller to avoid the race with signal_wake_up(). + */ + smp_mb__before_spinlock(); raw_spin_lock_irq(&rq->lock); switch_count = &prev->nivcsw; -- cgit v1.2.3 From 179ef71cbc085252e3fe6b8159263a7ed1d88ea4 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 13 Aug 2013 16:00:49 -0700 Subject: mm: save soft-dirty bits on swapped pages Andy Lutomirski reported that if a page with _PAGE_SOFT_DIRTY bit set get swapped out, the bit is getting lost and no longer available when pte read back. To resolve this we introduce _PTE_SWP_SOFT_DIRTY bit which is saved in pte entry for the page being swapped out. When such page is to be read back from a swap cache we check for bit presence and if it's there we clear it and restore the former _PAGE_SOFT_DIRTY bit back. One of the problem was to find a place in pte entry where we can save the _PTE_SWP_SOFT_DIRTY bit while page is in swap. The _PAGE_PSE was chosen for that, it doesn't intersect with swap entry format stored in pte. Reported-by: Andy Lutomirski Signed-off-by: Cyrill Gorcunov Acked-by: Pavel Emelyanov Cc: Matt Mackall Cc: Xiao Guangrong Cc: Marcelo Tosatti Cc: KOSAKI Motohiro Cc: Stephen Rothwell Cc: Peter Zijlstra Cc: "Aneesh Kumar K.V" Reviewed-by: Minchan Kim Reviewed-by: Wanpeng Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable.h | 15 +++++++++++++++ arch/x86/include/asm/pgtable_types.h | 13 +++++++++++++ fs/proc/task_mmu.c | 21 +++++++++++++++------ include/asm-generic/pgtable.h | 15 +++++++++++++++ include/linux/swapops.h | 2 ++ mm/memory.c | 2 ++ mm/rmap.c | 6 +++++- mm/swapfile.c | 19 +++++++++++++++++-- 8 files changed, 84 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 7dc305a46058..bd0518a7f197 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -314,6 +314,21 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); } +static inline pte_t pte_swp_mksoft_dirty(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); +} + +static inline int pte_swp_soft_dirty(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; +} + +static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); +} + /* * Mask out unsupported bits in a present pgprot. Non-present pgprots * can use those bits for other purposes, so leave them be. diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index c98ac63aae48..5e8442f178f9 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -67,6 +67,19 @@ #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0)) #endif +/* + * Tracking soft dirty bit when a page goes to a swap is tricky. + * We need a bit which can be stored in pte _and_ not conflict + * with swap entry format. On x86 bits 6 and 7 are *not* involved + * into swap entry computation, but bit 6 is used for nonlinear + * file mapping, so we borrow bit 7 for soft dirty tracking. + */ +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE +#else +#define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) +#endif + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #else diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index dbf61f6174f0..e2d9bdce5e7e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -730,8 +730,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, * of how soft-dirty works. */ pte_t ptent = *pte; - ptent = pte_wrprotect(ptent); - ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); + + if (pte_present(ptent)) { + ptent = pte_wrprotect(ptent); + ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); + } else if (is_swap_pte(ptent)) { + ptent = pte_swp_clear_soft_dirty(ptent); + } + set_pte_at(vma->vm_mm, addr, pte, ptent); #endif } @@ -752,14 +758,15 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; - if (!pte_present(ptent)) - continue; if (cp->type == CLEAR_REFS_SOFT_DIRTY) { clear_soft_dirty(vma, addr, pte); continue; } + if (!pte_present(ptent)) + continue; + page = vm_normal_page(vma, addr, ptent); if (!page) continue; @@ -930,8 +937,10 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, flags = PM_PRESENT; page = vm_normal_page(vma, addr, pte); } else if (is_swap_pte(pte)) { - swp_entry_t entry = pte_to_swp_entry(pte); - + swp_entry_t entry; + if (pte_swp_soft_dirty(pte)) + flags2 |= __PM_SOFT_DIRTY; + entry = pte_to_swp_entry(pte); frame = swp_type(entry) | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); flags = PM_SWAP; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 2f47ade1b567..2a7e0d10ad9a 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -417,6 +417,21 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) { return pmd; } + +static inline pte_t pte_swp_mksoft_dirty(pte_t pte) +{ + return pte; +} + +static inline int pte_swp_soft_dirty(pte_t pte) +{ + return 0; +} + +static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) +{ + return pte; +} #endif #ifndef __HAVE_PFNMAP_TRACKING diff --git a/include/linux/swapops.h b/include/linux/swapops.h index c5fd30d2a415..8d4fa82bfb91 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -67,6 +67,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte) swp_entry_t arch_entry; BUG_ON(pte_file(pte)); + if (pte_swp_soft_dirty(pte)) + pte = pte_swp_clear_soft_dirty(pte); arch_entry = __pte_to_swp_entry(pte); return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); } diff --git a/mm/memory.c b/mm/memory.c index 1ce2e2a734fc..e98ecad2b9c8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3115,6 +3115,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, exclusive = 1; } flush_icache_page(vma, page); + if (pte_swp_soft_dirty(orig_pte)) + pte = pte_mksoft_dirty(pte); set_pte_at(mm, address, page_table, pte); if (page == swapcache) do_page_add_anon_rmap(page, vma, address, exclusive); diff --git a/mm/rmap.c b/mm/rmap.c index cd356df4f71a..83325b80142b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1236,6 +1236,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, swp_entry_to_pte(make_hwpoison_entry(page))); } else if (PageAnon(page)) { swp_entry_t entry = { .val = page_private(page) }; + pte_t swp_pte; if (PageSwapCache(page)) { /* @@ -1264,7 +1265,10 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION); entry = make_migration_entry(page, pte_write(pteval)); } - set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); + swp_pte = swp_entry_to_pte(entry); + if (pte_soft_dirty(pteval)) + swp_pte = pte_swp_mksoft_dirty(swp_pte); + set_pte_at(mm, address, pte, swp_pte); BUG_ON(pte_file(*pte)); } else if (IS_ENABLED(CONFIG_MIGRATION) && (TTU_ACTION(flags) == TTU_MIGRATION)) { diff --git a/mm/swapfile.c b/mm/swapfile.c index 36af6eeaa67e..6cf2e60983b7 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -866,6 +866,21 @@ unsigned int count_swap_pages(int type, int free) } #endif /* CONFIG_HIBERNATION */ +static inline int maybe_same_pte(pte_t pte, pte_t swp_pte) +{ +#ifdef CONFIG_MEM_SOFT_DIRTY + /* + * When pte keeps soft dirty bit the pte generated + * from swap entry does not has it, still it's same + * pte from logical point of view. + */ + pte_t swp_pte_dirty = pte_swp_mksoft_dirty(swp_pte); + return pte_same(pte, swp_pte) || pte_same(pte, swp_pte_dirty); +#else + return pte_same(pte, swp_pte); +#endif +} + /* * No need to decide whether this PTE shares the swap entry with others, * just let do_wp_page work it out if a write is requested later - to @@ -892,7 +907,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, } pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); - if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { + if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) { mem_cgroup_cancel_charge_swapin(memcg); ret = 0; goto out; @@ -947,7 +962,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, * swapoff spends a _lot_ of time in this loop! * Test inline before going to call unuse_pte. */ - if (unlikely(pte_same(*pte, swp_pte))) { + if (unlikely(maybe_same_pte(*pte, swp_pte))) { pte_unmap(pte); ret = unuse_pte(vma, pmd, addr, entry, page); if (ret) -- cgit v1.2.3 From dfa9771a7c4784bafd0673bc7abcee3813088b77 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 13 Aug 2013 16:00:53 -0700 Subject: microblaze: fix clone syscall Fix inadvertent breakage in the clone syscall ABI for Microblaze that was introduced in commit f3268edbe6fe ("microblaze: switch to generic fork/vfork/clone"). The Microblaze syscall ABI for clone takes the parent tid address in the 4th argument; the third argument slot is used for the stack size. The incorrectly-used CLONE_BACKWARDS type assigned parent tid to the 3rd slot. This commit restores the original ABI so that existing userspace libc code will work correctly. All kernel versions from v3.8-rc1 were affected. Signed-off-by: Michal Simek Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 6 ++++++ arch/microblaze/Kconfig | 2 +- include/linux/syscalls.h | 5 +++++ kernel/fork.c | 6 ++++++ 4 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 8d2ae24b9f4a..1feb169274fe 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -407,6 +407,12 @@ config CLONE_BACKWARDS2 help Architecture has the first two arguments of clone(2) swapped. +config CLONE_BACKWARDS3 + bool + help + Architecture has tls passed as the 3rd argument of clone(2), + not the 5th one. + config ODD_RT_SIGACTION bool help diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index d22a4ecffff4..4fab52294d98 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -28,7 +28,7 @@ config MICROBLAZE select GENERIC_CLOCKEVENTS select GENERIC_IDLE_POLL_SETUP select MODULES_USE_ELF_RELA - select CLONE_BACKWARDS + select CLONE_BACKWARDS3 config SWAP def_bool n diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4147d700a293..84662ecc7b51 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -802,9 +802,14 @@ asmlinkage long sys_vfork(void); asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int, int __user *); #else +#ifdef CONFIG_CLONE_BACKWARDS3 +asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *, + int __user *, int); +#else asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int __user *, int); #endif +#endif asmlinkage long sys_execve(const char __user *filename, const char __user *const __user *argv, diff --git a/kernel/fork.c b/kernel/fork.c index 403d2bb8a968..e23bb19e2a3e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1679,6 +1679,12 @@ SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val) +#elif defined(CONFIG_CLONE_BACKWARDS3) +SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, + int, stack_size, + int __user *, parent_tidptr, + int __user *, child_tidptr, + int, tls_val) #else SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, -- cgit v1.2.3 From df54d6fa54275ce59660453e29d1228c2b45a826 Mon Sep 17 00:00:00 2001 From: Radu Caragea Date: Tue, 13 Aug 2013 16:00:59 -0700 Subject: x86 get_unmapped_area(): use proper mmap base for bottom-up direction When the stack is set to unlimited, the bottomup direction is used for mmap-ings but the mmap_base is not used and thus effectively renders ASLR for mmapings along with PIE useless. Cc: Michel Lespinasse Cc: Oleg Nesterov Reviewed-by: Rik van Riel Acked-by: Ingo Molnar Cc: Adrian Sendroiu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/sys_x86_64.c | 2 +- arch/x86/mm/mmap.c | 2 +- include/linux/sched.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index dbded5aedb81..48f8375e4c6b 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin, *begin = new_begin; } } else { - *begin = TASK_UNMAPPED_BASE; + *begin = mmap_legacy_base(); *end = TASK_SIZE; } } diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 62c29a5bfe26..f63778cb2363 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -98,7 +98,7 @@ static unsigned long mmap_base(void) * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 * does, but not when emulating X86_32 */ -static unsigned long mmap_legacy_base(void) +unsigned long mmap_legacy_base(void) { if (mmap_is_ia32()) return TASK_UNMAPPED_BASE; diff --git a/include/linux/sched.h b/include/linux/sched.h index d722490da030..923dd6ea4a0e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -314,6 +314,7 @@ struct nsproxy; struct user_namespace; #ifdef CONFIG_MMU +extern unsigned long mmap_legacy_base(void); extern void arch_pick_mmap_layout(struct mm_struct *mm); extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, -- cgit v1.2.3 From 3f0fa9a808f98fa10a18ba2a73f13d65fda990fb Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Wed, 14 Aug 2013 16:05:02 -0700 Subject: regmap: Add another missing header for !CONFIG_REGMAP stubs The use of WARN_ON() needs the definitions from bug.h, without it you can get: include/linux/regmap.h: In function 'regmap_write': include/linux/regmap.h:525:2: error: implicit declaration of function 'WARN_ONCE' [-Werror=implicit-function-declaration] Signed-off-by: Kevin Hilman Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- include/linux/regmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 580a5320cc96..6d91fcb4c5cb 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -16,6 +16,7 @@ #include #include #include +#include struct module; struct device; -- cgit v1.2.3 From 0a324f3189ed9c78b1aaf48d88e93cb18643c655 Mon Sep 17 00:00:00 2001 From: Moshe Lazer Date: Wed, 14 Aug 2013 17:46:48 +0300 Subject: net/mlx5_core: Support MANAGE_PAGES and QUERY_PAGES firmware command changes In the previous QUERY_PAGES command version we used one command to get the required amount of boot, init and post init pages. The new version uses the op_mod field to specify whether the query is for the required amount of boot, init or post init pages. In addition the output field size for the required amount of pages increased from 16 to 32 bits. In MANAGE_PAGES command the input_num_entries and output_num_entries fields sizes changed from 16 to 32 bits and the PAS tables offset changed to 0x10. In the pages request event the num_pages field also changed to 32 bits. In the HCA-capabilities-layout the size and location of max_qp_mcg field has been changed to support 24 bits. This patch isn't compatible with firmware versions < 5; however, it turns out that the first GA firmware we will publish will not support previous versions so this should be OK. Signed-off-by: Moshe Lazer Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 2 +- .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 58 ++++++++++------------ include/linux/mlx5/device.h | 22 ++++---- include/linux/mlx5/driver.h | 4 +- 6 files changed, 41 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index c571de85d0f9..5472cbd34028 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -46,7 +46,7 @@ #include "mlx5_core.h" enum { - CMD_IF_REV = 4, + CMD_IF_REV = 5, }; enum { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index c02cbcfd0fb8..443cc4d7b024 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -268,7 +268,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) case MLX5_EVENT_TYPE_PAGE_REQUEST: { u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id); - s16 npages = be16_to_cpu(eqe->data.req_pages.num_pages); + s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages); mlx5_core_dbg(dev, "page request for func 0x%x, napges %d\n", func_id, npages); mlx5_core_req_pages_handler(dev, func_id, npages); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 72a5222447f5..f012658b6a92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -113,7 +113,7 @@ int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, caps->log_max_srq = out->hca_cap.log_max_srqs & 0x1f; caps->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f; caps->log_max_mcg = out->hca_cap.log_max_mcg; - caps->max_qp_mcg = be16_to_cpu(out->hca_cap.max_qp_mcg); + caps->max_qp_mcg = be32_to_cpu(out->hca_cap.max_qp_mcg) & 0xffffff; caps->max_ra_res_qp = 1 << (out->hca_cap.log_max_ra_res_qp & 0x3f); caps->max_ra_req_qp = 1 << (out->hca_cap.log_max_ra_req_qp & 0x3f); caps->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 4a3e137931a3..3a2408d44820 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -43,10 +43,16 @@ enum { MLX5_PAGES_TAKE = 2 }; +enum { + MLX5_BOOT_PAGES = 1, + MLX5_INIT_PAGES = 2, + MLX5_POST_INIT_PAGES = 3 +}; + struct mlx5_pages_req { struct mlx5_core_dev *dev; u32 func_id; - s16 npages; + s32 npages; struct work_struct work; }; @@ -64,27 +70,23 @@ struct mlx5_query_pages_inbox { struct mlx5_query_pages_outbox { struct mlx5_outbox_hdr hdr; - __be16 num_boot_pages; + __be16 rsvd; __be16 func_id; - __be16 init_pages; - __be16 num_pages; + __be32 num_pages; }; struct mlx5_manage_pages_inbox { struct mlx5_inbox_hdr hdr; - __be16 rsvd0; + __be16 rsvd; __be16 func_id; - __be16 rsvd1; - __be16 num_entries; - u8 rsvd2[16]; + __be32 num_entries; __be64 pas[0]; }; struct mlx5_manage_pages_outbox { struct mlx5_outbox_hdr hdr; - u8 rsvd0[2]; - __be16 num_entries; - u8 rsvd1[20]; + __be32 num_entries; + u8 rsvd[4]; __be64 pas[0]; }; @@ -146,7 +148,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr) } static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, - s16 *pages, s16 *init_pages, u16 *boot_pages) + s32 *npages, int boot) { struct mlx5_query_pages_inbox in; struct mlx5_query_pages_outbox out; @@ -155,6 +157,8 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, memset(&in, 0, sizeof(in)); memset(&out, 0, sizeof(out)); in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES); + in.hdr.opmod = boot ? cpu_to_be16(MLX5_BOOT_PAGES) : cpu_to_be16(MLX5_INIT_PAGES); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); if (err) return err; @@ -162,15 +166,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, if (out.hdr.status) return mlx5_cmd_status_to_err(&out.hdr); - if (pages) - *pages = be16_to_cpu(out.num_pages); - - if (init_pages) - *init_pages = be16_to_cpu(out.init_pages); - - if (boot_pages) - *boot_pages = be16_to_cpu(out.num_boot_pages); - + *npages = be32_to_cpu(out.num_pages); *func_id = be16_to_cpu(out.func_id); return err; @@ -224,7 +220,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE); in->func_id = cpu_to_be16(func_id); - in->num_entries = cpu_to_be16(npages); + in->num_entries = cpu_to_be32(npages); err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); mlx5_core_dbg(dev, "err %d\n", err); if (err) { @@ -292,7 +288,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE); in.func_id = cpu_to_be16(func_id); - in.num_entries = cpu_to_be16(npages); + in.num_entries = cpu_to_be32(npages); mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); if (err) { @@ -306,7 +302,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, goto out_free; } - num_claimed = be16_to_cpu(out->num_entries); + num_claimed = be32_to_cpu(out->num_entries); if (nclaimed) *nclaimed = num_claimed; @@ -345,7 +341,7 @@ static void pages_work_handler(struct work_struct *work) } void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s16 npages) + s32 npages) { struct mlx5_pages_req *req; @@ -364,20 +360,18 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) { - u16 uninitialized_var(boot_pages); - s16 uninitialized_var(init_pages); u16 uninitialized_var(func_id); + s32 uninitialized_var(npages); int err; - err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages, - &boot_pages); + err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot); if (err) return err; + mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", + npages, boot ? "boot" : "init", func_id); - mlx5_core_dbg(dev, "requested %d init pages and %d boot pages for func_id 0x%x\n", - init_pages, boot_pages, func_id); - return give_pages(dev, func_id, boot ? boot_pages : init_pages, 0); + return give_pages(dev, func_id, npages, 0); } static int optimal_reclaimed_pages(void) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 737685e9e852..68029b30c3dc 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -309,21 +309,20 @@ struct mlx5_hca_cap { __be16 max_desc_sz_rq; u8 rsvd21[2]; __be16 max_desc_sz_sq_dc; - u8 rsvd22[4]; - __be16 max_qp_mcg; - u8 rsvd23; + __be32 max_qp_mcg; + u8 rsvd22[3]; u8 log_max_mcg; - u8 rsvd24; + u8 rsvd23; u8 log_max_pd; - u8 rsvd25; + u8 rsvd24; u8 log_max_xrcd; - u8 rsvd26[42]; + u8 rsvd25[42]; __be16 log_uar_page_sz; - u8 rsvd27[28]; + u8 rsvd26[28]; u8 log_msx_atomic_size_qp; - u8 rsvd28[2]; + u8 rsvd27[2]; u8 log_msx_atomic_size_dc; - u8 rsvd29[76]; + u8 rsvd28[76]; }; @@ -472,9 +471,8 @@ struct mlx5_eqe_cmd { struct mlx5_eqe_page_req { u8 rsvd0[2]; __be16 func_id; - u8 rsvd1[2]; - __be16 num_pages; - __be32 rsvd2[5]; + __be32 num_pages; + __be32 rsvd1[5]; }; union ev_data { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 611e65e76b00..8888381fc150 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -358,7 +358,7 @@ struct mlx5_caps { u32 reserved_lkey; u8 local_ca_ack_delay; u8 log_max_mcg; - u16 max_qp_mcg; + u32 max_qp_mcg; int min_page_sz; }; @@ -691,7 +691,7 @@ void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s16 npages); + s32 npages); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); -- cgit v1.2.3 From f46078cfcd77fa5165bf849f5e568a7ac5fa569c Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 16 Aug 2013 13:30:07 +0200 Subject: ipv6: drop packets with multiple fragmentation headers It is not allowed for an ipv6 packet to contain multiple fragmentation headers. So discard packets which were already reassembled by fragmentation logic and send back a parameter problem icmp. The updates for RFC 6980 will come in later, I have to do a bit more research here. Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + net/ipv6/reassembly.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 850e95bc766c..b8b7dc755752 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -101,6 +101,7 @@ struct inet6_skb_parm { #define IP6SKB_FORWARDED 2 #define IP6SKB_REROUTED 4 #define IP6SKB_ROUTERALERT 8 +#define IP6SKB_FRAGMENTED 16 }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 790d9f4b8b0b..1aeb473b2cc6 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -490,6 +490,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, ipv6_hdr(head)->payload_len = htons(payload_len); ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; + IP6CB(head)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ if (head->ip_summed == CHECKSUM_COMPLETE) @@ -524,6 +525,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct net *net = dev_net(skb_dst(skb)->dev); int evicted; + if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) + goto fail_hdr; + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ @@ -544,6 +548,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; return 1; } -- cgit v1.2.3 From d79ff142624e1be080ad8d09101f7004d79c36e1 Mon Sep 17 00:00:00 2001 From: Martin Peschke Date: Thu, 22 Aug 2013 17:45:36 +0200 Subject: [SCSI] zfcp: fix lock imbalance by reworking request queue locking This patch adds wait_event_interruptible_lock_irq_timeout(), which is a straight-forward descendant of wait_event_interruptible_timeout() and wait_event_interruptible_lock_irq(). The zfcp driver used to call wait_event_interruptible_timeout() in combination with some intricate and error-prone locking. Using wait_event_interruptible_lock_irq_timeout() as a replacement nicely cleans up that locking. This rework removes a situation that resulted in a locking imbalance in zfcp_qdio_sbal_get(): BUG: workqueue leaked lock or atomic: events/1/0xffffff00/10 last function: zfcp_fc_wka_port_offline+0x0/0xa0 [zfcp] It was introduced by commit c2af7545aaff3495d9bf9a7608c52f0af86fb194 "[SCSI] zfcp: Do not wait for SBALs on stopped queue", which had a new code path related to ZFCP_STATUS_ADAPTER_QDIOUP that took an early exit without a required lock being held. The problem occured when a special, non-SCSI I/O request was being submitted in process context, when the adapter's queues had been torn down. In this case the bug surfaced when the Fibre Channel port connection for a well-known address was closed during a concurrent adapter shut-down procedure, which is a rare constellation. This patch also fixes these warnings from the sparse tool (make C=1): drivers/s390/scsi/zfcp_qdio.c:224:12: warning: context imbalance in 'zfcp_qdio_sbal_check' - wrong count at exit drivers/s390/scsi/zfcp_qdio.c:244:5: warning: context imbalance in 'zfcp_qdio_sbal_get' - unexpected unlock Last but not least, we get rid of that crappy lock-unlock-lock sequence at the beginning of the critical section. It is okay to call zfcp_erp_adapter_reopen() with req_q_lock held. Reported-by: Mikulas Patocka Reported-by: Heiko Carstens Signed-off-by: Martin Peschke Cc: stable@vger.kernel.org #2.6.35+ Signed-off-by: Steffen Maier Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_qdio.c | 8 ++---- include/linux/wait.h | 57 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 665e3cfaaf85..de0598eaacd2 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -224,11 +224,9 @@ int zfcp_qdio_sbals_from_sg(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req, static int zfcp_qdio_sbal_check(struct zfcp_qdio *qdio) { - spin_lock_irq(&qdio->req_q_lock); if (atomic_read(&qdio->req_q_free) || !(atomic_read(&qdio->adapter->status) & ZFCP_STATUS_ADAPTER_QDIOUP)) return 1; - spin_unlock_irq(&qdio->req_q_lock); return 0; } @@ -246,9 +244,8 @@ int zfcp_qdio_sbal_get(struct zfcp_qdio *qdio) { long ret; - spin_unlock_irq(&qdio->req_q_lock); - ret = wait_event_interruptible_timeout(qdio->req_q_wq, - zfcp_qdio_sbal_check(qdio), 5 * HZ); + ret = wait_event_interruptible_lock_irq_timeout(qdio->req_q_wq, + zfcp_qdio_sbal_check(qdio), qdio->req_q_lock, 5 * HZ); if (!(atomic_read(&qdio->adapter->status) & ZFCP_STATUS_ADAPTER_QDIOUP)) return -EIO; @@ -262,7 +259,6 @@ int zfcp_qdio_sbal_get(struct zfcp_qdio *qdio) zfcp_erp_adapter_reopen(qdio->adapter, 0, "qdsbg_1"); } - spin_lock_irq(&qdio->req_q_lock); return -EIO; } diff --git a/include/linux/wait.h b/include/linux/wait.h index f487a4750b7f..a67fc1635592 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -811,6 +811,63 @@ do { \ __ret; \ }) +#define __wait_event_interruptible_lock_irq_timeout(wq, condition, \ + lock, ret) \ +do { \ + DEFINE_WAIT(__wait); \ + \ + for (;;) { \ + prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + if (signal_pending(current)) { \ + ret = -ERESTARTSYS; \ + break; \ + } \ + spin_unlock_irq(&lock); \ + ret = schedule_timeout(ret); \ + spin_lock_irq(&lock); \ + if (!ret) \ + break; \ + } \ + finish_wait(&wq, &__wait); \ +} while (0) + +/** + * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets true or a timeout elapses. + * The condition is checked under the lock. This is expected + * to be called with the lock taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before schedule() + * and reacquired afterwards. + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or signal is received. The @condition is + * checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before going to sleep and is reacquired afterwards. + * + * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it + * was interrupted by a signal, and the remaining jiffies otherwise + * if the condition evaluated to true before the timeout elapsed. + */ +#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock, \ + timeout) \ +({ \ + int __ret = timeout; \ + \ + if (!(condition)) \ + __wait_event_interruptible_lock_irq_timeout( \ + wq, condition, lock, __ret); \ + __ret; \ +}) + /* * These are the old interfaces to sleep waiting for an event. -- cgit v1.2.3 From 5ea80f76a56605a190a7ea16846c82aa63dbd0aa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 22 Aug 2013 09:13:06 -0700 Subject: Revert "x86 get_unmapped_area(): use proper mmap base for bottom-up direction" This reverts commit df54d6fa54275ce59660453e29d1228c2b45a826. The commit isn't necessarily wrong, but because it recalculates the random mmap_base every time, it seems to confuse user memory allocators that expect contiguous mmap allocations even when the mmap address isn't specified. In particular, the MATLAB Java runtime seems to be unhappy. See https://bugzilla.kernel.org/show_bug.cgi?id=60774 So we'll want to apply the random offset only once, and Radu has a patch for that. Revert this older commit in order to apply the other one. Reported-by: Jeff Shorey Cc: Radu Caragea Cc: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/sys_x86_64.c | 2 +- arch/x86/mm/mmap.c | 2 +- include/linux/sched.h | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 48f8375e4c6b..dbded5aedb81 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin, *begin = new_begin; } } else { - *begin = mmap_legacy_base(); + *begin = TASK_UNMAPPED_BASE; *end = TASK_SIZE; } } diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index f63778cb2363..62c29a5bfe26 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -98,7 +98,7 @@ static unsigned long mmap_base(void) * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 * does, but not when emulating X86_32 */ -unsigned long mmap_legacy_base(void) +static unsigned long mmap_legacy_base(void) { if (mmap_is_ia32()) return TASK_UNMAPPED_BASE; diff --git a/include/linux/sched.h b/include/linux/sched.h index e9995eb5985c..078066daffd4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -314,7 +314,6 @@ struct nsproxy; struct user_namespace; #ifdef CONFIG_MMU -extern unsigned long mmap_legacy_base(void); extern void arch_pick_mmap_layout(struct mm_struct *mm); extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, -- cgit v1.2.3 From 41aacc1eea645c99edbe8fbcf78a97dc9b862adc Mon Sep 17 00:00:00 2001 From: Radu Caragea Date: Wed, 21 Aug 2013 20:55:59 +0300 Subject: x86 get_unmapped_area: Access mmap_legacy_base through mm_struct member This is the updated version of df54d6fa5427 ("x86 get_unmapped_area(): use proper mmap base for bottom-up direction") that only randomizes the mmap base address once. Signed-off-by: Radu Caragea Reported-and-tested-by: Jeff Shorey Cc: Andrew Morton Cc: Michel Lespinasse Cc: Oleg Nesterov Cc: Rik van Riel Cc: Ingo Molnar Cc: Adrian Sendroiu Cc: Greg KH Cc: Kamal Mostafa Signed-off-by: Linus Torvalds --- arch/x86/kernel/sys_x86_64.c | 2 +- arch/x86/mm/mmap.c | 6 ++++-- include/linux/mm_types.h | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index dbded5aedb81..30277e27431a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin, *begin = new_begin; } } else { - *begin = TASK_UNMAPPED_BASE; + *begin = current->mm->mmap_legacy_base; *end = TASK_SIZE; } } diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 62c29a5bfe26..25e7e1372bb2 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -112,11 +112,13 @@ static unsigned long mmap_legacy_base(void) */ void arch_pick_mmap_layout(struct mm_struct *mm) { + mm->mmap_legacy_base = mmap_legacy_base(); + mm->mmap_base = mmap_base(); + if (mmap_is_legacy()) { - mm->mmap_base = mmap_legacy_base(); + mm->mmap_base = mm->mmap_legacy_base; mm->get_unmapped_area = arch_get_unmapped_area; } else { - mm->mmap_base = mmap_base(); mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index fb425aa16c01..faf4b7c1ad12 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -332,6 +332,7 @@ struct mm_struct { unsigned long pgoff, unsigned long flags); #endif unsigned long mmap_base; /* base of mmap area */ + unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ unsigned long task_size; /* size of task vm space */ unsigned long highest_vm_end; /* highest vma end address */ pgd_t * pgd; -- cgit v1.2.3 From 4a5a8aa6c966eafc106543bd955ae388230420e5 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 21 Aug 2013 21:09:47 -0700 Subject: ipv4: expose IPV4_DEVCONF IP sends device configuration (see inet_fill_link_af) as an array in the netlink information, but the indices in that array are not exposed to userspace through any current santized header file. It was available back in 2.6.32 (in /usr/include/linux/sysctl.h) but was broken by: commit 02291680ffba92e5b5865bc0c5e7d1f3056b80ec Author: Eric W. Biederman Date: Sun Feb 14 03:25:51 2010 +0000 net ipv4: Decouple ipv4 interface parameters from binary sysctl numbers Eric was solving the sysctl problem but then the indices were re-exposed by a later addition of devconf support for IPV4 commit 9f0f7272ac9506f4c8c05cc597b7e376b0b9f3e4 Author: Thomas Graf Date: Tue Nov 16 04:32:48 2010 +0000 ipv4: AF_INET link address family Putting them in /usr/include/linux/ip.h seemed the logical match for the DEVCONF_ definitions for IPV6 in /usr/include/linux/ip6.h Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 34 +--------------------------------- include/uapi/linux/ip.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index b99cd23f3474..79640e015a86 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -5,45 +5,13 @@ #include #include +#include #include #include #include #include #include -enum -{ - IPV4_DEVCONF_FORWARDING=1, - IPV4_DEVCONF_MC_FORWARDING, - IPV4_DEVCONF_PROXY_ARP, - IPV4_DEVCONF_ACCEPT_REDIRECTS, - IPV4_DEVCONF_SECURE_REDIRECTS, - IPV4_DEVCONF_SEND_REDIRECTS, - IPV4_DEVCONF_SHARED_MEDIA, - IPV4_DEVCONF_RP_FILTER, - IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE, - IPV4_DEVCONF_BOOTP_RELAY, - IPV4_DEVCONF_LOG_MARTIANS, - IPV4_DEVCONF_TAG, - IPV4_DEVCONF_ARPFILTER, - IPV4_DEVCONF_MEDIUM_ID, - IPV4_DEVCONF_NOXFRM, - IPV4_DEVCONF_NOPOLICY, - IPV4_DEVCONF_FORCE_IGMP_VERSION, - IPV4_DEVCONF_ARP_ANNOUNCE, - IPV4_DEVCONF_ARP_IGNORE, - IPV4_DEVCONF_PROMOTE_SECONDARIES, - IPV4_DEVCONF_ARP_ACCEPT, - IPV4_DEVCONF_ARP_NOTIFY, - IPV4_DEVCONF_ACCEPT_LOCAL, - IPV4_DEVCONF_SRC_VMARK, - IPV4_DEVCONF_PROXY_ARP_PVLAN, - IPV4_DEVCONF_ROUTE_LOCALNET, - __IPV4_DEVCONF_MAX -}; - -#define IPV4_DEVCONF_MAX (__IPV4_DEVCONF_MAX - 1) - struct ipv4_devconf { void *sysctl; int data[IPV4_DEVCONF_MAX]; diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 6cf06bfd841b..2fee45bdec0a 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -133,4 +133,38 @@ struct ip_beet_phdr { __u8 reserved; }; +/* index values for the variables in ipv4_devconf */ +enum +{ + IPV4_DEVCONF_FORWARDING=1, + IPV4_DEVCONF_MC_FORWARDING, + IPV4_DEVCONF_PROXY_ARP, + IPV4_DEVCONF_ACCEPT_REDIRECTS, + IPV4_DEVCONF_SECURE_REDIRECTS, + IPV4_DEVCONF_SEND_REDIRECTS, + IPV4_DEVCONF_SHARED_MEDIA, + IPV4_DEVCONF_RP_FILTER, + IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE, + IPV4_DEVCONF_BOOTP_RELAY, + IPV4_DEVCONF_LOG_MARTIANS, + IPV4_DEVCONF_TAG, + IPV4_DEVCONF_ARPFILTER, + IPV4_DEVCONF_MEDIUM_ID, + IPV4_DEVCONF_NOXFRM, + IPV4_DEVCONF_NOPOLICY, + IPV4_DEVCONF_FORCE_IGMP_VERSION, + IPV4_DEVCONF_ARP_ANNOUNCE, + IPV4_DEVCONF_ARP_IGNORE, + IPV4_DEVCONF_PROMOTE_SECONDARIES, + IPV4_DEVCONF_ARP_ACCEPT, + IPV4_DEVCONF_ARP_NOTIFY, + IPV4_DEVCONF_ACCEPT_LOCAL, + IPV4_DEVCONF_SRC_VMARK, + IPV4_DEVCONF_PROXY_ARP_PVLAN, + IPV4_DEVCONF_ROUTE_LOCALNET, + __IPV4_DEVCONF_MAX +}; + +#define IPV4_DEVCONF_MAX (__IPV4_DEVCONF_MAX - 1) + #endif /* _UAPI_LINUX_IP_H */ -- cgit v1.2.3 From 118b23022512eb2f41ce42db70dc0568d00be4ba Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 24 Aug 2013 12:08:17 -0400 Subject: cope with potentially long ->d_dname() output for shmem/hugetlb dynamic_dname() is both too much and too little for those - the output may be well in excess of 64 bytes dynamic_dname() assumes to be enough (thanks to ashmem feeding really long names to shmem_file_setup()) and vsnprintf() is an overkill for those guys. Signed-off-by: Al Viro --- fs/dcache.c | 11 +++++++++++ fs/hugetlbfs/inode.c | 8 +------- include/linux/dcache.h | 1 + mm/shmem.c | 8 +------- 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index 87bdb5329c3c..83cfb834db03 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2724,6 +2724,17 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, return memcpy(buffer, temp, sz); } +char *simple_dname(struct dentry *dentry, char *buffer, int buflen) +{ + char *end = buffer + buflen; + /* these dentries are never renamed, so d_lock is not needed */ + if (prepend(&end, &buflen, " (deleted)", 11) || + prepend_name(&end, &buflen, &dentry->d_name) || + prepend(&end, &buflen, "/", 1)) + end = ERR_PTR(-ENAMETOOLONG); + return end; +} + /* * Write full pathname from the root of the filesystem into the buffer. */ diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 34423978b170..d19b30ababf1 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -926,14 +926,8 @@ static int get_hstate_idx(int page_size_log) return h - hstates; } -static char *hugetlb_dname(struct dentry *dentry, char *buffer, int buflen) -{ - return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)", - dentry->d_name.name); -} - static struct dentry_operations anon_ops = { - .d_dname = hugetlb_dname + .d_dname = simple_dname }; /* diff --git a/include/linux/dcache.h b/include/linux/dcache.h index b90337c9d468..4a12532da8c4 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -336,6 +336,7 @@ extern int d_validate(struct dentry *, struct dentry *); * helper function for dentry_operations.d_dname() members */ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); +extern char *simple_dname(struct dentry *, char *, int); extern char *__d_path(const struct path *, const struct path *, char *, int); extern char *d_absolute_path(const struct path *, char *, int); diff --git a/mm/shmem.c b/mm/shmem.c index 8335dbd3fc35..e43dc555069d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2909,14 +2909,8 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range); /* common code */ -static char *shmem_dname(struct dentry *dentry, char *buffer, int buflen) -{ - return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)", - dentry->d_name.name); -} - static struct dentry_operations anon_ops = { - .d_dname = shmem_dname + .d_dname = simple_dname }; /** -- cgit v1.2.3 From c2b1df2eb42978073ec27c99cc199d20ae48b849 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 22 Aug 2013 11:39:16 -0700 Subject: Rename nsproxy.pid_ns to nsproxy.pid_ns_for_children nsproxy.pid_ns is *not* the task's pid namespace. The name should clarify that. This makes it more obvious that setns on a pid namespace is weird -- it won't change the pid namespace shown in procfs. Signed-off-by: Andy Lutomirski Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/nsproxy.h | 6 +++++- kernel/fork.c | 5 +++-- kernel/nsproxy.c | 27 ++++++++++++++------------- kernel/pid_namespace.c | 4 ++-- 4 files changed, 24 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 10e5947491c7..b4ec59d159ac 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -14,6 +14,10 @@ struct fs_struct; * A structure to contain pointers to all per-process * namespaces - fs (mount), uts, network, sysvipc, etc. * + * The pid namespace is an exception -- it's accessed using + * task_active_pid_ns. The pid namespace here is the + * namespace that children will use. + * * 'count' is the number of tasks holding a reference. * The count for each namespace, then, will be the number * of nsproxies pointing to it, not the number of tasks. @@ -27,7 +31,7 @@ struct nsproxy { struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; - struct pid_namespace *pid_ns; + struct pid_namespace *pid_ns_for_children; struct net *net_ns; }; extern struct nsproxy init_nsproxy; diff --git a/kernel/fork.c b/kernel/fork.c index e23bb19e2a3e..bf46287c91a4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1177,7 +1177,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, * don't allow the creation of threads. */ if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) && - (task_active_pid_ns(current) != current->nsproxy->pid_ns)) + (task_active_pid_ns(current) != + current->nsproxy->pid_ns_for_children)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); @@ -1351,7 +1352,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (pid != &init_struct_pid) { retval = -ENOMEM; - pid = alloc_pid(p->nsproxy->pid_ns); + pid = alloc_pid(p->nsproxy->pid_ns_for_children); if (!pid) goto bad_fork_cleanup_io; } diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 364ceab15f0c..997cbb951a3b 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -29,15 +29,15 @@ static struct kmem_cache *nsproxy_cachep; struct nsproxy init_nsproxy = { - .count = ATOMIC_INIT(1), - .uts_ns = &init_uts_ns, + .count = ATOMIC_INIT(1), + .uts_ns = &init_uts_ns, #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) - .ipc_ns = &init_ipc_ns, + .ipc_ns = &init_ipc_ns, #endif - .mnt_ns = NULL, - .pid_ns = &init_pid_ns, + .mnt_ns = NULL, + .pid_ns_for_children = &init_pid_ns, #ifdef CONFIG_NET - .net_ns = &init_net, + .net_ns = &init_net, #endif }; @@ -85,9 +85,10 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_ipc; } - new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns); - if (IS_ERR(new_nsp->pid_ns)) { - err = PTR_ERR(new_nsp->pid_ns); + new_nsp->pid_ns_for_children = + copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children); + if (IS_ERR(new_nsp->pid_ns_for_children)) { + err = PTR_ERR(new_nsp->pid_ns_for_children); goto out_pid; } @@ -100,8 +101,8 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, return new_nsp; out_net: - if (new_nsp->pid_ns) - put_pid_ns(new_nsp->pid_ns); + if (new_nsp->pid_ns_for_children) + put_pid_ns(new_nsp->pid_ns_for_children); out_pid: if (new_nsp->ipc_ns) put_ipc_ns(new_nsp->ipc_ns); @@ -174,8 +175,8 @@ void free_nsproxy(struct nsproxy *ns) put_uts_ns(ns->uts_ns); if (ns->ipc_ns) put_ipc_ns(ns->ipc_ns); - if (ns->pid_ns) - put_pid_ns(ns->pid_ns); + if (ns->pid_ns_for_children) + put_pid_ns(ns->pid_ns_for_children); put_net(ns->net_ns); kmem_cache_free(nsproxy_cachep, ns); } diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 6917e8edb48e..601bb361c235 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -349,8 +349,8 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns) if (ancestor != active) return -EINVAL; - put_pid_ns(nsproxy->pid_ns); - nsproxy->pid_ns = get_pid_ns(new); + put_pid_ns(nsproxy->pid_ns_for_children); + nsproxy->pid_ns_for_children = get_pid_ns(new); return 0; } -- cgit v1.2.3 From 0f8f2aaaab0b0f9c13635cb02e7d19bdaa9aa1bb Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 28 Aug 2013 18:13:26 -0700 Subject: Add new lockref infrastructure reference implementation This introduces a new "lockref" structure that supports the concept of lockless updates of reference counts that still honor an attached spinlock. NOTE! This reference implementation is not the optimized lockless version, rather it is the fallback implementation using standard spinlocks. The actual optimized versions will be merged into 3.12, but I wanted to get the infrastructure in place and document the new interfaces. [ Also note that this particular commit is drastically cut-down minimal version of the original patch by Waiman. In order to properly credit the original author I'm marking Waiman as the author here, but in the end this patch bears little resemblance to the patch by Waiman. So blame any errors on me editing things down to the point where I can introduce the infrastructure before the merge window for 3.12 actually opens. - Linus ] Signed-off-by: Waiman Long Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 include/linux/lockref.h (limited to 'include/linux') diff --git a/include/linux/lockref.h b/include/linux/lockref.h new file mode 100644 index 000000000000..01233e01627a --- /dev/null +++ b/include/linux/lockref.h @@ -0,0 +1,71 @@ +#ifndef __LINUX_LOCKREF_H +#define __LINUX_LOCKREF_H + +/* + * Locked reference counts. + * + * These are different from just plain atomic refcounts in that they + * are atomic with respect to the spinlock that goes with them. In + * particular, there can be implementations that don't actually get + * the spinlock for the common decrement/increment operations, but they + * still have to check that the operation is done semantically as if + * the spinlock had been taken (using a cmpxchg operation that covers + * both the lock and the count word, or using memory transactions, for + * example). + */ + +#include + +struct lockref { + spinlock_t lock; + unsigned int count; +}; + +/** + * lockref_get - Increments reference count unconditionally + * @lockcnt: pointer to lockref structure + * + * This operation is only valid if you already hold a reference + * to the object, so you know the count cannot be zero. + */ +static inline void lockref_get(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + lockref->count++; + spin_unlock(&lockref->lock); +} + +/** + * lockref_get_not_zero - Increments count unless the count is 0 + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count is 0 + */ +static inline int lockref_get_not_zero(struct lockref *lockref) +{ + int retval = 0; + + spin_lock(&lockref->lock); + if (lockref->count) { + lockref->count++; + retval = 1; + } + spin_unlock(&lockref->lock); + return retval; +} + +/** + * lockref_put_or_lock - decrements count unless count <= 1 before decrement + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken + */ +static inline int lockref_put_or_lock(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + if (lockref->count <= 1) + return 0; + lockref->count--; + spin_unlock(&lockref->lock); + return 1; +} + +#endif /* __LINUX_LOCKREF_H */ -- cgit v1.2.3 From 98474236f72e5a8b89c14cd7c74f0bb77a4b1a99 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 28 Aug 2013 18:24:59 -0700 Subject: vfs: make the dentry cache use the lockref infrastructure This just replaces the dentry count/lock combination with the lockref structure that contains both a count and a spinlock, and does the mechanical conversion to use the lockref infrastructure. There are no semantic changes here, it's purely syntactic. The reference lockref implementation uses the spinlock exactly the same way that the old dcache code did, and the bulk of this patch is just expanding the internal "d_count" use in the dcache code to use "d_lockref.count" instead. This is purely preparation for the real change to make the reference count updates be lockless during the 3.12 merge window. [ As with the previous commit, this is a rewritten version of a concept originally from Waiman, so credit goes to him, blame for any errors goes to me. Waiman's patch had some semantic differences for taking advantage of the lockless update in dget_parent(), while this patch is intentionally a pure search-and-replace change with no semantic changes. - Linus ] Signed-off-by: Waiman Long Signed-off-by: Linus Torvalds --- fs/dcache.c | 57 ++++++++++++++++++++------------------------------ fs/namei.c | 6 +++--- include/linux/dcache.h | 19 ++++++++--------- 3 files changed, 35 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index 83cfb834db03..b949af850cd6 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -229,7 +229,7 @@ static void __d_free(struct rcu_head *head) */ static void d_free(struct dentry *dentry) { - BUG_ON(dentry->d_count); + BUG_ON(dentry->d_lockref.count); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); @@ -467,7 +467,7 @@ relock: } if (ref) - dentry->d_count--; + dentry->d_lockref.count--; /* * inform the fs via d_prune that this dentry is about to be * unhashed and destroyed. @@ -513,15 +513,10 @@ void dput(struct dentry *dentry) return; repeat: - if (dentry->d_count == 1) + if (dentry->d_lockref.count == 1) might_sleep(); - spin_lock(&dentry->d_lock); - BUG_ON(!dentry->d_count); - if (dentry->d_count > 1) { - dentry->d_count--; - spin_unlock(&dentry->d_lock); + if (lockref_put_or_lock(&dentry->d_lockref)) return; - } if (dentry->d_flags & DCACHE_OP_DELETE) { if (dentry->d_op->d_delete(dentry)) @@ -535,7 +530,7 @@ repeat: dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); - dentry->d_count--; + dentry->d_lockref.count--; spin_unlock(&dentry->d_lock); return; @@ -590,7 +585,7 @@ int d_invalidate(struct dentry * dentry) * We also need to leave mountpoints alone, * directory or not. */ - if (dentry->d_count > 1 && dentry->d_inode) { + if (dentry->d_lockref.count > 1 && dentry->d_inode) { if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { spin_unlock(&dentry->d_lock); return -EBUSY; @@ -606,14 +601,12 @@ EXPORT_SYMBOL(d_invalidate); /* This must be called with d_lock held */ static inline void __dget_dlock(struct dentry *dentry) { - dentry->d_count++; + dentry->d_lockref.count++; } static inline void __dget(struct dentry *dentry) { - spin_lock(&dentry->d_lock); - __dget_dlock(dentry); - spin_unlock(&dentry->d_lock); + lockref_get(&dentry->d_lockref); } struct dentry *dget_parent(struct dentry *dentry) @@ -634,8 +627,8 @@ repeat: goto repeat; } rcu_read_unlock(); - BUG_ON(!ret->d_count); - ret->d_count++; + BUG_ON(!ret->d_lockref.count); + ret->d_lockref.count++; spin_unlock(&ret->d_lock); return ret; } @@ -718,7 +711,7 @@ restart: spin_lock(&inode->i_lock); hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); - if (!dentry->d_count) { + if (!dentry->d_lockref.count) { __dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); @@ -763,12 +756,8 @@ static void try_prune_one_dentry(struct dentry *dentry) /* Prune ancestors. */ dentry = parent; while (dentry) { - spin_lock(&dentry->d_lock); - if (dentry->d_count > 1) { - dentry->d_count--; - spin_unlock(&dentry->d_lock); + if (lockref_put_or_lock(&dentry->d_lockref)) return; - } dentry = dentry_kill(dentry, 1); } } @@ -793,7 +782,7 @@ static void shrink_dentry_list(struct list_head *list) * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ - if (dentry->d_count) { + if (dentry->d_lockref.count) { dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; @@ -913,7 +902,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry_lru_del(dentry); __d_shrink(dentry); - if (dentry->d_count != 0) { + if (dentry->d_lockref.count != 0) { printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%s}" " still in use (%d)" @@ -922,7 +911,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry->d_inode ? dentry->d_inode->i_ino : 0UL, dentry->d_name.name, - dentry->d_count, + dentry->d_lockref.count, dentry->d_sb->s_type->name, dentry->d_sb->s_id); BUG(); @@ -933,7 +922,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) list_del(&dentry->d_u.d_child); } else { parent = dentry->d_parent; - parent->d_count--; + parent->d_lockref.count--; list_del(&dentry->d_u.d_child); } @@ -981,7 +970,7 @@ void shrink_dcache_for_umount(struct super_block *sb) dentry = sb->s_root; sb->s_root = NULL; - dentry->d_count--; + dentry->d_lockref.count--; shrink_dcache_for_umount_subtree(dentry); while (!hlist_bl_empty(&sb->s_anon)) { @@ -1147,7 +1136,7 @@ resume: * loop in shrink_dcache_parent() might not make any progress * and loop forever. */ - if (dentry->d_count) { + if (dentry->d_lockref.count) { dentry_lru_del(dentry); } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { dentry_lru_move_list(dentry, dispose); @@ -1269,7 +1258,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) smp_wmb(); dentry->d_name.name = dname; - dentry->d_count = 1; + dentry->d_lockref.count = 1; dentry->d_flags = 0; spin_lock_init(&dentry->d_lock); seqcount_init(&dentry->d_seq); @@ -1970,7 +1959,7 @@ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) goto next; } - dentry->d_count++; + dentry->d_lockref.count++; found = dentry; spin_unlock(&dentry->d_lock); break; @@ -2069,7 +2058,7 @@ again: spin_lock(&dentry->d_lock); inode = dentry->d_inode; isdir = S_ISDIR(inode->i_mode); - if (dentry->d_count == 1) { + if (dentry->d_lockref.count == 1) { if (!spin_trylock(&inode->i_lock)) { spin_unlock(&dentry->d_lock); cpu_relax(); @@ -2948,7 +2937,7 @@ resume: } if (!(dentry->d_flags & DCACHE_GENOCIDE)) { dentry->d_flags |= DCACHE_GENOCIDE; - dentry->d_count--; + dentry->d_lockref.count--; } spin_unlock(&dentry->d_lock); } @@ -2956,7 +2945,7 @@ resume: struct dentry *child = this_parent; if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { this_parent->d_flags |= DCACHE_GENOCIDE; - this_parent->d_count--; + this_parent->d_lockref.count--; } this_parent = try_to_ascend(this_parent, locked, seq); if (!this_parent) diff --git a/fs/namei.c b/fs/namei.c index 8b61d103a8a7..7720fbd5277b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -536,8 +536,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) * a reference at this point. */ BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); - BUG_ON(!parent->d_count); - parent->d_count++; + BUG_ON(!parent->d_lockref.count); + parent->d_lockref.count++; spin_unlock(&dentry->d_lock); } spin_unlock(&parent->d_lock); @@ -3327,7 +3327,7 @@ void dentry_unhash(struct dentry *dentry) { shrink_dcache_parent(dentry); spin_lock(&dentry->d_lock); - if (dentry->d_count == 1) + if (dentry->d_lockref.count == 1) __d_drop(dentry); spin_unlock(&dentry->d_lock); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4a12532da8c4..efdc94434c30 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -9,6 +9,7 @@ #include #include #include +#include struct nameidata; struct path; @@ -100,6 +101,8 @@ extern unsigned int full_name_hash(const unsigned char *, unsigned int); # endif #endif +#define d_lock d_lockref.lock + struct dentry { /* RCU lookup touched fields */ unsigned int d_flags; /* protected by d_lock */ @@ -112,8 +115,7 @@ struct dentry { unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ /* Ref lookup also touches following */ - unsigned int d_count; /* protected by d_lock */ - spinlock_t d_lock; /* per dentry lock */ + struct lockref d_lockref; /* per-dentry lock and refcount */ const struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ unsigned long d_time; /* used by d_revalidate */ @@ -318,7 +320,7 @@ static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) assert_spin_locked(&dentry->d_lock); if (!read_seqcount_retry(&dentry->d_seq, seq)) { ret = 1; - dentry->d_count++; + dentry->d_lockref.count++; } return ret; @@ -326,7 +328,7 @@ static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) static inline unsigned d_count(const struct dentry *dentry) { - return dentry->d_count; + return dentry->d_lockref.count; } /* validate "insecure" dentry pointer */ @@ -357,17 +359,14 @@ extern char *dentry_path(struct dentry *, char *, int); static inline struct dentry *dget_dlock(struct dentry *dentry) { if (dentry) - dentry->d_count++; + dentry->d_lockref.count++; return dentry; } static inline struct dentry *dget(struct dentry *dentry) { - if (dentry) { - spin_lock(&dentry->d_lock); - dget_dlock(dentry); - spin_unlock(&dentry->d_lock); - } + if (dentry) + lockref_get(&dentry->d_lockref); return dentry; } -- cgit v1.2.3