From 37182d1bd3264cf9c0dce3408bee48af0755de7e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jul 2006 15:30:28 -0700 Subject: [NET]: Remove CONFIG_HAVE_ARCH_DEV_ALLOC_SKB skbuff.h has an #ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB to allow architectures to reimplement __dev_alloc_skb. It's not set on any architecture and now that we have an architecture-overrideable NET_SKB_PAD there is not point at all to have one either. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0bf31b83578c..f98757976647 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1066,7 +1066,6 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -#ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB /** * __dev_alloc_skb - allocate an skbuff for sending * @length: length to allocate @@ -1087,9 +1086,6 @@ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, skb_reserve(skb, NET_SKB_PAD); return skb; } -#else -extern struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask); -#endif /** * dev_alloc_skb - allocate an skbuff for sending -- cgit v1.2.3 From b4e54de8d34afe7fcf08bfe91070d9dfeae6ed27 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jul 2006 15:31:14 -0700 Subject: [NET]: Correct dev_alloc_skb kerneldoc dev_alloc_skb is designated for RX descriptors, not TX. (Some drivers use it for the latter anyway, but that's a different story) Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f98757976647..4307e764ef0a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1067,7 +1067,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) } /** - * __dev_alloc_skb - allocate an skbuff for sending + * __dev_alloc_skb - allocate an skbuff for receiving * @length: length to allocate * @gfp_mask: get_free_pages mask, passed to alloc_skb * @@ -1088,7 +1088,7 @@ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, } /** - * dev_alloc_skb - allocate an skbuff for sending + * dev_alloc_skb - allocate an skbuff for receiving * @length: length to allocate * * Allocate a new &sk_buff and assign it a usage count of one. The -- cgit v1.2.3 From 10ea6ac895418bd0d23900e3330daa6ba0836d26 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 24 Jul 2006 22:54:55 -0700 Subject: [NETFILTER]: bridge netfilter: add deferred output hooks to feature-removal-schedule Add bridge netfilter deferred output hooks to feature-removal-schedule and disable them by default. Until their removal they will be activated by the physdev match when needed. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- Documentation/feature-removal-schedule.txt | 16 ++++++++++++++++ include/linux/netfilter_bridge.h | 2 ++ net/bridge/br_netfilter.c | 5 +++++ net/netfilter/xt_physdev.c | 15 +++++++++++++++ 4 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 9d3a0775a11d..87851efb0228 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -258,3 +258,19 @@ Why: These drivers never compiled since they were added to the kernel Who: Jean Delvare --------------------------- + +What: Bridge netfilter deferred IPv4/IPv6 output hook calling +When: January 2007 +Why: The deferred output hooks are a layering violation causing unusual + and broken behaviour on bridge devices. Examples of things they + break include QoS classifation using the MARK or CLASSIFY targets, + the IPsec policy match and connection tracking with VLANs on a + bridge. Their only use is to enable bridge output port filtering + within iptables with the physdev match, which can also be done by + combining iptables and ebtables using netfilter marks. Until it + will get removed the hook deferral is disabled by default and is + only enabled when needed. + +Who: Patrick McHardy + +--------------------------- diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 87764022cc67..31f02ba036ce 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -79,6 +79,8 @@ struct bridge_skb_cb { __u32 ipv4; } daddr; }; + +extern int brnf_deferred_hooks; #endif /* CONFIG_BRIDGE_NETFILTER */ #endif /* __KERNEL__ */ diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index cbc8a389a0a8..05b3de888243 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -61,6 +61,9 @@ static int brnf_filter_vlan_tagged = 1; #define brnf_filter_vlan_tagged 1 #endif +int brnf_deferred_hooks; +EXPORT_SYMBOL_GPL(brnf_deferred_hooks); + static __be16 inline vlan_proto(const struct sk_buff *skb) { return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; @@ -890,6 +893,8 @@ static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb, return NF_ACCEPT; else if (ip->version == 6 && !brnf_call_ip6tables) return NF_ACCEPT; + else if (!brnf_deferred_hooks) + return NF_ACCEPT; #endif if (hook == NF_IP_POST_ROUTING) return NF_ACCEPT; diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 5fe4c9df17f5..a9f4f6f3c628 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -113,6 +113,21 @@ checkentry(const char *tablename, if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) return 0; + if (brnf_deferred_hooks == 0 && + info->bitmask & XT_PHYSDEV_OP_OUT && + (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || + info->invert & XT_PHYSDEV_OP_BRIDGED) && + hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | + (1 << NF_IP_POST_ROUTING))) { + printk(KERN_WARNING "physdev match: using --physdev-out in the " + "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " + "traffic is deprecated and breaks other things, it will " + "be removed in January 2007. See Documentation/" + "feature-removal-schedule.txt for details. This doesn't " + "affect you in case you're using it for purely bridged " + "traffic.\n"); + brnf_deferred_hooks = 1; + } return 1; } -- cgit v1.2.3 From 153d7f3fcae7ed4e19328549aa9467acdfbced10 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 26 Jul 2006 15:40:07 +0200 Subject: [PATCH] Reorganize the cpufreq cpu hotplug locking to not be totally bizare The patch below moves the cpu hotplugging higher up in the cpufreq layering; this is needed to avoid recursive taking of the cpu hotplug lock and to otherwise detangle the mess. The new rules are: 1. you must do lock_cpu_hotplug() around the following functions: __cpufreq_driver_target __cpufreq_governor (for CPUFREQ_GOV_LIMITS operation only) __cpufreq_set_policy 2. governer methods (.governer) must NOT take the lock_cpu_hotplug() lock in any way; they are called with the lock taken already 3. if your governer spawns a thread that does things, like calling __cpufreq_driver_target, your thread must honor rule #1. 4. the policy lock and other cpufreq internal locks nest within the lock_cpu_hotplug() lock. I'm not entirely happy about how the __cpufreq_governor rule ended up (conditional locking rule depending on the argument) but basically all callers pass this as a constant so it's not too horrible. The patch also removes the cpufreq_governor() function since during the locking audit it turned out to be entirely unused (so no need to fix it) The patch works on my testbox, but it could use more testing (otoh... it can't be much worse than the current code) Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- drivers/cpufreq/cpufreq.c | 40 +++++++++++++++------------------- drivers/cpufreq/cpufreq_conservative.c | 2 -- drivers/cpufreq/cpufreq_ondemand.c | 4 ++-- drivers/cpufreq/cpufreq_userspace.c | 3 +++ include/linux/cpufreq.h | 3 --- 5 files changed, 23 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8d328186f774..bc1088d9b379 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -364,10 +364,12 @@ static ssize_t store_##file_name \ if (ret != 1) \ return -EINVAL; \ \ + lock_cpu_hotplug(); \ mutex_lock(&policy->lock); \ ret = __cpufreq_set_policy(policy, &new_policy); \ policy->user_policy.object = policy->object; \ mutex_unlock(&policy->lock); \ + unlock_cpu_hotplug(); \ \ return ret ? ret : count; \ } @@ -1197,20 +1199,18 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier); *********************************************************************/ +/* Must be called with lock_cpu_hotplug held */ int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { int retval = -EINVAL; - lock_cpu_hotplug(); dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu, target_freq, relation); if (cpu_online(policy->cpu) && cpufreq_driver->target) retval = cpufreq_driver->target(policy, target_freq, relation); - unlock_cpu_hotplug(); - return retval; } EXPORT_SYMBOL_GPL(__cpufreq_driver_target); @@ -1225,17 +1225,23 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, if (!policy) return -EINVAL; + lock_cpu_hotplug(); mutex_lock(&policy->lock); ret = __cpufreq_driver_target(policy, target_freq, relation); mutex_unlock(&policy->lock); + unlock_cpu_hotplug(); cpufreq_cpu_put(policy); return ret; } EXPORT_SYMBOL_GPL(cpufreq_driver_target); +/* + * Locking: Must be called with the lock_cpu_hotplug() lock held + * when "event" is CPUFREQ_GOV_LIMITS + */ static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) { @@ -1257,24 +1263,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) } -int cpufreq_governor(unsigned int cpu, unsigned int event) -{ - int ret = 0; - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - - if (!policy) - return -EINVAL; - - mutex_lock(&policy->lock); - ret = __cpufreq_governor(policy, event); - mutex_unlock(&policy->lock); - - cpufreq_cpu_put(policy); - return ret; -} -EXPORT_SYMBOL_GPL(cpufreq_governor); - - int cpufreq_register_governor(struct cpufreq_governor *governor) { struct cpufreq_governor *t; @@ -1342,6 +1330,9 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu) EXPORT_SYMBOL(cpufreq_get_policy); +/* + * Locking: Must be called with the lock_cpu_hotplug() lock held + */ static int __cpufreq_set_policy(struct cpufreq_policy *data, struct cpufreq_policy *policy) { int ret = 0; @@ -1436,6 +1427,8 @@ int cpufreq_set_policy(struct cpufreq_policy *policy) if (!data) return -EINVAL; + lock_cpu_hotplug(); + /* lock this CPU */ mutex_lock(&data->lock); @@ -1446,6 +1439,8 @@ int cpufreq_set_policy(struct cpufreq_policy *policy) data->user_policy.governor = data->governor; mutex_unlock(&data->lock); + + unlock_cpu_hotplug(); cpufreq_cpu_put(data); return ret; @@ -1469,6 +1464,7 @@ int cpufreq_update_policy(unsigned int cpu) if (!data) return -ENODEV; + lock_cpu_hotplug(); mutex_lock(&data->lock); dprintk("updating policy for CPU %u\n", cpu); @@ -1494,7 +1490,7 @@ int cpufreq_update_policy(unsigned int cpu) ret = __cpufreq_set_policy(data, &policy); mutex_unlock(&data->lock); - + unlock_cpu_hotplug(); cpufreq_cpu_put(data); return ret; } diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index b3ebc8f01975..c4c578defabf 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -525,7 +525,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, break; case CPUFREQ_GOV_LIMITS: - lock_cpu_hotplug(); mutex_lock(&dbs_mutex); if (policy->max < this_dbs_info->cur_policy->cur) __cpufreq_driver_target( @@ -536,7 +535,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, this_dbs_info->cur_policy, policy->min, CPUFREQ_RELATION_L); mutex_unlock(&dbs_mutex); - unlock_cpu_hotplug(); break; } return 0; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 178f0c547eb7..52cf1f021825 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -309,7 +309,9 @@ static void do_dbs_timer(void *data) if (!dbs_info->enable) return; + lock_cpu_hotplug(); dbs_check_cpu(dbs_info); + unlock_cpu_hotplug(); queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); } @@ -412,7 +414,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, break; case CPUFREQ_GOV_LIMITS: - lock_cpu_hotplug(); mutex_lock(&dbs_mutex); if (policy->max < this_dbs_info->cur_policy->cur) __cpufreq_driver_target(this_dbs_info->cur_policy, @@ -423,7 +424,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, policy->min, CPUFREQ_RELATION_L); mutex_unlock(&dbs_mutex); - unlock_cpu_hotplug(); break; } return 0; diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index 44ae5e5b94cf..a06c204589cd 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -70,6 +71,7 @@ static int cpufreq_set(unsigned int freq, struct cpufreq_policy *policy) dprintk("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq); + lock_cpu_hotplug(); mutex_lock(&userspace_mutex); if (!cpu_is_managed[policy->cpu]) goto err; @@ -92,6 +94,7 @@ static int cpufreq_set(unsigned int freq, struct cpufreq_policy *policy) err: mutex_unlock(&userspace_mutex); + unlock_cpu_hotplug(); return ret; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 35e137636b0b..4ea39fee99c7 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -172,9 +172,6 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation); -/* pass an event to the cpufreq governor */ -int cpufreq_governor(unsigned int cpu, unsigned int event); - int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); -- cgit v1.2.3 From 361934849e9c0418950bedf667732f36337d88b9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Jul 2006 08:54:59 +0200 Subject: [PATCH] ide: option to disable cache flushes for buggy drives Some drives claim they support cache flushing, but get seriously confused if you try. Add this option to be able to boot with barriers enabled by default. Signed-off-by: Jens Axboe --- drivers/ide/ide-disk.c | 2 +- drivers/ide/ide.c | 5 ++++- include/linux/ide.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index f712e4cfd9dc..7cf3eb023521 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -776,7 +776,7 @@ static void update_ordered(ide_drive_t *drive) * not available so we don't need to recheck that. */ capacity = idedisk_capacity(drive); - barrier = ide_id_has_flush_cache(id) && + barrier = ide_id_has_flush_cache(id) && !drive->noflush && (drive->addressing == 0 || capacity <= (1ULL << 28) || ide_id_has_flush_cache_ext(id)); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 05fbd9298db7..defd4b4bd374 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -1539,7 +1539,7 @@ static int __init ide_setup(char *s) const char *hd_words[] = { "none", "noprobe", "nowerr", "cdrom", "serialize", "autotune", "noautotune", "minus8", "swapdata", "bswap", - "minus11", "remap", "remap63", "scsi", NULL }; + "noflush", "remap", "remap63", "scsi", NULL }; unit = s[2] - 'a'; hw = unit / MAX_DRIVES; unit = unit % MAX_DRIVES; @@ -1578,6 +1578,9 @@ static int __init ide_setup(char *s) case -10: /* "bswap" */ drive->bswap = 1; goto done; + case -11: /* noflush */ + drive->noflush = 1; + goto done; case -12: /* "remap" */ drive->remap_0_to_1 = 1; goto done; diff --git a/include/linux/ide.h b/include/linux/ide.h index dc7abef10965..99620451d958 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -571,6 +571,7 @@ typedef struct ide_drive_s { u8 waiting_for_dma; /* dma currently in progress */ u8 unmask; /* okay to unmask other irqs */ u8 bswap; /* byte swap data */ + u8 noflush; /* don't attempt flushes */ u8 dsc_overlap; /* DSC overlap */ u8 nice1; /* give potential excess bandwidth */ -- cgit v1.2.3 From e3f2ddeac718c768fdac4b7fe69d465172f788a8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 29 Jul 2006 05:17:57 +0200 Subject: [PATCH] pi-futex: robust-futex exit Fix robust PI-futexes to be properly unlocked on unexpected exit. For this to work the kernel has to know whether a futex is a PI or a non-PI one, because the semantics are different. Since the space in relevant glibc data structures is extremely scarce, the best solution is to encode the 'PI' information in bit 0 of the robust list pointer. Existing (non-PI) glibc robust futexes have this bit always zero, so the ABI is kept. New glibc with PI-robust-futexes will set this bit. Further fixes from Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Ulrich Drepper Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- include/linux/futex.h | 3 +- kernel/futex.c | 91 +++++++++++++++++++++++++++++++++++---------------- kernel/futex_compat.c | 34 ++++++++++++++----- 3 files changed, 89 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 34c3a215f2cd..d097b5b72bc6 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -96,7 +96,8 @@ struct robust_list_head { long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, u32 __user *uaddr2, u32 val2, u32 val3); -extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr); +extern int +handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); diff --git a/kernel/futex.c b/kernel/futex.c index f59003b1d8f9..dda2049692a2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -495,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) } /* - * We are the first waiter - try to look up the real owner and - * attach the new pi_state to it: + * We are the first waiter - try to look up the real owner and attach + * the new pi_state to it, but bail out when the owner died bit is set + * and TID = 0: */ pid = uval & FUTEX_TID_MASK; + if (!pid && (uval & FUTEX_OWNER_DIED)) + return -ESRCH; p = futex_find_get_task(pid); if (!p) return -ESRCH; @@ -579,16 +582,17 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) * kept enabled while there is PI state around. We must also * preserve the owner died bit.) */ - newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid; - - inc_preempt_count(); - curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); - dec_preempt_count(); + if (!(uval & FUTEX_OWNER_DIED)) { + newval = FUTEX_WAITERS | new_owner->pid; - if (curval == -EFAULT) - return -EFAULT; - if (curval != uval) - return -EINVAL; + inc_preempt_count(); + curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); + dec_preempt_count(); + if (curval == -EFAULT) + return -EFAULT; + if (curval != uval) + return -EINVAL; + } spin_lock_irq(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); @@ -1443,9 +1447,11 @@ retry_locked: * again. If it succeeds then we can return without waking * anyone else up: */ - inc_preempt_count(); - uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); - dec_preempt_count(); + if (!(uval & FUTEX_OWNER_DIED)) { + inc_preempt_count(); + uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); + dec_preempt_count(); + } if (unlikely(uval == -EFAULT)) goto pi_faulted; @@ -1478,9 +1484,11 @@ retry_locked: /* * No waiters - kernel unlocks the futex: */ - ret = unlock_futex_pi(uaddr, uval); - if (ret == -EFAULT) - goto pi_faulted; + if (!(uval & FUTEX_OWNER_DIED)) { + ret = unlock_futex_pi(uaddr, uval); + if (ret == -EFAULT) + goto pi_faulted; + } out_unlock: spin_unlock(&hb->lock); @@ -1699,9 +1707,9 @@ err_unlock: * Process a futex-list entry, check whether it's owned by the * dying task, and do notification if so: */ -int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) +int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { - u32 uval, nval; + u32 uval, nval, mval; retry: if (get_user(uval, uaddr)) @@ -1718,20 +1726,44 @@ retry: * thread-death.) The rest of the cleanup is done in * userspace. */ - nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, - uval | FUTEX_OWNER_DIED); + mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; + nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); + if (nval == -EFAULT) return -1; if (nval != uval) goto retry; - if (uval & FUTEX_WAITERS) - futex_wake(uaddr, 1); + /* + * Wake robust non-PI futexes here. The wakeup of + * PI futexes happens in exit_pi_state(): + */ + if (!pi) { + if (uval & FUTEX_WAITERS) + futex_wake(uaddr, 1); + } } return 0; } +/* + * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ +static inline int fetch_robust_entry(struct robust_list __user **entry, + struct robust_list __user **head, int *pi) +{ + unsigned long uentry; + + if (get_user(uentry, (unsigned long *)head)) + return -EFAULT; + + *entry = (void *)(uentry & ~1UL); + *pi = uentry & 1; + + return 0; +} + /* * Walk curr->robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. @@ -1742,14 +1774,14 @@ void exit_robust_list(struct task_struct *curr) { struct robust_list_head __user *head = curr->robust_list; struct robust_list __user *entry, *pending; - unsigned int limit = ROBUST_LIST_LIMIT; + unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned long futex_offset; /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ - if (get_user(entry, &head->list.next)) + if (fetch_robust_entry(&entry, &head->list.next, &pi)) return; /* * Fetch the relative futex offset: @@ -1760,10 +1792,11 @@ void exit_robust_list(struct task_struct *curr) * Fetch any possibly pending lock-add first, and handle it * if it exists: */ - if (get_user(pending, &head->list_op_pending)) + if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) return; + if (pending) - handle_futex_death((void *)pending + futex_offset, curr); + handle_futex_death((void *)pending + futex_offset, curr, pip); while (entry != &head->list) { /* @@ -1772,12 +1805,12 @@ void exit_robust_list(struct task_struct *curr) */ if (entry != pending) if (handle_futex_death((void *)entry + futex_offset, - curr)) + curr, pi)) return; /* * Fetch the next entry in the list: */ - if (get_user(entry, &entry->next)) + if (fetch_robust_entry(&entry, &entry->next, &pi)) return; /* * Avoid excessively long or circular lists: diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index d1d92b441fb7..d1aab1a452cc 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -12,6 +12,23 @@ #include + +/* + * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ +static inline int +fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, + compat_uptr_t *head, int *pi) +{ + if (get_user(*uentry, head)) + return -EFAULT; + + *entry = compat_ptr((*uentry) & ~1); + *pi = (unsigned int)(*uentry) & 1; + + return 0; +} + /* * Walk curr->robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. @@ -22,17 +39,16 @@ void compat_exit_robust_list(struct task_struct *curr) { struct compat_robust_list_head __user *head = curr->compat_robust_list; struct robust_list __user *entry, *pending; + unsigned int limit = ROBUST_LIST_LIMIT, pi; compat_uptr_t uentry, upending; - unsigned int limit = ROBUST_LIST_LIMIT; compat_long_t futex_offset; /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ - if (get_user(uentry, &head->list.next)) + if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) return; - entry = compat_ptr(uentry); /* * Fetch the relative futex offset: */ @@ -42,11 +58,11 @@ void compat_exit_robust_list(struct task_struct *curr) * Fetch any possibly pending lock-add first, and handle it * if it exists: */ - if (get_user(upending, &head->list_op_pending)) + if (fetch_robust_entry(&upending, &pending, + &head->list_op_pending, &pi)) return; - pending = compat_ptr(upending); if (upending) - handle_futex_death((void *)pending + futex_offset, curr); + handle_futex_death((void *)pending + futex_offset, curr, pi); while (compat_ptr(uentry) != &head->list) { /* @@ -55,15 +71,15 @@ void compat_exit_robust_list(struct task_struct *curr) */ if (entry != pending) if (handle_futex_death((void *)entry + futex_offset, - curr)) + curr, pi)) return; /* * Fetch the next entry in the list: */ - if (get_user(uentry, (compat_uptr_t *)&entry->next)) + if (fetch_robust_entry(&uentry, &entry, + (compat_uptr_t *)&entry->next, &pi)) return; - entry = compat_ptr(uentry); /* * Avoid excessively long or circular lists: */ -- cgit v1.2.3