From f6d5b33125c4fa63c16f7f54c533338c9695d82c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 29 Mar 2011 18:00:27 -0700 Subject: RTC: Fix early irqs caused by calling rtc_set_alarm too early When we register an rtc device at boot, we read the alarm value in hardware and set the rtc device's aie_timer to that value. The initial method to do this was to simply call rtc_set_alarm() with the value read from hardware. However, this may cause problems as rtc_set_alarm may enable interupts, and the RTC alarm might fire, which can cause invalid pointer dereferencing since the RTC registration is not complete. This patch solves the issue by initializing the rtc_device.aie_timer y hand via rtc_initialize_alarm(). This avoids any calls to the RTC hardware which might enable interrupts too early. CC: Thomas Gleixner CC: Alessandro Zummo Reported-by: Konrad Rzeszutek Wilk Tested-by: Konrad Rzeszutek Wilk Signed-off-by: John Stultz --- include/linux/rtc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 2ca7e8a78060..877ece45426f 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -228,6 +228,8 @@ extern int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); extern int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); +extern int rtc_initialize_alarm(struct rtc_device *rtc, + struct rtc_wkalrm *alrm); extern void rtc_update_irq(struct rtc_device *rtc, unsigned long num, unsigned long events); -- cgit v1.2.3 From 2f9f28b212a2bd4948c8ceaaec33ce0123632129 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 4 Apr 2011 15:19:25 +0200 Subject: netfilter: ipset: references are protected by rwlock instead of mutex The timeout variant of the list:set type must reference the member sets. However, its garbage collector runs at timer interrupt so the mutex protection of the references is a no go. Therefore the reference protection is converted to rwlock. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/ipset/ip_set.h | 2 +- include/linux/netfilter/ipset/ip_set_ahash.h | 3 +- net/netfilter/ipset/ip_set_bitmap_ip.c | 3 +- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 3 +- net/netfilter/ipset/ip_set_bitmap_port.c | 3 +- net/netfilter/ipset/ip_set_core.c | 109 ++++++++++++++++----------- net/netfilter/ipset/ip_set_list_set.c | 6 +- 7 files changed, 73 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ec333d83f3b4..5a262e3ae715 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -293,7 +293,7 @@ struct ip_set { /* Lock protecting the set data */ rwlock_t lock; /* References to the set */ - atomic_t ref; + u32 ref; /* The core set type */ struct ip_set_type *type; /* The type variant doing the real job */ diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h index ec9d9bea1e37..a0196ac79051 100644 --- a/include/linux/netfilter/ipset/ip_set_ahash.h +++ b/include/linux/netfilter/ipset/ip_set_ahash.h @@ -515,8 +515,7 @@ type_pf_head(struct ip_set *set, struct sk_buff *skb) if (h->netmask != HOST_MASK) NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask); #endif - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)); if (with_timeout(h->timeout)) NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout)); diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index bca96990218d..a113ff066928 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -338,8 +338,7 @@ bitmap_ip_head(struct ip_set *set, struct sk_buff *skb) NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); if (map->netmask != 32) NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize)); if (with_timeout(map->timeout)) diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 5e790172deff..00a33242e90c 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -434,8 +434,7 @@ bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip)); NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + (map->last_ip - map->first_ip + 1) * map->dsize)); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 165f09b1a9cb..6b38eb8f6ed8 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -320,8 +320,7 @@ bitmap_port_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port)); NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize)); if (with_timeout(map->timeout)) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d6b48230a540..e88ac3c3ed07 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -26,6 +26,7 @@ static LIST_HEAD(ip_set_type_list); /* all registered set types */ static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ +static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ static struct ip_set **ip_set_list; /* all individual sets */ static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ @@ -301,13 +302,18 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); static inline void __ip_set_get(ip_set_id_t index) { - atomic_inc(&ip_set_list[index]->ref); + write_lock_bh(&ip_set_ref_lock); + ip_set_list[index]->ref++; + write_unlock_bh(&ip_set_ref_lock); } static inline void __ip_set_put(ip_set_id_t index) { - atomic_dec(&ip_set_list[index]->ref); + write_lock_bh(&ip_set_ref_lock); + BUG_ON(ip_set_list[index]->ref == 0); + ip_set_list[index]->ref--; + write_unlock_bh(&ip_set_ref_lock); } /* @@ -324,7 +330,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret = 0; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -356,7 +362,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -378,7 +384,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret = 0; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -397,7 +403,6 @@ EXPORT_SYMBOL_GPL(ip_set_del); * Find set by name, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * - * The nfnl mutex must already be activated. */ ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set) @@ -423,15 +428,12 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname); * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. * - * The nfnl mutex must already be activated. */ void ip_set_put_byindex(ip_set_id_t index) { - if (ip_set_list[index] != NULL) { - BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); + if (ip_set_list[index] != NULL) __ip_set_put(index); - } } EXPORT_SYMBOL_GPL(ip_set_put_byindex); @@ -441,7 +443,6 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex); * can't be destroyed. The set cannot be renamed due to * the referencing either. * - * The nfnl mutex must already be activated. */ const char * ip_set_name_byindex(ip_set_id_t index) @@ -449,7 +450,7 @@ ip_set_name_byindex(ip_set_id_t index) const struct ip_set *set = ip_set_list[index]; BUG_ON(set == NULL); - BUG_ON(atomic_read(&set->ref) == 0); + BUG_ON(set->ref == 0); /* Referenced, so it's safe */ return set->name; @@ -515,10 +516,7 @@ void ip_set_nfnl_put(ip_set_id_t index) { nfnl_lock(); - if (ip_set_list[index] != NULL) { - BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); - __ip_set_put(index); - } + ip_set_put_byindex(index); nfnl_unlock(); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -526,7 +524,7 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put); /* * Communication protocol with userspace over netlink. * - * We already locked by nfnl_lock. + * The commands are serialized by the nfnl mutex. */ static inline bool @@ -657,7 +655,6 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, return -ENOMEM; rwlock_init(&set->lock); strlcpy(set->name, name, IPSET_MAXNAMELEN); - atomic_set(&set->ref, 0); set->family = family; /* @@ -690,8 +687,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, /* * Here, we have a valid, constructed set and we are protected - * by nfnl_lock. Find the first free index in ip_set_list and - * check clashing. + * by the nfnl mutex. Find the first free index in ip_set_list + * and check clashing. */ if ((ret = find_free_id(set->name, &index, &clash)) != 0) { /* If this is the same set and requested, ignore error */ @@ -751,31 +748,51 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, const struct nlattr * const attr[]) { ip_set_id_t i; + int ret = 0; if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* References are protected by the nfnl mutex */ + /* Commands are serialized and references are + * protected by the ip_set_ref_lock. + * External systems (i.e. xt_set) must call + * ip_set_put|get_nfnl_* functions, that way we + * can safely check references here. + * + * list:set timer can only decrement the reference + * counter, so if it's already zero, we can proceed + * without holding the lock. + */ + read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] != NULL && - (atomic_read(&ip_set_list[i]->ref))) - return -IPSET_ERR_BUSY; + if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { + ret = IPSET_ERR_BUSY; + goto out; + } } + read_unlock_bh(&ip_set_ref_lock); for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL) ip_set_destroy_set(i); } } else { i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); - if (i == IPSET_INVALID_ID) - return -ENOENT; - else if (atomic_read(&ip_set_list[i]->ref)) - return -IPSET_ERR_BUSY; + if (i == IPSET_INVALID_ID) { + ret = -ENOENT; + goto out; + } else if (ip_set_list[i]->ref) { + ret = -IPSET_ERR_BUSY; + goto out; + } + read_unlock_bh(&ip_set_ref_lock); ip_set_destroy_set(i); } return 0; +out: + read_unlock_bh(&ip_set_ref_lock); + return ret; } /* Flush sets */ @@ -834,6 +851,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set; const char *name2; ip_set_id_t i; + int ret = 0; if (unlikely(protocol_failed(attr) || attr[IPSET_ATTR_SETNAME] == NULL || @@ -843,25 +861,33 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; - if (atomic_read(&set->ref) != 0) - return -IPSET_ERR_REFERENCED; + + read_lock_bh(&ip_set_ref_lock); + if (set->ref != 0) { + ret = -IPSET_ERR_REFERENCED; + goto out; + } name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL && - STREQ(ip_set_list[i]->name, name2)) - return -IPSET_ERR_EXIST_SETNAME2; + STREQ(ip_set_list[i]->name, name2)) { + ret = -IPSET_ERR_EXIST_SETNAME2; + goto out; + } } strncpy(set->name, name2, IPSET_MAXNAMELEN); - return 0; +out: + read_unlock_bh(&ip_set_ref_lock); + return ret; } /* Swap two sets so that name/index points to the other. * References and set names are also swapped. * - * We are protected by the nfnl mutex and references are - * manipulated only by holding the mutex. The kernel interfaces + * The commands are serialized by the nfnl mutex and references are + * protected by the ip_set_ref_lock. The kernel interfaces * do not hold the mutex but the pointer settings are atomic * so the ip_set_list always contains valid pointers to the sets. */ @@ -874,7 +900,6 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; - u32 from_ref; if (unlikely(protocol_failed(attr) || attr[IPSET_ATTR_SETNAME] == NULL || @@ -899,17 +924,15 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, from->type->family == to->type->family)) return -IPSET_ERR_TYPE_MISMATCH; - /* No magic here: ref munging protected by the nfnl_lock */ strncpy(from_name, from->name, IPSET_MAXNAMELEN); - from_ref = atomic_read(&from->ref); - strncpy(from->name, to->name, IPSET_MAXNAMELEN); - atomic_set(&from->ref, atomic_read(&to->ref)); strncpy(to->name, from_name, IPSET_MAXNAMELEN); - atomic_set(&to->ref, from_ref); + write_lock_bh(&ip_set_ref_lock); + swap(from->ref, to->ref); ip_set_list[from_id] = to; ip_set_list[to_id] = from; + write_unlock_bh(&ip_set_ref_lock); return 0; } @@ -926,7 +949,7 @@ ip_set_dump_done(struct netlink_callback *cb) { if (cb->args[2]) { pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name); - __ip_set_put((ip_set_id_t) cb->args[1]); + ip_set_put_byindex((ip_set_id_t) cb->args[1]); } return 0; } @@ -1068,7 +1091,7 @@ release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { pr_debug("release set %s\n", ip_set_list[index]->name); - __ip_set_put(index); + ip_set_put_byindex(index); } /* If we dump all sets, continue with dumping last ones */ diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index f4a46c0d25f3..e9159e99fc4b 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -366,8 +366,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size)); if (with_timeout(map->timeout)) NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->size * map->dsize)); ipset_nest_end(skb, nested); @@ -457,8 +456,7 @@ list_set_gc(unsigned long ul_set) struct list_set *map = set->data; struct set_telem *e; u32 i; - - /* nfnl_lock should be called */ + write_lock_bh(&set->lock); for (i = 0; i < map->size; i++) { e = list_set_telem(map, i); -- cgit v1.2.3 From a09d19779f3ffac6e16821accc2c1cc4df1b643a Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 4 Apr 2011 15:25:18 +0200 Subject: IPVS: fix NULL ptr dereference in ip_vs_ctl.c ip_vs_genl_dump_daemons() ipvsadm -ln --daemon will trigger a Null pointer exception because ip_vs_genl_dump_daemons() uses skb_net() instead of skb_sknet(). To prevent others from NULL ptr a check is made in ip_vs.h skb_net(). Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 30b49ed72f0d..4d1b71ae82ba 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -52,7 +52,7 @@ static inline struct net *skb_net(const struct sk_buff *skb) */ if (likely(skb->dev && skb->dev->nd_net)) return dev_net(skb->dev); - if (skb_dst(skb)->dev) + if (skb_dst(skb) && skb_dst(skb)->dev) return dev_net(skb_dst(skb)->dev); WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n", __func__, __LINE__); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 33733c8872e7..ae47090bf45f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3120,7 +3120,7 @@ nla_put_failure: static int ip_vs_genl_dump_daemons(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb_net(skb); + struct net *net = skb_sknet(skb); struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&__ip_vs_mutex); -- cgit v1.2.3 From 31ad3dd64e689bc79dd819f8f134b9b025240eb8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 Apr 2011 16:56:29 +0200 Subject: netfilter: af_info: add network namespace parameter to route hook This is required to eventually replace the rt6_lookup call in xt_addrtype.c with nf_afinfo->route(). Signed-off-by: Florian Westphal Acked-by: David S. Miller Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 3 ++- net/ipv4/netfilter.c | 5 +++-- net/ipv6/netfilter.c | 5 +++-- net/netfilter/nf_conntrack_h323_main.c | 8 ++++---- net/netfilter/xt_TCPMSS.c | 2 +- 5 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index eeec00abb664..20ed4528e850 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -270,7 +270,8 @@ struct nf_afinfo { unsigned int dataoff, unsigned int len, u_int8_t protocol); - int (*route)(struct dst_entry **dst, struct flowi *fl); + int (*route)(struct net *net, struct dst_entry **dst, + struct flowi *fl); void (*saveroute)(const struct sk_buff *skb, struct nf_queue_entry *entry); int (*reroute)(struct sk_buff *skb, diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f3c0b549b8e1..f1035f056503 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -221,9 +221,10 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, return csum; } -static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) +static int nf_ip_route(struct net *net, struct dst_entry **dst, + struct flowi *fl) { - struct rtable *rt = ip_route_output_key(&init_net, &fl->u.ip4); + struct rtable *rt = ip_route_output_key(net, &fl->u.ip4); if (IS_ERR(rt)) return PTR_ERR(rt); *dst = &rt->dst; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 39aaca2b4fd2..e008b9b4a779 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -90,9 +90,10 @@ static int nf_ip6_reroute(struct sk_buff *skb, return 0; } -static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) +static int nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl) { - *dst = ip6_route_output(&init_net, NULL, &fl->u.ip6); + *dst = ip6_route_output(net, NULL, &fl->u.ip6); return (*dst)->error; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 533a183e6661..39a453895b4d 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -731,9 +731,9 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->ip; - if (!afinfo->route((struct dst_entry **)&rt1, + if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, flowi4_to_flowi(&fl1))) { - if (!afinfo->route((struct dst_entry **)&rt2, + if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, flowi4_to_flowi(&fl2))) { if (rt1->rt_gateway == rt2->rt_gateway && rt1->dst.dev == rt2->dst.dev) @@ -755,9 +755,9 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); ipv6_addr_copy(&fl2.daddr, &dst->in6); - if (!afinfo->route((struct dst_entry **)&rt1, + if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, flowi6_to_flowi(&fl1))) { - if (!afinfo->route((struct dst_entry **)&rt2, + if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, flowi6_to_flowi(&fl2))) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 6e6b46cb1db9..8690125e3b18 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -166,7 +166,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) - ai->route((struct dst_entry **)&rt, &fl); + ai->route(&init_net, (struct dst_entry **)&rt, &fl); rcu_read_unlock(); if (rt != NULL) { -- cgit v1.2.3 From 0fae2e7740aca7e384c5f337f458897e7e337d58 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 Apr 2011 17:00:54 +0200 Subject: netfilter: af_info: add 'strict' parameter to limit lookup to .oif ipv6 fib lookup can set RT6_LOOKUP_F_IFACE flag to restrict search to an interface, but this flag cannot be set via struct flowi. Also, it cannot be set via ip6_route_output: this function uses the passed sock struct to determine if this flag is required (by testing for nonzero sk_bound_dev_if). Work around this by passing in an artificial struct sk in case 'strict' argument is true. This is required to replace the rt6_lookup call in xt_addrtype.c with nf_afinfo->route(). Signed-off-by: Florian Westphal Acked-by: David S. Miller Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 2 +- net/ipv4/netfilter.c | 2 +- net/ipv6/netfilter.c | 12 ++++++++++-- net/netfilter/nf_conntrack_h323_main.c | 8 ++++---- net/netfilter/xt_TCPMSS.c | 2 +- 5 files changed, 17 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 20ed4528e850..7fa95df60146 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -271,7 +271,7 @@ struct nf_afinfo { unsigned int len, u_int8_t protocol); int (*route)(struct net *net, struct dst_entry **dst, - struct flowi *fl); + struct flowi *fl, bool strict); void (*saveroute)(const struct sk_buff *skb, struct nf_queue_entry *entry); int (*reroute)(struct sk_buff *skb, diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f1035f056503..4614babdc45f 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -222,7 +222,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, } static int nf_ip_route(struct net *net, struct dst_entry **dst, - struct flowi *fl) + struct flowi *fl, bool strict __always_unused) { struct rtable *rt = ip_route_output_key(net, &fl->u.ip4); if (IS_ERR(rt)) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index e008b9b4a779..28bc1f644b7b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -91,9 +91,17 @@ static int nf_ip6_reroute(struct sk_buff *skb, } static int nf_ip6_route(struct net *net, struct dst_entry **dst, - struct flowi *fl) + struct flowi *fl, bool strict) { - *dst = ip6_route_output(net, NULL, &fl->u.ip6); + static const struct ipv6_pinfo fake_pinfo; + static const struct inet_sock fake_sk = { + /* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */ + .sk.sk_bound_dev_if = 1, + .pinet6 = (struct ipv6_pinfo *) &fake_pinfo, + }; + const void *sk = strict ? &fake_sk : NULL; + + *dst = ip6_route_output(net, sk, &fl->u.ip6); return (*dst)->error; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 39a453895b4d..18b2ce5c8ced 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -732,9 +732,9 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->ip; if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, - flowi4_to_flowi(&fl1))) { + flowi4_to_flowi(&fl1), false)) { if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, - flowi4_to_flowi(&fl2))) { + flowi4_to_flowi(&fl2), false)) { if (rt1->rt_gateway == rt2->rt_gateway && rt1->dst.dev == rt2->dst.dev) ret = 1; @@ -756,9 +756,9 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); ipv6_addr_copy(&fl2.daddr, &dst->in6); if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, - flowi6_to_flowi(&fl1))) { + flowi6_to_flowi(&fl1), false)) { if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, - flowi6_to_flowi(&fl2))) { + flowi6_to_flowi(&fl2), false)) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && rt1->dst.dev == rt2->dst.dev) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 8690125e3b18..9e63b43faeed 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -166,7 +166,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) - ai->route(&init_net, (struct dst_entry **)&rt, &fl); + ai->route(&init_net, (struct dst_entry **)&rt, &fl, false); rcu_read_unlock(); if (rt != NULL) { -- cgit v1.2.3 From 5312c3f60ba49073081b2dc421f9f3c22dd43d99 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 1 Apr 2011 13:52:34 +0200 Subject: mac80211: fix comment regarding aggregation buf_size The description for buf_size was misleading and just said you couldn't TX larger aggregates, but of course you can't TX aggregates in a way that would exceed the window either, which is possible even if the aggregates are shorter than that. Expand the description, thanks to Emmanuel for explaining this to me. Cc: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cefe1b37c493..965f1b16e53a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1753,8 +1753,19 @@ enum ieee80211_ampdu_mlme_action { * that TX/RX_STOP can pass NULL for this parameter. * The @buf_size parameter is only valid when the action is set to * %IEEE80211_AMPDU_TX_OPERATIONAL and indicates the peer's reorder - * buffer size (number of subframes) for this session -- aggregates - * containing more subframes than this may not be transmitted to the peer. + * buffer size (number of subframes) for this session -- the driver + * may neither send aggregates containing more subframes than this + * nor send aggregates in a way that lost frames would exceed the + * buffer size. If just limiting the aggregate size, this would be + * possible with a buf_size of 8: + * - TX: 1.....7 + * - RX: 2....7 (lost frame #1) + * - TX: 8..1... + * which is invalid since #1 was now re-transmitted well past the + * buffer size of 8. Correct ways to retransmit #1 would be: + * - TX: 1 or 18 or 81 + * Even "189" would be wrong since 1 could be lost again. + * * Returns a negative error code on failure. * The callback can sleep. * -- cgit v1.2.3 From 34206f267120c839a479d0237db907fa062e7b0f Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Tue, 5 Apr 2011 07:08:41 +0000 Subject: can: mcp251x: Allow pass IRQ flags through platform data. When an interrupt occurs, the INT pin is driven low by the MCP251x controller (falling edge) but in some cases the INT pin can be connected to the MPU through a transistor or level translator which inverts this signal. In this case interrupt should be configured in rising edge. This patch adds support to pass the IRQ flags via mcp251x_platform_data. Signed-off-by: Enric Balletbo i Serra Acked-by: Wolfgang Grandegger Acked-by: Marc Kleine-Budde Signed-off-by: David S. Miller --- drivers/net/can/mcp251x.c | 3 ++- include/linux/can/platform/mcp251x.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c index 7513c4523ac4..330140ee266d 100644 --- a/drivers/net/can/mcp251x.c +++ b/drivers/net/can/mcp251x.c @@ -931,7 +931,8 @@ static int mcp251x_open(struct net_device *net) priv->tx_len = 0; ret = request_threaded_irq(spi->irq, NULL, mcp251x_can_ist, - IRQF_TRIGGER_FALLING, DEVICE_NAME, priv); + pdata->irq_flags ? pdata->irq_flags : IRQF_TRIGGER_FALLING, + DEVICE_NAME, priv); if (ret) { dev_err(&spi->dev, "failed to acquire irq %d\n", spi->irq); if (pdata->transceiver_enable) diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h index 8e20540043f5..089fe43211a4 100644 --- a/include/linux/can/platform/mcp251x.h +++ b/include/linux/can/platform/mcp251x.h @@ -12,6 +12,7 @@ /** * struct mcp251x_platform_data - MCP251X SPI CAN controller platform data * @oscillator_frequency: - oscillator frequency in Hz + * @irq_flags: - IRQF configuration flags * @board_specific_setup: - called before probing the chip (power,reset) * @transceiver_enable: - called to power on/off the transceiver * @power_enable: - called to power on/off the mcp *and* the @@ -24,6 +25,7 @@ struct mcp251x_platform_data { unsigned long oscillator_frequency; + unsigned long irq_flags; int (*board_specific_setup)(struct spi_device *spi); int (*transceiver_enable)(int enable); int (*power_enable) (int enable); -- cgit v1.2.3 From 1b86a58f9d7ce4fe2377687f378fbfb53bdc9b6c Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 7 Apr 2011 14:04:08 -0700 Subject: ipv4: Fix "Set rt->rt_iif more sanely on output routes." Commit 1018b5c01636c7c6bda31a719bda34fc631db29a ("Set rt->rt_iif more sanely on output routes.") breaks rt_is_{output,input}_route. This became the cause to return "IP_PKTINFO's ->ipi_ifindex == 0". To fix it, this does: 1) Add "int rt_route_iif;" to struct rtable 2) For input routes, always set rt_route_iif to same value as rt_iif 3) For output routes, always set rt_route_iif to zero. Set rt_iif as it is done currently. 4) Change rt_is_{output,input}_route() to test rt_route_iif Signed-off-by: OGAWA Hirofumi Signed-off-by: David S. Miller --- include/net/route.h | 5 +++-- net/ipv4/route.c | 8 ++++++-- net/ipv4/xfrm4_policy.c | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index f88429cad52a..8fce0621cad1 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -64,6 +64,7 @@ struct rtable { __be32 rt_dst; /* Path destination */ __be32 rt_src; /* Path source */ + int rt_route_iif; int rt_iif; int rt_oif; __u32 rt_mark; @@ -80,12 +81,12 @@ struct rtable { static inline bool rt_is_input_route(struct rtable *rt) { - return rt->rt_iif != 0; + return rt->rt_route_iif != 0; } static inline bool rt_is_output_route(struct rtable *rt) { - return rt->rt_iif == 0; + return rt->rt_route_iif == 0; } struct ip_rt_acct { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4b0c81180804..1628be530314 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1891,6 +1891,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif + rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->dst.dev = init_net.loopback_dev; dev_hold(rth->dst.dev); @@ -2026,6 +2027,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_key_src = saddr; rth->rt_src = saddr; rth->rt_gateway = daddr; + rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; rth->dst.dev = (out_dev)->dev; dev_hold(rth->dst.dev); @@ -2202,6 +2204,7 @@ local_input: #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif + rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->dst.dev = net->loopback_dev; dev_hold(rth->dst.dev); @@ -2401,7 +2404,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_mark = oldflp4->flowi4_mark; rth->rt_dst = fl4->daddr; rth->rt_src = fl4->saddr; - rth->rt_iif = 0; + rth->rt_route_iif = 0; + rth->rt_iif = oldflp4->flowi4_oif ? : dev_out->ifindex; /* get references to the devices that are to be hold by the routing cache entry */ rth->dst.dev = dev_out; @@ -2716,6 +2720,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_key_dst = ort->rt_key_dst; rt->rt_key_src = ort->rt_key_src; rt->rt_tos = ort->rt_tos; + rt->rt_route_iif = ort->rt_route_iif; rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; rt->rt_mark = ort->rt_mark; @@ -2725,7 +2730,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_type = ort->rt_type; rt->rt_dst = ort->rt_dst; rt->rt_src = ort->rt_src; - rt->rt_iif = ort->rt_iif; rt->rt_gateway = ort->rt_gateway; rt->rt_spec_dst = ort->rt_spec_dst; rt->peer = ort->peer; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 13e0e7f659ff..d20a05e970d8 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -74,6 +74,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, rt->rt_key_dst = fl4->daddr; rt->rt_key_src = fl4->saddr; rt->rt_tos = fl4->flowi4_tos; + rt->rt_route_iif = fl4->flowi4_iif; rt->rt_iif = fl4->flowi4_iif; rt->rt_oif = fl4->flowi4_oif; rt->rt_mark = fl4->flowi4_mark; -- cgit v1.2.3 From 1f112cee07b314e244ee9e71d9c1e6950dc13327 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 11 Apr 2011 22:54:42 +0200 Subject: PM / Hibernate: Introduce CONFIG_HIBERNATE_CALLBACKS Xen save/restore is going to use hibernate device callbacks for quiescing devices and putting them back to normal operations and it would need to select CONFIG_HIBERNATION for this purpose. However, that also would cause the hibernate interfaces for user space to be enabled, which might confuse user space, because the Xen kernels don't support hibernation. Moreover, it would be wasteful, as it would make the Xen kernels include a substantial amount of code that they would never use. To address this issue introduce new power management Kconfig option CONFIG_HIBERNATE_CALLBACKS, such that it will only select the code that is necessary for the hibernate device callbacks to work and make CONFIG_HIBERNATION select it. Then, Xen save/restore will be able to select CONFIG_HIBERNATE_CALLBACKS without dragging the entire hibernate code along with it. Signed-off-by: Rafael J. Wysocki Tested-by: Shriram Rajagopalan --- arch/powerpc/kernel/ibmebus.c | 6 +++--- drivers/amba/bus.c | 6 +++--- drivers/base/platform.c | 6 +++--- drivers/base/power/main.c | 8 ++++---- drivers/pci/pci-driver.c | 6 +++--- drivers/xen/manage.c | 6 +++--- include/linux/suspend.h | 11 +++-------- kernel/power/Kconfig | 6 +++++- 8 files changed, 27 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index c00d4ca1ee15..28581f1ad2c0 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -527,7 +527,7 @@ static int ibmebus_bus_pm_resume_noirq(struct device *dev) #endif /* !CONFIG_SUSPEND */ -#ifdef CONFIG_HIBERNATION +#ifdef CONFIG_HIBERNATE_CALLBACKS static int ibmebus_bus_pm_freeze(struct device *dev) { @@ -665,7 +665,7 @@ static int ibmebus_bus_pm_restore_noirq(struct device *dev) return ret; } -#else /* !CONFIG_HIBERNATION */ +#else /* !CONFIG_HIBERNATE_CALLBACKS */ #define ibmebus_bus_pm_freeze NULL #define ibmebus_bus_pm_thaw NULL @@ -676,7 +676,7 @@ static int ibmebus_bus_pm_restore_noirq(struct device *dev) #define ibmebus_bus_pm_poweroff_noirq NULL #define ibmebus_bus_pm_restore_noirq NULL -#endif /* !CONFIG_HIBERNATION */ +#endif /* !CONFIG_HIBERNATE_CALLBACKS */ static struct dev_pm_ops ibmebus_bus_dev_pm_ops = { .prepare = ibmebus_bus_pm_prepare, diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 821040503154..7025593a58c8 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -214,7 +214,7 @@ static int amba_pm_resume_noirq(struct device *dev) #endif /* !CONFIG_SUSPEND */ -#ifdef CONFIG_HIBERNATION +#ifdef CONFIG_HIBERNATE_CALLBACKS static int amba_pm_freeze(struct device *dev) { @@ -352,7 +352,7 @@ static int amba_pm_restore_noirq(struct device *dev) return ret; } -#else /* !CONFIG_HIBERNATION */ +#else /* !CONFIG_HIBERNATE_CALLBACKS */ #define amba_pm_freeze NULL #define amba_pm_thaw NULL @@ -363,7 +363,7 @@ static int amba_pm_restore_noirq(struct device *dev) #define amba_pm_poweroff_noirq NULL #define amba_pm_restore_noirq NULL -#endif /* !CONFIG_HIBERNATION */ +#endif /* !CONFIG_HIBERNATE_CALLBACKS */ #ifdef CONFIG_PM diff --git a/drivers/base/platform.c b/drivers/base/platform.c index f051cfff18af..92792313d24a 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -771,7 +771,7 @@ int __weak platform_pm_resume_noirq(struct device *dev) #endif /* !CONFIG_SUSPEND */ -#ifdef CONFIG_HIBERNATION +#ifdef CONFIG_HIBERNATE_CALLBACKS static int platform_pm_freeze(struct device *dev) { @@ -909,7 +909,7 @@ static int platform_pm_restore_noirq(struct device *dev) return ret; } -#else /* !CONFIG_HIBERNATION */ +#else /* !CONFIG_HIBERNATE_CALLBACKS */ #define platform_pm_freeze NULL #define platform_pm_thaw NULL @@ -920,7 +920,7 @@ static int platform_pm_restore_noirq(struct device *dev) #define platform_pm_poweroff_noirq NULL #define platform_pm_restore_noirq NULL -#endif /* !CONFIG_HIBERNATION */ +#endif /* !CONFIG_HIBERNATE_CALLBACKS */ #ifdef CONFIG_PM_RUNTIME diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 052dc53eef38..fbc5b6e7c591 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -233,7 +233,7 @@ static int pm_op(struct device *dev, } break; #endif /* CONFIG_SUSPEND */ -#ifdef CONFIG_HIBERNATION +#ifdef CONFIG_HIBERNATE_CALLBACKS case PM_EVENT_FREEZE: case PM_EVENT_QUIESCE: if (ops->freeze) { @@ -260,7 +260,7 @@ static int pm_op(struct device *dev, suspend_report_result(ops->restore, error); } break; -#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_HIBERNATE_CALLBACKS */ default: error = -EINVAL; } @@ -308,7 +308,7 @@ static int pm_noirq_op(struct device *dev, } break; #endif /* CONFIG_SUSPEND */ -#ifdef CONFIG_HIBERNATION +#ifdef CONFIG_HIBERNATE_CALLBACKS case PM_EVENT_FREEZE: case PM_EVENT_QUIESCE: if (ops->freeze_noirq) { @@ -335,7 +335,7 @@ static int pm_noirq_op(struct device *dev, suspend_report_result(ops->restore_noirq, error); } break; -#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_HIBERNATE_CALLBACKS */ default: error = -EINVAL; } diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index d86ea8b01137..135df164a4c1 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -781,7 +781,7 @@ static int pci_pm_resume(struct device *dev) #endif /* !CONFIG_SUSPEND */ -#ifdef CONFIG_HIBERNATION +#ifdef CONFIG_HIBERNATE_CALLBACKS static int pci_pm_freeze(struct device *dev) { @@ -970,7 +970,7 @@ static int pci_pm_restore(struct device *dev) return error; } -#else /* !CONFIG_HIBERNATION */ +#else /* !CONFIG_HIBERNATE_CALLBACKS */ #define pci_pm_freeze NULL #define pci_pm_freeze_noirq NULL @@ -981,7 +981,7 @@ static int pci_pm_restore(struct device *dev) #define pci_pm_restore NULL #define pci_pm_restore_noirq NULL -#endif /* !CONFIG_HIBERNATION */ +#endif /* !CONFIG_HIBERNATE_CALLBACKS */ #ifdef CONFIG_PM_RUNTIME diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 95143dd6904d..1ac94125bf93 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -61,7 +61,7 @@ static void xen_post_suspend(int cancelled) xen_mm_unpin_all(); } -#ifdef CONFIG_HIBERNATION +#ifdef CONFIG_HIBERNATE_CALLBACKS static int xen_suspend(void *data) { struct suspend_info *si = data; @@ -173,7 +173,7 @@ out: #endif shutting_down = SHUTDOWN_INVALID; } -#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_HIBERNATE_CALLBACKS */ struct shutdown_handler { const char *command; @@ -202,7 +202,7 @@ static void shutdown_handler(struct xenbus_watch *watch, { "poweroff", do_poweroff }, { "halt", do_poweroff }, { "reboot", do_reboot }, -#ifdef CONFIG_HIBERNATION +#ifdef CONFIG_HIBERNATE_CALLBACKS { "suspend", do_suspend }, #endif {NULL, NULL}, diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 5a89e3612875..083ffea7ba18 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -249,6 +249,8 @@ extern void hibernation_set_ops(const struct platform_hibernation_ops *ops); extern int hibernate(void); extern bool system_entering_hibernation(void); #else /* CONFIG_HIBERNATION */ +static inline void register_nosave_region(unsigned long b, unsigned long e) {} +static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} static inline void swsusp_unset_page_free(struct page *p) {} @@ -297,14 +299,7 @@ static inline bool pm_wakeup_pending(void) { return false; } extern struct mutex pm_mutex; -#ifndef CONFIG_HIBERNATION -static inline void register_nosave_region(unsigned long b, unsigned long e) -{ -} -static inline void register_nosave_region_late(unsigned long b, unsigned long e) -{ -} - +#ifndef CONFIG_HIBERNATE_CALLBACKS static inline void lock_system_sleep(void) {} static inline void unlock_system_sleep(void) {} diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 4603f08dc47b..049791468d37 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -18,9 +18,13 @@ config SUSPEND_FREEZER Turning OFF this setting is NOT recommended! If in doubt, say Y. +config HIBERNATE_CALLBACKS + bool + config HIBERNATION bool "Hibernation (aka 'suspend to disk')" depends on SWAP && ARCH_HIBERNATION_POSSIBLE + select HIBERNATE_CALLBACKS select LZO_COMPRESS select LZO_DECOMPRESS ---help--- @@ -85,7 +89,7 @@ config PM_STD_PARTITION config PM_SLEEP def_bool y - depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE + depends on SUSPEND || HIBERNATE_CALLBACKS || XEN_SAVE_RESTORE config PM_SLEEP_SMP def_bool y -- cgit v1.2.3 From d9c97833179036408e53ef5f3f5c7eaf781769bc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2011 10:06:33 +0200 Subject: block: remove block_unplug_timer() trace point We no longer have an unplug timer running, so no point in keeping the trace point. Signed-off-by: Jens Axboe --- include/trace/events/block.h | 14 -------------- kernel/trace/blktrace.c | 17 ----------------- 2 files changed, 31 deletions(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 78f18adb49c8..43a985390bb6 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -418,20 +418,6 @@ DECLARE_EVENT_CLASS(block_unplug, TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); -/** - * block_unplug_timer - timed release of operations requests in queue to device driver - * @q: request queue to unplug - * - * Unplug the request queue @q because a timer expired and allow block - * operation requests to be sent to the device driver. - */ -DEFINE_EVENT(block_unplug, block_unplug_timer, - - TP_PROTO(struct request_queue *q), - - TP_ARGS(q) -); - /** * block_unplug_io - release of operations requests in request queue * @q: request queue to unplug diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7aa40f8e182d..824708cbfb7b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -863,19 +863,6 @@ static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q) } } -static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q) -{ - struct blk_trace *bt = q->blk_trace; - - if (bt) { - unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; - __be64 rpdu = cpu_to_be64(pdu); - - __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0, - sizeof(rpdu), &rpdu); - } -} - static void blk_add_trace_split(void *ignore, struct request_queue *q, struct bio *bio, unsigned int pdu) @@ -1015,8 +1002,6 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_plug(blk_add_trace_plug, NULL); WARN_ON(ret); - ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); - WARN_ON(ret); ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); WARN_ON(ret); ret = register_trace_block_split(blk_add_trace_split, NULL); @@ -1033,7 +1018,6 @@ static void blk_unregister_tracepoints(void) unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); unregister_trace_block_split(blk_add_trace_split, NULL); unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); - unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); unregister_trace_block_plug(blk_add_trace_plug, NULL); unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); unregister_trace_block_getrq(blk_add_trace_getrq, NULL); @@ -1348,7 +1332,6 @@ static const struct { [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, - [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug }, [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic }, -- cgit v1.2.3 From 94b5eb28b41cc79d9713696e0005ae167b5afd1b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2011 10:12:19 +0200 Subject: block: fixup block IO unplug trace call It was removed with the on-stack plugging, readd it and track the depth of requests added when flushing the plug. Signed-off-by: Jens Axboe --- block/blk-core.c | 15 +++++++++++++-- include/trace/events/block.h | 11 ++++++----- kernel/trace/blktrace.c | 6 +++--- 3 files changed, 22 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index eeaca0998df5..d20ce1e849c8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2668,12 +2668,19 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) return !(rqa->q <= rqb->q); } +static void queue_unplugged(struct request_queue *q, unsigned int depth) +{ + trace_block_unplug_io(q, depth); + __blk_run_queue(q, false); +} + static void flush_plug_list(struct blk_plug *plug) { struct request_queue *q; unsigned long flags; struct request *rq; LIST_HEAD(list); + unsigned int depth; BUG_ON(plug->magic != PLUG_MAGIC); @@ -2688,6 +2695,7 @@ static void flush_plug_list(struct blk_plug *plug) } q = NULL; + depth = 0; local_irq_save(flags); while (!list_empty(&list)) { rq = list_entry_rq(list.next); @@ -2696,10 +2704,11 @@ static void flush_plug_list(struct blk_plug *plug) BUG_ON(!rq->q); if (rq->q != q) { if (q) { - __blk_run_queue(q, false); + queue_unplugged(q, depth); spin_unlock(q->queue_lock); } q = rq->q; + depth = 0; spin_lock(q->queue_lock); } rq->cmd_flags &= ~REQ_ON_PLUG; @@ -2711,10 +2720,12 @@ static void flush_plug_list(struct blk_plug *plug) __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); else __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); + + depth++; } if (q) { - __blk_run_queue(q, false); + queue_unplugged(q, depth); spin_unlock(q->queue_lock); } diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 43a985390bb6..006e60b58306 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -401,9 +401,9 @@ TRACE_EVENT(block_plug, DECLARE_EVENT_CLASS(block_unplug, - TP_PROTO(struct request_queue *q), + TP_PROTO(struct request_queue *q, unsigned int depth), - TP_ARGS(q), + TP_ARGS(q, depth), TP_STRUCT__entry( __field( int, nr_rq ) @@ -411,7 +411,7 @@ DECLARE_EVENT_CLASS(block_unplug, ), TP_fast_assign( - __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; + __entry->nr_rq = depth; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -421,15 +421,16 @@ DECLARE_EVENT_CLASS(block_unplug, /** * block_unplug_io - release of operations requests in request queue * @q: request queue to unplug + * @depth: number of requests just added to the queue * * Unplug request queue @q because device driver is scheduled to work * on elements in the request queue. */ DEFINE_EVENT(block_unplug, block_unplug_io, - TP_PROTO(struct request_queue *q), + TP_PROTO(struct request_queue *q, unsigned int depth), - TP_ARGS(q) + TP_ARGS(q, depth) ); /** diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 824708cbfb7b..3e3970d53d14 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -850,13 +850,13 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q) __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); } -static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q) +static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q, + unsigned int depth) { struct blk_trace *bt = q->blk_trace; if (bt) { - unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; - __be64 rpdu = cpu_to_be64(pdu); + __be64 rpdu = cpu_to_be64(depth); __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, sizeof(rpdu), &rpdu); -- cgit v1.2.3 From f75664570d8b75469cc468f23c2b27220984983b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2011 10:17:31 +0200 Subject: block: add callback function for unplug notification MD would like to know when a queue is unplugged, so it can flush it's bitmap writes. Add such a callback. Signed-off-by: Jens Axboe --- block/blk-core.c | 3 +++ block/blk-settings.c | 16 ++++++++++++++++ include/linux/blkdev.h | 3 +++ 3 files changed, 22 insertions(+) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 0c0ea10e61ea..76850fc9cf23 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2672,6 +2672,9 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth) { trace_block_unplug_io(q, depth); __blk_run_queue(q, false); + + if (q->unplugged_fn) + q->unplugged_fn(q); } static void flush_plug_list(struct blk_plug *plug) diff --git a/block/blk-settings.c b/block/blk-settings.c index 1fa769293597..eb949045bb12 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -790,6 +790,22 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush) } EXPORT_SYMBOL_GPL(blk_queue_flush); +/** + * blk_queue_unplugged - register a callback for an unplug event + * @q: the request queue for the device + * @fn: the function to call + * + * Some stacked drivers may need to know when IO is dispatched on an + * unplug event. By registrering a callback here, they will be notified + * when someone flushes their on-stack queue plug. The function will be + * called with the queue lock held. + */ +void blk_queue_unplugged(struct request_queue *q, unplugged_fn *fn) +{ + q->unplugged_fn = fn; +} +EXPORT_SYMBOL(blk_queue_unplugged); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 32176cc8e715..c07ffafac5d4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -196,6 +196,7 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); +typedef void (unplugged_fn) (struct request_queue *); struct bio_vec; struct bvec_merge_data { @@ -283,6 +284,7 @@ struct request_queue rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; lld_busy_fn *lld_busy_fn; + unplugged_fn *unplugged_fn; /* * Dispatch queue sorting @@ -841,6 +843,7 @@ extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); +extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -- cgit v1.2.3 From e710d7d5a9cab1041b7a3cf9e655b75d92786857 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 8 Apr 2011 00:43:01 +0200 Subject: mfd: Fetch cell pointer from platform_device->mfd_cell In order for MFD drivers to fetch their cell pointer but also their platform data one, an mfd cell pointer is added to the platform_device structure. That allows all MFD sub devices drivers to be MFD agnostic, unless they really need to access their MFD cell data. Most of them don't, especially the ones for IPs used by both MFD and non MFD SoCs. Cc: Grant Likely Acked-by: Greg KH Signed-off-by: Samuel Ortiz --- drivers/base/platform.c | 1 + drivers/mfd/mfd-core.c | 16 ++++++++++++++-- include/linux/mfd/core.h | 13 +++++++++++-- include/linux/platform_device.h | 5 +++++ 4 files changed, 31 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index f051cfff18af..6c3a2bdc527a 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -149,6 +149,7 @@ static void platform_device_release(struct device *dev) of_device_node_put(&pa->pdev.dev); kfree(pa->pdev.dev.platform_data); + kfree(pa->pdev.mfd_cell); kfree(pa->pdev.resource); kfree(pa); } diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index d01574d98870..f4c8c844b913 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -55,6 +55,19 @@ int mfd_cell_disable(struct platform_device *pdev) } EXPORT_SYMBOL(mfd_cell_disable); +static int mfd_platform_add_cell(struct platform_device *pdev, + const struct mfd_cell *cell) +{ + if (!cell) + return 0; + + pdev->mfd_cell = kmemdup(cell, sizeof(*cell), GFP_KERNEL); + if (!pdev->mfd_cell) + return -ENOMEM; + + return 0; +} + static int mfd_add_device(struct device *parent, int id, const struct mfd_cell *cell, struct resource *mem_base, @@ -75,7 +88,7 @@ static int mfd_add_device(struct device *parent, int id, pdev->dev.parent = parent; - ret = platform_device_add_data(pdev, cell, sizeof(*cell)); + ret = mfd_platform_add_cell(pdev, cell); if (ret) goto fail_res; @@ -123,7 +136,6 @@ static int mfd_add_device(struct device *parent, int id, return 0; -/* platform_device_del(pdev); */ fail_res: kfree(res); fail_device: diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index ad1b19aa6508..aef23309a742 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -86,16 +86,25 @@ extern int mfd_clone_cell(const char *cell, const char **clones, */ static inline const struct mfd_cell *mfd_get_cell(struct platform_device *pdev) { - return pdev->dev.platform_data; + return pdev->mfd_cell; } /* * Given a platform device that's been created by mfd_add_devices(), fetch * the .mfd_data entry from the mfd_cell that created it. + * Otherwise just return the platform_data pointer. + * This maintains compatibility with platform drivers whose devices aren't + * created by the mfd layer, and expect platform_data to contain what would've + * otherwise been in mfd_data. */ static inline void *mfd_get_data(struct platform_device *pdev) { - return mfd_get_cell(pdev)->mfd_data; + const struct mfd_cell *cell = mfd_get_cell(pdev); + + if (cell) + return cell->mfd_data; + else + return pdev->dev.platform_data; } extern int mfd_add_devices(struct device *parent, int id, diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index d96db9825708..744942c95fec 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -14,6 +14,8 @@ #include #include +struct mfd_cell; + struct platform_device { const char * name; int id; @@ -23,6 +25,9 @@ struct platform_device { const struct platform_device_id *id_entry; + /* MFD cell pointer */ + struct mfd_cell *mfd_cell; + /* arch specific additions */ struct pdev_archdata archdata; }; -- cgit v1.2.3 From be85bccaa5aa5a11dcaf85f9e945ffefd253f631 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 12 Apr 2011 13:35:56 -0700 Subject: Revert "vfs: Export file system uuid via /proc//mountinfo" This reverts commit 93f1c20bc8cdb757be50566eff88d65c3b26881f. It turns out that libmount misparses it because it adds a '-' character in the uuid string, which libmount then incorrectly confuses with the separator string (" - ") at the end of all the optional arguments. Upstream libmount (in the util-linux tree) has been fixed, but until that fix actually percolates up to users, we'd better not expose this change in the kernel. Let's revisit this later (possibly by exposing the UUID without any '-' characters in it, avoiding the user-space bug). Reported-by: Dave Jones Cc: Aneesh Kumar K.V Cc: Al Viro Cc: Karel Zak Cc: Ram Pai Cc: Miklos Szeredi Cc: Eric Sandeen Signed-off-by: Linus Torvalds --- fs/namespace.c | 16 ---------------- include/linux/fs.h | 1 - 2 files changed, 17 deletions(-) (limited to 'include') diff --git a/fs/namespace.c b/fs/namespace.c index 7dba2ed03429..d99bcf59e4c2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1030,18 +1030,6 @@ const struct seq_operations mounts_op = { .show = show_vfsmnt }; -static int uuid_is_nil(u8 *uuid) -{ - int i; - u8 *cp = (u8 *)uuid; - - for (i = 0; i < 16; i++) { - if (*cp++) - return 0; - } - return 1; -} - static int show_mountinfo(struct seq_file *m, void *v) { struct proc_mounts *p = m->private; @@ -1085,10 +1073,6 @@ static int show_mountinfo(struct seq_file *m, void *v) if (IS_MNT_UNBINDABLE(mnt)) seq_puts(m, " unbindable"); - if (!uuid_is_nil(mnt->mnt_sb->s_uuid)) - /* print the uuid */ - seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid); - /* Filesystem specific data */ seq_puts(m, " - "); show_type(m, sb); diff --git a/include/linux/fs.h b/include/linux/fs.h index dbd860af0804..5b14843af9f9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1403,7 +1403,6 @@ struct super_block { wait_queue_head_t s_wait_unfrozen; char s_id[32]; /* Informational name */ - u8 s_uuid[16]; /* UUID */ void *s_fs_info; /* Filesystem private info */ fmode_t s_mode; -- cgit v1.2.3 From 0bba01695b74fdd2f9286243bb39f88544d81401 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 12 Apr 2011 15:21:04 -0700 Subject: vfs: Re-introduce s_uuid in the superblock Gaah. When commit be85bccaa5aa reverted the export of file system uuid via /proc//mountinfo, it also unintentionally removed the s_uuid field in struct super_block. I didn't mean to do that, since filesystems have been taught to fill it in (and we want to keep it for future re-introduction in the mountinfo file). Stupid of me. This adds it back in. Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5b14843af9f9..dbd860af0804 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1403,6 +1403,7 @@ struct super_block { wait_queue_head_t s_wait_unfrozen; char s_id[32]; /* Informational name */ + u8 s_uuid[16]; /* UUID */ void *s_fs_info; /* Filesystem private info */ fmode_t s_mode; -- cgit v1.2.3 From ba6a078b77e0dc1309d7e6e2ee034b92ab91f88c Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Tue, 12 Apr 2011 23:13:08 -0700 Subject: Input: add KEY_IMAGES specifically for AL Image Browser Many media center remotes have buttons intended for jumping straight to one type of media browser or another -- commonly, images/photos/pictures, audio/music, television, and movies. At present, remotes with an images or photos or pictures button use any number of different keycodes which sort of maybe fit. I've seen at least KEY_MEDIA, KEY_CAMERA, KEY_GRAPHICSEDITOR and KEY_PRESENTATION. None of those seem quite right. In my mind, KEY_MEDIA should be something more like a media center application launcher (and I'd like to standardize on that for things like the windows media center button on the mce remotes). KEY_CAMERA is used in a lot of webcams, and typically means "take a picture now". KEY_GRAPHICSEDITOR implies an editor, not a browser. KEY_PRESENTATION might be the closest fit here, if you think "photo slide show", but it may well be more intended for "run application in full-screen presentation mode" or to launch something like magicpoint, I dunno. And thus, I'd like to have a KEY_IMAGES, which matches the HID Usage AL Image Browser, the meaning of which I think is crystal-clear. I believe AL Audio Browser is already covered by KEY_AUDIO, and AL Movie Browser by KEY_VIDEO, so I'm also adding appropriate comments next to those keys. Signed-off-by: Jarod Wilson Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index 056ae8a5bd9b..0cc25e4ce2ab 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -553,8 +553,8 @@ struct input_keymap_entry { #define KEY_DVD 0x185 /* Media Select DVD */ #define KEY_AUX 0x186 #define KEY_MP3 0x187 -#define KEY_AUDIO 0x188 -#define KEY_VIDEO 0x189 +#define KEY_AUDIO 0x188 /* AL Audio Browser */ +#define KEY_VIDEO 0x189 /* AL Movie Browser */ #define KEY_DIRECTORY 0x18a #define KEY_LIST 0x18b #define KEY_MEMO 0x18c /* Media Select Messages */ @@ -603,8 +603,9 @@ struct input_keymap_entry { #define KEY_FRAMEFORWARD 0x1b5 #define KEY_CONTEXT_MENU 0x1b6 /* GenDesc - system context menu */ #define KEY_MEDIA_REPEAT 0x1b7 /* Consumer - transport control */ -#define KEY_10CHANNELSUP 0x1b8 /* 10 channels up (10+) */ -#define KEY_10CHANNELSDOWN 0x1b9 /* 10 channels down (10-) */ +#define KEY_10CHANNELSUP 0x1b8 /* 10 channels up (10+) */ +#define KEY_10CHANNELSDOWN 0x1b9 /* 10 channels down (10-) */ +#define KEY_IMAGES 0x1ba /* AL Image Browser */ #define KEY_DEL_EOL 0x1c0 #define KEY_DEL_EOS 0x1c1 -- cgit v1.2.3 From 9fb0f14e31b6101a0cc69a333b43541044f9b0a6 Mon Sep 17 00:00:00 2001 From: Jeff Brown Date: Tue, 12 Apr 2011 23:29:38 -0700 Subject: Input: evdev - indicate buffer overrun with SYN_DROPPED Add a new EV_SYN code, SYN_DROPPED, to inform the client when input events have been dropped from the evdev input buffer due to a buffer overrun. The client should use this event as a hint to reset its state or ignore all following events until the next packet begins. Signed-off-by: Jeff Brown [dtor@mail.ru: Implement Henrik's suggestion and drop old events in case of overflow.] Signed-off-by: Dmitry Torokhov --- Documentation/input/event-codes.txt | 6 ++++++ drivers/input/evdev.c | 33 +++++++++++++++++++++------------ include/linux/input.h | 1 + 3 files changed, 28 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt index f13aee550409..23fcb05175be 100644 --- a/Documentation/input/event-codes.txt +++ b/Documentation/input/event-codes.txt @@ -85,6 +85,12 @@ sent in the evdev event stream. - Used to synchronize and separate touch events. See the multi-touch-protocol.txt document for more information. +* SYN_DROPPED: + - Used to indicate buffer overrun in the evdev client's event queue. + Client should ignore all events up to and including next SYN_REPORT + event and query the device (using EVIOCG* ioctls) to obtain its + current state. + EV_KEY: ---------- EV_KEY events take the form KEY_ or BTN_. For example, KEY_A is used diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 7f42d3a454d2..88d8e4cb419a 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -39,13 +39,13 @@ struct evdev { }; struct evdev_client { - int head; - int tail; + unsigned int head; + unsigned int tail; spinlock_t buffer_lock; /* protects access to buffer, head and tail */ struct fasync_struct *fasync; struct evdev *evdev; struct list_head node; - int bufsize; + unsigned int bufsize; struct input_event buffer[]; }; @@ -55,16 +55,25 @@ static DEFINE_MUTEX(evdev_table_mutex); static void evdev_pass_event(struct evdev_client *client, struct input_event *event) { - /* - * Interrupts are disabled, just acquire the lock. - * Make sure we don't leave with the client buffer - * "empty" by having client->head == client->tail. - */ + /* Interrupts are disabled, just acquire the lock. */ spin_lock(&client->buffer_lock); - do { - client->buffer[client->head++] = *event; - client->head &= client->bufsize - 1; - } while (client->head == client->tail); + + client->buffer[client->head++] = *event; + client->head &= client->bufsize - 1; + + if (unlikely(client->head == client->tail)) { + /* + * This effectively "drops" all unconsumed events, leaving + * EV_SYN/SYN_DROPPED plus the newest event in the queue. + */ + client->tail = (client->head - 2) & (client->bufsize - 1); + + client->buffer[client->tail].time = event->time; + client->buffer[client->tail].type = EV_SYN; + client->buffer[client->tail].code = SYN_DROPPED; + client->buffer[client->tail].value = 0; + } + spin_unlock(&client->buffer_lock); if (event->type == EV_SYN) diff --git a/include/linux/input.h b/include/linux/input.h index 0cc25e4ce2ab..73a8c6ee595b 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -167,6 +167,7 @@ struct input_keymap_entry { #define SYN_REPORT 0 #define SYN_CONFIG 1 #define SYN_MT_REPORT 2 +#define SYN_DROPPED 3 /* * Keys and buttons -- cgit v1.2.3 From 38a2f37258f9e2ae3f6e4241e01088be8dfaf4e9 Mon Sep 17 00:00:00 2001 From: huajun li Date: Wed, 13 Apr 2011 15:43:32 +0000 Subject: usbnet: Fix up 'FLAG_POINTTOPOINT' and 'FLAG_MULTI_PACKET' overlaps. USB tethering does not work anymore since 2.6.39-rc2, but it's okay in -rc1. The root cause is the new added mask code 'FLAG_POINTTOPOINT' overlaps 'FLAG_MULTI_PACKET' in include/linux/usb/usbnet.h, this causes logic issue in rx_process(). This patch cleans up the overlap. Reported-and-Tested-by: Gottfried Haider Signed-off-by: Huajun Li Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 3c7329b8ea0e..0e1855079fbb 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -103,8 +103,8 @@ struct driver_info { * Indicates to usbnet, that USB driver accumulates multiple IP packets. * Affects statistic (counters) and short packet handling. */ -#define FLAG_MULTI_PACKET 0x1000 -#define FLAG_RX_ASSEMBLE 0x2000 /* rx packets may span >1 frames */ +#define FLAG_MULTI_PACKET 0x2000 +#define FLAG_RX_ASSEMBLE 0x4000 /* rx packets may span >1 frames */ /* init device ... can sleep, or cause probe() failure */ int (*bind)(struct usbnet *, struct usb_interface *); -- cgit v1.2.3 From 67954fe95705a8ff80335964bd7e621d13fbc499 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Apr 2011 15:21:52 -0700 Subject: memcg: fix mem_cgroup_rotate_reclaimable_page() commit 3f58a8294333 ("move memcg reclaimable page into tail of inactive list") added inline keyword twice in its prototype. CC arch/x86/kernel/asm-offsets.s In file included from include/linux/swap.h:8, from include/linux/suspend.h:4, from arch/x86/kernel/asm-offsets.c:12: include/linux/memcontrol.h:220: error: duplicate `inline' Signed-off-by: Eric Dumazet Reviewed-by: Minchan Kim Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5a5ce7055839..5e9840f50980 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -216,7 +216,7 @@ static inline void mem_cgroup_del_lru_list(struct page *page, int lru) return ; } -static inline inline void mem_cgroup_rotate_reclaimable_page(struct page *page) +static inline void mem_cgroup_rotate_reclaimable_page(struct page *page) { return ; } -- cgit v1.2.3 From 81ab4201fb7d91d6b0cd9ad5b4b16776e4bed145 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Apr 2011 15:22:06 -0700 Subject: mm: add VM counters for transparent hugepages I found it difficult to make sense of transparent huge pages without having any counters for its actions. Add some counters to vmstat for allocation of transparent hugepages and fallback to smaller pages. Optional patch, but useful for development and understanding the system. Contains improvements from Andrea Arcangeli and Johannes Weiner [akpm@linux-foundation.org: coding-style fixes] [hannes@cmpxchg.org: fix vmstat_text[] entries] Signed-off-by: Andi Kleen Acked-by: Andrea Arcangeli Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 7 +++++++ mm/huge_memory.c | 25 +++++++++++++++++++++---- mm/vmstat.c | 9 +++++++++ 3 files changed, 37 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 461c0119664f..2b3831b58aa4 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -58,6 +58,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, UNEVICTABLE_PGCLEARED, /* on COW, page truncate */ UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */ UNEVICTABLE_MLOCKFREED, +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + THP_FAULT_ALLOC, + THP_FAULT_FALLBACK, + THP_COLLAPSE_ALLOC, + THP_COLLAPSE_ALLOC_FAILED, + THP_SPLIT, +#endif NR_VM_EVENT_ITEMS }; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0a619e0e2e0b..1722683bde23 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -680,8 +680,11 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, return VM_FAULT_OOM; page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), vma, haddr, numa_node_id(), 0); - if (unlikely(!page)) + if (unlikely(!page)) { + count_vm_event(THP_FAULT_FALLBACK); goto out; + } + count_vm_event(THP_FAULT_ALLOC); if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { put_page(page); goto out; @@ -909,11 +912,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, new_page = NULL; if (unlikely(!new_page)) { + count_vm_event(THP_FAULT_FALLBACK); ret = do_huge_pmd_wp_page_fallback(mm, vma, address, pmd, orig_pmd, page, haddr); put_page(page); goto out; } + count_vm_event(THP_FAULT_ALLOC); if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { put_page(new_page); @@ -1390,6 +1395,7 @@ int split_huge_page(struct page *page) BUG_ON(!PageSwapBacked(page)); __split_huge_page(page, anon_vma); + count_vm_event(THP_SPLIT); BUG_ON(PageCompound(page)); out_unlock: @@ -1784,9 +1790,11 @@ static void collapse_huge_page(struct mm_struct *mm, node, __GFP_OTHER_NODE); if (unlikely(!new_page)) { up_read(&mm->mmap_sem); + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); *hpage = ERR_PTR(-ENOMEM); return; } + count_vm_event(THP_COLLAPSE_ALLOC); if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { up_read(&mm->mmap_sem); put_page(new_page); @@ -2151,8 +2159,11 @@ static void khugepaged_do_scan(struct page **hpage) #ifndef CONFIG_NUMA if (!*hpage) { *hpage = alloc_hugepage(khugepaged_defrag()); - if (unlikely(!*hpage)) + if (unlikely(!*hpage)) { + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); break; + } + count_vm_event(THP_COLLAPSE_ALLOC); } #else if (IS_ERR(*hpage)) @@ -2192,8 +2203,11 @@ static struct page *khugepaged_alloc_hugepage(void) do { hpage = alloc_hugepage(khugepaged_defrag()); - if (!hpage) + if (!hpage) { + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); khugepaged_alloc_sleep(); + } else + count_vm_event(THP_COLLAPSE_ALLOC); } while (unlikely(!hpage) && likely(khugepaged_enabled())); return hpage; @@ -2210,8 +2224,11 @@ static void khugepaged_loop(void) while (likely(khugepaged_enabled())) { #ifndef CONFIG_NUMA hpage = khugepaged_alloc_hugepage(); - if (unlikely(!hpage)) + if (unlikely(!hpage)) { + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); break; + } + count_vm_event(THP_COLLAPSE_ALLOC); #else if (IS_ERR(hpage)) { khugepaged_alloc_sleep(); diff --git a/mm/vmstat.c b/mm/vmstat.c index 8cb0f0a703e5..897ea9e88238 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -948,7 +948,16 @@ static const char * const vmstat_text[] = { "unevictable_pgs_cleared", "unevictable_pgs_stranded", "unevictable_pgs_mlockfreed", + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + "thp_fault_alloc", + "thp_fault_fallback", + "thp_collapse_alloc", + "thp_collapse_alloc_failed", + "thp_split", #endif + +#endif /* CONFIG_VM_EVENTS_COUNTERS */ }; static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, -- cgit v1.2.3 From 4471a675dfc7ca676c165079e91c712b09dc9ce4 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 14 Apr 2011 15:22:09 -0700 Subject: brk: COMPAT_BRK: fix detection of randomized brk 5520e89 ("brk: fix min_brk lower bound computation for COMPAT_BRK") tried to get the whole logic of brk randomization for legacy (libc5-based) applications finally right. It turns out that the way to detect whether brk has actually been randomized in the end or not introduced by that patch still doesn't work for those binaries, as reported by Geert: : /sbin/init from my old m68k ramdisk exists prematurely. : : Before the patch: : : | brk(0x80005c8e) = 0x80006000 : : After the patch: : : | brk(0x80005c8e) = 0x80005c8e : : Old libc5 considers brk() to have failed if the return value is not : identical to the requested value. I don't like it, but currently see no better option than a bit flag in task_struct to catch the CONFIG_COMPAT_BRK && randomize_va_space == 2 case. Signed-off-by: Jiri Kosina Tested-by: Geert Uytterhoeven Reported-by: Geert Uytterhoeven Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 6 +++++- include/linux/sched.h | 3 +++ mm/mmap.c | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f34078d702d3..303983fabfd6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -941,9 +941,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->start_stack = bprm->p; #ifdef arch_randomize_brk - if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) + if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { current->mm->brk = current->mm->start_brk = arch_randomize_brk(current->mm); +#ifdef CONFIG_COMPAT_BRK + current->brk_randomized = 1; +#endif + } #endif if (current->personality & MMAP_PAGE_ZERO) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ec2c027e92c..18d63cea2848 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1254,6 +1254,9 @@ struct task_struct { #endif struct mm_struct *mm, *active_mm; +#ifdef CONFIG_COMPAT_BRK + unsigned brk_randomized:1; +#endif #if defined(SPLIT_RSS_COUNTING) struct task_rss_stat rss_stat; #endif diff --git a/mm/mmap.c b/mm/mmap.c index 8c05e5b43b69..e27e0cf0de03 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -259,7 +259,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) * randomize_va_space to 2, which will still cause mm->start_brk * to be arbitrarily shifted */ - if (mm->start_brk > PAGE_ALIGN(mm->end_data)) + if (current->brk_randomized) min_brk = mm->start_brk; else min_brk = mm->end_data; -- cgit v1.2.3 From 13209c2a52afa691ca19e7e6ebd22d4034bdfeed Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Thu, 14 Apr 2011 15:22:14 -0700 Subject: RapidIO: add IDT CPS-1432 switch definitions Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/switches/idt_gen2.c | 1 + include/linux/rio_ids.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/drivers/rapidio/switches/idt_gen2.c b/drivers/rapidio/switches/idt_gen2.c index 095016a9dec1..ac2701b22e71 100644 --- a/drivers/rapidio/switches/idt_gen2.c +++ b/drivers/rapidio/switches/idt_gen2.c @@ -418,3 +418,4 @@ DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1848, idtg2_switch_init); DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1616, idtg2_switch_init); DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTVPS1616, idtg2_switch_init); DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTSPS1616, idtg2_switch_init); +DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1432, idtg2_switch_init); diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h index 7410d3365e2a..0cee0152aca9 100644 --- a/include/linux/rio_ids.h +++ b/include/linux/rio_ids.h @@ -35,6 +35,7 @@ #define RIO_DID_IDTCPS6Q 0x035f #define RIO_DID_IDTCPS10Q 0x035e #define RIO_DID_IDTCPS1848 0x0374 +#define RIO_DID_IDTCPS1432 0x0375 #define RIO_DID_IDTCPS1616 0x0379 #define RIO_DID_IDTVPS1616 0x0377 #define RIO_DID_IDTSPS1616 0x0378 -- cgit v1.2.3 From 59f9996555542f901f2d01ccab5c0612c8c5c480 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Thu, 14 Apr 2011 15:22:14 -0700 Subject: RapidIO/mpc85xx: fix possible mport registration problems Fix a possible problem with mport registration left non-cleared after fsl_rio_setup() exits on link error. Abort mport initialization if registration failed. This patch is applicable to 2.6.39-rc1 only. The problem does not exist for earlier versions. Signed-off-by: Alexandre Bounine Cc: Kumar Gala Cc: Matt Porter Cc: Li Yang Cc: Thomas Moll Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/sysdev/fsl_rio.c | 4 +++- drivers/rapidio/rio.c | 5 +++-- include/linux/rio.h | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 14232d57369c..49798532b477 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -1457,7 +1457,6 @@ int fsl_rio_setup(struct platform_device *dev) port->ops = ops; port->priv = priv; port->phys_efptr = 0x100; - rio_register_mport(port); priv->regs_win = ioremap(regs.start, regs.end - regs.start + 1); rio_regs_win = priv->regs_win; @@ -1504,6 +1503,9 @@ int fsl_rio_setup(struct platform_device *dev) dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n", port->sys_size ? 65536 : 256); + if (rio_register_mport(port)) + goto err; + if (port->host_deviceid >= 0) out_be32(priv->regs_win + RIO_GCCSR, RIO_PORT_GEN_HOST | RIO_PORT_GEN_MASTER | RIO_PORT_GEN_DISCOVERED); diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index c29719cacbca..86c9a091a2ff 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -1171,16 +1171,17 @@ static int rio_hdid_setup(char *str) __setup("riohdid=", rio_hdid_setup); -void rio_register_mport(struct rio_mport *port) +int rio_register_mport(struct rio_mport *port) { if (next_portid >= RIO_MAX_MPORTS) { pr_err("RIO: reached specified max number of mports\n"); - return; + return 1; } port->id = next_portid++; port->host_deviceid = rio_get_hdid(port->id); list_add_tail(&port->node, &rio_mports); + return 0; } EXPORT_SYMBOL_GPL(rio_local_get_device_id); diff --git a/include/linux/rio.h b/include/linux/rio.h index 4e37a7cfa726..4d50611112ba 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -396,7 +396,7 @@ union rio_pw_msg { }; /* Architecture and hardware-specific functions */ -extern void rio_register_mport(struct rio_mport *); +extern int rio_register_mport(struct rio_mport *); extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int); extern void rio_close_inb_mbox(struct rio_mport *, int); extern int rio_open_outb_mbox(struct rio_mport *, void *, int, int); -- cgit v1.2.3 From 88b996cd0652280cc9b9fc70008fda15f14175e1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 15 Apr 2011 15:20:10 +0200 Subject: block: cleanup the block plug helper functions It's a bit of a mess currently. task->plug is being cleared and reset in __blk_finish_plug(), and blk_finish_plug() is testing for a NULL plug which cannot happen even from schedule() anymore since it uses blk_needs_flush_plug() to determine whether to call into this function at all. So get rid of some of the cruft. Signed-off-by: Jens Axboe --- block/blk-core.c | 24 ++++++------------------ include/linux/blkdev.h | 6 +++--- 2 files changed, 9 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 36b1a7559f94..b598fa7720d4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2671,7 +2671,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth) q->unplugged_fn(q); } -static void flush_plug_list(struct blk_plug *plug) +void blk_flush_plug_list(struct blk_plug *plug) { struct request_queue *q; unsigned long flags; @@ -2733,28 +2733,16 @@ static void flush_plug_list(struct blk_plug *plug) local_irq_restore(flags); } - -static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug) -{ - flush_plug_list(plug); - - if (plug == tsk->plug) - tsk->plug = NULL; -} +EXPORT_SYMBOL(blk_flush_plug_list); void blk_finish_plug(struct blk_plug *plug) { - if (plug) - __blk_finish_plug(current, plug); -} -EXPORT_SYMBOL(blk_finish_plug); + blk_flush_plug_list(plug); -void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug) -{ - __blk_finish_plug(tsk, plug); - tsk->plug = plug; + if (plug == current->plug) + current->plug = NULL; } -EXPORT_SYMBOL(__blk_flush_plug); +EXPORT_SYMBOL(blk_finish_plug); int __init blk_dev_init(void) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c07ffafac5d4..ffe48ff318f9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,14 +865,14 @@ struct blk_plug { extern void blk_start_plug(struct blk_plug *); extern void blk_finish_plug(struct blk_plug *); -extern void __blk_flush_plug(struct task_struct *, struct blk_plug *); +extern void blk_flush_plug_list(struct blk_plug *); static inline void blk_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; - if (unlikely(plug)) - __blk_flush_plug(tsk, plug); + if (plug) + blk_flush_plug_list(plug); } static inline bool blk_needs_flush_plug(struct task_struct *tsk) -- cgit v1.2.3 From f6603783f9f099bf7a83b3f6c689bbbf74f0e96e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Apr 2011 15:49:07 +0200 Subject: block: only force kblockd unplugging from the schedule() path For the explicit unplugging, we'd prefer to kick things off immediately and not pay the penalty of the latency to switch to kblockd. So let blk_finish_plug() do the run inline, while the implicit-on-schedule-out unplug will punt to kblockd. Signed-off-by: Jens Axboe --- block/blk-core.c | 13 +++++++------ include/linux/blkdev.h | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index b598fa7720d4..3c8121072507 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2662,16 +2662,17 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) return !(rqa->q <= rqb->q); } -static void queue_unplugged(struct request_queue *q, unsigned int depth) +static void queue_unplugged(struct request_queue *q, unsigned int depth, + bool force_kblockd) { trace_block_unplug_io(q, depth); - __blk_run_queue(q, true); + __blk_run_queue(q, force_kblockd); if (q->unplugged_fn) q->unplugged_fn(q); } -void blk_flush_plug_list(struct blk_plug *plug) +void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd) { struct request_queue *q; unsigned long flags; @@ -2706,7 +2707,7 @@ void blk_flush_plug_list(struct blk_plug *plug) BUG_ON(!rq->q); if (rq->q != q) { if (q) { - queue_unplugged(q, depth); + queue_unplugged(q, depth, force_kblockd); spin_unlock(q->queue_lock); } q = rq->q; @@ -2727,7 +2728,7 @@ void blk_flush_plug_list(struct blk_plug *plug) } if (q) { - queue_unplugged(q, depth); + queue_unplugged(q, depth, force_kblockd); spin_unlock(q->queue_lock); } @@ -2737,7 +2738,7 @@ EXPORT_SYMBOL(blk_flush_plug_list); void blk_finish_plug(struct blk_plug *plug) { - blk_flush_plug_list(plug); + blk_flush_plug_list(plug, false); if (plug == current->plug) current->plug = NULL; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ffe48ff318f9..1c76506fcf11 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,14 +865,14 @@ struct blk_plug { extern void blk_start_plug(struct blk_plug *); extern void blk_finish_plug(struct blk_plug *); -extern void blk_flush_plug_list(struct blk_plug *); +extern void blk_flush_plug_list(struct blk_plug *, bool); static inline void blk_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; if (plug) - blk_flush_plug_list(plug); + blk_flush_plug_list(plug, true); } static inline bool blk_needs_flush_plug(struct task_struct *tsk) -- cgit v1.2.3 From df5d8c80f1871d9e79af4b0f3656a9528a7d4bab Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Mar 2011 20:38:35 +0530 Subject: 9p: revert tsyncfs related changes Now that we use write_inode to flush server cache related to fid, we don't need tsyncfs either fort dotl or dotu protocols. For dotu this helps to do a more efficient server flush. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/fid.c | 15 ++------------- fs/9p/v9fs.h | 1 - fs/9p/vfs_super.c | 33 +++++++++------------------------ include/net/9p/9p.h | 2 -- include/net/9p/client.h | 1 - net/9p/client.c | 21 --------------------- 6 files changed, 11 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 0ee594569dcc..85b67ffa2a43 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -286,11 +286,9 @@ static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid) struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) { - int err, flags; + int err; struct p9_fid *fid; - struct v9fs_session_info *v9ses; - v9ses = v9fs_dentry2v9ses(dentry); fid = v9fs_fid_clone_with_uid(dentry, 0); if (IS_ERR(fid)) goto error_out; @@ -299,17 +297,8 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) * dirty pages. We always request for the open fid in read-write * mode so that a partial page write which result in page * read can work. - * - * we don't have a tsyncfs operation for older version - * of protocol. So make sure the write back fid is - * opened in O_SYNC mode. */ - if (!v9fs_proto_dotl(v9ses)) - flags = O_RDWR | O_SYNC; - else - flags = O_RDWR; - - err = p9_client_open(fid, flags); + err = p9_client_open(fid, O_RDWR); if (err < 0) { p9_client_clunk(fid); fid = ERR_PTR(err); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 9665c2b840e6..e5ebedfc5ed8 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -116,7 +116,6 @@ struct v9fs_session_info { struct list_head slist; /* list of sessions registered with v9fs */ struct backing_dev_info bdi; struct rw_semaphore rename_sem; - struct p9_fid *root_fid; /* Used for file system sync */ }; /* cache_validity flags */ diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index acdc26593e64..feef6cdc1fd2 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -154,6 +154,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, retval = PTR_ERR(inode); goto release_sb; } + root = d_alloc_root(inode); if (!root) { iput(inode); @@ -185,21 +186,10 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, p9stat_free(st); kfree(st); } - v9fs_fid_add(root, fid); retval = v9fs_get_acl(inode, fid); if (retval) goto release_sb; - /* - * Add the root fid to session info. This is used - * for file system sync. We want a cloned fid here - * so that we can do a sync_filesystem after a - * shrink_dcache_for_umount - */ - v9ses->root_fid = v9fs_fid_clone(root); - if (IS_ERR(v9ses->root_fid)) { - retval = PTR_ERR(v9ses->root_fid); - goto release_sb; - } + v9fs_fid_add(root, fid); P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); return dget(sb->s_root); @@ -210,11 +200,15 @@ close_session: v9fs_session_close(v9ses); kfree(v9ses); return ERR_PTR(retval); + release_sb: /* - * we will do the session_close and root dentry - * release in the below call. + * we will do the session_close and root dentry release + * in the below call. But we need to clunk fid, because we haven't + * attached the fid to dentry so it won't get clunked + * automatically. */ + p9_client_clunk(fid); deactivate_locked_super(sb); return ERR_PTR(retval); } @@ -232,7 +226,7 @@ static void v9fs_kill_super(struct super_block *s) P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); kill_anon_super(s); - p9_client_clunk(v9ses->root_fid); + v9fs_session_cancel(v9ses); v9fs_session_close(v9ses); kfree(v9ses); @@ -285,14 +279,6 @@ done: return res; } -static int v9fs_sync_fs(struct super_block *sb, int wait) -{ - struct v9fs_session_info *v9ses = sb->s_fs_info; - - P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb); - return p9_client_sync_fs(v9ses->root_fid); -} - static int v9fs_drop_inode(struct inode *inode) { struct v9fs_session_info *v9ses; @@ -365,7 +351,6 @@ static const struct super_operations v9fs_super_ops = { static const struct super_operations v9fs_super_ops_dotl = { .alloc_inode = v9fs_alloc_inode, .destroy_inode = v9fs_destroy_inode, - .sync_fs = v9fs_sync_fs, .statfs = v9fs_statfs, .drop_inode = v9fs_drop_inode, .evict_inode = v9fs_evict_inode, diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index cdf2e8ac4309..d2df55b0c213 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -139,8 +139,6 @@ do { \ */ enum p9_msg_t { - P9_TSYNCFS = 0, - P9_RSYNCFS, P9_TLERROR = 6, P9_RLERROR, P9_TSTATFS = 8, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 85c1413f054d..59b5df599210 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -230,7 +230,6 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, gid_t gid, struct p9_qid *qid); int p9_client_clunk(struct p9_fid *fid); int p9_client_fsync(struct p9_fid *fid, int datasync); -int p9_client_sync_fs(struct p9_fid *fid); int p9_client_remove(struct p9_fid *fid); int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count); diff --git a/net/9p/client.c b/net/9p/client.c index 48b8e084e710..d72aac7d25cc 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1220,27 +1220,6 @@ error: } EXPORT_SYMBOL(p9_client_fsync); -int p9_client_sync_fs(struct p9_fid *fid) -{ - int err = 0; - struct p9_req_t *req; - struct p9_client *clnt; - - P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid); - - clnt = fid->clnt; - req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid); - if (IS_ERR(req)) { - err = PTR_ERR(req); - goto error; - } - P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid); - p9_free_req(clnt, req); -error: - return err; -} -EXPORT_SYMBOL(p9_client_sync_fs); - int p9_client_clunk(struct p9_fid *fid) { int err; -- cgit v1.2.3 From b76225e22ac98070325ee2ba89473c1e1360c4cb Mon Sep 17 00:00:00 2001 From: Harsh Prateek Bora Date: Thu, 31 Mar 2011 15:49:39 +0530 Subject: net/9p: nwname should be an unsigned int Signed-off-by: Harsh Prateek Bora Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric VAn Hensbergen --- include/net/9p/client.h | 4 ++-- net/9p/client.c | 8 ++++---- net/9p/protocol.c | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 59b5df599210..051a99f79769 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -218,8 +218,8 @@ void p9_client_disconnect(struct p9_client *clnt); void p9_client_begin_disconnect(struct p9_client *clnt); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, char *uname, u32 n_uname, char *aname); -struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, - int clone); +struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, + char **wnames, int clone); int p9_client_open(struct p9_fid *fid, int mode); int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, char *extension); diff --git a/net/9p/client.c b/net/9p/client.c index d72aac7d25cc..77367745be9b 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -929,15 +929,15 @@ error: } EXPORT_SYMBOL(p9_client_attach); -struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, - int clone) +struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, + char **wnames, int clone) { int err; struct p9_client *clnt; struct p9_fid *fid; struct p9_qid *wqids; struct p9_req_t *req; - int16_t nwqids, count; + uint16_t nwqids, count; err = 0; wqids = NULL; @@ -955,7 +955,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, fid = oldfid; - P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n", + P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n", oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, diff --git a/net/9p/protocol.c b/net/9p/protocol.c index a7e899740041..b58a501cf3d1 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -265,7 +265,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, } break; case 'T':{ - int16_t *nwname = va_arg(ap, int16_t *); + uint16_t *nwname = va_arg(ap, uint16_t *); char ***wnames = va_arg(ap, char ***); errcode = p9pdu_readf(pdu, proto_version, @@ -496,7 +496,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, } break; case 'T':{ - int16_t nwname = va_arg(ap, int); + uint16_t nwname = va_arg(ap, int); const char **wnames = va_arg(ap, const char **); errcode = p9pdu_writef(pdu, proto_version, "w", -- cgit v1.2.3 From a237c1c5bc5dc5c76a21be922dca4826f3eca8ca Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 16 Apr 2011 13:27:55 +0200 Subject: block: let io_schedule() flush the plug inline Linus correctly observes that the most important dispatch cases are now done from kblockd, this isn't ideal for latency reasons. The original reason for switching dispatches out-of-line was to avoid too deep a stack, so by _only_ letting the "accidental" flush directly in schedule() be guarded by offload to kblockd, we should be able to get the best of both worlds. So add a blk_schedule_flush_plug() that offloads to kblockd, and only use that from the schedule() path. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 +++++++++++++ kernel/sched.c | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1c76506fcf11..ec0357d8c4a5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -871,6 +871,14 @@ static inline void blk_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; + if (plug) + blk_flush_plug_list(plug, false); +} + +static inline void blk_schedule_flush_plug(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + if (plug) blk_flush_plug_list(plug, true); } @@ -1317,6 +1325,11 @@ static inline void blk_flush_plug(struct task_struct *task) { } +static inline void blk_schedule_flush_plug(struct task_struct *task) +{ +} + + static inline bool blk_needs_flush_plug(struct task_struct *tsk) { return false; diff --git a/kernel/sched.c b/kernel/sched.c index a187c3fe027b..312f8b95c2d4 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4118,7 +4118,7 @@ need_resched: */ if (blk_needs_flush_plug(prev)) { raw_spin_unlock(&rq->lock); - blk_flush_plug(prev); + blk_schedule_flush_plug(prev); raw_spin_lock(&rq->lock); } } -- cgit v1.2.3 From 49cac01e1fa74174d72adb0e872504a7fefd7c01 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 16 Apr 2011 13:51:05 +0200 Subject: block: make unplug timer trace event correspond to the schedule() unplug It's a pretty close match to what we had before - the timer triggering would mean that nobody unplugged the plug in due time, in the new scheme this matches very closely what the schedule() unplug now is. It's essentially the difference between an explicit unplug (IO unplug) or an implicit unplug (timer unplug, we scheduled with pending IO queued). Signed-off-by: Jens Axboe --- block/blk-core.c | 18 ++++++++++++------ include/trace/events/block.h | 13 +++++++------ kernel/trace/blktrace.c | 18 ++++++++++++------ 3 files changed, 31 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 3c8121072507..78b7b0cb7216 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2662,17 +2662,23 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) return !(rqa->q <= rqb->q); } +/* + * If 'from_schedule' is true, then postpone the dispatch of requests + * until a safe kblockd context. We due this to avoid accidental big + * additional stack usage in driver dispatch, in places where the originally + * plugger did not intend it. + */ static void queue_unplugged(struct request_queue *q, unsigned int depth, - bool force_kblockd) + bool from_schedule) { - trace_block_unplug_io(q, depth); - __blk_run_queue(q, force_kblockd); + trace_block_unplug(q, depth, !from_schedule); + __blk_run_queue(q, from_schedule); if (q->unplugged_fn) q->unplugged_fn(q); } -void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd) +void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) { struct request_queue *q; unsigned long flags; @@ -2707,7 +2713,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd) BUG_ON(!rq->q); if (rq->q != q) { if (q) { - queue_unplugged(q, depth, force_kblockd); + queue_unplugged(q, depth, from_schedule); spin_unlock(q->queue_lock); } q = rq->q; @@ -2728,7 +2734,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd) } if (q) { - queue_unplugged(q, depth, force_kblockd); + queue_unplugged(q, depth, from_schedule); spin_unlock(q->queue_lock); } diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 006e60b58306..bf366547da25 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -401,9 +401,9 @@ TRACE_EVENT(block_plug, DECLARE_EVENT_CLASS(block_unplug, - TP_PROTO(struct request_queue *q, unsigned int depth), + TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), - TP_ARGS(q, depth), + TP_ARGS(q, depth, explicit), TP_STRUCT__entry( __field( int, nr_rq ) @@ -419,18 +419,19 @@ DECLARE_EVENT_CLASS(block_unplug, ); /** - * block_unplug_io - release of operations requests in request queue + * block_unplug - release of operations requests in request queue * @q: request queue to unplug * @depth: number of requests just added to the queue + * @explicit: whether this was an explicit unplug, or one from schedule() * * Unplug request queue @q because device driver is scheduled to work * on elements in the request queue. */ -DEFINE_EVENT(block_unplug, block_unplug_io, +DEFINE_EVENT(block_unplug, block_unplug, - TP_PROTO(struct request_queue *q, unsigned int depth), + TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), - TP_ARGS(q, depth) + TP_ARGS(q, depth, explicit) ); /** diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 3e3970d53d14..6957aa298dfa 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -850,16 +850,21 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q) __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); } -static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q, - unsigned int depth) +static void blk_add_trace_unplug(void *ignore, struct request_queue *q, + unsigned int depth, bool explicit) { struct blk_trace *bt = q->blk_trace; if (bt) { __be64 rpdu = cpu_to_be64(depth); + u32 what; - __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, - sizeof(rpdu), &rpdu); + if (explicit) + what = BLK_TA_UNPLUG_IO; + else + what = BLK_TA_UNPLUG_TIMER; + + __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu); } } @@ -1002,7 +1007,7 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_plug(blk_add_trace_plug, NULL); WARN_ON(ret); - ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); + ret = register_trace_block_unplug(blk_add_trace_unplug, NULL); WARN_ON(ret); ret = register_trace_block_split(blk_add_trace_split, NULL); WARN_ON(ret); @@ -1017,7 +1022,7 @@ static void blk_unregister_tracepoints(void) unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); unregister_trace_block_split(blk_add_trace_split, NULL); - unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); + unregister_trace_block_unplug(blk_add_trace_unplug, NULL); unregister_trace_block_plug(blk_add_trace_plug, NULL); unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); unregister_trace_block_getrq(blk_add_trace_getrq, NULL); @@ -1332,6 +1337,7 @@ static const struct { [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, + [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug }, [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic }, -- cgit v1.2.3 From 048c9374a749a27f16493cea033fa4a8ff492356 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Apr 2011 09:52:22 +0200 Subject: block: Enhance new plugging support to support general callbacks md/raid requires an unplug callback, but as it does not uses requests the current code cannot provide one. So allow arbitrary callbacks to be attached to the blk_plug. Signed-off-by: NeilBrown Signed-off-by: Jens Axboe --- block/blk-core.c | 20 ++++++++++++++++++++ include/linux/blkdev.h | 7 ++++++- 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 78b7b0cb7216..77edf0512338 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2638,6 +2638,7 @@ void blk_start_plug(struct blk_plug *plug) plug->magic = PLUG_MAGIC; INIT_LIST_HEAD(&plug->list); + INIT_LIST_HEAD(&plug->cb_list); plug->should_sort = 0; /* @@ -2678,6 +2679,24 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, q->unplugged_fn(q); } +static void flush_plug_callbacks(struct blk_plug *plug) +{ + LIST_HEAD(callbacks); + + if (list_empty(&plug->cb_list)) + return; + + list_splice_init(&plug->cb_list, &callbacks); + + while (!list_empty(&callbacks)) { + struct blk_plug_cb *cb = list_first_entry(&callbacks, + struct blk_plug_cb, + list); + list_del(&cb->list); + cb->callback(cb); + } +} + void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) { struct request_queue *q; @@ -2688,6 +2707,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) BUG_ON(plug->magic != PLUG_MAGIC); + flush_plug_callbacks(plug); if (list_empty(&plug->list)) return; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ec0357d8c4a5..f3f7879391a7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -860,8 +860,13 @@ extern void blk_put_queue(struct request_queue *); struct blk_plug { unsigned long magic; struct list_head list; + struct list_head cb_list; unsigned int should_sort; }; +struct blk_plug_cb { + struct list_head list; + void (*callback)(struct blk_plug_cb *); +}; extern void blk_start_plug(struct blk_plug *); extern void blk_finish_plug(struct blk_plug *); @@ -887,7 +892,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; - return plug && !list_empty(&plug->list); + return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list)); } /* -- cgit v1.2.3 From b4cb290e0a7d19235bd075c2ad4d60dbab0bac15 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Apr 2011 09:54:05 +0200 Subject: Revert "block: add callback function for unplug notification" MD can't use this since it really requires us to be able to keep more than a single piece of state for the unplug. Commit 048c9374 added the required support for MD, so get rid of this now unused code. This reverts commit f75664570d8b75469cc468f23c2b27220984983b. Conflicts: block/blk-core.c Signed-off-by: Jens Axboe --- block/blk-core.c | 3 --- block/blk-settings.c | 16 ---------------- include/linux/blkdev.h | 3 --- 3 files changed, 22 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 77edf0512338..09b262811fff 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2674,9 +2674,6 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, { trace_block_unplug(q, depth, !from_schedule); __blk_run_queue(q, from_schedule); - - if (q->unplugged_fn) - q->unplugged_fn(q); } static void flush_plug_callbacks(struct blk_plug *plug) diff --git a/block/blk-settings.c b/block/blk-settings.c index eb949045bb12..1fa769293597 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -790,22 +790,6 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush) } EXPORT_SYMBOL_GPL(blk_queue_flush); -/** - * blk_queue_unplugged - register a callback for an unplug event - * @q: the request queue for the device - * @fn: the function to call - * - * Some stacked drivers may need to know when IO is dispatched on an - * unplug event. By registrering a callback here, they will be notified - * when someone flushes their on-stack queue plug. The function will be - * called with the queue lock held. - */ -void blk_queue_unplugged(struct request_queue *q, unplugged_fn *fn) -{ - q->unplugged_fn = fn; -} -EXPORT_SYMBOL(blk_queue_unplugged); - static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3f7879391a7..3448d89297e8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -196,7 +196,6 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); -typedef void (unplugged_fn) (struct request_queue *); struct bio_vec; struct bvec_merge_data { @@ -284,7 +283,6 @@ struct request_queue rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; lld_busy_fn *lld_busy_fn; - unplugged_fn *unplugged_fn; /* * Dispatch queue sorting @@ -843,7 +841,6 @@ extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); -extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -- cgit v1.2.3 From af1db72d8b340f97ad12b60175afdef43e6f0e60 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Apr 2011 18:25:41 +1000 Subject: md/dm - remove remains of plug_fn callback. Now that unplugging is done differently, the unplug_fn callback is never called, so it can be completely discarded. Signed-off-by: NeilBrown --- drivers/md/dm-raid.c | 8 -------- include/linux/device-mapper.h | 1 - 2 files changed, 9 deletions(-) (limited to 'include') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 5ef136cdba91..e5d8904fc8f6 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -390,13 +390,6 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) return md_raid5_congested(&rs->md, bits); } -static void raid_unplug(struct dm_target_callbacks *cb) -{ - struct raid_set *rs = container_of(cb, struct raid_set, callbacks); - - md_raid5_kick_device(rs->md.private); -} - /* * Construct a RAID4/5/6 mapping: * Args: @@ -487,7 +480,6 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) } rs->callbacks.congested_fn = raid_is_congested; - rs->callbacks.unplug_fn = raid_unplug; dm_table_add_target_callbacks(ti->table, &rs->callbacks); return 0; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index e2768834f397..32a4423710f5 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -197,7 +197,6 @@ struct dm_target { struct dm_target_callbacks { struct list_head list; int (*congested_fn) (struct dm_target_callbacks *, int); - void (*unplug_fn)(struct dm_target_callbacks *); }; int dm_register_target(struct target_type *t); -- cgit v1.2.3 From 1791f881435fab951939ad700e947b66c062e083 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 30 Mar 2011 15:24:21 +0200 Subject: posix clocks: Replace mutex with reader/writer semaphore A dynamic posix clock is protected from asynchronous removal by a mutex. However, using a mutex has the unwanted effect that a long running clock operation in one process will unnecessarily block other processes. For example, one process might call read() to get an external time stamp coming in at one pulse per second. A second process calling clock_gettime would have to wait for almost a whole second. This patch fixes the issue by using a reader/writer semaphore instead of a mutex. Signed-off-by: Richard Cochran Cc: John Stultz Link: http://lkml.kernel.org/r/%3C20110330132421.GA31771%40riccoc20.at.omicron.at%3E Signed-off-by: Thomas Gleixner --- include/linux/posix-clock.h | 5 +++-- kernel/time/posix-clock.c | 24 +++++++++--------------- 2 files changed, 12 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 369e19d3750b..7f1183dcd119 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -24,6 +24,7 @@ #include #include #include +#include struct posix_clock; @@ -104,7 +105,7 @@ struct posix_clock_operations { * @ops: Functional interface to the clock * @cdev: Character device instance for this clock * @kref: Reference count. - * @mutex: Protects the 'zombie' field from concurrent access. + * @rwsem: Protects the 'zombie' field from concurrent access. * @zombie: If 'zombie' is true, then the hardware has disappeared. * @release: A function to free the structure when the reference count reaches * zero. May be NULL if structure is statically allocated. @@ -117,7 +118,7 @@ struct posix_clock { struct posix_clock_operations ops; struct cdev cdev; struct kref kref; - struct mutex mutex; + struct rw_semaphore rwsem; bool zombie; void (*release)(struct posix_clock *clk); }; diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 25028dd4fa18..c340ca658f37 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -19,7 +19,6 @@ */ #include #include -#include #include #include #include @@ -34,19 +33,19 @@ static struct posix_clock *get_posix_clock(struct file *fp) { struct posix_clock *clk = fp->private_data; - mutex_lock(&clk->mutex); + down_read(&clk->rwsem); if (!clk->zombie) return clk; - mutex_unlock(&clk->mutex); + up_read(&clk->rwsem); return NULL; } static void put_posix_clock(struct posix_clock *clk) { - mutex_unlock(&clk->mutex); + up_read(&clk->rwsem); } static ssize_t posix_clock_read(struct file *fp, char __user *buf, @@ -156,7 +155,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) struct posix_clock *clk = container_of(inode->i_cdev, struct posix_clock, cdev); - mutex_lock(&clk->mutex); + down_read(&clk->rwsem); if (clk->zombie) { err = -ENODEV; @@ -172,7 +171,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) fp->private_data = clk; } out: - mutex_unlock(&clk->mutex); + up_read(&clk->rwsem); return err; } @@ -211,25 +210,20 @@ int posix_clock_register(struct posix_clock *clk, dev_t devid) int err; kref_init(&clk->kref); - mutex_init(&clk->mutex); + init_rwsem(&clk->rwsem); cdev_init(&clk->cdev, &posix_clock_file_operations); clk->cdev.owner = clk->ops.owner; err = cdev_add(&clk->cdev, devid, 1); - if (err) - goto no_cdev; return err; -no_cdev: - mutex_destroy(&clk->mutex); - return err; } EXPORT_SYMBOL_GPL(posix_clock_register); static void delete_clock(struct kref *kref) { struct posix_clock *clk = container_of(kref, struct posix_clock, kref); - mutex_destroy(&clk->mutex); + if (clk->release) clk->release(clk); } @@ -238,9 +232,9 @@ void posix_clock_unregister(struct posix_clock *clk) { cdev_del(&clk->cdev); - mutex_lock(&clk->mutex); + down_write(&clk->rwsem); clk->zombie = true; - mutex_unlock(&clk->mutex); + up_write(&clk->rwsem); kref_put(&clk->kref, delete_clock); } -- cgit v1.2.3 From 24ecfbe27f65563909b14492afda2f1c21f7c044 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Apr 2011 11:41:33 +0200 Subject: block: add blk_run_queue_async Instead of overloading __blk_run_queue to force an offload to kblockd add a new blk_run_queue_async helper to do it explicitly. I've kept the blk_queue_stopped check for now, but I suspect it's not needed as the check we do when the workqueue items runs should be enough. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 36 ++++++++++++++++++++++++------------ block/blk-exec.c | 2 +- block/blk-flush.c | 4 ++-- block/blk.h | 1 + block/cfq-iosched.c | 6 +++--- block/elevator.c | 4 ++-- drivers/scsi/scsi_lib.c | 2 +- drivers/scsi/scsi_transport_fc.c | 2 +- include/linux/blkdev.h | 2 +- 9 files changed, 36 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index e2bacfa46cc3..5fa3dd2705c6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -204,7 +204,7 @@ static void blk_delay_work(struct work_struct *work) q = container_of(work, struct request_queue, delay_work.work); spin_lock_irq(q->queue_lock); - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock_irq(q->queue_lock); } @@ -239,7 +239,7 @@ void blk_start_queue(struct request_queue *q) WARN_ON(!irqs_disabled()); queue_flag_clear(QUEUE_FLAG_STOPPED, q); - __blk_run_queue(q, false); + __blk_run_queue(q); } EXPORT_SYMBOL(blk_start_queue); @@ -296,11 +296,9 @@ EXPORT_SYMBOL(blk_sync_queue); * * Description: * See @blk_run_queue. This variant must be called with the queue lock - * held and interrupts disabled. If force_kblockd is true, then it is - * safe to call this without holding the queue lock. - * + * held and interrupts disabled. */ -void __blk_run_queue(struct request_queue *q, bool force_kblockd) +void __blk_run_queue(struct request_queue *q) { if (unlikely(blk_queue_stopped(q))) return; @@ -309,7 +307,7 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd) * Only recurse once to avoid overrunning the stack, let the unplug * handling reinvoke the handler shortly if we already got there. */ - if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { + if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { q->request_fn(q); queue_flag_clear(QUEUE_FLAG_REENTER, q); } else @@ -317,6 +315,20 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd) } EXPORT_SYMBOL(__blk_run_queue); +/** + * blk_run_queue_async - run a single device queue in workqueue context + * @q: The queue to run + * + * Description: + * Tells kblockd to perform the equivalent of @blk_run_queue on behalf + * of us. + */ +void blk_run_queue_async(struct request_queue *q) +{ + if (likely(!blk_queue_stopped(q))) + queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); +} + /** * blk_run_queue - run a single device queue * @q: The queue to run @@ -330,7 +342,7 @@ void blk_run_queue(struct request_queue *q) unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_run_queue); @@ -979,7 +991,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq, blk_queue_end_tag(q, rq); add_acct_request(q, rq, where); - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_insert_request); @@ -1323,7 +1335,7 @@ get_rq: } else { spin_lock_irq(q->queue_lock); add_acct_request(q, req, where); - __blk_run_queue(q, false); + __blk_run_queue(q); out_unlock: spin_unlock_irq(q->queue_lock); } @@ -2684,9 +2696,9 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, */ if (from_schedule) { spin_unlock(q->queue_lock); - __blk_run_queue(q, true); + blk_run_queue_async(q); } else { - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock(q->queue_lock); } diff --git a/block/blk-exec.c b/block/blk-exec.c index 7482b7fa863b..81e31819a597 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -55,7 +55,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, WARN_ON(irqs_disabled()); spin_lock_irq(q->queue_lock); __elv_add_request(q, rq, where); - __blk_run_queue(q, false); + __blk_run_queue(q); /* the queue is stopped so it won't be plugged+unplugged */ if (rq->cmd_type == REQ_TYPE_PM_RESUME) q->request_fn(q); diff --git a/block/blk-flush.c b/block/blk-flush.c index eba4a2790c6c..6c9b5e189e62 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -218,7 +218,7 @@ static void flush_end_io(struct request *flush_rq, int error) * request_fn may confuse the driver. Always use kblockd. */ if (queued) - __blk_run_queue(q, true); + blk_run_queue_async(q); } /** @@ -274,7 +274,7 @@ static void flush_data_end_io(struct request *rq, int error) * the comment in flush_end_io(). */ if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) - __blk_run_queue(q, true); + blk_run_queue_async(q); } /** diff --git a/block/blk.h b/block/blk.h index 61263463e38e..c9df8fc3c999 100644 --- a/block/blk.h +++ b/block/blk.h @@ -22,6 +22,7 @@ void blk_rq_timed_out_timer(unsigned long data); void blk_delete_timer(struct request *); void blk_add_timer(struct request *); void __generic_unplug_device(struct request_queue *); +void blk_run_queue_async(struct request_queue *q); /* * Internal atomic flags for request handling diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 3be881ec95ad..46b0a1d1d925 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3368,7 +3368,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqd->busy_queues > 1) { cfq_del_timer(cfqd, cfqq); cfq_clear_cfqq_wait_request(cfqq); - __blk_run_queue(cfqd->queue, false); + __blk_run_queue(cfqd->queue); } else { cfq_blkiocg_update_idle_time_stats( &cfqq->cfqg->blkg); @@ -3383,7 +3383,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, * this new queue is RT and the current one is BE */ cfq_preempt_queue(cfqd, cfqq); - __blk_run_queue(cfqd->queue, false); + __blk_run_queue(cfqd->queue); } } @@ -3743,7 +3743,7 @@ static void cfq_kick_queue(struct work_struct *work) struct request_queue *q = cfqd->queue; spin_lock_irq(q->queue_lock); - __blk_run_queue(cfqd->queue, false); + __blk_run_queue(cfqd->queue); spin_unlock_irq(q->queue_lock); } diff --git a/block/elevator.c b/block/elevator.c index 0cdb4e7ebab4..6f6abc08bb56 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -642,7 +642,7 @@ void elv_quiesce_start(struct request_queue *q) */ elv_drain_elevator(q); while (q->rq.elvpriv) { - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock_irq(q->queue_lock); msleep(10); spin_lock_irq(q->queue_lock); @@ -695,7 +695,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) * with anything. There's no point in delaying queue * processing. */ - __blk_run_queue(q, false); + __blk_run_queue(q); break; case ELEVATOR_INSERT_SORT_MERGE: diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 6d5c7ff43f5b..ab55c2fa7ce2 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -443,7 +443,7 @@ static void scsi_run_queue(struct request_queue *q) &sdev->request_queue->queue_flags); if (flagset) queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue); - __blk_run_queue(sdev->request_queue, false); + __blk_run_queue(sdev->request_queue); if (flagset) queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue); spin_unlock(sdev->request_queue->queue_lock); diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index fdf3fa639056..28c33506e4ad 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3829,7 +3829,7 @@ fc_bsg_goose_queue(struct fc_rport *rport) !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags); if (flagset) queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q); - __blk_run_queue(rport->rqst_q, false); + __blk_run_queue(rport->rqst_q); if (flagset) queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q); spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3448d89297e8..cbbfd98ad4a3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -697,7 +697,7 @@ extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); -extern void __blk_run_queue(struct request_queue *q, bool force_kblockd); +extern void __blk_run_queue(struct request_queue *q); extern void blk_run_queue(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, -- cgit v1.2.3 From 80b4895aa4578e9372d76cd4063f82d0c3994d77 Mon Sep 17 00:00:00 2001 From: Jeff Brown Date: Mon, 18 Apr 2011 10:08:02 -0700 Subject: Input: estimate number of events per packet Calculate a default based on the number of ABS axes, REL axes, and MT slots for the device during input device registration. Signed-off-by: Jeff Brown Reviewed-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 40 ++++++++++++++++++++++++++++++++++++++++ include/linux/input/mt.h | 6 ++++++ 2 files changed, 46 insertions(+) (limited to 'include') diff --git a/drivers/input/input.c b/drivers/input/input.c index d6e8bd8a851c..ebbceedc92f4 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1746,6 +1746,42 @@ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int } EXPORT_SYMBOL(input_set_capability); +static unsigned int input_estimate_events_per_packet(struct input_dev *dev) +{ + int mt_slots; + int i; + unsigned int events; + + if (dev->mtsize) { + mt_slots = dev->mtsize; + } else if (test_bit(ABS_MT_TRACKING_ID, dev->absbit)) { + mt_slots = dev->absinfo[ABS_MT_TRACKING_ID].maximum - + dev->absinfo[ABS_MT_TRACKING_ID].minimum + 1, + clamp(mt_slots, 2, 32); + } else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) { + mt_slots = 2; + } else { + mt_slots = 0; + } + + events = mt_slots + 1; /* count SYN_MT_REPORT and SYN_REPORT */ + + for (i = 0; i < ABS_CNT; i++) { + if (test_bit(i, dev->absbit)) { + if (input_is_mt_axis(i)) + events += mt_slots; + else + events++; + } + } + + for (i = 0; i < REL_CNT; i++) + if (test_bit(i, dev->relbit)) + events++; + + return events; +} + #define INPUT_CLEANSE_BITMASK(dev, type, bits) \ do { \ if (!test_bit(EV_##type, dev->evbit)) \ @@ -1793,6 +1829,10 @@ int input_register_device(struct input_dev *dev) /* Make sure that bitmasks not mentioned in dev->evbit are clean. */ input_cleanse_bitmasks(dev); + if (!dev->hint_events_per_packet) + dev->hint_events_per_packet = + input_estimate_events_per_packet(dev); + /* * If delay and period are pre-set by the driver, then autorepeating * is handled by the driver itself and we don't do it in input.c. diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index b3ac06a4435d..318bb82325a6 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -48,6 +48,12 @@ static inline void input_mt_slot(struct input_dev *dev, int slot) input_event(dev, EV_ABS, ABS_MT_SLOT, slot); } +static inline bool input_is_mt_axis(int axis) +{ + return axis == ABS_MT_SLOT || + (axis >= ABS_MT_FIRST && axis <= ABS_MT_LAST); +} + void input_mt_report_slot_state(struct input_dev *dev, unsigned int tool_type, bool active); -- cgit v1.2.3 From c78193e9c7bcbf25b8237ad0dec82f805c4ea69b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 18 Apr 2011 10:35:30 -0700 Subject: next_pidmap: fix overflow condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit next_pidmap() just quietly accepted whatever 'last' pid that was passed in, which is not all that safe when one of the users is /proc. Admittedly the proc code should do some sanity checking on the range (and that will be the next commit), but that doesn't mean that the helper functions should just do that pidmap pointer arithmetic without checking the range of its arguments. So clamp 'last' to PID_MAX_LIMIT. The fact that we then do "last+1" doesn't really matter, the for-loop does check against the end of the pidmap array properly (it's only the actual pointer arithmetic overflow case we need to worry about, and going one bit beyond isn't going to overflow). [ Use PID_MAX_LIMIT rather than pid_max as per Eric Biederman ] Reported-by: Tavis Ormandy Analyzed-by: Robert Święcki Cc: Eric W. Biederman Cc: Pavel Emelyanov Signed-off-by: Linus Torvalds --- include/linux/pid.h | 2 +- kernel/pid.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pid.h b/include/linux/pid.h index 31afb7ecbe1f..cdced84261d7 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -117,7 +117,7 @@ extern struct pid *find_vpid(int nr); */ extern struct pid *find_get_pid(int nr); extern struct pid *find_ge_pid(int nr, struct pid_namespace *); -int next_pidmap(struct pid_namespace *pid_ns, int last); +int next_pidmap(struct pid_namespace *pid_ns, unsigned int last); extern struct pid *alloc_pid(struct pid_namespace *ns); extern void free_pid(struct pid *pid); diff --git a/kernel/pid.c b/kernel/pid.c index 02f221274265..57a8346a270e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -217,11 +217,14 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) return -1; } -int next_pidmap(struct pid_namespace *pid_ns, int last) +int next_pidmap(struct pid_namespace *pid_ns, unsigned int last) { int offset; struct pidmap *map, *end; + if (last >= PID_MAX_LIMIT) + return -1; + offset = (last + 1) & BITS_PER_PAGE_MASK; map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE]; end = &pid_ns->pidmap[PIDMAP_ENTRIES]; -- cgit v1.2.3 From c21e6beba8835d09bb80e34961430b13e60381c5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 Apr 2011 13:32:46 +0200 Subject: block: get rid of QUEUE_FLAG_REENTER We are currently using this flag to check whether it's safe to call into ->request_fn(). If it is set, we punt to kblockd. But we get a lot of false positives and excessive punts to kblockd, which hurts performance. The only real abuser of this infrastructure is SCSI. So export the async queue run and convert SCSI over to use that. There's room for improvement in that SCSI need not always use the async call, but this fixes our performance issue and they can fix that up in due time. Signed-off-by: Jens Axboe --- block/blk-core.c | 11 ++--------- block/blk.h | 1 - drivers/scsi/scsi_lib.c | 17 +---------------- drivers/scsi/scsi_transport_fc.c | 19 ++++--------------- include/linux/blkdev.h | 26 +++++++++++++------------- 5 files changed, 20 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 580eee5743e5..40725b9091f1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -303,15 +303,7 @@ void __blk_run_queue(struct request_queue *q) if (unlikely(blk_queue_stopped(q))) return; - /* - * Only recurse once to avoid overrunning the stack, let the unplug - * handling reinvoke the handler shortly if we already got there. - */ - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { - q->request_fn(q); - queue_flag_clear(QUEUE_FLAG_REENTER, q); - } else - queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); + q->request_fn(q); } EXPORT_SYMBOL(__blk_run_queue); @@ -328,6 +320,7 @@ void blk_run_queue_async(struct request_queue *q) if (likely(!blk_queue_stopped(q))) queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); } +EXPORT_SYMBOL(blk_run_queue_async); /** * blk_run_queue - run a single device queue diff --git a/block/blk.h b/block/blk.h index c9df8fc3c999..61263463e38e 100644 --- a/block/blk.h +++ b/block/blk.h @@ -22,7 +22,6 @@ void blk_rq_timed_out_timer(unsigned long data); void blk_delete_timer(struct request *); void blk_add_timer(struct request *); void __generic_unplug_device(struct request_queue *); -void blk_run_queue_async(struct request_queue *q); /* * Internal atomic flags for request handling diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ab55c2fa7ce2..e9901b8f8443 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -411,8 +411,6 @@ static void scsi_run_queue(struct request_queue *q) list_splice_init(&shost->starved_list, &starved_list); while (!list_empty(&starved_list)) { - int flagset; - /* * As long as shost is accepting commands and we have * starved queues, call blk_run_queue. scsi_request_fn @@ -435,20 +433,7 @@ static void scsi_run_queue(struct request_queue *q) continue; } - spin_unlock(shost->host_lock); - - spin_lock(sdev->request_queue->queue_lock); - flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) && - !test_bit(QUEUE_FLAG_REENTER, - &sdev->request_queue->queue_flags); - if (flagset) - queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue); - __blk_run_queue(sdev->request_queue); - if (flagset) - queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue); - spin_unlock(sdev->request_queue->queue_lock); - - spin_lock(shost->host_lock); + blk_run_queue_async(sdev->request_queue); } /* put any unprocessed entries back */ list_splice(&starved_list, &shost->starved_list); diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 28c33506e4ad..815069d13f9b 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3816,28 +3816,17 @@ fail_host_msg: static void fc_bsg_goose_queue(struct fc_rport *rport) { - int flagset; - unsigned long flags; - if (!rport->rqst_q) return; + /* + * This get/put dance makes no sense + */ get_device(&rport->dev); - - spin_lock_irqsave(rport->rqst_q->queue_lock, flags); - flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) && - !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags); - if (flagset) - queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q); - __blk_run_queue(rport->rqst_q); - if (flagset) - queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q); - spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags); - + blk_run_queue_async(rport->rqst_q); put_device(&rport->dev); } - /** * fc_bsg_rport_dispatch - process rport bsg requests and dispatch to LLDD * @q: rport request queue diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cbbfd98ad4a3..2ad95fa1d130 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -388,20 +388,19 @@ struct request_queue #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ -#define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ -#define QUEUE_FLAG_ELVSWITCH 7 /* don't use elevator, just do FIFO */ -#define QUEUE_FLAG_BIDI 8 /* queue supports bidi requests */ -#define QUEUE_FLAG_NOMERGES 9 /* disable merge attempts */ -#define QUEUE_FLAG_SAME_COMP 10 /* force complete on same CPU */ -#define QUEUE_FLAG_FAIL_IO 11 /* fake timeout */ -#define QUEUE_FLAG_STACKABLE 12 /* supports request stacking */ -#define QUEUE_FLAG_NONROT 13 /* non-rotational device (SSD) */ +#define QUEUE_FLAG_ELVSWITCH 6 /* don't use elevator, just do FIFO */ +#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */ +#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */ +#define QUEUE_FLAG_SAME_COMP 9 /* force complete on same CPU */ +#define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */ +#define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */ +#define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ -#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ -#define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ -#define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */ -#define QUEUE_FLAG_ADD_RANDOM 18 /* Contributes to random pool */ -#define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ +#define QUEUE_FLAG_IO_STAT 13 /* do IO stats */ +#define QUEUE_FLAG_DISCARD 14 /* supports DISCARD */ +#define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */ +#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */ +#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -699,6 +698,7 @@ extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *q); extern void blk_run_queue(struct request_queue *); +extern void blk_run_queue_async(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); -- cgit v1.2.3