From e174961ca1a0b28f7abf0be47973ad57cb74e5f0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 27 Oct 2008 15:59:26 -0700 Subject: net: convert print_mac to %pM This converts pretty much everything to print_mac. There were a few things that had conflicts which I have just dropped for now, no harm done. I've built an allyesconfig with this and looked at the files that weren't built very carefully, but it's a huge patch. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/core/netpoll.c | 5 ++--- net/core/pktgen.c | 9 ++++----- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'net/core') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 6c7af390be0a..34f5d072f168 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -553,7 +553,6 @@ out: void netpoll_print_options(struct netpoll *np) { - DECLARE_MAC_BUF(mac); printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port); printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", @@ -564,8 +563,8 @@ void netpoll_print_options(struct netpoll *np) np->name, np->remote_port); printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", np->name, HIPQUAD(np->remote_ip)); - printk(KERN_INFO "%s: remote ethernet address %s\n", - np->name, print_mac(mac, np->remote_mac)); + printk(KERN_INFO "%s: remote ethernet address %pM\n", + np->name, np->remote_mac); } int netpoll_parse_options(struct netpoll *np, char *opt) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 99f656d35b4f..18dd83c2ead0 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -509,7 +509,6 @@ static int pktgen_if_show(struct seq_file *seq, void *v) __u64 sa; __u64 stopped; __u64 now = getCurUs(); - DECLARE_MAC_BUF(mac); seq_printf(seq, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", @@ -554,12 +553,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, " src_mac: "); - seq_printf(seq, "%s ", - print_mac(mac, is_zero_ether_addr(pkt_dev->src_mac) ? - pkt_dev->odev->dev_addr : pkt_dev->src_mac)); + seq_printf(seq, "%pM ", + is_zero_ether_addr(pkt_dev->src_mac) ? + pkt_dev->odev->dev_addr : pkt_dev->src_mac); seq_printf(seq, "dst_mac: "); - seq_printf(seq, "%s\n", print_mac(mac, pkt_dev->dst_mac)); + seq_printf(seq, "%pM\n", pkt_dev->dst_mac); seq_printf(seq, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", -- cgit v1.2.3 From 3891845e1ef6e6807075d4241966b26f6ecb0a5c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 27 Oct 2008 17:51:47 -0700 Subject: netns: Coexist with the sysfs limitations v2 To make testing of the network namespace simpler allow the network namespace code and the sysfs code to be compiled and run at the same time. To do this only virtual devices are allowed in the additional network namespaces and those virtual devices are not placed in the kobject tree. Since virtual devices don't actually do anything interesting hardware wise that needs device management there should be no loss in keeping them out of the kobject tree and by implication sysfs. The gain in ease of testing and code coverage should be significant. Changelog: v2: As pointed out by Benjamin Thery it only makes sense to call device_rename in the initial network namespace for now. Signed-off-by: Eric W. Biederman Acked-by: Benjamin Thery Tested-by: Serge Hallyn Acked-by: Serge Hallyn Acked-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/Kconfig | 2 +- net/core/dev.c | 25 ++++++++++++++++++++----- net/core/net-sysfs.c | 7 +++++++ 3 files changed, 28 insertions(+), 6 deletions(-) (limited to 'net/core') diff --git a/net/Kconfig b/net/Kconfig index d789d79551ae..8c3d97ca0d96 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -27,7 +27,7 @@ menu "Networking options" config NET_NS bool "Network namespace support" default n - depends on EXPERIMENTAL && !SYSFS && NAMESPACES + depends on EXPERIMENTAL && NAMESPACES help Allow user space to create what appear to be multiple instances of the network stack. diff --git a/net/core/dev.c b/net/core/dev.c index d9038e328cc1..3a2b8be9e67b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -924,10 +924,15 @@ int dev_change_name(struct net_device *dev, const char *newname) strlcpy(dev->name, newname, IFNAMSIZ); rollback: - ret = device_rename(&dev->dev, dev->name); - if (ret) { - memcpy(dev->name, oldname, IFNAMSIZ); - return ret; + /* For now only devices in the initial network namespace + * are in sysfs. + */ + if (net == &init_net) { + ret = device_rename(&dev->dev, dev->name); + if (ret) { + memcpy(dev->name, oldname, IFNAMSIZ); + return ret; + } } write_lock_bh(&dev_base_lock); @@ -4460,6 +4465,15 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char if (dev->features & NETIF_F_NETNS_LOCAL) goto out; +#ifdef CONFIG_SYSFS + /* Don't allow real devices to be moved when sysfs + * is enabled. + */ + err = -EINVAL; + if (dev->dev.parent) + goto out; +#endif + /* Ensure the device has been registrered */ err = -EINVAL; if (dev->reg_state != NETREG_REGISTERED) @@ -4517,6 +4531,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char */ dev_addr_discard(dev); + netdev_unregister_kobject(dev); + /* Actually switch the network namespace */ dev_net_set(dev, net); @@ -4533,7 +4549,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char } /* Fixup kobjects */ - netdev_unregister_kobject(dev); err = netdev_register_kobject(dev); WARN_ON(err); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 92d6b9467314..85cb8bdcfb8f 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -476,6 +476,10 @@ void netdev_unregister_kobject(struct net_device * net) struct device *dev = &(net->dev); kobject_get(&dev->kobj); + + if (dev_net(net) != &init_net) + return; + device_del(dev); } @@ -501,6 +505,9 @@ int netdev_register_kobject(struct net_device *net) #endif #endif /* CONFIG_SYSFS */ + if (dev_net(net) != &init_net) + return 0; + return device_add(dev); } -- cgit v1.2.3 From def8b4faff5ca349beafbbfeb2c51f3602a6ef3a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 28 Oct 2008 13:24:06 -0700 Subject: net: reduce structures when XFRM=n ifdef out * struct sk_buff::sp (pointer) * struct dst_entry::xfrm (pointer) * struct sock::sk_policy (2 pointers) Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/skbuff.h | 15 ++++++++++++++- include/net/dst.h | 3 ++- include/net/sock.h | 2 ++ include/net/xfrm.h | 4 ++++ net/core/skbuff.c | 2 +- net/ipv4/icmp.c | 3 ++- net/ipv4/ip_forward.c | 2 +- net/ipv4/route.c | 2 ++ net/ipv6/icmp.c | 3 ++- net/ipv6/ip6_output.c | 2 +- security/selinux/hooks.c | 4 ++-- 11 files changed, 33 insertions(+), 9 deletions(-) (limited to 'net/core') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2725f4e5a9bf..487e34507b41 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -269,8 +269,9 @@ struct sk_buff { struct dst_entry *dst; struct rtable *rtable; }; +#ifdef CONFIG_XFRM struct sec_path *sp; - +#endif /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you @@ -1864,6 +1865,18 @@ static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_bu to->queue_mapping = from->queue_mapping; } +#ifdef CONFIG_XFRM +static inline struct sec_path *skb_sec_path(struct sk_buff *skb) +{ + return skb->sp; +} +#else +static inline struct sec_path *skb_sec_path(struct sk_buff *skb) +{ + return NULL; +} +#endif + static inline int skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/include/net/dst.h b/include/net/dst.h index 8a8b71e5f3f1..f96c4ba4dd32 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -59,8 +59,9 @@ struct dst_entry struct neighbour *neighbour; struct hh_cache *hh; +#ifdef CONFIG_XFRM struct xfrm_state *xfrm; - +#endif int (*input)(struct sk_buff*); int (*output)(struct sk_buff*); diff --git a/include/net/sock.h b/include/net/sock.h index ada50c04d09f..d6b750a25078 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -229,7 +229,9 @@ struct sock { } sk_backlog; wait_queue_head_t *sk_sleep; struct dst_entry *sk_dst_cache; +#ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; +#endif rwlock_t sk_dst_lock; atomic_t sk_rmem_alloc; atomic_t sk_wmem_alloc; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 11c890ad8ebb..f2c5ba28a428 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -882,6 +882,7 @@ struct xfrm_dst u32 path_cookie; }; +#ifdef CONFIG_XFRM static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) { dst_release(xdst->route); @@ -894,6 +895,7 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) xdst->partner = NULL; #endif } +#endif extern void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); @@ -1536,9 +1538,11 @@ static inline void xfrm_states_delete(struct xfrm_state **states, int n) } #endif +#ifdef CONFIG_XFRM static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { return skb->sp->xvec[skb->sp->len - 1]; } +#endif #endif /* _NET_XFRM_H */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e22e3a35359..cdfe473181af 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -489,7 +489,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->network_header = old->network_header; new->mac_header = old->mac_header; new->dst = dst_clone(old->dst); -#ifdef CONFIG_INET +#ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif memcpy(new->cb, old->cb, sizeof(old->cb)); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 72b2de76f1cd..e9d6ea0b49ca 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -976,9 +976,10 @@ int icmp_rcv(struct sk_buff *skb) struct net *net = dev_net(rt->u.dst.dev); if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { + struct sec_path *sp = skb_sec_path(skb); int nh; - if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & + if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP)) goto drop; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 450016b89a18..df3fe50bbf0d 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -106,7 +106,7 @@ int ip_forward(struct sk_buff *skb) * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ - if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb->sp) + if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 21ce7e1b2284..ffb2c5705432 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1399,7 +1399,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->u.dst.path = &rt->u.dst; rt->u.dst.neighbour = NULL; rt->u.dst.hh = NULL; +#ifdef CONFIG_XFRM rt->u.dst.xfrm = NULL; +#endif rt->rt_genid = rt_genid(net); rt->rt_flags |= RTCF_REDIRECTED; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 9b7d19ae5ced..508a713ac045 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -646,9 +646,10 @@ static int icmpv6_rcv(struct sk_buff *skb) int type; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + struct sec_path *sp = skb_sec_path(skb); int nh; - if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & + if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP)) goto drop_no_count; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c77db0b95e26..7d92fd97cfb9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -490,7 +490,7 @@ int ip6_forward(struct sk_buff *skb) We don't send redirects to frames decapsulated from IPsec. */ if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && - !skb->sp) { + !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct rt6_info *rt; struct neighbour *n = dst->neighbour; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 3e3fde7c1d2b..aedf02b1345a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4626,7 +4626,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, * as fast and as clean as possible. */ if (selinux_compat_net || !selinux_policycap_netpeer) return selinux_ip_postroute_compat(skb, ifindex, family); - +#ifdef CONFIG_XFRM /* If skb->dst->xfrm is non-NULL then the packet is undergoing an IPsec * packet transformation so allow the packet to pass without any checks * since we'll have another chance to perform access control checks @@ -4635,7 +4635,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, * is NULL, in this case go ahead and apply access control. */ if (skb->dst != NULL && skb->dst->xfrm != NULL) return NF_ACCEPT; - +#endif secmark_active = selinux_secmark_enabled(); peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled(); if (!secmark_active && !peerlbl_active) -- cgit v1.2.3 From 93adcc80f3288f1827baf6f821af818f6eeef7f9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 28 Oct 2008 13:25:09 -0700 Subject: net: don't use INIT_RCU_HEAD call_rcu() will unconditionally rewrite RCU head anyway. Applies to struct neigh_parms struct neigh_table struct net struct cipso_v4_doi struct in_ifaddr struct in_device rt->u.dst Signed-off-by: Alexey Dobriyan Acked-by: Paul E. McKenney Signed-off-by: David S. Miller --- net/core/neighbour.c | 2 -- net/core/net_namespace.c | 2 -- net/ipv4/cipso_ipv4.c | 1 - net/ipv4/devinet.c | 9 +-------- net/ipv4/route.c | 1 - 5 files changed, 1 insertion(+), 14 deletions(-) (limited to 'net/core') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1dc728b38589..b337a937ea52 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1340,7 +1340,6 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, if (p) { p->tbl = tbl; atomic_set(&p->refcnt, 1); - INIT_RCU_HEAD(&p->rcu_head); p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); @@ -1412,7 +1411,6 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) tbl->parms.net = &init_net; #endif atomic_set(&tbl->parms.refcnt, 1); - INIT_RCU_HEAD(&tbl->parms.rcu_head); tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f1d07b5c1e17..861b4cbf40db 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -47,7 +47,6 @@ static __net_init int setup_net(struct net *net) goto out; ng->len = INITIAL_NET_GEN_PTRS; - INIT_RCU_HEAD(&ng->rcu); rcu_assign_pointer(net->gen, ng); error = 0; @@ -446,7 +445,6 @@ int net_assign_generic(struct net *net, int id, void *data) */ ng->len = id; - INIT_RCU_HEAD(&ng->rcu); memcpy(&ng->ptr, &old_ng->ptr, old_ng->len); rcu_assign_pointer(net->gen, ng); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 490e035c6d90..4bcec7f77251 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -490,7 +490,6 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) } atomic_set(&doi_def->refcount, 1); - INIT_RCU_HEAD(&doi_def->rcu); spin_lock(&cipso_v4_doi_list_lock); if (cipso_v4_doi_search(doi_def->doi) != NULL) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 56fce3ab6c55..0bff576d2918 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -112,13 +112,7 @@ static inline void devinet_sysctl_unregister(struct in_device *idev) static struct in_ifaddr *inet_alloc_ifa(void) { - struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL); - - if (ifa) { - INIT_RCU_HEAD(&ifa->rcu_head); - } - - return ifa; + return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); } static void inet_rcu_free_ifa(struct rcu_head *head) @@ -161,7 +155,6 @@ static struct in_device *inetdev_init(struct net_device *dev) in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); if (!in_dev) goto out; - INIT_RCU_HEAD(&in_dev->rcu_head); memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, sizeof(in_dev->cnf)); in_dev->cnf.sysctl = NULL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ffb2c5705432..e59b4dcf6778 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1386,7 +1386,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, /* Copy all the information. */ *rt = *rth; - INIT_RCU_HEAD(&rt->u.dst.rcu_head); rt->u.dst.__use = 1; atomic_set(&rt->u.dst.__refcnt, 1); rt->u.dst.child = NULL; -- cgit v1.2.3 From 271b72c7fa82c2c7a795bc16896149933110672d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 02:11:14 -0700 Subject: udp: RCU handling for Unicast packets. Goals are : 1) Optimizing handling of incoming Unicast UDP frames, so that no memory writes should happen in the fast path. Note: Multicasts and broadcasts still will need to take a lock, because doing a full lockless lookup in this case is difficult. 2) No expensive operations in the socket bind/unhash phases : - No expensive synchronize_rcu() calls. - No added rcu_head in socket structure, increasing memory needs, but more important, forcing us to use call_rcu() calls, that have the bad property of making sockets structure cold. (rcu grace period between socket freeing and its potential reuse make this socket being cold in CPU cache). David did a previous patch using call_rcu() and noticed a 20% impact on TCP connection rates. Quoting Cristopher Lameter : "Right. That results in cacheline cooldown. You'd want to recycle the object as they are cache hot on a per cpu basis. That is screwed up by the delayed regular rcu processing. We have seen multiple regressions due to cacheline cooldown. The only choice in cacheline hot sensitive areas is to deal with the complexity that comes with SLAB_DESTROY_BY_RCU or give up on RCU." - Because udp sockets are allocated from dedicated kmem_cache, use of SLAB_DESTROY_BY_RCU can help here. Theory of operation : --------------------- As the lookup is lockfree (using rcu_read_lock()/rcu_read_unlock()), special attention must be taken by readers and writers. Use of SLAB_DESTROY_BY_RCU is tricky too, because a socket can be freed, reused, inserted in a different chain or in worst case in the same chain while readers could do lookups in the same time. In order to avoid loops, a reader must check each socket found in a chain really belongs to the chain the reader was traversing. If it finds a mismatch, lookup must start again at the begining. This *restart* loop is the reason we had to use rdlock for the multicast case, because we dont want to send same message several times to the same socket. We use RCU only for fast path. Thus, /proc/net/udp still takes spinlocks. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 37 ++++++++++++++++++++++++++++++++++++- net/core/sock.c | 3 ++- net/ipv4/udp.c | 35 ++++++++++++++++++++++++++--------- net/ipv4/udplite.c | 1 + net/ipv6/udp.c | 31 ++++++++++++++++++++++++------- net/ipv6/udplite.c | 1 + 6 files changed, 90 insertions(+), 18 deletions(-) (limited to 'net/core') diff --git a/include/net/sock.h b/include/net/sock.h index d200dfbe1ef6..0bea25db5471 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -363,6 +363,27 @@ static __inline__ int sk_del_node_init(struct sock *sk) return rc; } +static __inline__ int __sk_del_node_init_rcu(struct sock *sk) +{ + if (sk_hashed(sk)) { + hlist_del_init_rcu(&sk->sk_node); + return 1; + } + return 0; +} + +static __inline__ int sk_del_node_init_rcu(struct sock *sk) +{ + int rc = __sk_del_node_init_rcu(sk); + + if (rc) { + /* paranoid for a while -acme */ + WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + __sock_put(sk); + } + return rc; +} + static __inline__ void __sk_add_node(struct sock *sk, struct hlist_head *list) { hlist_add_head(&sk->sk_node, list); @@ -374,6 +395,17 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) __sk_add_node(sk, list); } +static __inline__ void __sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +{ + hlist_add_head_rcu(&sk->sk_node, list); +} + +static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +{ + sock_hold(sk); + __sk_add_node_rcu(sk, list); +} + static __inline__ void __sk_del_bind_node(struct sock *sk) { __hlist_del(&sk->sk_bind_node); @@ -387,6 +419,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_for_each(__sk, node, list) \ hlist_for_each_entry(__sk, node, list, sk_node) +#define sk_for_each_rcu(__sk, node, list) \ + hlist_for_each_entry_rcu(__sk, node, list, sk_node) #define sk_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_from(__sk, node, sk_node) @@ -589,8 +623,9 @@ struct proto { int *sysctl_rmem; int max_header; - struct kmem_cache *slab; + struct kmem_cache *slab; unsigned int obj_size; + int slab_flags; atomic_t *orphan_count; diff --git a/net/core/sock.c b/net/core/sock.c index 5e2a3132a8c9..ded1eb5d2fd4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2042,7 +2042,8 @@ int proto_register(struct proto *prot, int alloc_slab) if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_HWCACHE_ALIGN | prot->slab_flags, + NULL); if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2a6c491f97d7..0ea974bf7962 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -187,7 +187,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - sk_add_node(sk, &hslot->head); + sk_add_node_rcu(sk, &hslot->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } error = 0; @@ -253,15 +253,24 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, struct udp_table *udptable) { - struct sock *sk, *result = NULL; + struct sock *sk, *result; struct hlist_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; - int score, badness = -1; + int score, badness; - spin_lock(&hslot->lock); - sk_for_each(sk, node, &hslot->head) { + rcu_read_lock(); +begin: + result = NULL; + badness = -1; + sk_for_each_rcu(sk, node, &hslot->head) { + /* + * lockless reader, and SLAB_DESTROY_BY_RCU items: + * We must check this item was not moved to another chain + */ + if (udp_hashfn(net, sk->sk_hash) != hash) + goto begin; score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { @@ -269,9 +278,16 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, badness = score; } } - if (result) - sock_hold(result); - spin_unlock(&hslot->lock); + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, saddr, hnum, sport, + daddr, dport, dif) < badness)) { + sock_put(result); + goto begin; + } + } + rcu_read_unlock(); return result; } @@ -953,7 +969,7 @@ void udp_lib_unhash(struct sock *sk) struct udp_hslot *hslot = &udptable->hash[hash]; spin_lock(&hslot->lock); - if (sk_del_node_init(sk)) { + if (sk_del_node_init_rcu(sk)) { inet_sk(sk)->num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } @@ -1517,6 +1533,7 @@ struct proto udp_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index d8ea8e5f5ea3..c784891cb7e5 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -51,6 +51,7 @@ struct proto udplite_prot = { .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, .obj_size = sizeof(struct udp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ccee7244ca0f..1d9790e43dfc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -97,24 +97,40 @@ static struct sock *__udp6_lib_lookup(struct net *net, struct in6_addr *daddr, __be16 dport, int dif, struct udp_table *udptable) { - struct sock *sk, *result = NULL; + struct sock *sk, *result; struct hlist_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; - int score, badness = -1; + int score, badness; - spin_lock(&hslot->lock); - sk_for_each(sk, node, &hslot->head) { + rcu_read_lock(); +begin: + result = NULL; + badness = -1; + sk_for_each_rcu(sk, node, &hslot->head) { + /* + * lockless reader, and SLAB_DESTROY_BY_RCU items: + * We must check this item was not moved to another chain + */ + if (udp_hashfn(net, sk->sk_hash) != hash) + goto begin; score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } - if (result) - sock_hold(result); - spin_unlock(&hslot->lock); + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, saddr, sport, + daddr, dport, dif) < badness)) { + sock_put(result); + goto begin; + } + } + rcu_read_unlock(); return result; } @@ -1062,6 +1078,7 @@ struct proto udpv6_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index f1e892a99e05..ba162a824585 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -49,6 +49,7 @@ struct proto udplitev6_prot = { .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, .obj_size = sizeof(struct udp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, -- cgit v1.2.3 From 8b30b1fe368ab03049435884c11c5c50e4c4ef0b Mon Sep 17 00:00:00 2001 From: Sujith Date: Fri, 24 Oct 2008 09:55:27 +0530 Subject: mac80211: Re-enable aggregation Wireless HW without any dedicated queues for aggregation do not need the ampdu_queues mechanism present right now in mac80211. Since mac80211 is still incomplete wrt TX MQ changes, do not allow aggregation sessions for drivers that set ampdu_queues. This is only an interim hack until Intel fixes the requeue issue. Signed-off-by: Sujith Signed-off-by: Luis Rodriguez Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/main.c | 6 ++-- drivers/net/wireless/iwlwifi/iwl-core.c | 3 +- include/linux/skbuff.h | 4 +++ include/net/mac80211.h | 8 ++--- net/core/skbuff.c | 1 + net/mac80211/ht.c | 60 +++++++++++++++++++-------------- net/mac80211/main.c | 7 ++-- net/mac80211/rx.c | 7 ++-- net/mac80211/tx.c | 19 ++++++++--- net/mac80211/wme.c | 24 ++++++------- 10 files changed, 76 insertions(+), 63 deletions(-) (limited to 'net/core') diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index 795fed5cadfa..f6dc4c826044 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -953,10 +953,7 @@ static int ath_attach(u16 devid, &sc->sbands[IEEE80211_BAND_5GHZ]; } - /* FIXME: Have to figure out proper hw init values later */ - hw->queues = 4; - hw->ampdu_queues = 1; /* Register rate control */ hw->rate_control_algorithm = "ath9k_rate_control"; @@ -1745,7 +1742,8 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_NOISE_DBM; + IEEE80211_HW_NOISE_DBM | + IEEE80211_HW_AMPDU_AGGREGATION; hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_AP) | diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 20c7ff382914..ba05f5ddc6d0 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -871,7 +871,8 @@ int iwl_setup_mac(struct iwl_priv *priv) /* Tell mac80211 our characteristics */ hw->flags = IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_NOISE_DBM; + IEEE80211_HW_NOISE_DBM | + IEEE80211_HW_AMPDU_AGGREGATION; hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_STATION) | diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 487e34507b41..a01b6f84e3bc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -250,6 +250,9 @@ typedef unsigned char *sk_buff_data_t; * @tc_verd: traffic control verdict * @ndisc_nodetype: router type (from link layer) * @do_not_encrypt: set to prevent encryption of this frame + * @requeue: set to indicate that the wireless core should attempt + * a software retry on this frame if we failed to + * receive an ACK for it * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions * @secmark: security marking @@ -326,6 +329,7 @@ struct sk_buff { #endif #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) __u8 do_not_encrypt:1; + __u8 requeue:1; #endif /* 0/13/14 bit hole */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 16c895969e6e..bba96a203885 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -242,7 +242,6 @@ struct ieee80211_bss_conf { * @IEEE80211_TX_CTL_RATE_CTRL_PROBE: internal to mac80211, can be * set by rate control algorithms to indicate probe rate, will * be cleared for fragmented frames (except on the last fragment) - * @IEEE80211_TX_CTL_REQUEUE: REMOVE THIS */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), @@ -258,9 +257,6 @@ enum mac80211_tx_control_flags { IEEE80211_TX_STAT_AMPDU = BIT(10), IEEE80211_TX_STAT_AMPDU_NO_BACK = BIT(11), IEEE80211_TX_CTL_RATE_CTRL_PROBE = BIT(12), - - /* XXX: remove this */ - IEEE80211_TX_CTL_REQUEUE = BIT(13), }; enum mac80211_rate_control_flags { @@ -847,6 +843,9 @@ enum ieee80211_tkip_key_type { * @IEEE80211_HW_SPECTRUM_MGMT: * Hardware supports spectrum management defined in 802.11h * Measurement, Channel Switch, Quieting, TPC + * + * @IEEE80211_HW_AMPDU_AGGREGATION: + * Hardware supports 11n A-MPDU aggregation. */ enum ieee80211_hw_flags { IEEE80211_HW_RX_INCLUDES_FCS = 1<<1, @@ -858,6 +857,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SIGNAL_DBM = 1<<7, IEEE80211_HW_NOISE_DBM = 1<<8, IEEE80211_HW_SPECTRUM_MGMT = 1<<9, + IEEE80211_HW_AMPDU_AGGREGATION = 1<<10, }; /** diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cdfe473181af..c4c8a33f3418 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -544,6 +544,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(truesize); #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) C(do_not_encrypt); + C(requeue); #endif atomic_set(&n->users, 1); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 42c3e590df98..08009d4b7d6e 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -458,7 +458,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) u8 *state; int ret; - if (tid >= STA_TID_NUM) + if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION)) return -EINVAL; #ifdef CONFIG_MAC80211_HT_DEBUG @@ -515,17 +515,19 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) (unsigned long)&sta->timer_to_tid[tid]; init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); - /* create a new queue for this aggregation */ - ret = ieee80211_ht_agg_queue_add(local, sta, tid); + if (hw->ampdu_queues) { + /* create a new queue for this aggregation */ + ret = ieee80211_ht_agg_queue_add(local, sta, tid); - /* case no queue is available to aggregation - * don't switch to aggregation */ - if (ret) { + /* case no queue is available to aggregation + * don't switch to aggregation */ + if (ret) { #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - queue unavailable for" - " tid %d\n", tid); + printk(KERN_DEBUG "BA request denied - " + "queue unavailable for tid %d\n", tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - goto err_unlock_queue; + goto err_unlock_queue; + } } sdata = sta->sdata; @@ -544,7 +546,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) /* No need to requeue the packets in the agg queue, since we * held the tx lock: no packet could be enqueued to the newly * allocated queue */ - ieee80211_ht_agg_queue_remove(local, sta, tid, 0); + if (hw->ampdu_queues) + ieee80211_ht_agg_queue_remove(local, sta, tid, 0); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "BA request denied - HW unavailable for" " tid %d\n", tid); @@ -554,7 +557,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) } /* Will put all the packets in the new SW queue */ - ieee80211_requeue(local, ieee802_1d_to_ac[tid]); + if (hw->ampdu_queues) + ieee80211_requeue(local, ieee802_1d_to_ac[tid]); spin_unlock_bh(&sta->lock); /* send an addBA request */ @@ -622,7 +626,8 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, ra, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]); + if (hw->ampdu_queues) + ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]); *state = HT_AGG_STATE_REQ_STOP_BA_MSK | (initiator << HT_AGG_STATE_INITIATOR_SHIFT); @@ -635,7 +640,8 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, if (ret) { WARN_ON(ret != -EBUSY); *state = HT_AGG_STATE_OPERATIONAL; - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + if (hw->ampdu_queues) + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); goto stop_BA_exit; } @@ -691,7 +697,8 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); #endif - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + if (hw->ampdu_queues) + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); } spin_unlock_bh(&sta->lock); rcu_read_unlock(); @@ -745,16 +752,18 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) ieee80211_send_delba(sta->sdata, ra, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); - agg_queue = sta->tid_to_tx_q[tid]; - - ieee80211_ht_agg_queue_remove(local, sta, tid, 1); - - /* We just requeued the all the frames that were in the - * removed queue, and since we might miss a softirq we do - * netif_schedule_queue. ieee80211_wake_queue is not used - * here as this queue is not necessarily stopped - */ - netif_schedule_queue(netdev_get_tx_queue(local->mdev, agg_queue)); + if (hw->ampdu_queues) { + agg_queue = sta->tid_to_tx_q[tid]; + ieee80211_ht_agg_queue_remove(local, sta, tid, 1); + + /* We just requeued the all the frames that were in the + * removed queue, and since we might miss a softirq we do + * netif_schedule_queue. ieee80211_wake_queue is not used + * here as this queue is not necessarily stopped + */ + netif_schedule_queue(netdev_get_tx_queue(local->mdev, + agg_queue)); + } spin_lock_bh(&sta->lock); *state = HT_AGG_STATE_IDLE; sta->ampdu_mlme.addba_req_num[tid] = 0; @@ -1011,7 +1020,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, *state |= HT_ADDBA_RECEIVED_MSK; sta->ampdu_mlme.addba_req_num[tid] = 0; - if (*state == HT_AGG_STATE_OPERATIONAL) + if (*state == HT_AGG_STATE_OPERATIONAL && + local->hw.ampdu_queues) ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); spin_unlock_bh(&sta->lock); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 88c1975a97a5..fa0cc7a1e6b4 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -386,8 +386,6 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, struct sta_info *sta, struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - sta->tx_filtered_count++; /* @@ -434,10 +432,9 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, return; } - if (!test_sta_flags(sta, WLAN_STA_PS) && - !(info->flags & IEEE80211_TX_CTL_REQUEUE)) { + if (!test_sta_flags(sta, WLAN_STA_PS) && !skb->requeue) { /* Software retry the packet once */ - info->flags |= IEEE80211_TX_CTL_REQUEUE; + skb->requeue = 1; ieee80211_remove_tx_extra(local, sta->key, skb); dev_queue_xmit(skb); return; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c4c95f1db605..648a1d0e6c82 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -669,7 +669,6 @@ static int ap_sta_ps_end(struct sta_info *sta) struct ieee80211_local *local = sdata->local; struct sk_buff *skb; int sent = 0; - struct ieee80211_tx_info *info; atomic_dec(&sdata->bss->num_sta_ps); @@ -685,13 +684,11 @@ static int ap_sta_ps_end(struct sta_info *sta) /* Send all buffered frames to the station */ while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { - info = IEEE80211_SKB_CB(skb); sent++; - info->flags |= IEEE80211_TX_CTL_REQUEUE; + skb->requeue = 1; dev_queue_xmit(skb); } while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { - info = IEEE80211_SKB_CB(skb); local->total_ps_buffered--; sent++; #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG @@ -699,7 +696,7 @@ static int ap_sta_ps_end(struct sta_info *sta) "since STA not sleeping anymore\n", sdata->dev->name, sta->sta.addr, sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - info->flags |= IEEE80211_TX_CTL_REQUEUE; + skb->requeue = 1; dev_queue_xmit(skb); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 541e3e64493d..d6392af9cd20 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -661,6 +661,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) static ieee80211_tx_result debug_noinline ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; size_t hdrlen, per_fragm, num_fragm, payload_len, left; struct sk_buff **frags, *first, *frag; @@ -677,9 +678,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) * This scenario is handled in __ieee80211_tx_prepare but extra * caution taken here as fragmented ampdu may cause Tx stop. */ - if (WARN_ON(tx->flags & IEEE80211_TX_CTL_AMPDU || - skb_get_queue_mapping(tx->skb) >= - ieee80211_num_regular_queues(&tx->local->hw))) + if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) return TX_DROP; first = tx->skb; @@ -951,7 +950,8 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, struct ieee80211_sub_if_data *sdata; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int hdrlen; + int hdrlen, tid; + u8 *qc, *state; memset(tx, 0, sizeof(*tx)); tx->skb = skb; @@ -982,6 +982,15 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, tx->sta = sta_info_get(local, hdr->addr1); + if (tx->sta && ieee80211_is_data_qos(hdr->frame_control)) { + qc = ieee80211_get_qos_ctl(hdr); + tid = *qc & IEEE80211_QOS_CTL_TID_MASK; + + state = &tx->sta->ampdu_mlme.tid_state_tx[tid]; + if (*state == HT_AGG_STATE_OPERATIONAL) + info->flags |= IEEE80211_TX_CTL_AMPDU; + } + if (is_multicast_ether_addr(hdr->addr1)) { tx->flags &= ~IEEE80211_TX_UNICAST; info->flags |= IEEE80211_TX_CTL_NO_ACK; @@ -1172,7 +1181,7 @@ retry: * queues, there's no reason for a driver to reject * a frame there, warn and drop it. */ - if (WARN_ON(queue >= ieee80211_num_regular_queues(&local->hw))) + if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) goto drop; store = &local->pending_packet[queue]; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index d27ef7f2d4a7..ac71b38f7cb5 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -114,8 +114,8 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) { struct ieee80211_master_priv *mpriv = netdev_priv(dev); struct ieee80211_local *local = mpriv->local; + struct ieee80211_hw *hw = &local->hw; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct sta_info *sta; u16 queue; u8 tid; @@ -124,21 +124,19 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) if (unlikely(queue >= local->hw.queues)) queue = local->hw.queues - 1; - if (info->flags & IEEE80211_TX_CTL_REQUEUE) { + if (skb->requeue) { + if (!hw->ampdu_queues) + return queue; + rcu_read_lock(); sta = sta_info_get(local, hdr->addr1); tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; if (sta) { - struct ieee80211_hw *hw = &local->hw; int ampdu_queue = sta->tid_to_tx_q[tid]; if ((ampdu_queue < ieee80211_num_queues(hw)) && - test_bit(ampdu_queue, local->queue_pool)) { + test_bit(ampdu_queue, local->queue_pool)) queue = ampdu_queue; - info->flags |= IEEE80211_TX_CTL_AMPDU; - } else { - info->flags &= ~IEEE80211_TX_CTL_AMPDU; - } } rcu_read_unlock(); @@ -159,20 +157,18 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) *p++ = ack_policy | tid; *p = 0; + if (!hw->ampdu_queues) + return queue; + rcu_read_lock(); sta = sta_info_get(local, hdr->addr1); if (sta) { int ampdu_queue = sta->tid_to_tx_q[tid]; - struct ieee80211_hw *hw = &local->hw; if ((ampdu_queue < ieee80211_num_queues(hw)) && - test_bit(ampdu_queue, local->queue_pool)) { + test_bit(ampdu_queue, local->queue_pool)) queue = ampdu_queue; - info->flags |= IEEE80211_TX_CTL_AMPDU; - } else { - info->flags &= ~IEEE80211_TX_CTL_AMPDU; - } } rcu_read_unlock(); -- cgit v1.2.3 From 24f8b2385e03a4f4c8dac513d03b5eaa475822b9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 3 Nov 2008 17:14:38 -0800 Subject: net: increase receive packet quantum This patch gets about 1.25% back on tbench regression. My change to NAPI for multiqueue support changed the time limit on network receive processing. Under sustained loads like tbench, this can cause the receiver to reschedule prematurely. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 3a2b8be9e67b..8f9d3b38a44b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2373,7 +2373,7 @@ EXPORT_SYMBOL(__napi_schedule); static void net_rx_action(struct softirq_action *h) { struct list_head *list = &__get_cpu_var(softnet_data).poll_list; - unsigned long start_time = jiffies; + unsigned long time_limit = jiffies + 2; int budget = netdev_budget; void *have; @@ -2384,13 +2384,10 @@ static void net_rx_action(struct softirq_action *h) int work, weight; /* If softirq window is exhuasted then punt. - * - * Note that this is a slight policy change from the - * previous NAPI code, which would allow up to 2 - * jiffies to pass before breaking out. The test - * used to be "jiffies - start_time > 1". + * Allow this to run for 2 jiffies since which will allow + * an average latency of 1.5/HZ. */ - if (unlikely(budget <= 0 || jiffies != start_time)) + if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) goto softnet_break; local_irq_enable(); -- cgit v1.2.3 From 6d9f239a1edb31d6133230f478fd1dc2da338ec5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 3 Nov 2008 18:21:05 -0800 Subject: net: '&' redux I want to compile out proc_* and sysctl_* handlers totally and stub them to NULL depending on config options, however usage of & will prevent this, since taking adress of NULL pointer will break compilation. So, drop & in front of every ->proc_handler and every ->strategy handler, it was never needed in fact. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/802/tr.c | 2 +- net/appletalk/sysctl_net_atalk.c | 14 +- net/ax25/sysctl_net_ax25.c | 56 ++++---- net/bridge/br_netfilter.c | 10 +- net/core/neighbour.c | 48 +++---- net/core/sysctl_net_core.c | 34 ++--- net/decnet/sysctl_net_decnet.c | 44 +++--- net/ipv4/ip_fragment.c | 14 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 12 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 4 +- net/ipv4/route.c | 48 +++---- net/ipv4/sysctl_net_ipv4.c | 178 ++++++++++++------------- net/ipv6/addrconf.c | 62 ++++----- net/ipv6/icmp.c | 4 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +- net/ipv6/reassembly.c | 12 +- net/ipv6/route.c | 34 ++--- net/ipv6/sysctl_net_ipv6.c | 4 +- net/ipx/sysctl_net_ipx.c | 2 +- net/irda/irsysctl.c | 50 +++---- net/llc/sysctl_net_llc.c | 20 +-- net/netfilter/ipvs/ip_vs_ctl.c | 46 +++---- net/netfilter/ipvs/ip_vs_lblc.c | 2 +- net/netfilter/ipvs/ip_vs_lblcr.c | 2 +- net/netfilter/nf_conntrack_acct.c | 2 +- net/netfilter/nf_conntrack_proto_generic.c | 4 +- net/netfilter/nf_conntrack_proto_sctp.c | 28 ++-- net/netfilter/nf_conntrack_proto_tcp.c | 50 +++---- net/netfilter/nf_conntrack_proto_udp.c | 8 +- net/netfilter/nf_conntrack_proto_udplite.c | 4 +- net/netfilter/nf_conntrack_standalone.c | 16 +-- net/netrom/sysctl_net_netrom.c | 48 +++---- net/phonet/sysctl.c | 2 +- net/rose/sysctl_net_rose.c | 40 +++--- net/sctp/sysctl.c | 82 ++++++------ net/unix/sysctl_net_unix.c | 2 +- net/x25/sysctl_net_x25.c | 22 +-- 38 files changed, 509 insertions(+), 509 deletions(-) (limited to 'net/core') diff --git a/net/802/tr.c b/net/802/tr.c index 38f1f290c635..158150fee462 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -635,7 +635,7 @@ static struct ctl_table tr_table[] = { .data = &sysctl_tr_rif_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { 0 }, }; diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index 621805dfa2f4..8d237b15183b 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -17,8 +17,8 @@ static struct ctl_table atalk_table[] = { .data = &sysctl_aarp_expiry_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_ATALK_AARP_TICK_TIME, @@ -26,8 +26,8 @@ static struct ctl_table atalk_table[] = { .data = &sysctl_aarp_tick_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_ATALK_AARP_RETRANSMIT_LIMIT, @@ -35,7 +35,7 @@ static struct ctl_table atalk_table[] = { .data = &sysctl_aarp_retransmit_limit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_ATALK_AARP_RESOLVE_TIME, @@ -43,8 +43,8 @@ static struct ctl_table atalk_table[] = { .data = &sysctl_aarp_resolve_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { 0 }, }; diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index f288fc4aef9b..c1d877bb5dff 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -43,8 +43,8 @@ static const ctl_table ax25_param_table[] = { .procname = "ip_default_mode", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ipdefmode, .extra2 = &max_ipdefmode }, @@ -53,8 +53,8 @@ static const ctl_table ax25_param_table[] = { .procname = "ax25_default_mode", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_axdefmode, .extra2 = &max_axdefmode }, @@ -63,8 +63,8 @@ static const ctl_table ax25_param_table[] = { .procname = "backoff_type", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_backoff, .extra2 = &max_backoff }, @@ -73,8 +73,8 @@ static const ctl_table ax25_param_table[] = { .procname = "connect_mode", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_conmode, .extra2 = &max_conmode }, @@ -83,8 +83,8 @@ static const ctl_table ax25_param_table[] = { .procname = "standard_window_size", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, @@ -93,8 +93,8 @@ static const ctl_table ax25_param_table[] = { .procname = "extended_window_size", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ewindow, .extra2 = &max_ewindow }, @@ -103,8 +103,8 @@ static const ctl_table ax25_param_table[] = { .procname = "t1_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t1, .extra2 = &max_t1 }, @@ -113,8 +113,8 @@ static const ctl_table ax25_param_table[] = { .procname = "t2_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t2, .extra2 = &max_t2 }, @@ -123,8 +123,8 @@ static const ctl_table ax25_param_table[] = { .procname = "t3_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t3, .extra2 = &max_t3 }, @@ -133,8 +133,8 @@ static const ctl_table ax25_param_table[] = { .procname = "idle_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, @@ -143,8 +143,8 @@ static const ctl_table ax25_param_table[] = { .procname = "maximum_retry_count", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_n2, .extra2 = &max_n2 }, @@ -153,8 +153,8 @@ static const ctl_table ax25_param_table[] = { .procname = "maximum_packet_length", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_paclen, .extra2 = &max_paclen }, @@ -163,8 +163,8 @@ static const ctl_table ax25_param_table[] = { .procname = "protocol", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_proto, .extra2 = &max_proto }, @@ -174,8 +174,8 @@ static const ctl_table ax25_param_table[] = { .procname = "dama_slave_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ds_timeout, .extra2 = &max_ds_timeout }, diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index fa5cda4e552a..db6176d96e71 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -938,35 +938,35 @@ static ctl_table brnf_table[] = { .data = &brnf_call_arptables, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-call-iptables", .data = &brnf_call_iptables, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-call-ip6tables", .data = &brnf_call_ip6tables, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-filter-vlan-tagged", .data = &brnf_filter_vlan_tagged, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-filter-pppoe-tagged", .data = &brnf_filter_pppoe_tagged, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .ctl_name = 0 } }; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b337a937ea52..d9bbe010e0ee 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2566,128 +2566,128 @@ static struct neigh_sysctl_table { .procname = "mcast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_UCAST_SOLICIT, .procname = "ucast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_APP_SOLICIT, .procname = "app_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "retrans_time", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = proc_dointvec_userhz_jiffies, }, { .ctl_name = NET_NEIGH_REACHABLE_TIME, .procname = "base_reachable_time", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_NEIGH_DELAY_PROBE_TIME, .procname = "delay_first_probe_time", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_NEIGH_GC_STALE_TIME, .procname = "gc_stale_time", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_NEIGH_UNRES_QLEN, .procname = "unres_qlen", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_PROXY_QLEN, .procname = "proxy_qlen", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "anycast_delay", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = proc_dointvec_userhz_jiffies, }, { .procname = "proxy_delay", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = proc_dointvec_userhz_jiffies, }, { .procname = "locktime", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = proc_dointvec_userhz_jiffies, }, { .ctl_name = NET_NEIGH_RETRANS_TIME_MS, .procname = "retrans_time_ms", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies, }, { .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, .procname = "base_reachable_time_ms", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies, }, { .ctl_name = NET_NEIGH_GC_INTERVAL, .procname = "gc_interval", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_NEIGH_GC_THRESH1, .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_GC_THRESH2, .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_GC_THRESH3, .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, {}, }, diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f686467ff12b..2bc0384b0448 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -22,7 +22,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_wmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_RMEM_MAX, @@ -30,7 +30,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_rmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_WMEM_DEFAULT, @@ -38,7 +38,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_wmem_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_RMEM_DEFAULT, @@ -46,7 +46,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_rmem_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_DEV_WEIGHT, @@ -54,7 +54,7 @@ static struct ctl_table net_core_table[] = { .data = &weight_p, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_MAX_BACKLOG, @@ -62,7 +62,7 @@ static struct ctl_table net_core_table[] = { .data = &netdev_max_backlog, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_MSG_COST, @@ -70,8 +70,8 @@ static struct ctl_table net_core_table[] = { .data = &net_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_CORE_MSG_BURST, @@ -79,7 +79,7 @@ static struct ctl_table net_core_table[] = { .data = &net_ratelimit_state.burst, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_CORE_OPTMEM_MAX, @@ -87,7 +87,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_optmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #ifdef CONFIG_XFRM { @@ -96,7 +96,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_xfrm_aevent_etime, .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_AEVENT_RSEQTH, @@ -104,7 +104,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_xfrm_aevent_rseqth, .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = CTL_UNNUMBERED, @@ -112,7 +112,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_xfrm_larval_drop, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = CTL_UNNUMBERED, @@ -120,7 +120,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_xfrm_acq_expires, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif /* CONFIG_XFRM */ #endif /* CONFIG_NET */ @@ -130,7 +130,7 @@ static struct ctl_table net_core_table[] = { .data = &netdev_budget, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_WARNINGS, @@ -138,7 +138,7 @@ static struct ctl_table net_core_table[] = { .data = &net_msg_warn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; @@ -150,7 +150,7 @@ static struct ctl_table netns_core_table[] = { .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 36400b266896..2f360a1e5e49 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -354,8 +354,8 @@ static ctl_table dn_table[] = { .data = node_name, .maxlen = 7, .mode = 0644, - .proc_handler = &proc_dostring, - .strategy = &sysctl_string, + .proc_handler = proc_dostring, + .strategy = sysctl_string, }, { .ctl_name = NET_DECNET_DEFAULT_DEVICE, @@ -371,8 +371,8 @@ static ctl_table dn_table[] = { .data = &decnet_time_wait, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_decnet_time_wait, .extra2 = &max_decnet_time_wait }, @@ -382,8 +382,8 @@ static ctl_table dn_table[] = { .data = &decnet_dn_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, @@ -393,8 +393,8 @@ static ctl_table dn_table[] = { .data = &decnet_di_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, @@ -404,8 +404,8 @@ static ctl_table dn_table[] = { .data = &decnet_dr_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, @@ -415,8 +415,8 @@ static ctl_table dn_table[] = { .data = &decnet_dst_gc_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_decnet_dst_gc_interval, .extra2 = &max_decnet_dst_gc_interval }, @@ -426,8 +426,8 @@ static ctl_table dn_table[] = { .data = &decnet_no_fc_max_cwnd, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_decnet_no_fc_max_cwnd, .extra2 = &max_decnet_no_fc_max_cwnd }, @@ -437,8 +437,8 @@ static ctl_table dn_table[] = { .data = &sysctl_decnet_mem, .maxlen = sizeof(sysctl_decnet_mem), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec, }, { .ctl_name = NET_DECNET_RMEM, @@ -446,8 +446,8 @@ static ctl_table dn_table[] = { .data = &sysctl_decnet_rmem, .maxlen = sizeof(sysctl_decnet_rmem), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec, }, { .ctl_name = NET_DECNET_WMEM, @@ -455,8 +455,8 @@ static ctl_table dn_table[] = { .data = &sysctl_decnet_wmem, .maxlen = sizeof(sysctl_decnet_wmem), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec, }, { .ctl_name = NET_DECNET_DEBUG_LEVEL, @@ -464,8 +464,8 @@ static ctl_table dn_table[] = { .data = &decnet_debug_level, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec, }, {0} }; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1a3c37b5e936..6659ac000eeb 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -607,7 +607,7 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .data = &init_net.ipv4.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, @@ -615,7 +615,7 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .data = &init_net.ipv4.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_IPFRAG_TIME, @@ -623,8 +623,8 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .data = &init_net.ipv4.frags.timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { } }; @@ -636,15 +636,15 @@ static struct ctl_table ip4_frags_ctl_table[] = { .data = &ip4_frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .procname = "ipfrag_max_dist", .data = &sysctl_ipfrag_max_dist, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = &zero }, { } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index bddadead6195..b2141e11575e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -197,7 +197,7 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, @@ -205,7 +205,7 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS, @@ -213,7 +213,7 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &nf_conntrack_htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, @@ -221,7 +221,7 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, @@ -229,8 +229,8 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 4e8879220222..1fd3ef7718b6 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -272,7 +272,7 @@ static struct ctl_table icmp_sysctl_table[] = { .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 @@ -285,7 +285,7 @@ static struct ctl_table icmp_compat_sysctl_table[] = { .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f47b9db0db7f..0dc0c3826763 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3118,7 +3118,7 @@ static ctl_table ipv4_route_table[] = { .data = &ipv4_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_MAX_SIZE, @@ -3126,7 +3126,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_max_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { /* Deprecated. Use gc_min_interval_ms */ @@ -3136,8 +3136,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, @@ -3145,8 +3145,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT, @@ -3154,8 +3154,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL, @@ -3163,8 +3163,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD, @@ -3172,7 +3172,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_redirect_load, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER, @@ -3180,7 +3180,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_redirect_number, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE, @@ -3188,7 +3188,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_redirect_silence, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_ERROR_COST, @@ -3196,7 +3196,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_error_cost, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_ERROR_BURST, @@ -3204,7 +3204,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_error_burst, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY, @@ -3212,7 +3212,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES, @@ -3220,8 +3220,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_MIN_PMTU, @@ -3229,7 +3229,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_min_pmtu, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS, @@ -3237,7 +3237,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL, @@ -3245,8 +3245,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &ipv4_sysctl_rt_secret_interval, - .strategy = &ipv4_sysctl_rt_secret_interval_strategy, + .proc_handler = ipv4_sysctl_rt_secret_interval, + .strategy = ipv4_sysctl_rt_secret_interval_strategy, }, { .ctl_name = 0 } }; @@ -3274,8 +3274,8 @@ static struct ctl_table ipv4_route_flush_table[] = { .procname = "flush", .maxlen = sizeof(int), .mode = 0200, - .proc_handler = &ipv4_sysctl_rtcache_flush, - .strategy = &ipv4_sysctl_rtcache_flush_strategy, + .proc_handler = ipv4_sysctl_rtcache_flush, + .strategy = ipv4_sysctl_rtcache_flush_strategy, }, { .ctl_name = 0 }, }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0cc8d31f9ac0..4710d219f06a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -195,7 +195,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_timestamps, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, @@ -203,7 +203,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_window_scaling, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_SACK, @@ -211,7 +211,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_sack, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_RETRANS_COLLAPSE, @@ -219,7 +219,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_retrans_collapse, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_DEFAULT_TTL, @@ -227,8 +227,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_ip_default_ttl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &ipv4_doint_and_flush, - .strategy = &ipv4_doint_and_flush_strategy, + .proc_handler = ipv4_doint_and_flush, + .strategy = ipv4_doint_and_flush_strategy, .extra2 = &init_net, }, { @@ -237,7 +237,7 @@ static struct ctl_table ipv4_table[] = { .data = &ipv4_config.no_pmtu_disc, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_NONLOCAL_BIND, @@ -245,7 +245,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_ip_nonlocal_bind, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_SYN_RETRIES, @@ -253,7 +253,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syn_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_SYNACK_RETRIES, @@ -261,7 +261,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_synack_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_MAX_ORPHANS, @@ -269,7 +269,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_max_orphans, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_MAX_TW_BUCKETS, @@ -277,7 +277,7 @@ static struct ctl_table ipv4_table[] = { .data = &tcp_death_row.sysctl_max_tw_buckets, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_DYNADDR, @@ -285,7 +285,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_ip_dynaddr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME, @@ -293,8 +293,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_keepalive_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_TCP_KEEPALIVE_PROBES, @@ -302,7 +302,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_keepalive_probes, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_KEEPALIVE_INTVL, @@ -310,8 +310,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_keepalive_intvl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_TCP_RETRIES1, @@ -319,8 +319,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_retries1, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra2 = &tcp_retr1_max }, { @@ -329,7 +329,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_retries2, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_FIN_TIMEOUT, @@ -337,8 +337,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_fin_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, #ifdef CONFIG_SYN_COOKIES { @@ -347,7 +347,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syncookies, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif { @@ -356,7 +356,7 @@ static struct ctl_table ipv4_table[] = { .data = &tcp_death_row.sysctl_tw_recycle, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_ABORT_ON_OVERFLOW, @@ -364,7 +364,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_abort_on_overflow, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_STDURG, @@ -372,7 +372,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_stdurg, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_RFC1337, @@ -380,7 +380,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_rfc1337, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_MAX_SYN_BACKLOG, @@ -388,7 +388,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_max_syn_backlog, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, @@ -396,8 +396,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_local_ports.range, .maxlen = sizeof(sysctl_local_ports.range), .mode = 0644, - .proc_handler = &ipv4_local_port_range, - .strategy = &ipv4_sysctl_local_port_range, + .proc_handler = ipv4_local_port_range, + .strategy = ipv4_sysctl_local_port_range, }, #ifdef CONFIG_IP_MULTICAST { @@ -406,7 +406,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_igmp_max_memberships, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif @@ -416,7 +416,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_igmp_max_msf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, @@ -424,7 +424,7 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_threshold, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_INET_PEER_MINTTL, @@ -432,8 +432,8 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_minttl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_INET_PEER_MAXTTL, @@ -441,8 +441,8 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_maxttl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, @@ -450,8 +450,8 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_gc_mintime, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, @@ -459,8 +459,8 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_gc_maxtime, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_TCP_ORPHAN_RETRIES, @@ -468,7 +468,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_orphan_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_FACK, @@ -476,7 +476,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_fack, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_REORDERING, @@ -484,7 +484,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_reordering, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_ECN, @@ -492,7 +492,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_ecn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_DSACK, @@ -500,7 +500,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_dsack, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_MEM, @@ -508,7 +508,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_mem, .maxlen = sizeof(sysctl_tcp_mem), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_WMEM, @@ -516,7 +516,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_wmem, .maxlen = sizeof(sysctl_tcp_wmem), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_RMEM, @@ -524,7 +524,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_rmem, .maxlen = sizeof(sysctl_tcp_rmem), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_APP_WIN, @@ -532,7 +532,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_app_win, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_ADV_WIN_SCALE, @@ -540,7 +540,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_adv_win_scale, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_TW_REUSE, @@ -548,7 +548,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_tw_reuse, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_FRTO, @@ -556,7 +556,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_frto, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_FRTO_RESPONSE, @@ -564,7 +564,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_frto_response, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_LOW_LATENCY, @@ -572,7 +572,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_low_latency, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_NO_METRICS_SAVE, @@ -580,7 +580,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_nometrics_save, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_MODERATE_RCVBUF, @@ -588,7 +588,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_moderate_rcvbuf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_TSO_WIN_DIVISOR, @@ -596,15 +596,15 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_tso_win_divisor, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_CONG_CONTROL, .procname = "tcp_congestion_control", .mode = 0644, .maxlen = TCP_CA_NAME_MAX, - .proc_handler = &proc_tcp_congestion_control, - .strategy = &sysctl_tcp_congestion_control, + .proc_handler = proc_tcp_congestion_control, + .strategy = sysctl_tcp_congestion_control, }, { .ctl_name = NET_TCP_ABC, @@ -612,7 +612,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_abc, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_MTU_PROBING, @@ -620,7 +620,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_mtu_probing, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_BASE_MSS, @@ -628,7 +628,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_base_mss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, @@ -636,7 +636,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_workaround_signed_windows, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #ifdef CONFIG_NET_DMA { @@ -645,7 +645,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_dma_copybreak, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif { @@ -654,7 +654,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_slow_start_after_idle, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #ifdef CONFIG_NETLABEL { @@ -663,7 +663,7 @@ static struct ctl_table ipv4_table[] = { .data = &cipso_v4_cache_enabled, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, @@ -671,7 +671,7 @@ static struct ctl_table ipv4_table[] = { .data = &cipso_v4_cache_bucketsize, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_CIPSOV4_RBM_OPTFMT, @@ -679,7 +679,7 @@ static struct ctl_table ipv4_table[] = { .data = &cipso_v4_rbm_optfmt, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, @@ -687,22 +687,22 @@ static struct ctl_table ipv4_table[] = { .data = &cipso_v4_rbm_strictvalid, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif /* CONFIG_NETLABEL */ { .procname = "tcp_available_congestion_control", .maxlen = TCP_CA_BUF_MAX, .mode = 0444, - .proc_handler = &proc_tcp_available_congestion_control, + .proc_handler = proc_tcp_available_congestion_control, }, { .ctl_name = NET_TCP_ALLOWED_CONG_CONTROL, .procname = "tcp_allowed_congestion_control", .maxlen = TCP_CA_BUF_MAX, .mode = 0644, - .proc_handler = &proc_allowed_congestion_control, - .strategy = &strategy_allowed_congestion_control, + .proc_handler = proc_allowed_congestion_control, + .strategy = strategy_allowed_congestion_control, }, { .ctl_name = NET_TCP_MAX_SSTHRESH, @@ -710,7 +710,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_max_ssthresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -718,8 +718,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &zero }, { @@ -728,8 +728,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_udp_rmem_min, .maxlen = sizeof(sysctl_udp_rmem_min), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &zero }, { @@ -738,8 +738,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_udp_wmem_min, .maxlen = sizeof(sysctl_udp_wmem_min), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &zero }, { .ctl_name = 0 } @@ -752,7 +752,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, @@ -760,7 +760,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, @@ -768,7 +768,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, @@ -776,7 +776,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_ICMP_RATELIMIT, @@ -784,8 +784,8 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_ratelimit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies }, { .ctl_name = NET_IPV4_ICMP_RATEMASK, @@ -793,7 +793,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_ratemask, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = CTL_UNNUMBERED, @@ -801,7 +801,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { } }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ff7ae05f72e1..07ee758de9e1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4030,8 +4030,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.forwarding, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &addrconf_sysctl_forward, - .strategy = &addrconf_sysctl_forward_strategy, + .proc_handler = addrconf_sysctl_forward, + .strategy = addrconf_sysctl_forward_strategy, }, { .ctl_name = NET_IPV6_HOP_LIMIT, @@ -4047,7 +4047,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.mtu6, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_RA, @@ -4055,7 +4055,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_REDIRECTS, @@ -4063,7 +4063,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_redirects, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_AUTOCONF, @@ -4071,7 +4071,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.autoconf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_DAD_TRANSMITS, @@ -4079,7 +4079,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.dad_transmits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_RTR_SOLICITS, @@ -4087,7 +4087,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_solicits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL, @@ -4095,8 +4095,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_solicit_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY, @@ -4104,8 +4104,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_solicit_delay, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_FORCE_MLD_VERSION, @@ -4113,7 +4113,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.force_mld_version, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_PRIVACY { @@ -4122,7 +4122,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.use_tempaddr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_TEMP_VALID_LFT, @@ -4130,7 +4130,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.temp_valid_lft, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_TEMP_PREFERED_LFT, @@ -4138,7 +4138,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.temp_prefered_lft, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_REGEN_MAX_RETRY, @@ -4146,7 +4146,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.regen_max_retry, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR, @@ -4154,7 +4154,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.max_desync_factor, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif { @@ -4163,7 +4163,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.max_addresses, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_RA_DEFRTR, @@ -4171,7 +4171,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_defrtr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_RA_PINFO, @@ -4179,7 +4179,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_pinfo, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_ROUTER_PREF { @@ -4188,7 +4188,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_rtr_pref, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_RTR_PROBE_INTERVAL, @@ -4196,8 +4196,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_probe_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, #ifdef CONFIG_IPV6_ROUTE_INFO { @@ -4206,7 +4206,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_rt_info_max_plen, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif #endif @@ -4216,7 +4216,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.proxy_ndp, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_SOURCE_ROUTE, @@ -4224,7 +4224,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_source_route, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_OPTIMISTIC_DAD { @@ -4233,7 +4233,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.optimistic_dad, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif @@ -4244,7 +4244,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.mc_forwarding, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif { @@ -4253,7 +4253,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.disable_ipv6, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -4261,7 +4261,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_dad, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0, /* sentinel */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 3c2821f9b529..be351009fd03 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -956,8 +956,8 @@ ctl_table ipv6_icmp_table_template[] = { .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies }, { .ctl_name = 0 }, }; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 05726177903f..bd52151d31e9 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -253,7 +253,7 @@ static struct ctl_table icmpv6_sysctl_table[] = { .data = &nf_ct_icmpv6_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 9967ac7a01a8..ed4d79a9e4a6 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -80,7 +80,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .data = &nf_init_frags.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, @@ -88,7 +88,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .data = &nf_init_frags.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, @@ -96,7 +96,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .data = &nf_init_frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 } }; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index af12de071f4c..3c575118fca5 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -642,7 +642,7 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .data = &init_net.ipv6.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, @@ -650,7 +650,7 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .data = &init_net.ipv6.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV6_IP6FRAG_TIME, @@ -658,8 +658,8 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .data = &init_net.ipv6.frags.timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { } }; @@ -671,8 +671,8 @@ static struct ctl_table ip6_frags_ctl_table[] = { .data = &ip6_frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { } }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d69fa462d3f0..4d40dc214b2d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2499,7 +2499,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.flush_delay, .maxlen = sizeof(int), .mode = 0200, - .proc_handler = &ipv6_sysctl_rtcache_flush + .proc_handler = ipv6_sysctl_rtcache_flush }, { .ctl_name = NET_IPV6_ROUTE_GC_THRESH, @@ -2507,7 +2507,7 @@ ctl_table ipv6_route_table_template[] = { .data = &ip6_dst_ops_template.gc_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, @@ -2515,7 +2515,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_max_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, @@ -2523,8 +2523,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, @@ -2532,8 +2532,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, @@ -2541,8 +2541,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, @@ -2550,8 +2550,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, @@ -2559,8 +2559,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, @@ -2568,8 +2568,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, @@ -2577,8 +2577,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies, }, { .ctl_name = 0 } }; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 587f8f60c489..9048fe7e7ea7 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -35,7 +35,7 @@ static ctl_table ipv6_table_template[] = { .data = &init_net.ipv6.sysctl.bindv6only, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; @@ -47,7 +47,7 @@ static ctl_table ipv6_table[] = { .data = &sysctl_mld_max_msf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c index 92fef864e852..633fcab35580 100644 --- a/net/ipx/sysctl_net_ipx.c +++ b/net/ipx/sysctl_net_ipx.c @@ -23,7 +23,7 @@ static struct ctl_table ipx_table[] = { .data = &sysctl_ipx_pprop_broadcasting, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { 0 }, }; diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 9ab3df15425d..57f8817c3979 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -118,8 +118,8 @@ static ctl_table irda_table[] = { .data = &sysctl_discovery, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &do_discovery, - .strategy = &sysctl_intvec + .proc_handler = do_discovery, + .strategy = sysctl_intvec }, { .ctl_name = NET_IRDA_DEVNAME, @@ -127,8 +127,8 @@ static ctl_table irda_table[] = { .data = sysctl_devname, .maxlen = 65, .mode = 0644, - .proc_handler = &do_devname, - .strategy = &sysctl_string + .proc_handler = do_devname, + .strategy = sysctl_string }, #ifdef CONFIG_IRDA_DEBUG { @@ -137,7 +137,7 @@ static ctl_table irda_table[] = { .data = &irda_debug, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif #ifdef CONFIG_IRDA_FAST_RR @@ -147,7 +147,7 @@ static ctl_table irda_table[] = { .data = &sysctl_fast_poll_increase, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif { @@ -156,8 +156,8 @@ static ctl_table irda_table[] = { .data = &sysctl_discovery_slots, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_discovery_slots, .extra2 = &max_discovery_slots }, @@ -167,7 +167,7 @@ static ctl_table irda_table[] = { .data = &sysctl_discovery_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IRDA_SLOT_TIMEOUT, @@ -175,8 +175,8 @@ static ctl_table irda_table[] = { .data = &sysctl_slot_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_slot_timeout, .extra2 = &max_slot_timeout }, @@ -186,8 +186,8 @@ static ctl_table irda_table[] = { .data = &sysctl_max_baud_rate, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_max_baud_rate, .extra2 = &max_max_baud_rate }, @@ -197,8 +197,8 @@ static ctl_table irda_table[] = { .data = &sysctl_min_tx_turn_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_min_tx_turn_time, .extra2 = &max_min_tx_turn_time }, @@ -208,8 +208,8 @@ static ctl_table irda_table[] = { .data = &sysctl_max_tx_data_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_max_tx_data_size, .extra2 = &max_max_tx_data_size }, @@ -219,8 +219,8 @@ static ctl_table irda_table[] = { .data = &sysctl_max_tx_window, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_max_tx_window, .extra2 = &max_max_tx_window }, @@ -230,8 +230,8 @@ static ctl_table irda_table[] = { .data = &sysctl_max_noreply_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_max_noreply_time, .extra2 = &max_max_noreply_time }, @@ -241,8 +241,8 @@ static ctl_table irda_table[] = { .data = &sysctl_warn_noreply_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_warn_noreply_time, .extra2 = &max_warn_noreply_time }, @@ -252,8 +252,8 @@ static ctl_table irda_table[] = { .data = &sysctl_lap_keepalive_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_lap_keepalive_time, .extra2 = &max_lap_keepalive_time }, diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index 5bef1dcf18e3..57b9304d444c 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -20,8 +20,8 @@ static struct ctl_table llc2_timeout_table[] = { .data = &sysctl_llc2_ack_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_LLC2_BUSY_TIMEOUT, @@ -29,8 +29,8 @@ static struct ctl_table llc2_timeout_table[] = { .data = &sysctl_llc2_busy_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_LLC2_P_TIMEOUT, @@ -38,8 +38,8 @@ static struct ctl_table llc2_timeout_table[] = { .data = &sysctl_llc2_p_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_LLC2_REJ_TIMEOUT, @@ -47,8 +47,8 @@ static struct ctl_table llc2_timeout_table[] = { .data = &sysctl_llc2_rej_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { 0 }, }; @@ -60,8 +60,8 @@ static struct ctl_table llc_station_table[] = { .data = &sysctl_llc_station_ack_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { 0 }, }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d0ccdaff4276..e01061f49cdc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1545,7 +1545,7 @@ static struct ctl_table vs_vars[] = { .data = &sysctl_ip_vs_amemthresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IP_VS_DEBUG { @@ -1553,7 +1553,7 @@ static struct ctl_table vs_vars[] = { .data = &sysctl_ip_vs_debug_level, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif { @@ -1561,28 +1561,28 @@ static struct ctl_table vs_vars[] = { .data = &sysctl_ip_vs_am_droprate, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "drop_entry", .data = &sysctl_ip_vs_drop_entry, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_do_defense_mode, + .proc_handler = proc_do_defense_mode, }, { .procname = "drop_packet", .data = &sysctl_ip_vs_drop_packet, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_do_defense_mode, + .proc_handler = proc_do_defense_mode, }, { .procname = "secure_tcp", .data = &sysctl_ip_vs_secure_tcp, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_do_defense_mode, + .proc_handler = proc_do_defense_mode, }, #if 0 { @@ -1590,84 +1590,84 @@ static struct ctl_table vs_vars[] = { .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_synsent", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_synrecv", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_finwait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_timewait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_close", .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_closewait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_lastack", .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_listen", .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_synack", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_udp", .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_icmp", .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, #endif { @@ -1675,35 +1675,35 @@ static struct ctl_table vs_vars[] = { .data = &sysctl_ip_vs_cache_bypass, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "expire_nodest_conn", .data = &sysctl_ip_vs_expire_nodest_conn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "expire_quiescent_template", .data = &sysctl_ip_vs_expire_quiescent_template, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "sync_threshold", .data = &sysctl_ip_vs_sync_threshold, .maxlen = sizeof(sysctl_ip_vs_sync_threshold), .mode = 0644, - .proc_handler = &proc_do_sync_threshold, + .proc_handler = proc_do_sync_threshold, }, { .procname = "nat_icmp_send", .data = &sysctl_ip_vs_nat_icmp_send, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 } }; diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 4256cfad8d31..9394f539966a 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -116,7 +116,7 @@ static ctl_table vs_vars_table[] = { .data = &sysctl_ip_vs_lblc_expiration, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 } }; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 321b49fa41d8..92dc76a6842c 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -295,7 +295,7 @@ static ctl_table vs_vars_table[] = { .data = &sysctl_ip_vs_lblcr_expiration, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 } }; diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index b92df5c1dfcf..9fe8982bd7c9 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -35,7 +35,7 @@ static struct ctl_table acct_sysctl_table[] = { .data = &init_net.ct.sysctl_acct, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, {} }; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index dbe680af85d2..4be80d7b8795 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -67,7 +67,7 @@ static struct ctl_table generic_sysctl_table[] = { .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 @@ -80,7 +80,7 @@ static struct ctl_table generic_compat_sysctl_table[] = { .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index ae8c2609e230..c2bd457bc2f2 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -548,49 +548,49 @@ static struct ctl_table sctp_sysctl_table[] = { .data = &sctp_timeouts[SCTP_CONNTRACK_CLOSED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_cookie_wait", .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_cookie_echoed", .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_established", .data = &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_shutdown_sent", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_shutdown_recd", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 @@ -604,49 +604,49 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .data = &sctp_timeouts[SCTP_CONNTRACK_CLOSED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_cookie_wait", .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_cookie_echoed", .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_established", .data = &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_shutdown_sent", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_shutdown_recd", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index f947ec41e391..a1edb9c1adee 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1192,70 +1192,70 @@ static struct ctl_table tcp_sysctl_table[] = { .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_syn_recv", .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_established", .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_fin_wait", .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_close_wait", .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_last_ack", .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_time_wait", .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_close", .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_max_retrans", .data = &nf_ct_tcp_timeout_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_unacknowledged", .data = &nf_ct_tcp_timeout_unacknowledged, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, @@ -1263,7 +1263,7 @@ static struct ctl_table tcp_sysctl_table[] = { .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL, @@ -1271,7 +1271,7 @@ static struct ctl_table tcp_sysctl_table[] = { .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS, @@ -1279,7 +1279,7 @@ static struct ctl_table tcp_sysctl_table[] = { .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 @@ -1293,63 +1293,63 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_syn_recv", .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_established", .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_fin_wait", .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_close_wait", .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_last_ack", .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_time_wait", .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_close", .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_max_retrans", .data = &nf_ct_tcp_timeout_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE, @@ -1357,7 +1357,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, @@ -1365,7 +1365,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, @@ -1373,7 +1373,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 7c2ca48698be..2b8b1f579f93 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -143,14 +143,14 @@ static struct ctl_table udp_sysctl_table[] = { .data = &nf_ct_udp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_udp_timeout_stream", .data = &nf_ct_udp_timeout_stream, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 @@ -163,14 +163,14 @@ static struct ctl_table udp_compat_sysctl_table[] = { .data = &nf_ct_udp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_udp_timeout_stream", .data = &nf_ct_udp_timeout_stream, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index d22d839e4f94..4579d8de13b1 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -151,7 +151,7 @@ static struct ctl_table udplite_sysctl_table[] = { .data = &nf_ct_udplite_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = CTL_UNNUMBERED, @@ -159,7 +159,7 @@ static struct ctl_table udplite_sysctl_table[] = { .data = &nf_ct_udplite_timeout_stream, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 98106d4e89f0..f37b9b74c6a8 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -336,7 +336,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_COUNT, @@ -344,7 +344,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_BUCKETS, @@ -352,7 +352,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &nf_conntrack_htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_CHECKSUM, @@ -360,7 +360,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, @@ -368,8 +368,8 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, @@ -379,7 +379,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &nf_ct_expect_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 } }; @@ -393,7 +393,7 @@ static ctl_table nf_ct_netfilter_table[] = { .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 } }; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index 34c96c9674df..7b49591fe87c 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -41,8 +41,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_default_path_quality, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_quality, .extra2 = &max_quality }, @@ -52,8 +52,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_obsolescence_count_initialiser, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_obs, .extra2 = &max_obs }, @@ -63,8 +63,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_network_ttl_initialiser, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ttl, .extra2 = &max_ttl }, @@ -74,8 +74,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t1, .extra2 = &max_t1 }, @@ -85,8 +85,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_maximum_tries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_n2, .extra2 = &max_n2 }, @@ -96,8 +96,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_acknowledge_delay, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t2, .extra2 = &max_t2 }, @@ -107,8 +107,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_busy_delay, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t4, .extra2 = &max_t4 }, @@ -118,8 +118,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_requested_window_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, @@ -129,8 +129,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_no_activity_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, @@ -140,8 +140,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_routing_control, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_route, .extra2 = &max_route }, @@ -151,8 +151,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_link_fails_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_fails, .extra2 = &max_fails }, @@ -162,8 +162,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_reset_circuit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_reset, .extra2 = &max_reset }, diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 600a4309b8c8..0e3347bbdb12 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -89,7 +89,7 @@ static struct ctl_table phonet_table[] = { .data = &local_port_range, .maxlen = sizeof(local_port_range), .mode = 0644, - .proc_handler = &proc_local_port_range, + .proc_handler = proc_local_port_range, .strategy = NULL, }, { .ctl_name = 0 } diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c index 20be3485a97f..3bfe504faf86 100644 --- a/net/rose/sysctl_net_rose.c +++ b/net/rose/sysctl_net_rose.c @@ -31,8 +31,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_restart_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -42,8 +42,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_call_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -53,8 +53,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_reset_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -64,8 +64,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_clear_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -75,8 +75,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_no_activity_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, @@ -86,8 +86,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_ack_hold_back_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -97,8 +97,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_routing_control, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_route, .extra2 = &max_route }, @@ -108,8 +108,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_link_fail_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ftimer, .extra2 = &max_ftimer }, @@ -119,8 +119,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_maximum_vcs, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_maxvcs, .extra2 = &max_maxvcs }, @@ -130,8 +130,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_window_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 52910697e104..f58e994e6852 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -63,8 +63,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_initial, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -74,8 +74,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -85,8 +85,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -96,8 +96,8 @@ static ctl_table sctp_table[] = { .data = &sctp_valid_cookie_life, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -107,8 +107,8 @@ static ctl_table sctp_table[] = { .data = &sctp_max_burst, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &zero, .extra2 = &int_max }, @@ -118,8 +118,8 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_association, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, @@ -129,8 +129,8 @@ static ctl_table sctp_table[] = { .data = &sctp_sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_RCVBUF_POLICY, @@ -138,8 +138,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_PATH_MAX_RETRANS, @@ -147,8 +147,8 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_path, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, @@ -158,8 +158,8 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_init, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, @@ -169,8 +169,8 @@ static ctl_table sctp_table[] = { .data = &sctp_hb_interval, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -180,8 +180,8 @@ static ctl_table sctp_table[] = { .data = &sctp_cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_RTO_ALPHA, @@ -189,8 +189,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_alpha, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_RTO_BETA, @@ -198,8 +198,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_beta, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_ADDIP_ENABLE, @@ -207,8 +207,8 @@ static ctl_table sctp_table[] = { .data = &sctp_addip_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_PRSCTP_ENABLE, @@ -216,8 +216,8 @@ static ctl_table sctp_table[] = { .data = &sctp_prsctp_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_SACK_TIMEOUT, @@ -225,8 +225,8 @@ static ctl_table sctp_table[] = { .data = &sctp_sack_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, @@ -236,7 +236,7 @@ static ctl_table sctp_table[] = { .data = &sysctl_sctp_mem, .maxlen = sizeof(sysctl_sctp_mem), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -244,7 +244,7 @@ static ctl_table sctp_table[] = { .data = &sysctl_sctp_rmem, .maxlen = sizeof(sysctl_sctp_rmem), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -252,7 +252,7 @@ static ctl_table sctp_table[] = { .data = &sysctl_sctp_wmem, .maxlen = sizeof(sysctl_sctp_wmem), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -260,8 +260,8 @@ static ctl_table sctp_table[] = { .data = &sctp_auth_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = CTL_UNNUMBERED, @@ -269,8 +269,8 @@ static ctl_table sctp_table[] = { .data = &sctp_addip_noauth, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = 0 } }; diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 1f4040cdadad..83c093077ebc 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -21,7 +21,7 @@ static ctl_table unix_table[] = { .data = &init_net.unx.sysctl_max_dgram_qlen, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index 6ebda25c24e9..a5d3416522de 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -24,8 +24,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_restart_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -35,8 +35,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_call_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -46,8 +46,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_reset_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -57,8 +57,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_clear_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -68,8 +68,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_ack_holdback_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -79,7 +79,7 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_forward, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { 0, }, }; -- cgit v1.2.3 From 270acefafeb74ce2fe93d35b75733870bf1e11e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Nov 2008 01:38:06 -0800 Subject: net: sk_free_datagram() should use sk_mem_reclaim_partial() I noticed a contention on udp_memory_allocated on regular UDP applications. While tcp_memory_allocated is seldom used, it appears each incoming UDP frame is currently touching udp_memory_allocated when queued, and when received by application. One possible solution is to use sk_mem_reclaim_partial() instead of sk_mem_reclaim(), so that we keep a small reserve (less than one page) of memory for each UDP socket. We did something very similar on TCP side in commit 9993e7d313e80bdc005d09c7def91903e0068f07 ([TCP]: Do not purge sk_forward_alloc entirely in tcp_delack_timer()) A more complex solution would need to convert prot->memory_allocated to use a percpu_counter with batches of 64 or 128 pages. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/datagram.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/datagram.c b/net/core/datagram.c index ee631843c2f5..5e2ac0c4b07c 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -209,7 +209,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); - sk_mem_reclaim(sk); + sk_mem_reclaim_partial(sk); } /** @@ -248,8 +248,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) spin_unlock_bh(&sk->sk_receive_queue.lock); } - kfree_skb(skb); - sk_mem_reclaim(sk); + skb_free_datagram(sk, skb); return err; } -- cgit v1.2.3 From d0c082cea6dfb9b674b4f6e1e84025662dbd24e8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 5 Nov 2008 15:59:38 -0800 Subject: netns: Delete virtual interfaces during namespace cleanup When physical devices are inside of network namespace and that network namespace terminates we can not make them go away. We have to keep them and moving them to the initial network namespace is the best we can do. For virtual devices left in a network namespace that is exiting we have no need to preserve them and we now have the infrastructure that allows us to delete them. So delete virtual devices when we exit a network namespace. Keeping the necessary user space clean up after a network namespace exits much more tractable. Acked-by: Daniel Lezcano Acked-by: Pavel Emelyanov Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 8f9d3b38a44b..9475f3e624a8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4852,6 +4852,12 @@ static void __net_exit default_device_exit(struct net *net) if (dev->features & NETIF_F_NETNS_LOCAL) continue; + /* Delete virtual devices */ + if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { + dev->rtnl_link_ops->dellink(dev); + continue; + } + /* Push remaing network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); err = dev_change_net_namespace(dev, &init_net, fb_name); -- cgit v1.2.3 From ae33bc40c0d96d02f51a996482ea7e41c5152695 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 5 Nov 2008 16:00:02 -0800 Subject: net: Guaranetee the proper ordering of the loopback device. I was recently hunting a bug that occurred in network namespace cleanup. In looking at the code it became apparrent that we have and will continue to have cases where if we have anything going on in a network namespace there will be assumptions that the loopback device is present. Things like sending igmp unsubscribe messages when we bring down network devices invokes the routing code which assumes that at least the loopback driver is present. Therefore to avoid magic initcall ordering hackery that is hard to follow and hard to get right insert a call to register the loopback device directly from net_dev_init(). This guarantes that the loopback device is the first device registered and the last network device to go away. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/loopback.c | 13 ++----------- include/linux/netdevice.h | 1 + net/core/dev.c | 12 ++++++++++++ 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'net/core') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 91d08585a6d8..c4516b580ba5 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -204,17 +204,8 @@ static __net_exit void loopback_net_exit(struct net *net) unregister_netdev(dev); } -static struct pernet_operations __net_initdata loopback_net_ops = { +/* Registered in net/core/dev.c */ +struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, .exit = loopback_net_exit, }; - -static int __init loopback_init(void) -{ - return register_pernet_device(&loopback_net_ops); -} - -/* Loopback is special. It should be initialized before any other network - * device and network subsystem. - */ -fs_initcall(loopback_init); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f1b0dbe58464..12d7f4469dc9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,6 +1766,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } +extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 9475f3e624a8..811507c39805 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4904,6 +4904,18 @@ static int __init net_dev_init(void) if (register_pernet_subsys(&netdev_net_ops)) goto out; + /* The loopback device is special if any other network devices + * is present in a network namespace the loopback device must + * be present. Since we now dynamically allocate and free the + * loopback device ensure this invariant is maintained by + * keeping the loopback device as the first device on the + * list of network devices. Ensuring the loopback devices + * is the first device that appears and the last network device + * that disappears. + */ + if (register_pernet_device(&loopback_net_ops)) + goto out; + if (register_pernet_device(&default_device_ops)) goto out; -- cgit v1.2.3 From 0a36b345ab99d6b3c96999e7e3b79bd243cf9bf7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 5 Nov 2008 16:00:24 -0800 Subject: net: Don't leak packets when a netns is going down I have been tracking for a while a case where when the network namespace exits the cleanup gets stck in an endless precessess of: unregister_netdevice: waiting for lo to become free. Usage count = 3 unregister_netdevice: waiting for lo to become free. Usage count = 3 unregister_netdevice: waiting for lo to become free. Usage count = 3 unregister_netdevice: waiting for lo to become free. Usage count = 3 unregister_netdevice: waiting for lo to become free. Usage count = 3 unregister_netdevice: waiting for lo to become free. Usage count = 3 unregister_netdevice: waiting for lo to become free. Usage count = 3 It turns out that if you listen on a multicast address an unsubscribe packet is sent when the network device goes down. If you shutdown the network namespace without carefully cleaning up this can trigger the unsubscribe packet to be sent over the loopback interface while the network namespace is going down. All of which is fine except when we drop the packet and forget to free it leaking the skb and the dst entry attached to. As it turns out the dst entry hold a reference to the idev which holds the dev and keeps everything from being cleaned up. Yuck! By fixing my earlier thinko and add the needed kfree_skb and everything cleans up beautifully. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 811507c39805..a0c60607f1a7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2253,8 +2253,10 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); /* Don't receive packets in an exiting network namespace */ - if (!net_alive(dev_net(skb->dev))) + if (!net_alive(dev_net(skb->dev))) { + kfree_skb(skb); goto out; + } #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { -- cgit v1.2.3 From 76acfdb9b78acf73023307974f6d38a269e9967a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 6 Nov 2008 23:06:44 -0800 Subject: net: mark flow_cache_cpu_prepare() as __init It's called from __init code only. And__devinit in generic networking code is pretty strange :^) Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/core/flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/flow.c b/net/core/flow.c index 5cf81052d044..d323388dd1ba 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -307,7 +307,7 @@ void flow_cache_flush(void) put_online_cpus(); } -static void __devinit flow_cache_cpu_prepare(int cpu) +static void __init flow_cache_cpu_prepare(int cpu) { struct tasklet_struct *tasklet; unsigned long order; -- cgit v1.2.3 From 3d8160b1493bcadca74fbb635d79b3928b8999cf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 7 Nov 2008 22:52:14 -0800 Subject: Revert "net: Guaranetee the proper ordering of the loopback device." This reverts commit ae33bc40c0d96d02f51a996482ea7e41c5152695. --- drivers/net/loopback.c | 13 +++++++++++-- include/linux/netdevice.h | 1 - net/core/dev.c | 12 ------------ 3 files changed, 11 insertions(+), 15 deletions(-) (limited to 'net/core') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index c4516b580ba5..91d08585a6d8 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -204,8 +204,17 @@ static __net_exit void loopback_net_exit(struct net *net) unregister_netdev(dev); } -/* Registered in net/core/dev.c */ -struct pernet_operations __net_initdata loopback_net_ops = { +static struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, .exit = loopback_net_exit, }; + +static int __init loopback_init(void) +{ + return register_pernet_device(&loopback_net_ops); +} + +/* Loopback is special. It should be initialized before any other network + * device and network subsystem. + */ +fs_initcall(loopback_init); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 12d7f4469dc9..f1b0dbe58464 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,7 +1766,6 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } -extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/net/core/dev.c b/net/core/dev.c index e0dc67a789b7..2306d56fbb5e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4909,18 +4909,6 @@ static int __init net_dev_init(void) if (register_pernet_subsys(&netdev_net_ops)) goto out; - /* The loopback device is special if any other network devices - * is present in a network namespace the loopback device must - * be present. Since we now dynamically allocate and free the - * loopback device ensure this invariant is maintained by - * keeping the loopback device as the first device on the - * list of network devices. Ensuring the loopback devices - * is the first device that appears and the last network device - * that disappears. - */ - if (register_pernet_device(&loopback_net_ops)) - goto out; - if (register_pernet_device(&default_device_ops)) goto out; -- cgit v1.2.3 From 5d6d480908300a0c0b3be8b58567dfcef62c83a5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 7 Nov 2008 22:52:34 -0800 Subject: net: fib_rules ordering fixes. We need to setup the network namespace state before we register the notifier. Otherwise if a network device is already registered we get a nasty NULL pointer dereference. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/fib_rules.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 79de3b14a8d1..32b3a0152d7a 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -664,17 +664,18 @@ static int __init fib_rules_init(void) rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); - err = register_netdevice_notifier(&fib_rules_notifier); + err = register_pernet_subsys(&fib_rules_net_ops); if (err < 0) goto fail; - err = register_pernet_subsys(&fib_rules_net_ops); + err = register_netdevice_notifier(&fib_rules_notifier); if (err < 0) goto fail_unregister; + return 0; fail_unregister: - unregister_netdevice_notifier(&fib_rules_notifier); + unregister_pernet_subsys(&fib_rules_net_ops); fail: rtnl_unregister(PF_UNSPEC, RTM_NEWRULE); rtnl_unregister(PF_UNSPEC, RTM_DELRULE); -- cgit v1.2.3 From 505d4f73dda9e20d59da05008f1f5eb432613e71 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 7 Nov 2008 22:54:20 -0800 Subject: net: Guaranetee the proper ordering of the loopback device. v2 I was recently hunting a bug that occurred in network namespace cleanup. In looking at the code it became apparrent that we have and will continue to have cases where if we have anything going on in a network namespace there will be assumptions that the loopback device is present. Things like sending igmp unsubscribe messages when we bring down network devices invokes the routing code which assumes that at least the loopback driver is present. Therefore to avoid magic initcall ordering hackery that is hard to follow and hard to get right insert a call to register the loopback device directly from net_dev_init(). This guarantes that the loopback device is the first device registered and the last network device to go away. But do it carefully so we register the loopback device after we clear dev_boot_phase. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/loopback.c | 13 ++----------- include/linux/netdevice.h | 1 + net/core/dev.c | 22 +++++++++++++++++----- 3 files changed, 20 insertions(+), 16 deletions(-) (limited to 'net/core') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 91d08585a6d8..c4516b580ba5 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -204,17 +204,8 @@ static __net_exit void loopback_net_exit(struct net *net) unregister_netdev(dev); } -static struct pernet_operations __net_initdata loopback_net_ops = { +/* Registered in net/core/dev.c */ +struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, .exit = loopback_net_exit, }; - -static int __init loopback_init(void) -{ - return register_pernet_device(&loopback_net_ops); -} - -/* Loopback is special. It should be initialized before any other network - * device and network subsystem. - */ -fs_initcall(loopback_init); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f1b0dbe58464..12d7f4469dc9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,6 +1766,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } +extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 2306d56fbb5e..31568b2068ac 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4909,9 +4909,6 @@ static int __init net_dev_init(void) if (register_pernet_subsys(&netdev_net_ops)) goto out; - if (register_pernet_device(&default_device_ops)) - goto out; - /* * Initialise the packet receive queues. */ @@ -4928,10 +4925,25 @@ static int __init net_dev_init(void) queue->backlog.weight = weight_p; } - netdev_dma_register(); - dev_boot_phase = 0; + /* The loopback device is special if any other network devices + * is present in a network namespace the loopback device must + * be present. Since we now dynamically allocate and free the + * loopback device ensure this invariant is maintained by + * keeping the loopback device as the first device on the + * list of network devices. Ensuring the loopback devices + * is the first device that appears and the last network device + * that disappears. + */ + if (register_pernet_device(&loopback_net_ops)) + goto out; + + if (register_pernet_device(&default_device_ops)) + goto out; + + netdev_dma_register(); + open_softirq(NET_TX_SOFTIRQ, net_tx_action); open_softirq(NET_RX_SOFTIRQ, net_rx_action); -- cgit v1.2.3 From fb28ad35906af2f042c94e2f9c0f898ef9acfa37 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Mon, 10 Nov 2008 13:55:14 -0800 Subject: net: struct device - replace bus_id with dev_name(), dev_set_name() Acked-by: Marcel Holtmann Acked-by: Greg Kroah-Hartman Signed-off-by: Kay Sievers Signed-off-by: David S. Miller --- drivers/net/3c59x.c | 4 ++-- drivers/net/defxx.c | 2 +- drivers/net/enc28j60.c | 2 +- drivers/net/fec_mpc52xx.c | 2 +- drivers/net/gianfar.c | 2 +- drivers/net/mlx4/mlx4_en.h | 8 ++++---- drivers/net/pasemi_mac.c | 3 ++- drivers/net/phy/mdio_bus.c | 4 ++-- drivers/net/phy/phy.c | 2 +- drivers/net/phy/phy_device.c | 4 ++-- drivers/net/sh_eth.c | 2 +- drivers/net/tg3.c | 4 ++-- drivers/net/tulip/de4x5.c | 4 ++-- drivers/net/ucc_geth.c | 4 ++-- drivers/net/wireless/libertas/defs.h | 2 +- drivers/net/wireless/orinoco/orinoco.c | 2 +- drivers/net/wireless/orinoco/orinoco_cs.c | 2 +- drivers/net/wireless/orinoco/spectrum_cs.c | 2 +- include/net/wireless.h | 4 ++-- net/atm/atm_sysfs.c | 2 +- net/bluetooth/hci_sysfs.c | 7 +++---- net/core/net-sysfs.c | 2 +- net/dsa/slave.c | 2 +- net/rfkill/rfkill.c | 5 ++--- net/wireless/core.c | 3 +-- 25 files changed, 39 insertions(+), 41 deletions(-) (limited to 'net/core') diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index b4168dfd6893..3893f505fb5f 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -1025,7 +1025,7 @@ static int __devinit vortex_probe1(struct device *gendev, } if ((edev = DEVICE_EISA(gendev))) { - print_name = edev->dev.bus_id; + print_name = dev_name(&edev->dev); } } @@ -2883,7 +2883,7 @@ static void vortex_get_drvinfo(struct net_device *dev, strcpy(info->bus_info, pci_name(VORTEX_PCI(vp))); } else { if (VORTEX_EISA(vp)) - sprintf(info->bus_info, vp->gendev->bus_id); + sprintf(info->bus_info, dev_name(vp->gendev)); else sprintf(info->bus_info, "EISA 0x%lx %d", dev->base_addr, dev->irq); diff --git a/drivers/net/defxx.c b/drivers/net/defxx.c index f07887435247..6e6583b609f7 100644 --- a/drivers/net/defxx.c +++ b/drivers/net/defxx.c @@ -511,7 +511,7 @@ static int __devinit dfx_register(struct device *bdev) int dfx_bus_pci = DFX_BUS_PCI(bdev); int dfx_bus_tc = DFX_BUS_TC(bdev); int dfx_use_mmio = DFX_MMIO || dfx_bus_tc; - char *print_name = bdev->bus_id; + const char *print_name = dev_name(bdev); struct net_device *dev; DFX_board_t *bp; /* board pointer */ resource_size_t bar_start = 0; /* pointer to port */ diff --git a/drivers/net/enc28j60.c b/drivers/net/enc28j60.c index d186a52cdb62..32c19790d013 100644 --- a/drivers/net/enc28j60.c +++ b/drivers/net/enc28j60.c @@ -1448,7 +1448,7 @@ enc28j60_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); strlcpy(info->version, DRV_VERSION, sizeof(info->version)); strlcpy(info->bus_info, - dev->dev.parent->bus_id, sizeof(info->bus_info)); + dev_name(dev->dev.parent), sizeof(info->bus_info)); } static int diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c index 94054b4981f3..cd8e98b45ec5 100644 --- a/drivers/net/fec_mpc52xx.c +++ b/drivers/net/fec_mpc52xx.c @@ -216,7 +216,7 @@ static int mpc52xx_fec_init_phy(struct net_device *dev) struct phy_device *phydev; char phy_id[BUS_ID_SIZE]; - snprintf(phy_id, BUS_ID_SIZE, "%x:%02x", + snprintf(phy_id, sizeof(phy_id), "%x:%02x", (unsigned int)dev->base_addr, priv->phy_addr); priv->link = PHY_DOWN; diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index a091db393615..451f6b8b6163 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -546,7 +546,7 @@ static int init_phy(struct net_device *dev) priv->oldspeed = 0; priv->oldduplex = -1; - snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT, priv->einfo->bus_id, priv->einfo->phy_id); + snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, priv->einfo->bus_id, priv->einfo->phy_id); interface = gfar_get_interface(dev); diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h index 11fb17c6e97b..cc022197e2a5 100644 --- a/drivers/net/mlx4/mlx4_en.h +++ b/drivers/net/mlx4/mlx4_en.h @@ -58,17 +58,17 @@ #define mlx4_dbg(mlevel, priv, format, arg...) \ if (NETIF_MSG_##mlevel & priv->msg_enable) \ printk(KERN_DEBUG "%s %s: " format , DRV_NAME ,\ - (&priv->mdev->pdev->dev)->bus_id , ## arg) + (dev_name(&priv->mdev->pdev->dev)) , ## arg) #define mlx4_err(mdev, format, arg...) \ printk(KERN_ERR "%s %s: " format , DRV_NAME ,\ - (&mdev->pdev->dev)->bus_id , ## arg) + (dev_name(&mdev->pdev->dev)) , ## arg) #define mlx4_info(mdev, format, arg...) \ printk(KERN_INFO "%s %s: " format , DRV_NAME ,\ - (&mdev->pdev->dev)->bus_id , ## arg) + (dev_name(&mdev->pdev->dev)) , ## arg) #define mlx4_warn(mdev, format, arg...) \ printk(KERN_WARNING "%s %s: " format , DRV_NAME ,\ - (&mdev->pdev->dev)->bus_id , ## arg) + (dev_name(&mdev->pdev->dev)) , ## arg) /* * Device constants diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c index b0270052c3a9..fcbf6ccd0a85 100644 --- a/drivers/net/pasemi_mac.c +++ b/drivers/net/pasemi_mac.c @@ -1105,7 +1105,8 @@ static int pasemi_mac_phy_init(struct net_device *dev) goto err; phy_id = *prop; - snprintf(mac->phy_id, BUS_ID_SIZE, "%x:%02x", (int)r.start, phy_id); + snprintf(mac->phy_id, sizeof(mac->phy_id), "%x:%02x", + (int)r.start, phy_id); of_node_put(phy_dn); diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index d0ed1ef284a8..6afd35f62d77 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -97,7 +97,7 @@ int mdiobus_register(struct mii_bus *bus) bus->dev.parent = bus->parent; bus->dev.class = &mdio_bus_class; bus->dev.groups = NULL; - memcpy(bus->dev.bus_id, bus->id, MII_BUS_ID_SIZE); + dev_set_name(&bus->dev, bus->id); err = device_register(&bus->dev); if (err) { @@ -191,7 +191,7 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr) phydev->dev.parent = bus->parent; phydev->dev.bus = &mdio_bus_type; - snprintf(phydev->dev.bus_id, BUS_ID_SIZE, PHY_ID_FMT, bus->id, addr); + dev_set_name(&phydev->dev, PHY_ID_FMT, bus->id, addr); phydev->bus = bus; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index df4e6257d4a7..e4ede6080c9d 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -45,7 +45,7 @@ */ void phy_print_status(struct phy_device *phydev) { - pr_info("PHY: %s - Link is %s", phydev->dev.bus_id, + pr_info("PHY: %s - Link is %s", dev_name(&phydev->dev), phydev->link ? "Up" : "Down"); if (phydev->link) printk(" - %d/%s", phydev->speed, diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index e11b03b2b25a..e976c1c60095 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -74,7 +74,7 @@ int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, if (!fixup) return -ENOMEM; - strncpy(fixup->bus_id, bus_id, BUS_ID_SIZE); + strlcpy(fixup->bus_id, bus_id, sizeof(fixup->bus_id)); fixup->phy_uid = phy_uid; fixup->phy_uid_mask = phy_uid_mask; fixup->run = run; @@ -109,7 +109,7 @@ EXPORT_SYMBOL(phy_register_fixup_for_id); */ static int phy_needs_fixup(struct phy_device *phydev, struct phy_fixup *fixup) { - if (strcmp(fixup->bus_id, phydev->dev.bus_id) != 0) + if (strcmp(fixup->bus_id, dev_name(&phydev->dev)) != 0) if (strcmp(fixup->bus_id, PHY_ANY_ID) != 0) return 0; diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c index c51bfc57d405..077d796ccb70 100644 --- a/drivers/net/sh_eth.c +++ b/drivers/net/sh_eth.c @@ -799,7 +799,7 @@ static int sh_eth_phy_init(struct net_device *ndev) char phy_id[BUS_ID_SIZE]; struct phy_device *phydev = NULL; - snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT, + snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, mdp->mii_bus->id , mdp->phy_id); mdp->link = PHY_DOWN; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 546b9eeaa171..e05849ee9000 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -1396,7 +1396,7 @@ static int tg3_phy_init(struct tg3 *tp) phydev = tp->mdio_bus->phy_map[PHY_ADDR]; /* Attach the MAC to the PHY. */ - phydev = phy_connect(tp->dev, phydev->dev.bus_id, tg3_adjust_link, + phydev = phy_connect(tp->dev, dev_name(&phydev->dev), tg3_adjust_link, phydev->dev_flags, phydev->interface); if (IS_ERR(phydev)) { printk(KERN_ERR "%s: Could not attach to PHY\n", tp->dev->name); @@ -13645,7 +13645,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, "%s: attached PHY driver [%s] (mii_bus:phy_addr=%s)\n", tp->dev->name, tp->mdio_bus->phy_map[PHY_ADDR]->drv->name, - tp->mdio_bus->phy_map[PHY_ADDR]->dev.bus_id); + dev_name(&tp->mdio_bus->phy_map[PHY_ADDR]->dev)); else printk(KERN_INFO "%s: attached PHY is %s (%s Ethernet) (WireSpeed[%d])\n", diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c index f8a45253eaf8..67bfd6f43366 100644 --- a/drivers/net/tulip/de4x5.c +++ b/drivers/net/tulip/de4x5.c @@ -1118,7 +1118,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev) } dev->base_addr = iobase; - printk ("%s: %s at 0x%04lx", gendev->bus_id, name, iobase); + printk ("%s: %s at 0x%04lx", dev_name(gendev), name, iobase); status = get_hw_addr(dev); printk(", h/w address %pM\n", dev->dev_addr); @@ -1153,7 +1153,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev) } } lp->fdx = lp->params.fdx; - sprintf(lp->adapter_name,"%s (%s)", name, gendev->bus_id); + sprintf(lp->adapter_name,"%s (%s)", name, dev_name(gendev)); lp->dma_size = (NUM_RX_DESC + NUM_TX_DESC) * sizeof(struct de4x5_desc); #if defined(__alpha__) || defined(__powerpc__) || defined(CONFIG_SPARC) || defined(DE4X5_DO_MEMCPY) diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c index 4931af736630..0a5b817fd7ac 100644 --- a/drivers/net/ucc_geth.c +++ b/drivers/net/ucc_geth.c @@ -1615,8 +1615,8 @@ static int init_phy(struct net_device *dev) priv->oldspeed = 0; priv->oldduplex = -1; - snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT, priv->ug_info->mdio_bus, - priv->ug_info->phy_address); + snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, priv->ug_info->mdio_bus, + priv->ug_info->phy_address); phydev = phy_connect(dev, phy_id, &adjust_link, 0, priv->phy_interface); diff --git a/drivers/net/wireless/libertas/defs.h b/drivers/net/wireless/libertas/defs.h index 076a636e8f62..2d4666f26808 100644 --- a/drivers/net/wireless/libertas/defs.h +++ b/drivers/net/wireless/libertas/defs.h @@ -79,7 +79,7 @@ do { if ((lbs_debug & (grp)) == (grp)) \ #define lbs_deb_tx(fmt, args...) LBS_DEB_LL(LBS_DEB_TX, " tx", fmt, ##args) #define lbs_deb_fw(fmt, args...) LBS_DEB_LL(LBS_DEB_FW, " fw", fmt, ##args) #define lbs_deb_usb(fmt, args...) LBS_DEB_LL(LBS_DEB_USB, " usb", fmt, ##args) -#define lbs_deb_usbd(dev, fmt, args...) LBS_DEB_LL(LBS_DEB_USB, " usbd", "%s:" fmt, (dev)->bus_id, ##args) +#define lbs_deb_usbd(dev, fmt, args...) LBS_DEB_LL(LBS_DEB_USB, " usbd", "%s:" fmt, dev_name(dev), ##args) #define lbs_deb_cs(fmt, args...) LBS_DEB_LL(LBS_DEB_CS, " cs", fmt, ##args) #define lbs_deb_thread(fmt, args...) LBS_DEB_LL(LBS_DEB_THREAD, " thread", fmt, ##args) #define lbs_deb_sdio(fmt, args...) LBS_DEB_LL(LBS_DEB_SDIO, " sdio", fmt, ##args) diff --git a/drivers/net/wireless/orinoco/orinoco.c b/drivers/net/wireless/orinoco/orinoco.c index f4ea08f96970..072be44b37de 100644 --- a/drivers/net/wireless/orinoco/orinoco.c +++ b/drivers/net/wireless/orinoco/orinoco.c @@ -5987,7 +5987,7 @@ static void orinoco_get_drvinfo(struct net_device *dev, strncpy(info->version, DRIVER_VERSION, sizeof(info->version) - 1); strncpy(info->fw_version, priv->fw_name, sizeof(info->fw_version) - 1); if (dev->dev.parent) - strncpy(info->bus_info, dev->dev.parent->bus_id, + strncpy(info->bus_info, dev_name(dev->dev.parent), sizeof(info->bus_info) - 1); else snprintf(info->bus_info, sizeof(info->bus_info) - 1, diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c index 6fcf2bda7cdf..bf6a51da3b29 100644 --- a/drivers/net/wireless/orinoco/orinoco_cs.c +++ b/drivers/net/wireless/orinoco/orinoco_cs.c @@ -308,7 +308,7 @@ orinoco_cs_config(struct pcmcia_device *link) /* Finally, report what we've done */ printk(KERN_DEBUG "%s: " DRIVER_NAME " at %s, irq %d, io " - "0x%04x-0x%04x\n", dev->name, dev->dev.parent->bus_id, + "0x%04x-0x%04x\n", dev->name, dev_name(dev->dev.parent), link->irq.AssignedIRQ, link->io.BasePort1, link->io.BasePort1 + link->io.NumPorts1 - 1); return 0; diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c index 852789ad34b3..0bae3dcf9d50 100644 --- a/drivers/net/wireless/orinoco/spectrum_cs.c +++ b/drivers/net/wireless/orinoco/spectrum_cs.c @@ -383,7 +383,7 @@ spectrum_cs_config(struct pcmcia_device *link) /* Finally, report what we've done */ printk(KERN_DEBUG "%s: " DRIVER_NAME " at %s, irq %d, io " - "0x%04x-0x%04x\n", dev->name, dev->dev.parent->bus_id, + "0x%04x-0x%04x\n", dev->name, dev_name(dev->dev.parent), link->irq.AssignedIRQ, link->io.BasePort1, link->io.BasePort1 + link->io.NumPorts1 - 1); diff --git a/include/net/wireless.h b/include/net/wireless.h index 17d4b582cf34..412351560b76 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -263,9 +263,9 @@ static inline struct device *wiphy_dev(struct wiphy *wiphy) /** * wiphy_name - get wiphy name */ -static inline char *wiphy_name(struct wiphy *wiphy) +static inline const char *wiphy_name(struct wiphy *wiphy) { - return wiphy->dev.bus_id; + return dev_name(&wiphy->dev); } /** diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index 1b88311f2130..b5674dc2083d 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -149,7 +149,7 @@ int atm_register_sysfs(struct atm_dev *adev) cdev->class = &atm_class; dev_set_drvdata(cdev, adev); - snprintf(cdev->bus_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number); + dev_set_name(cdev, "%s%d", adev->type, adev->number); err = device_register(cdev); if (err < 0) return err; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index f4f6615cad9f..f2bbb2f65434 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -113,8 +113,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn) conn->dev.class = bt_class; conn->dev.parent = &hdev->dev; - snprintf(conn->dev.bus_id, BUS_ID_SIZE, "%s:%d", - hdev->name, conn->handle); + dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); dev_set_drvdata(&conn->dev, conn); @@ -132,7 +131,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn) */ static int __match_tty(struct device *dev, void *data) { - return !strncmp(dev->bus_id, "rfcomm", 6); + return !strncmp(dev_name(dev), "rfcomm", 6); } static void del_conn(struct work_struct *work) @@ -421,7 +420,7 @@ int hci_register_sysfs(struct hci_dev *hdev) dev->class = bt_class; dev->parent = hdev->parent; - strlcpy(dev->bus_id, hdev->name, BUS_ID_SIZE); + dev_set_name(dev, hdev->name); dev_set_drvdata(dev, hdev); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 85cb8bdcfb8f..146dcfeb060e 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -494,7 +494,7 @@ int netdev_register_kobject(struct net_device *net) dev->groups = groups; BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); - strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); + dev_set_name(dev, net->name); #ifdef CONFIG_SYSFS *groups++ = &netstat_group; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 37616884b8a9..7384bad81652 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -284,7 +284,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, netif_carrier_off(slave_dev); if (p->phy != NULL) { - phy_attach(slave_dev, p->phy->dev.bus_id, + phy_attach(slave_dev, dev_name(&p->phy->dev), 0, PHY_INTERFACE_MODE_GMII); p->phy->autoneg = AUTONEG_ENABLE; diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 69f3a3b4dd61..ec26eae8004d 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -774,7 +774,7 @@ static void rfkill_led_trigger_register(struct rfkill *rfkill) int error; if (!rfkill->led_trigger.name) - rfkill->led_trigger.name = rfkill->dev.bus_id; + rfkill->led_trigger.name = dev_name(&rfkill->dev); if (!rfkill->led_trigger.activate) rfkill->led_trigger.activate = rfkill_led_trigger_activate; error = led_trigger_register(&rfkill->led_trigger); @@ -815,8 +815,7 @@ int __must_check rfkill_register(struct rfkill *rfkill) "badly initialized rfkill struct\n")) return -EINVAL; - snprintf(dev->bus_id, sizeof(dev->bus_id), - "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1); + dev_set_name(dev, "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1); rfkill_led_trigger_register(rfkill); diff --git a/net/wireless/core.c b/net/wireless/core.c index 72825afe2bf6..39e3d10fccde 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -236,8 +236,7 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) mutex_unlock(&cfg80211_drv_mutex); /* give it a proper name */ - snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE, - PHY_NAME "%d", drv->idx); + dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->idx); mutex_init(&drv->mtx); mutex_init(&drv->devlist_mtx); -- cgit v1.2.3 From eb37b41cc2274cdecfc19d371717c321fe2ab426 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 10 Nov 2008 16:48:03 -0800 Subject: pktgen: add full reset functionality While testing pktgen, I found that sometimes my configurations from previous runs would be left over, particularly when going from a test with 8 threads down to a test with 4 threads. This adds new functionality to pktgen where you can call pgset "reset" and it will be just like you just insmod'ed pktgen again. Signed-off-by: Jesse Brandeburg Signed-off-by: Jeff Kirsher Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/core/pktgen.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'net/core') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index fa4973bf73e9..a4f5ad1ab352 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -422,6 +422,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char *ifname); static int pktgen_device_event(struct notifier_block *, unsigned long, void *); static void pktgen_run_all_threads(void); +static void pktgen_reset_all_threads(void); static void pktgen_stop_all_threads_ifs(void); static int pktgen_stop_device(struct pktgen_dev *pkt_dev); static void pktgen_stop(struct pktgen_thread *t); @@ -480,6 +481,9 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf, else if (!strcmp(data, "start")) pktgen_run_all_threads(); + else if (!strcmp(data, "reset")) + pktgen_reset_all_threads(); + else printk(KERN_WARNING "pktgen: Unknown command: %s\n", data); @@ -3173,6 +3177,24 @@ static void pktgen_run_all_threads(void) pktgen_wait_all_threads_run(); } +static void pktgen_reset_all_threads(void) +{ + struct pktgen_thread *t; + + pr_debug("pktgen: entering pktgen_reset_all_threads.\n"); + + mutex_lock(&pktgen_thread_lock); + + list_for_each_entry(t, &pktgen_threads, th_list) + t->control |= (T_REMDEVALL); + + mutex_unlock(&pktgen_thread_lock); + + schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ + + pktgen_wait_all_threads_run(); +} + static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) { __u64 total_us, bps, mbps, pps, idle; -- cgit v1.2.3 From 9b739ba5e66c96938fbc07a4dbd9da5b81eac56f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 11 Nov 2008 16:47:44 -0800 Subject: net: remove struct neigh_table::pde ->pde isn't actually needed, since name is stashed in ->id. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/neighbour.h | 3 --- net/core/neighbour.c | 5 ++--- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'net/core') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index aa4b708654a4..365b5e260239 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -180,9 +180,6 @@ struct neigh_table __u32 hash_rnd; unsigned int hash_chain_gc; struct pneigh_entry **phash_buckets; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *pde; -#endif }; /* flags for neigh_update() */ diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d9bbe010e0ee..500c2430007c 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1424,9 +1424,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS - tbl->pde = proc_create_data(tbl->id, 0, init_net.proc_net_stat, - &neigh_stat_seq_fops, tbl); - if (!tbl->pde) + if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat, + &neigh_stat_seq_fops, tbl)) panic("cannot create neighbour proc dir entry"); #endif -- cgit v1.2.3 From e42ea986e4a4cab4209d982feffcaf50f21e80e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Nov 2008 00:54:54 -0800 Subject: net: Cleanup of neighbour code Using read_pnet() and write_pnet() in neighbour code ease the reading of code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 12 ++---------- net/core/neighbour.c | 12 +++--------- 2 files changed, 5 insertions(+), 19 deletions(-) (limited to 'net/core') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 365b5e260239..d8d790e56d3d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -220,11 +220,7 @@ extern void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *p static inline struct net *neigh_parms_net(const struct neigh_parms *parms) { -#ifdef CONFIG_NET_NS - return parms->net; -#else - return &init_net; -#endif + return read_pnet(&parms->net); } extern unsigned long neigh_rand_reach_time(unsigned long base); @@ -241,11 +237,7 @@ extern int pneigh_delete(struct neigh_table *tbl, struct net *net, const void static inline struct net *pneigh_net(const struct pneigh_entry *pneigh) { -#ifdef CONFIG_NET_NS - return pneigh->net; -#else - return &init_net; -#endif + return read_pnet(&pneigh->net); } extern void neigh_app_ns(struct neighbour *n); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 500c2430007c..cca6a55909eb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -531,9 +531,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (!n) goto out; -#ifdef CONFIG_NET_NS - n->net = hold_net(net); -#endif + write_pnet(&n->net, hold_net(net)); memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) @@ -1350,9 +1348,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, dev_hold(dev); p->dev = dev; -#ifdef CONFIG_NET_NS - p->net = hold_net(net); -#endif + write_pnet(&p->net, hold_net(net)); p->sysctl_table = NULL; write_lock_bh(&tbl->lock); p->next = tbl->parms.next; @@ -1407,9 +1403,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) unsigned long now = jiffies; unsigned long phsize; -#ifdef CONFIG_NET_NS - tbl->parms.net = &init_net; -#endif + write_pnet(&tbl->parms.net, &init_net); atomic_set(&tbl->parms.refcnt, 1); tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time); -- cgit v1.2.3 From 8192b0c482d7078fcdcb4854341b977426f6f09b Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:10 +1100 Subject: CRED: Wrap task credential accesses in the networking subsystem Wrap access to task credentials so that they can be separated more easily from the task_struct during the introduction of COW creds. Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id(). Change some task->e?[ug]id to task_e?[ug]id(). In some places it makes more sense to use RCU directly rather than a convenient wrapper; these will be addressed by later patches. Signed-off-by: David Howells Reviewed-by: James Morris Acked-by: Serge Hallyn Cc: netdev@vger.kernel.org Signed-off-by: James Morris --- include/net/scm.h | 4 ++-- net/core/dev.c | 8 ++++++-- net/core/scm.c | 8 ++++---- net/socket.c | 4 ++-- 4 files changed, 14 insertions(+), 10 deletions(-) (limited to 'net/core') diff --git a/include/net/scm.h b/include/net/scm.h index 06df126103ca..f160116db54a 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -54,8 +54,8 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) { struct task_struct *p = current; - scm->creds.uid = p->uid; - scm->creds.gid = p->gid; + scm->creds.uid = current_uid(); + scm->creds.gid = current_gid(); scm->creds.pid = task_tgid_vnr(p); scm->fp = NULL; scm->seq = 0; diff --git a/net/core/dev.c b/net/core/dev.c index d9038e328cc1..262df226b3c9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2958,6 +2958,8 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + uid_t uid; + gid_t gid; ASSERT_RTNL(); @@ -2982,15 +2984,17 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : "left"); - if (audit_enabled) + if (audit_enabled) { + current_uid_gid(&uid, &gid); audit_log(current->audit_context, GFP_ATOMIC, AUDIT_ANOM_PROMISCUOUS, "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), audit_get_loginuid(current), - current->uid, current->gid, + uid, gid, audit_get_sessionid(current)); + } dev_change_rx_flags(dev, IFF_PROMISC); } diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a47..4681d8f9b45b 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -45,10 +45,10 @@ static __inline__ int scm_check_creds(struct ucred *creds) { if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && - ((creds->uid == current->uid || creds->uid == current->euid || - creds->uid == current->suid) || capable(CAP_SETUID)) && - ((creds->gid == current->gid || creds->gid == current->egid || - creds->gid == current->sgid) || capable(CAP_SETGID))) { + ((creds->uid == current_uid() || creds->uid == current_euid() || + creds->uid == current_suid()) || capable(CAP_SETUID)) && + ((creds->gid == current_gid() || creds->gid == current_egid() || + creds->gid == current_sgid()) || capable(CAP_SETGID))) { return 0; } return -EPERM; diff --git a/net/socket.c b/net/socket.c index 57550c3bcabe..62c7729527ff 100644 --- a/net/socket.c +++ b/net/socket.c @@ -491,8 +491,8 @@ static struct socket *sock_alloc(void) sock = SOCKET_I(inode); inode->i_mode = S_IFSOCK | S_IRWXUGO; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); get_cpu_var(sockets_in_use)++; put_cpu_var(sockets_in_use); -- cgit v1.2.3 From b6dff3ec5e116e3af6f537d4caedcad6b9e5082a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:16 +1100 Subject: CRED: Separate task security context from task_struct Separate the task security context from task_struct. At this point, the security data is temporarily embedded in the task_struct with two pointers pointing to it. Note that the Alpha arch is altered as it refers to (E)UID and (E)GID in entry.S via asm-offsets. With comment fixes Signed-off-by: Marc Dionne Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- arch/alpha/kernel/asm-offsets.c | 11 +- arch/alpha/kernel/entry.S | 10 +- arch/ia64/ia32/sys_ia32.c | 8 +- arch/mips/kernel/kspd.c | 4 +- arch/s390/kernel/compat_linux.c | 28 ++--- drivers/connector/cn_proc.c | 8 +- fs/binfmt_elf.c | 12 +- fs/binfmt_elf_fdpic.c | 12 +- fs/exec.c | 4 +- fs/fcntl.c | 4 +- fs/file_table.c | 4 +- fs/fuse/dir.c | 12 +- fs/hugetlbfs/inode.c | 4 +- fs/ioprio.c | 12 +- fs/nfsd/auth.c | 22 ++-- fs/nfsd/nfs4recover.c | 12 +- fs/nfsd/nfsfh.c | 6 +- fs/open.c | 17 +-- fs/proc/array.c | 18 +-- fs/proc/base.c | 16 +-- fs/xfs/linux-2.6/xfs_cred.h | 6 +- fs/xfs/linux-2.6/xfs_globals.h | 2 +- fs/xfs/xfs_inode.h | 2 +- fs/xfs/xfs_vnodeops.h | 10 +- include/linux/cred.h | 155 +++++++++++++++++++---- include/linux/init_task.h | 24 ++-- include/linux/sched.h | 52 +------- include/linux/securebits.h | 2 +- ipc/mqueue.c | 2 +- ipc/shm.c | 4 +- kernel/auditsc.c | 52 ++++---- kernel/capability.c | 4 +- kernel/cgroup.c | 4 +- kernel/exit.c | 10 +- kernel/fork.c | 24 ++-- kernel/futex.c | 6 +- kernel/futex_compat.c | 5 +- kernel/ptrace.c | 19 +-- kernel/sched.c | 10 +- kernel/signal.c | 16 +-- kernel/sys.c | 266 ++++++++++++++++++++++----------------- kernel/trace/trace.c | 2 +- kernel/tsacct.c | 4 +- kernel/uid16.c | 28 ++--- kernel/user.c | 4 +- mm/mempolicy.c | 10 +- mm/migrate.c | 10 +- mm/oom_kill.c | 2 +- net/core/scm.c | 10 +- net/sunrpc/auth.c | 2 +- security/commoncap.c | 161 +++++++++++++----------- security/keys/keyctl.c | 25 ++-- security/keys/permission.c | 11 +- security/keys/process_keys.c | 98 ++++++++------- security/keys/request_key.c | 18 +-- security/keys/request_key_auth.c | 12 +- security/selinux/exports.c | 2 +- security/selinux/hooks.c | 116 ++++++++--------- security/selinux/selinuxfs.c | 2 +- security/selinux/xfrm.c | 6 +- security/smack/smack_access.c | 4 +- security/smack/smack_lsm.c | 77 ++++++------ security/smack/smackfs.c | 6 +- 63 files changed, 832 insertions(+), 677 deletions(-) (limited to 'net/core') diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c index 4b18cd94d59d..6ff8886e7e22 100644 --- a/arch/alpha/kernel/asm-offsets.c +++ b/arch/alpha/kernel/asm-offsets.c @@ -19,15 +19,18 @@ void foo(void) BLANK(); DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked)); - DEFINE(TASK_UID, offsetof(struct task_struct, uid)); - DEFINE(TASK_EUID, offsetof(struct task_struct, euid)); - DEFINE(TASK_GID, offsetof(struct task_struct, gid)); - DEFINE(TASK_EGID, offsetof(struct task_struct, egid)); + DEFINE(TASK_CRED, offsetof(struct task_struct, cred)); DEFINE(TASK_REAL_PARENT, offsetof(struct task_struct, real_parent)); DEFINE(TASK_GROUP_LEADER, offsetof(struct task_struct, group_leader)); DEFINE(TASK_TGID, offsetof(struct task_struct, tgid)); BLANK(); + DEFINE(CRED_UID, offsetof(struct cred, uid)); + DEFINE(CRED_EUID, offsetof(struct cred, euid)); + DEFINE(CRED_GID, offsetof(struct cred, gid)); + DEFINE(CRED_EGID, offsetof(struct cred, egid)); + BLANK(); + DEFINE(SIZEOF_PT_REGS, sizeof(struct pt_regs)); DEFINE(PT_PTRACED, PT_PTRACED); DEFINE(CLONE_VM, CLONE_VM); diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 5fc61e281ac7..f77345bc66a9 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -850,8 +850,9 @@ osf_getpriority: sys_getxuid: .prologue 0 ldq $2, TI_TASK($8) - ldl $0, TASK_UID($2) - ldl $1, TASK_EUID($2) + ldq $3, TASK_CRED($2) + ldl $0, CRED_UID($3) + ldl $1, CRED_EUID($3) stq $1, 80($sp) ret .end sys_getxuid @@ -862,8 +863,9 @@ sys_getxuid: sys_getxgid: .prologue 0 ldq $2, TI_TASK($8) - ldl $0, TASK_GID($2) - ldl $1, TASK_EGID($2) + ldq $3, TASK_CRED($2) + ldl $0, CRED_GID($3) + ldl $1, CRED_EGID($3) stq $1, 80($sp) ret .end sys_getxgid diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 5e92ae00bdbb..2445a9d3488e 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1772,20 +1772,20 @@ sys32_getgroups16 (int gidsetsize, short __user *grouplist) if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, current->cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(current->cred->group_info); return i; } diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c index b0591ae0ce56..fd6e51224034 100644 --- a/arch/mips/kernel/kspd.c +++ b/arch/mips/kernel/kspd.c @@ -174,8 +174,8 @@ static unsigned int translate_open_flags(int flags) static void sp_setfsuidgid( uid_t uid, gid_t gid) { - current->fsuid = uid; - current->fsgid = gid; + current->cred->fsuid = uid; + current->cred->fsgid = gid; key_fsuid_changed(current); key_fsgid_changed(current); diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 4646382af34f..6cc87d8c8682 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -148,9 +148,9 @@ asmlinkage long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user { int retval; - if (!(retval = put_user(high2lowuid(current->uid), ruid)) && - !(retval = put_user(high2lowuid(current->euid), euid))) - retval = put_user(high2lowuid(current->suid), suid); + if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && + !(retval = put_user(high2lowuid(current->cred->euid), euid))) + retval = put_user(high2lowuid(current->cred->suid), suid); return retval; } @@ -165,9 +165,9 @@ asmlinkage long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user { int retval; - if (!(retval = put_user(high2lowgid(current->gid), rgid)) && - !(retval = put_user(high2lowgid(current->egid), egid))) - retval = put_user(high2lowgid(current->sgid), sgid); + if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && + !(retval = put_user(high2lowgid(current->cred->egid), egid))) + retval = put_user(high2lowgid(current->cred->sgid), sgid); return retval; } @@ -217,20 +217,20 @@ asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist) if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, current->cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(current->cred->group_info); return i; } @@ -261,22 +261,22 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) asmlinkage long sys32_getuid16(void) { - return high2lowuid(current->uid); + return high2lowuid(current->cred->uid); } asmlinkage long sys32_geteuid16(void) { - return high2lowuid(current->euid); + return high2lowuid(current->cred->euid); } asmlinkage long sys32_getgid16(void) { - return high2lowgid(current->gid); + return high2lowgid(current->cred->gid); } asmlinkage long sys32_getegid16(void) { - return high2lowgid(current->egid); + return high2lowgid(current->cred->egid); } /* diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 5c9f67f98d10..354c1ff17159 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -116,11 +116,11 @@ void proc_id_connector(struct task_struct *task, int which_id) ev->event_data.id.process_pid = task->pid; ev->event_data.id.process_tgid = task->tgid; if (which_id == PROC_EVENT_UID) { - ev->event_data.id.r.ruid = task->uid; - ev->event_data.id.e.euid = task->euid; + ev->event_data.id.r.ruid = task->cred->uid; + ev->event_data.id.e.euid = task->cred->euid; } else if (which_id == PROC_EVENT_GID) { - ev->event_data.id.r.rgid = task->gid; - ev->event_data.id.e.egid = task->egid; + ev->event_data.id.r.rgid = task->cred->gid; + ev->event_data.id.e.egid = task->cred->egid; } else return; get_seq(&msg->seq, &ev->cpu); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 8fcfa398d350..7a52477ce493 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -223,10 +223,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_BASE, interp_load_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec->e_entry); - NEW_AUX_ENT(AT_UID, tsk->uid); - NEW_AUX_ENT(AT_EUID, tsk->euid); - NEW_AUX_ENT(AT_GID, tsk->gid); - NEW_AUX_ENT(AT_EGID, tsk->egid); + NEW_AUX_ENT(AT_UID, tsk->cred->uid); + NEW_AUX_ENT(AT_EUID, tsk->cred->euid); + NEW_AUX_ENT(AT_GID, tsk->cred->gid); + NEW_AUX_ENT(AT_EGID, tsk->cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); if (k_platform) { @@ -1388,8 +1388,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->uid); - SET_GID(psinfo->pr_gid, p->gid); + SET_UID(psinfo->pr_uid, p->cred->uid); + SET_GID(psinfo->pr_gid, p->cred->gid); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 488584c87512..9f67054c2c4e 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -623,10 +623,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr); - NEW_AUX_ENT(AT_UID, (elf_addr_t) current_uid()); - NEW_AUX_ENT(AT_EUID, (elf_addr_t) current_euid()); - NEW_AUX_ENT(AT_GID, (elf_addr_t) current_gid()); - NEW_AUX_ENT(AT_EGID, (elf_addr_t) current_egid()); + NEW_AUX_ENT(AT_UID, (elf_addr_t) current->cred->uid); + NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->cred->euid); + NEW_AUX_ENT(AT_GID, (elf_addr_t) current->cred->gid); + NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); @@ -1440,8 +1440,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->uid); - SET_GID(psinfo->pr_gid, p->gid); + SET_UID(psinfo->pr_uid, p->cred->uid); + SET_GID(psinfo->pr_gid, p->cred->gid); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/exec.c b/fs/exec.c index 604834f3b208..31149e430a89 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1738,7 +1738,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) */ if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ - current->fsuid = 0; /* Dump root private */ + current->cred->fsuid = 0; /* Dump root private */ } retval = coredump_wait(exit_code, &core_state); @@ -1834,7 +1834,7 @@ fail_unlock: if (helper_argv) argv_free(helper_argv); - current->fsuid = fsuid; + current->cred->fsuid = fsuid; coredump_finish(mm); fail: return retval; diff --git a/fs/fcntl.c b/fs/fcntl.c index bf049a805e59..63964d863ad6 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -401,8 +401,8 @@ static inline int sigio_perm(struct task_struct *p, struct fown_struct *fown, int sig) { return (((fown->euid == 0) || - (fown->euid == p->suid) || (fown->euid == p->uid) || - (fown->uid == p->suid) || (fown->uid == p->uid)) && + (fown->euid == p->cred->suid) || (fown->euid == p->cred->uid) || + (fown->uid == p->cred->suid) || (fown->uid == p->cred->uid)) && !security_file_send_sigiotask(p, fown, sig)); } diff --git a/fs/file_table.c b/fs/file_table.c index 5ad0eca6eea2..3152b53cfab0 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -122,8 +122,8 @@ struct file *get_empty_filp(void) INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); - f->f_uid = tsk->fsuid; - f->f_gid = tsk->fsgid; + f->f_uid = tsk->cred->fsuid; + f->f_gid = tsk->cred->fsgid; eventpoll_init_file(f); /* f->f_version: 0 */ return f; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index fd03330cadeb..e97a98981862 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -872,12 +872,12 @@ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) if (fc->flags & FUSE_ALLOW_OTHER) return 1; - if (task->euid == fc->user_id && - task->suid == fc->user_id && - task->uid == fc->user_id && - task->egid == fc->group_id && - task->sgid == fc->group_id && - task->gid == fc->group_id) + if (task->cred->euid == fc->user_id && + task->cred->suid == fc->user_id && + task->cred->uid == fc->user_id && + task->cred->egid == fc->group_id && + task->cred->sgid == fc->group_id && + task->cred->gid == fc->group_id) return 1; return 0; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 08ad76c79b49..870a721b8bd2 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -958,7 +958,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size) if (!can_do_hugetlb_shm()) return ERR_PTR(-EPERM); - if (!user_shm_lock(size, current->user)) + if (!user_shm_lock(size, current->cred->user)) return ERR_PTR(-ENOMEM); root = hugetlbfs_vfsmount->mnt_root; @@ -998,7 +998,7 @@ out_inode: out_dentry: dput(dentry); out_shm_unlock: - user_shm_unlock(size, current->user); + user_shm_unlock(size, current->cred->user); return ERR_PTR(error); } diff --git a/fs/ioprio.c b/fs/ioprio.c index 68d2cd807118..bb5210af77c2 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -32,8 +32,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) int err; struct io_context *ioc; - if (task->uid != current_euid() && - task->uid != current_uid() && !capable(CAP_SYS_NICE)) + if (task->cred->uid != current_euid() && + task->cred->uid != current_uid() && !capable(CAP_SYS_NICE)) return -EPERM; err = security_task_setioprio(task, ioprio); @@ -123,7 +123,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; case IOPRIO_WHO_USER: if (!who) - user = current->user; + user = current->cred->user; else user = find_user(who); @@ -131,7 +131,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; do_each_thread(g, p) { - if (p->uid != who) + if (p->cred->uid != who) continue; ret = set_task_ioprio(p, ioprio); if (ret) @@ -216,7 +216,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; case IOPRIO_WHO_USER: if (!who) - user = current->user; + user = current->cred->user; else user = find_user(who); @@ -224,7 +224,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; do_each_thread(g, p) { - if (p->uid != user->uid) + if (p->cred->uid != user->uid) continue; tmpio = get_task_ioprio(p); if (tmpio < 0) diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 294992e9bf69..808fc03a6fbd 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -27,6 +27,7 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) { + struct cred *act_as = current->cred ; struct svc_cred cred = rqstp->rq_cred; int i; int flags = nfsexp_flags(rqstp, exp); @@ -55,25 +56,26 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) get_group_info(cred.cr_group_info); if (cred.cr_uid != (uid_t) -1) - current->fsuid = cred.cr_uid; + act_as->fsuid = cred.cr_uid; else - current->fsuid = exp->ex_anon_uid; + act_as->fsuid = exp->ex_anon_uid; if (cred.cr_gid != (gid_t) -1) - current->fsgid = cred.cr_gid; + act_as->fsgid = cred.cr_gid; else - current->fsgid = exp->ex_anon_gid; + act_as->fsgid = exp->ex_anon_gid; if (!cred.cr_group_info) return -ENOMEM; - ret = set_current_groups(cred.cr_group_info); + ret = set_groups(act_as, cred.cr_group_info); put_group_info(cred.cr_group_info); if ((cred.cr_uid)) { - current->cap_effective = - cap_drop_nfsd_set(current->cap_effective); + act_as->cap_effective = + cap_drop_nfsd_set(act_as->cap_effective); } else { - current->cap_effective = - cap_raise_nfsd_set(current->cap_effective, - current->cap_permitted); + act_as->cap_effective = + cap_raise_nfsd_set(act_as->cap_effective, + act_as->cap_permitted); } return ret; } + diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index bb93946ace22..a5e14e8695ea 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -57,17 +57,17 @@ static int rec_dir_init = 0; static void nfs4_save_user(uid_t *saveuid, gid_t *savegid) { - *saveuid = current->fsuid; - *savegid = current->fsgid; - current->fsuid = 0; - current->fsgid = 0; + *saveuid = current->cred->fsuid; + *savegid = current->cred->fsgid; + current->cred->fsuid = 0; + current->cred->fsgid = 0; } static void nfs4_reset_user(uid_t saveuid, gid_t savegid) { - current->fsuid = saveuid; - current->fsgid = savegid; + current->cred->fsuid = saveuid; + current->cred->fsgid = savegid; } static void diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index cd25d91895a1..e67cfaea0865 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -186,9 +186,9 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) * access control settings being in effect, we cannot * fix that case easily. */ - current->cap_effective = - cap_raise_nfsd_set(current->cap_effective, - current->cap_permitted); + current->cred->cap_effective = + cap_raise_nfsd_set(current->cred->cap_effective, + current->cred->cap_permitted); } else { error = nfsd_setuser_and_check_port(rqstp, exp); if (error) diff --git a/fs/open.c b/fs/open.c index 500cc0c54762..b1238e195e7e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -425,6 +425,7 @@ out: */ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) { + struct cred *cred = current->cred; struct path path; struct inode *inode; int old_fsuid, old_fsgid; @@ -434,18 +435,18 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; - old_fsuid = current->fsuid; - old_fsgid = current->fsgid; + old_fsuid = cred->fsuid; + old_fsgid = cred->fsgid; - current->fsuid = current->uid; - current->fsgid = current->gid; + cred->fsuid = cred->uid; + cred->fsgid = cred->gid; if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */ - if (current->uid) + if (current->cred->uid) old_cap = cap_set_effective(__cap_empty_set); else - old_cap = cap_set_effective(current->cap_permitted); + old_cap = cap_set_effective(cred->cap_permitted); } res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); @@ -484,8 +485,8 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) out_path_release: path_put(&path); out: - current->fsuid = old_fsuid; - current->fsgid = old_fsgid; + cred->fsuid = old_fsuid; + cred->fsgid = old_fsgid; if (!issecure(SECURE_NO_SETUID_FIXUP)) cap_set_effective(old_cap); diff --git a/fs/proc/array.c b/fs/proc/array.c index 6af7fba7abb1..62fe9b2009b6 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -182,8 +182,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_tgid_nr_ns(p, ns), pid_nr_ns(pid, ns), ppid, tpid, - p->uid, p->euid, p->suid, p->fsuid, - p->gid, p->egid, p->sgid, p->fsgid); + p->cred->uid, p->cred->euid, p->cred->suid, p->cred->fsuid, + p->cred->gid, p->cred->egid, p->cred->sgid, p->cred->fsgid); task_lock(p); if (p->files) @@ -194,7 +194,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, fdt ? fdt->max_fds : 0); rcu_read_unlock(); - group_info = p->group_info; + group_info = p->cred->group_info; get_group_info(group_info); task_unlock(p); @@ -262,7 +262,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) blocked = p->blocked; collect_sigign_sigcatch(p, &ignored, &caught); num_threads = atomic_read(&p->signal->count); - qsize = atomic_read(&p->user->sigpending); + qsize = atomic_read(&p->cred->user->sigpending); qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; unlock_task_sighand(p, &flags); } @@ -293,10 +293,12 @@ static void render_cap_t(struct seq_file *m, const char *header, static inline void task_cap(struct seq_file *m, struct task_struct *p) { - render_cap_t(m, "CapInh:\t", &p->cap_inheritable); - render_cap_t(m, "CapPrm:\t", &p->cap_permitted); - render_cap_t(m, "CapEff:\t", &p->cap_effective); - render_cap_t(m, "CapBnd:\t", &p->cap_bset); + struct cred *cred = p->cred; + + render_cap_t(m, "CapInh:\t", &cred->cap_inheritable); + render_cap_t(m, "CapPrm:\t", &cred->cap_permitted); + render_cap_t(m, "CapEff:\t", &cred->cap_effective); + render_cap_t(m, "CapBnd:\t", &cred->cap_bset); } static inline void task_context_switch_counts(struct seq_file *m, diff --git a/fs/proc/base.c b/fs/proc/base.c index 486cf3fe7139..6862b360c36c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1428,8 +1428,8 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st inode->i_uid = 0; inode->i_gid = 0; if (task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + inode->i_uid = task->cred->euid; + inode->i_gid = task->cred->egid; } security_task_to_inode(task, inode); @@ -1454,8 +1454,8 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - stat->uid = task->euid; - stat->gid = task->egid; + stat->uid = task->cred->euid; + stat->gid = task->cred->egid; } } rcu_read_unlock(); @@ -1486,8 +1486,8 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + inode->i_uid = task->cred->euid; + inode->i_gid = task->cred->egid; } else { inode->i_uid = 0; inode->i_gid = 0; @@ -1658,8 +1658,8 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) rcu_read_unlock(); put_files_struct(files); if (task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + inode->i_uid = task->cred->euid; + inode->i_gid = task->cred->egid; } else { inode->i_uid = 0; inode->i_gid = 0; diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h index 293043a5573a..8c022cd0ad67 100644 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ b/fs/xfs/linux-2.6/xfs_cred.h @@ -23,11 +23,9 @@ /* * Credentials */ -typedef struct cred { - /* EMPTY */ -} cred_t; +typedef const struct cred cred_t; -extern struct cred *sys_cred; +extern cred_t *sys_cred; /* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ static inline int capable_cred(cred_t *cr, int cid) diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h index 2770b0085ee8..6eda8a3eb6f1 100644 --- a/fs/xfs/linux-2.6/xfs_globals.h +++ b/fs/xfs/linux-2.6/xfs_globals.h @@ -19,6 +19,6 @@ #define __XFS_GLOBALS_H__ extern uint64_t xfs_panic_mask; /* set to cause more panics */ -extern struct cred *sys_cred; +extern cred_t *sys_cred; #endif /* __XFS_GLOBALS_H__ */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 1420c49674d7..6be310d41daf 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -497,7 +497,7 @@ int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, xfs_inode_t **, xfs_daddr_t, uint); int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, - xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, + xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t, int, struct xfs_buf **, boolean_t *, xfs_inode_t **); void xfs_dinode_from_disk(struct xfs_icdinode *, struct xfs_dinode_core *); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index e932a96bec54..7b0c2ab88333 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -16,7 +16,7 @@ struct xfs_iomap; int xfs_open(struct xfs_inode *ip); int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags, - struct cred *credp); + cred_t *credp); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ @@ -28,24 +28,24 @@ int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, - xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp); + xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode *ip); int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, struct xfs_name *target_name); int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name, - mode_t mode, struct xfs_inode **ipp, struct cred *credp); + mode_t mode, struct xfs_inode **ipp, cred_t *credp); int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, xfs_off_t *offset, filldir_t filldir); int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, mode_t mode, struct xfs_inode **ipp, - struct cred *credp); + cred_t *credp); int xfs_inode_flush(struct xfs_inode *ip, int flags); int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); int xfs_reclaim(struct xfs_inode *ip); int xfs_change_file_space(struct xfs_inode *ip, int cmd, xfs_flock64_t *bf, xfs_off_t offset, - struct cred *credp, int attr_flags); + cred_t *credp, int attr_flags); int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, struct xfs_inode *target_ip); diff --git a/include/linux/cred.h b/include/linux/cred.h index b69222cc1fd2..3e65587a72e5 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -12,39 +12,150 @@ #ifndef _LINUX_CRED_H #define _LINUX_CRED_H -#define get_current_user() (get_uid(current->user)) - -#define task_uid(task) ((task)->uid) -#define task_gid(task) ((task)->gid) -#define task_euid(task) ((task)->euid) -#define task_egid(task) ((task)->egid) - -#define current_uid() (current->uid) -#define current_gid() (current->gid) -#define current_euid() (current->euid) -#define current_egid() (current->egid) -#define current_suid() (current->suid) -#define current_sgid() (current->sgid) -#define current_fsuid() (current->fsuid) -#define current_fsgid() (current->fsgid) -#define current_cap() (current->cap_effective) +#include +#include +#include + +struct user_struct; +struct cred; + +/* + * COW Supplementary groups list + */ +#define NGROUPS_SMALL 32 +#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) + +struct group_info { + atomic_t usage; + int ngroups; + int nblocks; + gid_t small_block[NGROUPS_SMALL]; + gid_t *blocks[0]; +}; + +/** + * get_group_info - Get a reference to a group info structure + * @group_info: The group info to reference + * + * This must be called with the owning task locked (via task_lock()) when task + * != current. The reason being that the vast majority of callers are looking + * at current->group_info, which can not be changed except by the current task. + * Changing current->group_info requires the task lock, too. + */ +#define get_group_info(group_info) \ +do { \ + atomic_inc(&(group_info)->usage); \ +} while (0) + +/** + * put_group_info - Release a reference to a group info structure + * @group_info: The group info to release + */ +#define put_group_info(group_info) \ +do { \ + if (atomic_dec_and_test(&(group_info)->usage)) \ + groups_free(group_info); \ +} while (0) + +extern struct group_info *groups_alloc(int); +extern void groups_free(struct group_info *); +extern int set_current_groups(struct group_info *); +extern int set_groups(struct cred *, struct group_info *); +extern int groups_search(struct group_info *, gid_t); + +/* access the groups "array" with this macro */ +#define GROUP_AT(gi, i) \ + ((gi)->blocks[(i) / NGROUPS_PER_BLOCK][(i) % NGROUPS_PER_BLOCK]) + +extern int in_group_p(gid_t); +extern int in_egroup_p(gid_t); + +/* + * The security context of a task + * + * The parts of the context break down into two categories: + * + * (1) The objective context of a task. These parts are used when some other + * task is attempting to affect this one. + * + * (2) The subjective context. These details are used when the task is acting + * upon another object, be that a file, a task, a key or whatever. + * + * Note that some members of this structure belong to both categories - the + * LSM security pointer for instance. + * + * A task has two security pointers. task->real_cred points to the objective + * context that defines that task's actual details. The objective part of this + * context is used whenever that task is acted upon. + * + * task->cred points to the subjective context that defines the details of how + * that task is going to act upon another object. This may be overridden + * temporarily to point to another security context, but normally points to the + * same context as task->real_cred. + */ +struct cred { + atomic_t usage; + uid_t uid; /* real UID of the task */ + gid_t gid; /* real GID of the task */ + uid_t suid; /* saved UID of the task */ + gid_t sgid; /* saved GID of the task */ + uid_t euid; /* effective UID of the task */ + gid_t egid; /* effective GID of the task */ + uid_t fsuid; /* UID for VFS ops */ + gid_t fsgid; /* GID for VFS ops */ + unsigned securebits; /* SUID-less security management */ + kernel_cap_t cap_inheritable; /* caps our children can inherit */ + kernel_cap_t cap_permitted; /* caps we're permitted */ + kernel_cap_t cap_effective; /* caps we can actually use */ + kernel_cap_t cap_bset; /* capability bounding set */ +#ifdef CONFIG_KEYS + unsigned char jit_keyring; /* default keyring to attach requested + * keys to */ + struct key *thread_keyring; /* keyring private to this thread */ + struct key *request_key_auth; /* assumed request_key authority */ +#endif +#ifdef CONFIG_SECURITY + void *security; /* subjective LSM security */ +#endif + struct user_struct *user; /* real user ID subscription */ + struct group_info *group_info; /* supplementary groups for euid/fsgid */ + struct rcu_head rcu; /* RCU deletion hook */ + spinlock_t lock; /* lock for pointer changes */ +}; + +#define get_current_user() (get_uid(current->cred->user)) + +#define task_uid(task) ((task)->cred->uid) +#define task_gid(task) ((task)->cred->gid) +#define task_euid(task) ((task)->cred->euid) +#define task_egid(task) ((task)->cred->egid) + +#define current_uid() (current->cred->uid) +#define current_gid() (current->cred->gid) +#define current_euid() (current->cred->euid) +#define current_egid() (current->cred->egid) +#define current_suid() (current->cred->suid) +#define current_sgid() (current->cred->sgid) +#define current_fsuid() (current->cred->fsuid) +#define current_fsgid() (current->cred->fsgid) +#define current_cap() (current->cred->cap_effective) #define current_uid_gid(_uid, _gid) \ do { \ - *(_uid) = current->uid; \ - *(_gid) = current->gid; \ + *(_uid) = current->cred->uid; \ + *(_gid) = current->cred->gid; \ } while(0) #define current_euid_egid(_uid, _gid) \ do { \ - *(_uid) = current->euid; \ - *(_gid) = current->egid; \ + *(_uid) = current->cred->euid; \ + *(_gid) = current->cred->egid; \ } while(0) #define current_fsuid_fsgid(_uid, _gid) \ do { \ - *(_uid) = current->fsuid; \ - *(_gid) = current->fsgid; \ + *(_uid) = current->cred->fsuid; \ + *(_gid) = current->cred->fsgid; \ } while(0) #endif /* _LINUX_CRED_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 23fd8909b9e5..9de41ccd67b5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -113,6 +113,21 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_INIT_EFF_SET #endif +extern struct cred init_cred; + +#define INIT_CRED(p) \ +{ \ + .usage = ATOMIC_INIT(3), \ + .securebits = SECUREBITS_DEFAULT, \ + .cap_inheritable = CAP_INIT_INH_SET, \ + .cap_permitted = CAP_FULL_SET, \ + .cap_effective = CAP_INIT_EFF_SET, \ + .cap_bset = CAP_INIT_BSET, \ + .user = INIT_USER, \ + .group_info = &init_groups, \ + .lock = __SPIN_LOCK_UNLOCKED(p.lock), \ +} + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -147,13 +162,8 @@ extern struct group_info init_groups; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .group_info = &init_groups, \ - .cap_effective = CAP_INIT_EFF_SET, \ - .cap_inheritable = CAP_INIT_INH_SET, \ - .cap_permitted = CAP_FULL_SET, \ - .cap_bset = CAP_INIT_BSET, \ - .securebits = SECUREBITS_DEFAULT, \ - .user = INIT_USER, \ + .__temp_cred = INIT_CRED(tsk.__temp_cred), \ + .cred = &tsk.__temp_cred, \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..c8b92502354d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -660,6 +660,7 @@ extern struct user_struct *find_user(uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) + struct backing_dev_info; struct reclaim_state; @@ -883,38 +884,7 @@ partition_sched_domains(int ndoms_new, cpumask_t *doms_new, #endif /* !CONFIG_SMP */ struct io_context; /* See blkdev.h */ -#define NGROUPS_SMALL 32 -#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) -struct group_info { - int ngroups; - atomic_t usage; - gid_t small_block[NGROUPS_SMALL]; - int nblocks; - gid_t *blocks[0]; -}; - -/* - * get_group_info() must be called with the owning task locked (via task_lock()) - * when task != current. The reason being that the vast majority of callers are - * looking at current->group_info, which can not be changed except by the - * current task. Changing current->group_info requires the task lock, too. - */ -#define get_group_info(group_info) do { \ - atomic_inc(&(group_info)->usage); \ -} while (0) -#define put_group_info(group_info) do { \ - if (atomic_dec_and_test(&(group_info)->usage)) \ - groups_free(group_info); \ -} while (0) - -extern struct group_info *groups_alloc(int gidsetsize); -extern void groups_free(struct group_info *group_info); -extern int set_current_groups(struct group_info *group_info); -extern int groups_search(struct group_info *group_info, gid_t grp); -/* access the groups "array" with this macro */ -#define GROUP_AT(gi, i) \ - ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK extern void prefetch_stack(struct task_struct *t); @@ -1181,17 +1151,9 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - uid_t uid,euid,suid,fsuid; - gid_t gid,egid,sgid,fsgid; - struct group_info *group_info; - kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; - struct user_struct *user; - unsigned securebits; -#ifdef CONFIG_KEYS - unsigned char jit_keyring; /* default keyring to attach requested keys to */ - struct key *request_key_auth; /* assumed request_key authority */ - struct key *thread_keyring; /* keyring private to this thread */ -#endif + struct cred __temp_cred __deprecated; /* temporary credentials to be removed */ + struct cred *cred; /* actual/objective task credentials */ + char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) @@ -1228,9 +1190,6 @@ struct task_struct { int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; -#ifdef CONFIG_SECURITY - void *security; -#endif struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL uid_t loginuid; @@ -1787,9 +1746,6 @@ extern void wake_up_new_task(struct task_struct *tsk, extern void sched_fork(struct task_struct *p, int clone_flags); extern void sched_dead(struct task_struct *p); -extern int in_group_p(gid_t); -extern int in_egroup_p(gid_t); - extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); diff --git a/include/linux/securebits.h b/include/linux/securebits.h index 92f09bdf1175..6d389491bfa2 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -32,7 +32,7 @@ setting is locked or not. A setting which is locked cannot be changed from user-level. */ #define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current->securebits) +#define issecure(X) (issecure_mask(X) & current->cred->securebits) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index abda5991d7e3..e1885b494bac 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -126,7 +126,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode, if (S_ISREG(mode)) { struct mqueue_inode_info *info; struct task_struct *p = current; - struct user_struct *u = p->user; + struct user_struct *u = p->cred->user; unsigned long mq_bytes, mq_msg_tblsz; inode->i_fop = &mqueue_file_operations; diff --git a/ipc/shm.c b/ipc/shm.c index 0c3debbe32d5..264a9d33c5dd 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -366,7 +366,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (shmflg & SHM_HUGETLB) { /* hugetlb_file_setup takes care of mlock user accounting */ file = hugetlb_file_setup(name, size); - shp->mlock_user = current->user; + shp->mlock_user = current->cred->user; } else { int acctflag = VM_ACCOUNT; /* @@ -767,7 +767,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) goto out_unlock; if(cmd==SHM_LOCK) { - struct user_struct * user = current->user; + struct user_struct *user = current->cred->user; if (!is_file_hugepages(shp->shm_file)) { err = shmem_lock(shp->shm_file, 1, user); if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 9c7e47ae4576..2febf5165fad 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -447,6 +447,7 @@ static int audit_filter_rules(struct task_struct *tsk, struct audit_names *name, enum audit_state *state) { + struct cred *cred = tsk->cred; int i, j, need_sid = 1; u32 sid; @@ -466,28 +467,28 @@ static int audit_filter_rules(struct task_struct *tsk, } break; case AUDIT_UID: - result = audit_comparator(tsk->uid, f->op, f->val); + result = audit_comparator(cred->uid, f->op, f->val); break; case AUDIT_EUID: - result = audit_comparator(tsk->euid, f->op, f->val); + result = audit_comparator(cred->euid, f->op, f->val); break; case AUDIT_SUID: - result = audit_comparator(tsk->suid, f->op, f->val); + result = audit_comparator(cred->suid, f->op, f->val); break; case AUDIT_FSUID: - result = audit_comparator(tsk->fsuid, f->op, f->val); + result = audit_comparator(cred->fsuid, f->op, f->val); break; case AUDIT_GID: - result = audit_comparator(tsk->gid, f->op, f->val); + result = audit_comparator(cred->gid, f->op, f->val); break; case AUDIT_EGID: - result = audit_comparator(tsk->egid, f->op, f->val); + result = audit_comparator(cred->egid, f->op, f->val); break; case AUDIT_SGID: - result = audit_comparator(tsk->sgid, f->op, f->val); + result = audit_comparator(cred->sgid, f->op, f->val); break; case AUDIT_FSGID: - result = audit_comparator(tsk->fsgid, f->op, f->val); + result = audit_comparator(cred->fsgid, f->op, f->val); break; case AUDIT_PERS: result = audit_comparator(tsk->personality, f->op, f->val); @@ -1228,6 +1229,7 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { + struct cred *cred = tsk->cred; int i, call_panic = 0; struct audit_buffer *ab; struct audit_aux_data *aux; @@ -1237,14 +1239,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->pid = tsk->pid; if (!context->ppid) context->ppid = sys_getppid(); - context->uid = tsk->uid; - context->gid = tsk->gid; - context->euid = tsk->euid; - context->suid = tsk->suid; - context->fsuid = tsk->fsuid; - context->egid = tsk->egid; - context->sgid = tsk->sgid; - context->fsgid = tsk->fsgid; + context->uid = cred->uid; + context->gid = cred->gid; + context->euid = cred->euid; + context->suid = cred->suid; + context->fsuid = cred->fsuid; + context->egid = cred->egid; + context->sgid = cred->sgid; + context->fsgid = cred->fsgid; context->personality = tsk->personality; ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); @@ -2086,7 +2088,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid) audit_log_format(ab, "login pid=%d uid=%u " "old auid=%u new auid=%u" " old ses=%u new ses=%u", - task->pid, task->uid, + task->pid, task->cred->uid, task->loginuid, loginuid, task->sessionid, sessionid); audit_log_end(ab); @@ -2469,7 +2471,7 @@ void __audit_ptrace(struct task_struct *t) context->target_pid = t->pid; context->target_auid = audit_get_loginuid(t); - context->target_uid = t->uid; + context->target_uid = t->cred->uid; context->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &context->target_sid); memcpy(context->target_comm, t->comm, TASK_COMM_LEN); @@ -2495,7 +2497,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (tsk->loginuid != -1) audit_sig_uid = tsk->loginuid; else - audit_sig_uid = tsk->uid; + audit_sig_uid = tsk->cred->uid; security_task_getsecid(tsk, &audit_sig_sid); } if (!audit_signals || audit_dummy_context()) @@ -2507,7 +2509,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (!ctx->target_pid) { ctx->target_pid = t->tgid; ctx->target_auid = audit_get_loginuid(t); - ctx->target_uid = t->uid; + ctx->target_uid = t->cred->uid; ctx->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &ctx->target_sid); memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN); @@ -2528,7 +2530,7 @@ int __audit_signal_info(int sig, struct task_struct *t) axp->target_pid[axp->pid_count] = t->tgid; axp->target_auid[axp->pid_count] = audit_get_loginuid(t); - axp->target_uid[axp->pid_count] = t->uid; + axp->target_uid[axp->pid_count] = t->cred->uid; axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t); security_task_getsecid(t, &axp->target_sid[axp->pid_count]); memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN); @@ -2575,12 +2577,12 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_ ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; ax->old_pcap.permitted = *pP; - ax->old_pcap.inheritable = current->cap_inheritable; + ax->old_pcap.inheritable = current->cred->cap_inheritable; ax->old_pcap.effective = *pE; - ax->new_pcap.permitted = current->cap_permitted; - ax->new_pcap.inheritable = current->cap_inheritable; - ax->new_pcap.effective = current->cap_effective; + ax->new_pcap.permitted = current->cred->cap_permitted; + ax->new_pcap.inheritable = current->cred->cap_inheritable; + ax->new_pcap.effective = current->cred->cap_effective; } /** diff --git a/kernel/capability.c b/kernel/capability.c index 58b00519624a..a404b980b1bd 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -171,8 +171,8 @@ kernel_cap_t cap_set_effective(const kernel_cap_t pE_new) spin_lock(&task_capability_lock); - pE_old = current->cap_effective; - current->cap_effective = pE_new; + pE_old = current->cred->cap_effective; + current->cred->cap_effective = pE_new; spin_unlock(&task_capability_lock); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 78f9b310c4f3..e210526e6401 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1293,7 +1293,9 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) rcu_read_unlock(); euid = current_euid(); - if (euid && euid != tsk->uid && euid != tsk->suid) { + if (euid && + euid != tsk->cred->uid && + euid != tsk->cred->suid) { put_task_struct(tsk); return -EACCES; } diff --git a/kernel/exit.c b/kernel/exit.c index 80137a5d9467..e0f6e1892fb9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -160,7 +160,7 @@ void release_task(struct task_struct * p) int zap_leader; repeat: tracehook_prepare_release_task(p); - atomic_dec(&p->user->processes); + atomic_dec(&p->cred->user->processes); proc_flush_task(p); write_lock_irq(&tasklist_lock); tracehook_finish_release_task(p); @@ -1272,7 +1272,7 @@ static int wait_task_zombie(struct task_struct *p, int options, return 0; if (unlikely(options & WNOWAIT)) { - uid_t uid = p->uid; + uid_t uid = p->cred->uid; int exit_code = p->exit_code; int why, status; @@ -1393,7 +1393,7 @@ static int wait_task_zombie(struct task_struct *p, int options, if (!retval && infop) retval = put_user(pid, &infop->si_pid); if (!retval && infop) - retval = put_user(p->uid, &infop->si_uid); + retval = put_user(p->cred->uid, &infop->si_uid); if (!retval) retval = pid; @@ -1458,7 +1458,7 @@ static int wait_task_stopped(int ptrace, struct task_struct *p, if (!unlikely(options & WNOWAIT)) p->exit_code = 0; - uid = p->uid; + uid = p->cred->uid; unlock_sig: spin_unlock_irq(&p->sighand->siglock); if (!exit_code) @@ -1535,7 +1535,7 @@ static int wait_task_continued(struct task_struct *p, int options, spin_unlock_irq(&p->sighand->siglock); pid = task_pid_vnr(p); - uid = p->uid; + uid = p->cred->uid; get_task_struct(p); read_unlock(&tasklist_lock); diff --git a/kernel/fork.c b/kernel/fork.c index f6083561dfe0..81fdc7733908 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -147,8 +147,8 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(tsk == current); security_task_free(tsk); - free_uid(tsk->user); - put_group_info(tsk->group_info); + free_uid(tsk->__temp_cred.user); + put_group_info(tsk->__temp_cred.group_info); delayacct_tsk_free(tsk); if (!profile_handoff_task(tsk)) @@ -969,17 +969,18 @@ static struct task_struct *copy_process(unsigned long clone_flags, DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif + p->cred = &p->__temp_cred; retval = -EAGAIN; - if (atomic_read(&p->user->processes) >= + if (atomic_read(&p->cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->user != current->nsproxy->user_ns->root_user) + p->cred->user != current->nsproxy->user_ns->root_user) goto bad_fork_free; } - atomic_inc(&p->user->__count); - atomic_inc(&p->user->processes); - get_group_info(p->group_info); + atomic_inc(&p->cred->user->__count); + atomic_inc(&p->cred->user->processes); + get_group_info(p->cred->group_info); /* * If multiple threads are within copy_process(), then this check @@ -1035,9 +1036,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); #ifdef CONFIG_SECURITY - p->security = NULL; + p->cred->security = NULL; #endif - p->cap_bset = current->cap_bset; p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); @@ -1298,9 +1298,9 @@ bad_fork_cleanup_cgroup: bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: - put_group_info(p->group_info); - atomic_dec(&p->user->processes); - free_uid(p->user); + put_group_info(p->cred->group_info); + atomic_dec(&p->cred->user->processes); + free_uid(p->cred->user); bad_fork_free: free_task(p); fork_out: diff --git a/kernel/futex.c b/kernel/futex.c index e06962132aaf..28421d8210b8 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -443,7 +443,8 @@ static struct task_struct * futex_find_get_task(pid_t pid) rcu_read_lock(); p = find_task_by_vpid(pid); - if (!p || (euid != p->euid && euid != p->uid)) + if (!p || (euid != p->cred->euid && + euid != p->cred->uid)) p = ERR_PTR(-ESRCH); else get_task_struct(p); @@ -1846,7 +1847,8 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->euid && euid != p->uid && + if (euid != p->cred->euid && + euid != p->cred->uid && !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->robust_list; diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 3254d4e41e88..2c3fd5ed34f5 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -151,8 +151,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->euid && euid != p->uid && - !capable(CAP_SYS_PTRACE)) + if (euid != p->cred->euid && + euid != p->cred->uid && + !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->compat_robust_list; read_unlock(&tasklist_lock); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 937f6b5b2008..49849d12dd12 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -115,6 +115,8 @@ int ptrace_check_attach(struct task_struct *child, int kill) int __ptrace_may_access(struct task_struct *task, unsigned int mode) { + struct cred *cred = current->cred, *tcred = task->cred; + /* May we inspect the given task? * This check is used both for attaching with ptrace * and for allowing access to sensitive information in /proc. @@ -123,19 +125,18 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ - uid_t uid; - gid_t gid; + uid_t uid = cred->uid; + gid_t gid = cred->gid; int dumpable = 0; /* Don't let security modules deny introspection */ if (task == current) return 0; - current_uid_gid(&uid, &gid); - if ((uid != task->euid || - uid != task->suid || - uid != task->uid || - gid != task->egid || - gid != task->sgid || - gid != task->gid) && !capable(CAP_SYS_PTRACE)) + if ((uid != tcred->euid || + uid != tcred->suid || + uid != tcred->uid || + gid != tcred->egid || + gid != tcred->sgid || + gid != tcred->gid) && !capable(CAP_SYS_PTRACE)) return -EPERM; smp_rmb(); if (task->mm) diff --git a/kernel/sched.c b/kernel/sched.c index c3b8b1fcde0d..733c59e645aa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -345,7 +345,7 @@ static inline struct task_group *task_group(struct task_struct *p) struct task_group *tg; #ifdef CONFIG_USER_SCHED - tg = p->user->tg; + tg = p->cred->user->tg; #elif defined(CONFIG_CGROUP_SCHED) tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), struct task_group, css); @@ -5182,8 +5182,8 @@ recheck: /* can't change other user's priorities */ euid = current_euid(); - if (euid != p->euid && - euid != p->uid) + if (euid != p->cred->euid && + euid != p->cred->uid) return -EPERM; } @@ -5417,7 +5417,9 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) euid = current_euid(); retval = -EPERM; - if (euid != p->euid && euid != p->uid && !capable(CAP_SYS_NICE)) + if (euid != p->cred->euid && + euid != p->cred->uid && + !capable(CAP_SYS_NICE)) goto out_unlock; retval = security_task_setscheduler(p, 0, NULL); diff --git a/kernel/signal.c b/kernel/signal.c index 167b535fe1a9..80e8a6489f97 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -187,7 +187,7 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, * In order to avoid problems with "switch_user()", we want to make * sure that the compiler doesn't re-load "t->user" */ - user = t->user; + user = t->cred->user; barrier(); atomic_inc(&user->sigpending); if (override_rlimit || @@ -582,8 +582,8 @@ static int check_kill_permission(int sig, struct siginfo *info, uid = current_uid(); euid = current_euid(); - if ((euid ^ t->suid) && (euid ^ t->uid) && - (uid ^ t->suid) && (uid ^ t->uid) && + if ((euid ^ t->cred->suid) && (euid ^ t->cred->uid) && + (uid ^ t->cred->suid) && (uid ^ t->cred->uid) && !capable(CAP_KILL)) { switch (sig) { case SIGCONT: @@ -1100,8 +1100,8 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, goto out_unlock; } if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) - && (euid != p->suid) && (euid != p->uid) - && (uid != p->suid) && (uid != p->uid)) { + && (euid != p->cred->suid) && (euid != p->cred->uid) + && (uid != p->cred->suid) && (uid != p->cred->uid)) { ret = -EPERM; goto out_unlock; } @@ -1374,7 +1374,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); rcu_read_unlock(); - info.si_uid = tsk->uid; + info.si_uid = tsk->cred->uid; thread_group_cputime(tsk, &cputime); info.si_utime = cputime_to_jiffies(cputime.utime); @@ -1445,7 +1445,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); rcu_read_unlock(); - info.si_uid = tsk->uid; + info.si_uid = tsk->cred->uid; info.si_utime = cputime_to_clock_t(tsk->utime); info.si_stime = cputime_to_clock_t(tsk->stime); @@ -1713,7 +1713,7 @@ static int ptrace_signal(int signr, siginfo_t *info, info->si_errno = 0; info->si_code = SI_USER; info->si_pid = task_pid_vnr(current->parent); - info->si_uid = current->parent->uid; + info->si_uid = current->parent->cred->uid; } /* If the (new) signal is now blocked, requeue it. */ diff --git a/kernel/sys.c b/kernel/sys.c index ed5c29c748ac..5d81f07c0150 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -117,7 +117,9 @@ static int set_one_prio(struct task_struct *p, int niceval, int error) uid_t euid = current_euid(); int no_nice; - if (p->uid != euid && p->euid != euid && !capable(CAP_SYS_NICE)) { + if (p->cred->uid != euid && + p->cred->euid != euid && + !capable(CAP_SYS_NICE)) { error = -EPERM; goto out; } @@ -174,7 +176,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->user; + user = current->cred->user; if (!who) who = current_uid(); else @@ -182,7 +184,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->uid == who) + if (p->cred->uid == who) error = set_one_prio(p, niceval, error); while_each_thread(g, p); if (who != current_uid()) @@ -236,7 +238,7 @@ asmlinkage long sys_getpriority(int which, int who) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->user; + user = current->cred->user; if (!who) who = current_uid(); else @@ -244,7 +246,7 @@ asmlinkage long sys_getpriority(int which, int who) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->uid == who) { + if (p->cred->uid == who) { niceval = 20 - task_nice(p); if (niceval > retval) retval = niceval; @@ -472,8 +474,9 @@ void ctrl_alt_del(void) */ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) { - int old_rgid = current->gid; - int old_egid = current->egid; + struct cred *cred = current->cred; + int old_rgid = cred->gid; + int old_egid = cred->egid; int new_rgid = old_rgid; int new_egid = old_egid; int retval; @@ -484,7 +487,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) if (rgid != (gid_t) -1) { if ((old_rgid == rgid) || - (current->egid==rgid) || + (cred->egid == rgid) || capable(CAP_SETGID)) new_rgid = rgid; else @@ -492,8 +495,8 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) } if (egid != (gid_t) -1) { if ((old_rgid == egid) || - (current->egid == egid) || - (current->sgid == egid) || + (cred->egid == egid) || + (cred->sgid == egid) || capable(CAP_SETGID)) new_egid = egid; else @@ -505,10 +508,10 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) } if (rgid != (gid_t) -1 || (egid != (gid_t) -1 && egid != old_rgid)) - current->sgid = new_egid; - current->fsgid = new_egid; - current->egid = new_egid; - current->gid = new_rgid; + cred->sgid = new_egid; + cred->fsgid = new_egid; + cred->egid = new_egid; + cred->gid = new_rgid; key_fsgid_changed(current); proc_id_connector(current, PROC_EVENT_GID); return 0; @@ -521,7 +524,8 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) */ asmlinkage long sys_setgid(gid_t gid) { - int old_egid = current->egid; + struct cred *cred = current->cred; + int old_egid = cred->egid; int retval; retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID); @@ -533,13 +537,13 @@ asmlinkage long sys_setgid(gid_t gid) set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->gid = current->egid = current->sgid = current->fsgid = gid; - } else if ((gid == current->gid) || (gid == current->sgid)) { + cred->gid = cred->egid = cred->sgid = cred->fsgid = gid; + } else if ((gid == cred->gid) || (gid == cred->sgid)) { if (old_egid != gid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->egid = current->fsgid = gid; + cred->egid = cred->fsgid = gid; } else return -EPERM; @@ -570,7 +574,7 @@ static int set_user(uid_t new_ruid, int dumpclear) set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->uid = new_ruid; + current->cred->uid = new_ruid; return 0; } @@ -591,6 +595,7 @@ static int set_user(uid_t new_ruid, int dumpclear) */ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) { + struct cred *cred = current->cred; int old_ruid, old_euid, old_suid, new_ruid, new_euid; int retval; @@ -598,14 +603,14 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) if (retval) return retval; - new_ruid = old_ruid = current->uid; - new_euid = old_euid = current->euid; - old_suid = current->suid; + new_ruid = old_ruid = cred->uid; + new_euid = old_euid = cred->euid; + old_suid = cred->suid; if (ruid != (uid_t) -1) { new_ruid = ruid; if ((old_ruid != ruid) && - (current->euid != ruid) && + (cred->euid != ruid) && !capable(CAP_SETUID)) return -EPERM; } @@ -613,8 +618,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) if (euid != (uid_t) -1) { new_euid = euid; if ((old_ruid != euid) && - (current->euid != euid) && - (current->suid != euid) && + (cred->euid != euid) && + (cred->suid != euid) && !capable(CAP_SETUID)) return -EPERM; } @@ -626,11 +631,11 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsuid = current->euid = new_euid; + cred->fsuid = cred->euid = new_euid; if (ruid != (uid_t) -1 || (euid != (uid_t) -1 && euid != old_ruid)) - current->suid = current->euid; - current->fsuid = current->euid; + cred->suid = cred->euid; + cred->fsuid = cred->euid; key_fsuid_changed(current); proc_id_connector(current, PROC_EVENT_UID); @@ -653,7 +658,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) */ asmlinkage long sys_setuid(uid_t uid) { - int old_euid = current->euid; + struct cred *cred = current->cred; + int old_euid = cred->euid; int old_ruid, old_suid, new_suid; int retval; @@ -661,23 +667,23 @@ asmlinkage long sys_setuid(uid_t uid) if (retval) return retval; - old_ruid = current->uid; - old_suid = current->suid; + old_ruid = cred->uid; + old_suid = cred->suid; new_suid = old_suid; if (capable(CAP_SETUID)) { if (uid != old_ruid && set_user(uid, old_euid != uid) < 0) return -EAGAIN; new_suid = uid; - } else if ((uid != current->uid) && (uid != new_suid)) + } else if ((uid != cred->uid) && (uid != new_suid)) return -EPERM; if (old_euid != uid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsuid = current->euid = uid; - current->suid = new_suid; + cred->fsuid = cred->euid = uid; + cred->suid = new_suid; key_fsuid_changed(current); proc_id_connector(current, PROC_EVENT_UID); @@ -692,9 +698,10 @@ asmlinkage long sys_setuid(uid_t uid) */ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) { - int old_ruid = current->uid; - int old_euid = current->euid; - int old_suid = current->suid; + struct cred *cred = current->cred; + int old_ruid = cred->uid; + int old_euid = cred->euid; + int old_suid = cred->suid; int retval; retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES); @@ -702,30 +709,31 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) return retval; if (!capable(CAP_SETUID)) { - if ((ruid != (uid_t) -1) && (ruid != current->uid) && - (ruid != current->euid) && (ruid != current->suid)) + if ((ruid != (uid_t) -1) && (ruid != cred->uid) && + (ruid != cred->euid) && (ruid != cred->suid)) return -EPERM; - if ((euid != (uid_t) -1) && (euid != current->uid) && - (euid != current->euid) && (euid != current->suid)) + if ((euid != (uid_t) -1) && (euid != cred->uid) && + (euid != cred->euid) && (euid != cred->suid)) return -EPERM; - if ((suid != (uid_t) -1) && (suid != current->uid) && - (suid != current->euid) && (suid != current->suid)) + if ((suid != (uid_t) -1) && (suid != cred->uid) && + (suid != cred->euid) && (suid != cred->suid)) return -EPERM; } if (ruid != (uid_t) -1) { - if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0) + if (ruid != cred->uid && + set_user(ruid, euid != cred->euid) < 0) return -EAGAIN; } if (euid != (uid_t) -1) { - if (euid != current->euid) { + if (euid != cred->euid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->euid = euid; + cred->euid = euid; } - current->fsuid = current->euid; + cred->fsuid = cred->euid; if (suid != (uid_t) -1) - current->suid = suid; + cred->suid = suid; key_fsuid_changed(current); proc_id_connector(current, PROC_EVENT_UID); @@ -735,11 +743,12 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) { + struct cred *cred = current->cred; int retval; - if (!(retval = put_user(current->uid, ruid)) && - !(retval = put_user(current->euid, euid))) - retval = put_user(current->suid, suid); + if (!(retval = put_user(cred->uid, ruid)) && + !(retval = put_user(cred->euid, euid))) + retval = put_user(cred->suid, suid); return retval; } @@ -749,6 +758,7 @@ asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __us */ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) { + struct cred *cred = current->cred; int retval; retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES); @@ -756,28 +766,28 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) return retval; if (!capable(CAP_SETGID)) { - if ((rgid != (gid_t) -1) && (rgid != current->gid) && - (rgid != current->egid) && (rgid != current->sgid)) + if ((rgid != (gid_t) -1) && (rgid != cred->gid) && + (rgid != cred->egid) && (rgid != cred->sgid)) return -EPERM; - if ((egid != (gid_t) -1) && (egid != current->gid) && - (egid != current->egid) && (egid != current->sgid)) + if ((egid != (gid_t) -1) && (egid != cred->gid) && + (egid != cred->egid) && (egid != cred->sgid)) return -EPERM; - if ((sgid != (gid_t) -1) && (sgid != current->gid) && - (sgid != current->egid) && (sgid != current->sgid)) + if ((sgid != (gid_t) -1) && (sgid != cred->gid) && + (sgid != cred->egid) && (sgid != cred->sgid)) return -EPERM; } if (egid != (gid_t) -1) { - if (egid != current->egid) { + if (egid != cred->egid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->egid = egid; + cred->egid = egid; } - current->fsgid = current->egid; + cred->fsgid = cred->egid; if (rgid != (gid_t) -1) - current->gid = rgid; + cred->gid = rgid; if (sgid != (gid_t) -1) - current->sgid = sgid; + cred->sgid = sgid; key_fsgid_changed(current); proc_id_connector(current, PROC_EVENT_GID); @@ -786,11 +796,12 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) { + struct cred *cred = current->cred; int retval; - if (!(retval = put_user(current->gid, rgid)) && - !(retval = put_user(current->egid, egid))) - retval = put_user(current->sgid, sgid); + if (!(retval = put_user(cred->gid, rgid)) && + !(retval = put_user(cred->egid, egid))) + retval = put_user(cred->sgid, sgid); return retval; } @@ -804,20 +815,21 @@ asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __us */ asmlinkage long sys_setfsuid(uid_t uid) { + struct cred *cred = current->cred; int old_fsuid; - old_fsuid = current->fsuid; + old_fsuid = cred->fsuid; if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS)) return old_fsuid; - if (uid == current->uid || uid == current->euid || - uid == current->suid || uid == current->fsuid || + if (uid == cred->uid || uid == cred->euid || + uid == cred->suid || uid == cred->fsuid || capable(CAP_SETUID)) { if (uid != old_fsuid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsuid = uid; + cred->fsuid = uid; } key_fsuid_changed(current); @@ -833,20 +845,21 @@ asmlinkage long sys_setfsuid(uid_t uid) */ asmlinkage long sys_setfsgid(gid_t gid) { + struct cred *cred = current->cred; int old_fsgid; - old_fsgid = current->fsgid; + old_fsgid = cred->fsgid; if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS)) return old_fsgid; - if (gid == current->gid || gid == current->egid || - gid == current->sgid || gid == current->fsgid || + if (gid == cred->gid || gid == cred->egid || + gid == cred->sgid || gid == cred->fsgid || capable(CAP_SETGID)) { if (gid != old_fsgid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsgid = gid; + cred->fsgid = gid; key_fsgid_changed(current); proc_id_connector(current, PROC_EVENT_GID); } @@ -1208,8 +1221,15 @@ int groups_search(struct group_info *group_info, gid_t grp) return 0; } -/* validate and set current->group_info */ -int set_current_groups(struct group_info *group_info) +/** + * set_groups - Change a group subscription in a security record + * @sec: The security record to alter + * @group_info: The group list to impose + * + * Validate a group subscription and, if valid, impose it upon a task security + * record. + */ +int set_groups(struct cred *cred, struct group_info *group_info) { int retval; struct group_info *old_info; @@ -1221,20 +1241,34 @@ int set_current_groups(struct group_info *group_info) groups_sort(group_info); get_group_info(group_info); - task_lock(current); - old_info = current->group_info; - current->group_info = group_info; - task_unlock(current); + spin_lock(&cred->lock); + old_info = cred->group_info; + cred->group_info = group_info; + spin_unlock(&cred->lock); put_group_info(old_info); - return 0; } +EXPORT_SYMBOL(set_groups); + +/** + * set_current_groups - Change current's group subscription + * @group_info: The group list to impose + * + * Validate a group subscription and, if valid, impose it upon current's task + * security record. + */ +int set_current_groups(struct group_info *group_info) +{ + return set_groups(current->cred, group_info); +} + EXPORT_SYMBOL(set_current_groups); asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) { + struct cred *cred = current->cred; int i = 0; /* @@ -1246,13 +1280,13 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) return -EINVAL; /* no need to grab task_lock here; it cannot change */ - i = current->group_info->ngroups; + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups_to_user(grouplist, current->group_info)) { + if (groups_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } @@ -1296,9 +1330,10 @@ asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) */ int in_group_p(gid_t grp) { + struct cred *cred = current->cred; int retval = 1; - if (grp != current->fsgid) - retval = groups_search(current->group_info, grp); + if (grp != cred->fsgid) + retval = groups_search(cred->group_info, grp); return retval; } @@ -1306,9 +1341,10 @@ EXPORT_SYMBOL(in_group_p); int in_egroup_p(gid_t grp) { + struct cred *cred = current->cred; int retval = 1; - if (grp != current->egid) - retval = groups_search(current->group_info, grp); + if (grp != cred->egid) + retval = groups_search(cred->group_info, grp); return retval; } @@ -1624,7 +1660,9 @@ asmlinkage long sys_umask(int mask) asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { - long error = 0; + struct task_struct *me = current; + unsigned char comm[sizeof(me->comm)]; + long error; if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) return error; @@ -1635,39 +1673,41 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, error = -EINVAL; break; } - current->pdeath_signal = arg2; + me->pdeath_signal = arg2; + error = 0; break; case PR_GET_PDEATHSIG: - error = put_user(current->pdeath_signal, (int __user *)arg2); + error = put_user(me->pdeath_signal, (int __user *)arg2); break; case PR_GET_DUMPABLE: - error = get_dumpable(current->mm); + error = get_dumpable(me->mm); break; case PR_SET_DUMPABLE: if (arg2 < 0 || arg2 > 1) { error = -EINVAL; break; } - set_dumpable(current->mm, arg2); + set_dumpable(me->mm, arg2); + error = 0; break; case PR_SET_UNALIGN: - error = SET_UNALIGN_CTL(current, arg2); + error = SET_UNALIGN_CTL(me, arg2); break; case PR_GET_UNALIGN: - error = GET_UNALIGN_CTL(current, arg2); + error = GET_UNALIGN_CTL(me, arg2); break; case PR_SET_FPEMU: - error = SET_FPEMU_CTL(current, arg2); + error = SET_FPEMU_CTL(me, arg2); break; case PR_GET_FPEMU: - error = GET_FPEMU_CTL(current, arg2); + error = GET_FPEMU_CTL(me, arg2); break; case PR_SET_FPEXC: - error = SET_FPEXC_CTL(current, arg2); + error = SET_FPEXC_CTL(me, arg2); break; case PR_GET_FPEXC: - error = GET_FPEXC_CTL(current, arg2); + error = GET_FPEXC_CTL(me, arg2); break; case PR_GET_TIMING: error = PR_TIMING_STATISTICAL; @@ -1675,33 +1715,28 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, case PR_SET_TIMING: if (arg2 != PR_TIMING_STATISTICAL) error = -EINVAL; + else + error = 0; break; - case PR_SET_NAME: { - struct task_struct *me = current; - unsigned char ncomm[sizeof(me->comm)]; - - ncomm[sizeof(me->comm)-1] = 0; - if (strncpy_from_user(ncomm, (char __user *)arg2, - sizeof(me->comm)-1) < 0) + case PR_SET_NAME: + comm[sizeof(me->comm)-1] = 0; + if (strncpy_from_user(comm, (char __user *)arg2, + sizeof(me->comm) - 1) < 0) return -EFAULT; - set_task_comm(me, ncomm); + set_task_comm(me, comm); return 0; - } - case PR_GET_NAME: { - struct task_struct *me = current; - unsigned char tcomm[sizeof(me->comm)]; - - get_task_comm(tcomm, me); - if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm))) + case PR_GET_NAME: + get_task_comm(comm, me); + if (copy_to_user((char __user *)arg2, comm, + sizeof(comm))) return -EFAULT; return 0; - } case PR_GET_ENDIAN: - error = GET_ENDIAN(current, arg2); + error = GET_ENDIAN(me, arg2); break; case PR_SET_ENDIAN: - error = SET_ENDIAN(current, arg2); + error = SET_ENDIAN(me, arg2); break; case PR_GET_SECCOMP: @@ -1725,6 +1760,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, current->default_timer_slack_ns; else current->timer_slack_ns = arg2; + error = 0; break; default: error = -EINVAL; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9f3b478f9171..5c97c5b4ea8f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -246,7 +246,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) memcpy(data->comm, tsk->comm, TASK_COMM_LEN); data->pid = tsk->pid; - data->uid = tsk->uid; + data->uid = task_uid(tsk); data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; data->policy = tsk->policy; data->rt_priority = tsk->rt_priority; diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 8ebcd8532dfb..6d1ed07bf312 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -53,8 +53,8 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) stats->ac_flag |= AXSIG; stats->ac_nice = task_nice(tsk); stats->ac_sched = tsk->policy; - stats->ac_uid = tsk->uid; - stats->ac_gid = tsk->gid; + stats->ac_uid = tsk->cred->uid; + stats->ac_gid = tsk->cred->gid; stats->ac_pid = tsk->pid; rcu_read_lock(); stats->ac_ppid = pid_alive(tsk) ? diff --git a/kernel/uid16.c b/kernel/uid16.c index 3e41c1673e2f..71f07fc39fea 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -86,9 +86,9 @@ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, { int retval; - if (!(retval = put_user(high2lowuid(current->uid), ruid)) && - !(retval = put_user(high2lowuid(current->euid), euid))) - retval = put_user(high2lowuid(current->suid), suid); + if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && + !(retval = put_user(high2lowuid(current->cred->euid), euid))) + retval = put_user(high2lowuid(current->cred->suid), suid); return retval; } @@ -106,9 +106,9 @@ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, { int retval; - if (!(retval = put_user(high2lowgid(current->gid), rgid)) && - !(retval = put_user(high2lowgid(current->egid), egid))) - retval = put_user(high2lowgid(current->sgid), sgid); + if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && + !(retval = put_user(high2lowgid(current->cred->egid), egid))) + retval = put_user(high2lowgid(current->cred->sgid), sgid); return retval; } @@ -166,20 +166,20 @@ asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist) if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, current->cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(current->cred->group_info); return i; } @@ -210,20 +210,20 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) asmlinkage long sys_getuid16(void) { - return high2lowuid(current->uid); + return high2lowuid(current->cred->uid); } asmlinkage long sys_geteuid16(void) { - return high2lowuid(current->euid); + return high2lowuid(current->cred->euid); } asmlinkage long sys_getgid16(void) { - return high2lowgid(current->gid); + return high2lowgid(current->cred->gid); } asmlinkage long sys_getegid16(void) { - return high2lowgid(current->egid); + return high2lowgid(current->cred->egid); } diff --git a/kernel/user.c b/kernel/user.c index 39d6159fae43..104d22ac84d5 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -457,11 +457,11 @@ void switch_uid(struct user_struct *new_user) * cheaply with the new uid cache, so if it matters * we should be checking for it. -DaveM */ - old_user = current->user; + old_user = current->cred->user; atomic_inc(&new_user->processes); atomic_dec(&old_user->processes); switch_uid_keyring(new_user); - current->user = new_user; + current->cred->user = new_user; sched_switch_user(current); /* diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 07a96474077d..b23492ee3e50 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1110,12 +1110,12 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *old_nodes, const unsigned long __user *new_nodes) { + struct cred *cred, *tcred; struct mm_struct *mm; struct task_struct *task; nodemask_t old; nodemask_t new; nodemask_t task_nodes; - uid_t uid, euid; int err; err = get_nodes(&old, old_nodes, maxnode); @@ -1145,10 +1145,10 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, * capabilities, superuser privileges or the same * userid as the target process. */ - uid = current_uid(); - euid = current_euid(); - if (euid != task->suid && euid != task->uid && - uid != task->suid && uid != task->uid && + cred = current->cred; + tcred = task->cred; + if (cred->euid != tcred->suid && cred->euid != tcred->uid && + cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; diff --git a/mm/migrate.c b/mm/migrate.c index 6263c24c4afe..794443da1b4f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1045,10 +1045,10 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, const int __user *nodes, int __user *status, int flags) { + struct cred *cred, *tcred; struct task_struct *task; struct mm_struct *mm; int err; - uid_t uid, euid; /* Check flags */ if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) @@ -1076,10 +1076,10 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, * capabilities, superuser privileges or the same * userid as the target process. */ - uid = current_uid(); - euid = current_euid(); - if (euid != task->suid && euid != task->uid && - uid != task->suid && uid != task->uid && + cred = current->cred; + tcred = task->cred; + if (cred->euid != tcred->suid && cred->euid != tcred->uid && + cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 34a458aa7997..3af787ba2077 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -298,7 +298,7 @@ static void dump_tasks(const struct mem_cgroup *mem) task_lock(p); printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", - p->pid, p->uid, p->tgid, p->mm->total_vm, + p->pid, p->cred->uid, p->tgid, p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj, p->comm); task_unlock(p); diff --git a/net/core/scm.c b/net/core/scm.c index 4681d8f9b45b..c28ca32a7d93 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -44,11 +44,13 @@ static __inline__ int scm_check_creds(struct ucred *creds) { + struct cred *cred = current->cred; + if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && - ((creds->uid == current_uid() || creds->uid == current_euid() || - creds->uid == current_suid()) || capable(CAP_SETUID)) && - ((creds->gid == current_gid() || creds->gid == current_egid() || - creds->gid == current_sgid()) || capable(CAP_SETGID))) { + ((creds->uid == cred->uid || creds->uid == cred->euid || + creds->uid == cred->suid) || capable(CAP_SETUID)) && + ((creds->gid == cred->gid || creds->gid == cred->egid || + creds->gid == cred->sgid) || capable(CAP_SETGID))) { return 0; } return -EPERM; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 8fc380578807..c79543212602 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -353,7 +353,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) struct auth_cred acred = { .uid = current_fsuid(), .gid = current_fsgid(), - .group_info = current->group_info, + .group_info = current->cred->group_info, }; struct rpc_cred *ret; diff --git a/security/commoncap.c b/security/commoncap.c index fb4e240720d8..fa61679f8c73 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -30,7 +30,7 @@ int cap_netlink_send(struct sock *sk, struct sk_buff *skb) { - NETLINK_CB(skb).eff_cap = current->cap_effective; + NETLINK_CB(skb).eff_cap = current_cap(); return 0; } @@ -52,7 +52,7 @@ EXPORT_SYMBOL(cap_netlink_recv); int cap_capable(struct task_struct *tsk, int cap, int audit) { /* Derived from include/linux/sched.h:capable. */ - if (cap_raised(tsk->cap_effective, cap)) + if (cap_raised(tsk->cred->cap_effective, cap)) return 0; return -EPERM; } @@ -67,7 +67,8 @@ int cap_settime(struct timespec *ts, struct timezone *tz) int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) { /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ - if (cap_issubset(child->cap_permitted, current->cap_permitted)) + if (cap_issubset(child->cred->cap_permitted, + current->cred->cap_permitted)) return 0; if (capable(CAP_SYS_PTRACE)) return 0; @@ -76,8 +77,8 @@ int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) int cap_ptrace_traceme(struct task_struct *parent) { - /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ - if (cap_issubset(current->cap_permitted, parent->cap_permitted)) + if (cap_issubset(current->cred->cap_permitted, + parent->cred->cap_permitted)) return 0; if (has_capability(parent, CAP_SYS_PTRACE)) return 0; @@ -87,10 +88,12 @@ int cap_ptrace_traceme(struct task_struct *parent) int cap_capget (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { + struct cred *cred = target->cred; + /* Derived from kernel/capability.c:sys_capget. */ - *effective = target->cap_effective; - *inheritable = target->cap_inheritable; - *permitted = target->cap_permitted; + *effective = cred->cap_effective; + *inheritable = cred->cap_inheritable; + *permitted = cred->cap_permitted; return 0; } @@ -122,24 +125,26 @@ int cap_capset_check(const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { + const struct cred *cred = current->cred; + if (cap_inh_is_capped() && !cap_issubset(*inheritable, - cap_combine(current->cap_inheritable, - current->cap_permitted))) { + cap_combine(cred->cap_inheritable, + cred->cap_permitted))) { /* incapable of using this inheritable set */ return -EPERM; } if (!cap_issubset(*inheritable, - cap_combine(current->cap_inheritable, - current->cap_bset))) { + cap_combine(cred->cap_inheritable, + cred->cap_bset))) { /* no new pI capabilities outside bounding set */ return -EPERM; } /* verify restrictions on target's new Permitted set */ if (!cap_issubset (*permitted, - cap_combine (current->cap_permitted, - current->cap_permitted))) { + cap_combine (cred->cap_permitted, + cred->cap_permitted))) { return -EPERM; } @@ -155,9 +160,11 @@ void cap_capset_set(const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { - current->cap_effective = *effective; - current->cap_inheritable = *inheritable; - current->cap_permitted = *permitted; + struct cred *cred = current->cred; + + cred->cap_effective = *effective; + cred->cap_inheritable = *inheritable; + cred->cap_permitted = *permitted; } static inline void bprm_clear_caps(struct linux_binprm *bprm) @@ -211,8 +218,8 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, * pP' = (X & fP) | (pI & fI) */ bprm->cap_post_exec_permitted.cap[i] = - (current->cap_bset.cap[i] & permitted) | - (current->cap_inheritable.cap[i] & inheritable); + (current->cred->cap_bset.cap[i] & permitted) | + (current->cred->cap_inheritable.cap[i] & inheritable); if (permitted & ~bprm->cap_post_exec_permitted.cap[i]) { /* @@ -354,8 +361,8 @@ int cap_bprm_set_security (struct linux_binprm *bprm) if (bprm->e_uid == 0 || current_uid() == 0) { /* pP' = (cap_bset & ~0) | (pI & ~0) */ bprm->cap_post_exec_permitted = cap_combine( - current->cap_bset, current->cap_inheritable - ); + current->cred->cap_bset, + current->cred->cap_inheritable); bprm->cap_effective = (bprm->e_uid == 0); ret = 0; } @@ -366,44 +373,39 @@ int cap_bprm_set_security (struct linux_binprm *bprm) void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) { - kernel_cap_t pP = current->cap_permitted; - kernel_cap_t pE = current->cap_effective; - uid_t uid; - gid_t gid; + struct cred *cred = current->cred; - current_uid_gid(&uid, &gid); - - if (bprm->e_uid != uid || bprm->e_gid != gid || + if (bprm->e_uid != cred->uid || bprm->e_gid != cred->gid || !cap_issubset(bprm->cap_post_exec_permitted, - current->cap_permitted)) { + cred->cap_permitted)) { set_dumpable(current->mm, suid_dumpable); current->pdeath_signal = 0; if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { if (!capable(CAP_SETUID)) { - bprm->e_uid = uid; - bprm->e_gid = gid; + bprm->e_uid = cred->uid; + bprm->e_gid = cred->gid; } if (cap_limit_ptraced_target()) { bprm->cap_post_exec_permitted = cap_intersect( bprm->cap_post_exec_permitted, - current->cap_permitted); + cred->cap_permitted); } } } - current->suid = current->euid = current->fsuid = bprm->e_uid; - current->sgid = current->egid = current->fsgid = bprm->e_gid; + cred->suid = cred->euid = cred->fsuid = bprm->e_uid; + cred->sgid = cred->egid = cred->fsgid = bprm->e_gid; /* For init, we want to retain the capabilities set * in the init_task struct. Thus we skip the usual * capability rules */ if (!is_global_init(current)) { - current->cap_permitted = bprm->cap_post_exec_permitted; + cred->cap_permitted = bprm->cap_post_exec_permitted; if (bprm->cap_effective) - current->cap_effective = bprm->cap_post_exec_permitted; + cred->cap_effective = bprm->cap_post_exec_permitted; else - cap_clear(current->cap_effective); + cap_clear(cred->cap_effective); } /* @@ -418,27 +420,30 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) * Number 1 above might fail if you don't have a full bset, but I think * that is interesting information to audit. */ - if (!cap_isclear(current->cap_effective)) { - if (!cap_issubset(CAP_FULL_SET, current->cap_effective) || - (bprm->e_uid != 0) || (current->uid != 0) || + if (!cap_isclear(cred->cap_effective)) { + if (!cap_issubset(CAP_FULL_SET, cred->cap_effective) || + (bprm->e_uid != 0) || (cred->uid != 0) || issecure(SECURE_NOROOT)) - audit_log_bprm_fcaps(bprm, &pP, &pE); + audit_log_bprm_fcaps(bprm, &cred->cap_permitted, + &cred->cap_effective); } - current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); } int cap_bprm_secureexec (struct linux_binprm *bprm) { - if (current_uid() != 0) { + const struct cred *cred = current->cred; + + if (cred->uid != 0) { if (bprm->cap_effective) return 1; if (!cap_isclear(bprm->cap_post_exec_permitted)) return 1; } - return (current_euid() != current_uid() || - current_egid() != current_gid()); + return (cred->euid != cred->uid || + cred->egid != cred->gid); } int cap_inode_setxattr(struct dentry *dentry, const char *name, @@ -501,25 +506,27 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) static inline void cap_emulate_setxuid (int old_ruid, int old_euid, int old_suid) { - uid_t euid = current_euid(); + struct cred *cred = current->cred; if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && - (current_uid() != 0 && euid != 0 && current_suid() != 0) && + (cred->uid != 0 && cred->euid != 0 && cred->suid != 0) && !issecure(SECURE_KEEP_CAPS)) { - cap_clear (current->cap_permitted); - cap_clear (current->cap_effective); + cap_clear (cred->cap_permitted); + cap_clear (cred->cap_effective); } - if (old_euid == 0 && euid != 0) { - cap_clear (current->cap_effective); + if (old_euid == 0 && cred->euid != 0) { + cap_clear (cred->cap_effective); } - if (old_euid != 0 && euid == 0) { - current->cap_effective = current->cap_permitted; + if (old_euid != 0 && cred->euid == 0) { + cred->cap_effective = cred->cap_permitted; } } int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags) { + struct cred *cred = current->cred; + switch (flags) { case LSM_SETID_RE: case LSM_SETID_ID: @@ -541,16 +548,16 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, */ if (!issecure (SECURE_NO_SETUID_FIXUP)) { - if (old_fsuid == 0 && current_fsuid() != 0) { - current->cap_effective = + if (old_fsuid == 0 && cred->fsuid != 0) { + cred->cap_effective = cap_drop_fs_set( - current->cap_effective); + cred->cap_effective); } - if (old_fsuid != 0 && current_fsuid() == 0) { - current->cap_effective = + if (old_fsuid != 0 && cred->fsuid == 0) { + cred->cap_effective = cap_raise_fs_set( - current->cap_effective, - current->cap_permitted); + cred->cap_effective, + cred->cap_permitted); } } break; @@ -575,7 +582,8 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, */ static int cap_safe_nice(struct task_struct *p) { - if (!cap_issubset(p->cap_permitted, current->cap_permitted) && + if (!cap_issubset(p->cred->cap_permitted, + current->cred->cap_permitted) && !capable(CAP_SYS_NICE)) return -EPERM; return 0; @@ -610,7 +618,7 @@ static long cap_prctl_drop(unsigned long cap) return -EPERM; if (!cap_valid(cap)) return -EINVAL; - cap_lower(current->cap_bset, cap); + cap_lower(current->cred->cap_bset, cap); return 0; } @@ -633,6 +641,7 @@ int cap_task_setnice (struct task_struct *p, int nice) int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5, long *rc_p) { + struct cred *cred = current->cred; long error = 0; switch (option) { @@ -640,7 +649,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, if (!cap_valid(arg2)) error = -EINVAL; else - error = !!cap_raised(current->cap_bset, arg2); + error = !!cap_raised(cred->cap_bset, arg2); break; #ifdef CONFIG_SECURITY_FILE_CAPABILITIES case PR_CAPBSET_DROP: @@ -667,9 +676,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, * capability-based-privilege environment. */ case PR_SET_SECUREBITS: - if ((((current->securebits & SECURE_ALL_LOCKS) >> 1) - & (current->securebits ^ arg2)) /*[1]*/ - || ((current->securebits & SECURE_ALL_LOCKS + if ((((cred->securebits & SECURE_ALL_LOCKS) >> 1) + & (cred->securebits ^ arg2)) /*[1]*/ + || ((cred->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0)) { /*[4]*/ @@ -682,11 +691,11 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, */ error = -EPERM; /* cannot change a locked bit */ } else { - current->securebits = arg2; + cred->securebits = arg2; } break; case PR_GET_SECUREBITS: - error = current->securebits; + error = cred->securebits; break; #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ @@ -701,10 +710,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, else if (issecure(SECURE_KEEP_CAPS_LOCKED)) error = -EPERM; else if (arg2) - current->securebits |= issecure_mask(SECURE_KEEP_CAPS); + cred->securebits |= issecure_mask(SECURE_KEEP_CAPS); else - current->securebits &= - ~issecure_mask(SECURE_KEEP_CAPS); + cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); break; default: @@ -719,11 +727,12 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, void cap_task_reparent_to_init (struct task_struct *p) { - cap_set_init_eff(p->cap_effective); - cap_clear(p->cap_inheritable); - cap_set_full(p->cap_permitted); - p->securebits = SECUREBITS_DEFAULT; - return; + struct cred *cred = p->cred; + + cap_set_init_eff(cred->cap_effective); + cap_clear(cred->cap_inheritable); + cap_set_full(cred->cap_permitted); + p->cred->securebits = SECUREBITS_DEFAULT; } int cap_syslog (int type) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index fcce331eca72..8833b447adef 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -889,7 +889,7 @@ long keyctl_instantiate_key(key_serial_t id, /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->request_key_auth; + instkey = current->cred->request_key_auth; if (!instkey) goto error; @@ -932,8 +932,8 @@ long keyctl_instantiate_key(key_serial_t id, /* discard the assumed authority if it's just been disabled by * instantiation of the key */ if (ret == 0) { - key_put(current->request_key_auth); - current->request_key_auth = NULL; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = NULL; } error2: @@ -960,7 +960,7 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->request_key_auth; + instkey = current->cred->request_key_auth; if (!instkey) goto error; @@ -983,8 +983,8 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* discard the assumed authority if it's just been disabled by * instantiation of the key */ if (ret == 0) { - key_put(current->request_key_auth); - current->request_key_auth = NULL; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = NULL; } error: @@ -999,6 +999,7 @@ error: */ long keyctl_set_reqkey_keyring(int reqkey_defl) { + struct cred *cred = current->cred; int ret; switch (reqkey_defl) { @@ -1018,10 +1019,10 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) case KEY_REQKEY_DEFL_USER_KEYRING: case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: set: - current->jit_keyring = reqkey_defl; + cred->jit_keyring = reqkey_defl; case KEY_REQKEY_DEFL_NO_CHANGE: - return current->jit_keyring; + return cred->jit_keyring; case KEY_REQKEY_DEFL_GROUP_KEYRING: default: @@ -1086,8 +1087,8 @@ long keyctl_assume_authority(key_serial_t id) /* we divest ourselves of authority if given an ID of 0 */ if (id == 0) { - key_put(current->request_key_auth); - current->request_key_auth = NULL; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = NULL; ret = 0; goto error; } @@ -1103,8 +1104,8 @@ long keyctl_assume_authority(key_serial_t id) goto error; } - key_put(current->request_key_auth); - current->request_key_auth = authkey; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = authkey; ret = authkey->serial; error: diff --git a/security/keys/permission.c b/security/keys/permission.c index 3b41f9b52537..baf3d5f31e71 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -22,6 +22,7 @@ int key_task_permission(const key_ref_t key_ref, struct task_struct *context, key_perm_t perm) { + struct cred *cred = context->cred; struct key *key; key_perm_t kperm; int ret; @@ -29,7 +30,7 @@ int key_task_permission(const key_ref_t key_ref, key = key_ref_to_ptr(key_ref); /* use the second 8-bits of permissions for keys the caller owns */ - if (key->uid == context->fsuid) { + if (key->uid == cred->fsuid) { kperm = key->perm >> 16; goto use_these_perms; } @@ -37,14 +38,14 @@ int key_task_permission(const key_ref_t key_ref, /* use the third 8-bits of permissions for keys the caller has a group * membership in common with */ if (key->gid != -1 && key->perm & KEY_GRP_ALL) { - if (key->gid == context->fsgid) { + if (key->gid == cred->fsgid) { kperm = key->perm >> 8; goto use_these_perms; } - task_lock(context); - ret = groups_search(context->group_info, key->gid); - task_unlock(context); + spin_lock(&cred->lock); + ret = groups_search(cred->group_info, key->gid); + spin_unlock(&cred->lock); if (ret) { kperm = key->perm >> 8; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 1c793b7090a7..b0904cdda2e7 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -42,7 +42,7 @@ struct key_user root_key_user = { */ int install_user_keyrings(void) { - struct user_struct *user = current->user; + struct user_struct *user = current->cred->user; struct key *uid_keyring, *session_keyring; char buf[20]; int ret; @@ -156,7 +156,7 @@ int install_thread_keyring(void) sprintf(buf, "_tid.%u", tsk->pid); - keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); @@ -164,8 +164,8 @@ int install_thread_keyring(void) } task_lock(tsk); - old = tsk->thread_keyring; - tsk->thread_keyring = keyring; + old = tsk->cred->thread_keyring; + tsk->cred->thread_keyring = keyring; task_unlock(tsk); ret = 0; @@ -192,7 +192,7 @@ int install_process_keyring(void) if (!tsk->signal->process_keyring) { sprintf(buf, "_pid.%u", tsk->tgid); - keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); @@ -238,7 +238,7 @@ static int install_session_keyring(struct key *keyring) if (tsk->signal->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); @@ -292,14 +292,14 @@ int copy_thread_group_keys(struct task_struct *tsk) */ int copy_keys(unsigned long clone_flags, struct task_struct *tsk) { - key_check(tsk->thread_keyring); - key_check(tsk->request_key_auth); + key_check(tsk->cred->thread_keyring); + key_check(tsk->cred->request_key_auth); /* no thread keyring yet */ - tsk->thread_keyring = NULL; + tsk->cred->thread_keyring = NULL; /* copy the request_key() authorisation for this thread */ - key_get(tsk->request_key_auth); + key_get(tsk->cred->request_key_auth); return 0; @@ -322,8 +322,8 @@ void exit_thread_group_keys(struct signal_struct *tg) */ void exit_keys(struct task_struct *tsk) { - key_put(tsk->thread_keyring); - key_put(tsk->request_key_auth); + key_put(tsk->cred->thread_keyring); + key_put(tsk->cred->request_key_auth); } /* end exit_keys() */ @@ -337,8 +337,8 @@ int exec_keys(struct task_struct *tsk) /* newly exec'd tasks don't get a thread keyring */ task_lock(tsk); - old = tsk->thread_keyring; - tsk->thread_keyring = NULL; + old = tsk->cred->thread_keyring; + tsk->cred->thread_keyring = NULL; task_unlock(tsk); key_put(old); @@ -373,10 +373,11 @@ int suid_keys(struct task_struct *tsk) void key_fsuid_changed(struct task_struct *tsk) { /* update the ownership of the thread keyring */ - if (tsk->thread_keyring) { - down_write(&tsk->thread_keyring->sem); - tsk->thread_keyring->uid = tsk->fsuid; - up_write(&tsk->thread_keyring->sem); + BUG_ON(!tsk->cred); + if (tsk->cred->thread_keyring) { + down_write(&tsk->cred->thread_keyring->sem); + tsk->cred->thread_keyring->uid = tsk->cred->fsuid; + up_write(&tsk->cred->thread_keyring->sem); } } /* end key_fsuid_changed() */ @@ -388,10 +389,11 @@ void key_fsuid_changed(struct task_struct *tsk) void key_fsgid_changed(struct task_struct *tsk) { /* update the ownership of the thread keyring */ - if (tsk->thread_keyring) { - down_write(&tsk->thread_keyring->sem); - tsk->thread_keyring->gid = tsk->fsgid; - up_write(&tsk->thread_keyring->sem); + BUG_ON(!tsk->cred); + if (tsk->cred->thread_keyring) { + down_write(&tsk->cred->thread_keyring->sem); + tsk->cred->thread_keyring->gid = tsk->cred->fsgid; + up_write(&tsk->cred->thread_keyring->sem); } } /* end key_fsgid_changed() */ @@ -426,9 +428,9 @@ key_ref_t search_process_keyrings(struct key_type *type, err = ERR_PTR(-EAGAIN); /* search the thread keyring first */ - if (context->thread_keyring) { + if (context->cred->thread_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->thread_keyring, 1), + make_key_ref(context->cred->thread_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -493,9 +495,9 @@ key_ref_t search_process_keyrings(struct key_type *type, } } /* or search the user-session keyring */ - else if (context->user->session_keyring) { + else if (context->cred->user->session_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->user->session_keyring, 1), + make_key_ref(context->cred->user->session_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -517,20 +519,20 @@ key_ref_t search_process_keyrings(struct key_type *type, * search the keyrings of the process mentioned there * - we don't permit access to request_key auth keys via this method */ - if (context->request_key_auth && + if (context->cred->request_key_auth && context == current && type != &key_type_request_key_auth ) { /* defend against the auth key being revoked */ - down_read(&context->request_key_auth->sem); + down_read(&context->cred->request_key_auth->sem); - if (key_validate(context->request_key_auth) == 0) { - rka = context->request_key_auth->payload.data; + if (key_validate(context->cred->request_key_auth) == 0) { + rka = context->cred->request_key_auth->payload.data; key_ref = search_process_keyrings(type, description, match, rka->context); - up_read(&context->request_key_auth->sem); + up_read(&context->cred->request_key_auth->sem); if (!IS_ERR(key_ref)) goto found; @@ -547,7 +549,7 @@ key_ref_t search_process_keyrings(struct key_type *type, break; } } else { - up_read(&context->request_key_auth->sem); + up_read(&context->cred->request_key_auth->sem); } } @@ -580,15 +582,16 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, { struct request_key_auth *rka; struct task_struct *t = current; - key_ref_t key_ref, skey_ref; + struct cred *cred = t->cred; struct key *key; + key_ref_t key_ref, skey_ref; int ret; key_ref = ERR_PTR(-ENOKEY); switch (id) { case KEY_SPEC_THREAD_KEYRING: - if (!t->thread_keyring) { + if (!cred->thread_keyring) { if (!create) goto error; @@ -599,7 +602,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, } } - key = t->thread_keyring; + key = cred->thread_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; @@ -628,7 +631,8 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, ret = install_user_keyrings(); if (ret < 0) goto error; - ret = install_session_keyring(t->user->session_keyring); + ret = install_session_keyring( + cred->user->session_keyring); if (ret < 0) goto error; } @@ -641,25 +645,25 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, break; case KEY_SPEC_USER_KEYRING: - if (!t->user->uid_keyring) { + if (!cred->user->uid_keyring) { ret = install_user_keyrings(); if (ret < 0) goto error; } - key = t->user->uid_keyring; + key = cred->user->uid_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_SESSION_KEYRING: - if (!t->user->session_keyring) { + if (!cred->user->session_keyring) { ret = install_user_keyrings(); if (ret < 0) goto error; } - key = t->user->session_keyring; + key = cred->user->session_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; @@ -670,7 +674,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, goto error; case KEY_SPEC_REQKEY_AUTH_KEY: - key = t->request_key_auth; + key = cred->request_key_auth; if (!key) goto error; @@ -679,19 +683,19 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, break; case KEY_SPEC_REQUESTOR_KEYRING: - if (!t->request_key_auth) + if (!cred->request_key_auth) goto error; - down_read(&t->request_key_auth->sem); - if (t->request_key_auth->flags & KEY_FLAG_REVOKED) { + down_read(&cred->request_key_auth->sem); + if (cred->request_key_auth->flags & KEY_FLAG_REVOKED) { key_ref = ERR_PTR(-EKEYREVOKED); key = NULL; } else { - rka = t->request_key_auth->payload.data; + rka = cred->request_key_auth->payload.data; key = rka->dest_keyring; atomic_inc(&key->usage); } - up_read(&t->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); if (!key) goto error; key_ref = make_key_ref(key, 1); @@ -791,7 +795,7 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(name, tsk->cred->uid, tsk->cred->gid, tsk, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 8e9d93b4a402..3e9b9eb1dd28 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -104,7 +104,8 @@ static int call_sbin_request_key(struct key_construction *cons, /* we specify the process's default keyrings */ sprintf(keyring_str[0], "%d", - tsk->thread_keyring ? tsk->thread_keyring->serial : 0); + tsk->cred->thread_keyring ? + tsk->cred->thread_keyring->serial : 0); prkey = 0; if (tsk->signal->process_keyring) @@ -117,7 +118,7 @@ static int call_sbin_request_key(struct key_construction *cons, sskey = rcu_dereference(tsk->signal->session_keyring)->serial; rcu_read_unlock(); } else { - sskey = tsk->user->session_keyring->serial; + sskey = tsk->cred->user->session_keyring->serial; } sprintf(keyring_str[2], "%d", sskey); @@ -232,11 +233,11 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } else { /* use a default keyring; falling through the cases until we * find one that we actually have */ - switch (tsk->jit_keyring) { + switch (tsk->cred->jit_keyring) { case KEY_REQKEY_DEFL_DEFAULT: case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: - if (tsk->request_key_auth) { - authkey = tsk->request_key_auth; + if (tsk->cred->request_key_auth) { + authkey = tsk->cred->request_key_auth; down_read(&authkey->sem); rka = authkey->payload.data; if (!test_bit(KEY_FLAG_REVOKED, @@ -249,7 +250,7 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } case KEY_REQKEY_DEFL_THREAD_KEYRING: - dest_keyring = key_get(tsk->thread_keyring); + dest_keyring = key_get(tsk->cred->thread_keyring); if (dest_keyring) break; @@ -268,11 +269,12 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) break; case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: - dest_keyring = key_get(tsk->user->session_keyring); + dest_keyring = + key_get(tsk->cred->user->session_keyring); break; case KEY_REQKEY_DEFL_USER_KEYRING: - dest_keyring = key_get(tsk->user->uid_keyring); + dest_keyring = key_get(tsk->cred->user->uid_keyring); break; case KEY_REQKEY_DEFL_GROUP_KEYRING: diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 1762d44711d5..2125579d5d73 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -164,22 +164,22 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, /* see if the calling process is already servicing the key request of * another process */ - if (current->request_key_auth) { + if (current->cred->request_key_auth) { /* it is - use that instantiation context here too */ - down_read(¤t->request_key_auth->sem); + down_read(¤t->cred->request_key_auth->sem); /* if the auth key has been revoked, then the key we're * servicing is already instantiated */ if (test_bit(KEY_FLAG_REVOKED, - ¤t->request_key_auth->flags)) + ¤t->cred->request_key_auth->flags)) goto auth_key_revoked; - irka = current->request_key_auth->payload.data; + irka = current->cred->request_key_auth->payload.data; rka->context = irka->context; rka->pid = irka->pid; get_task_struct(rka->context); - up_read(¤t->request_key_auth->sem); + up_read(¤t->cred->request_key_auth->sem); } else { /* it isn't - use this process as the context */ @@ -214,7 +214,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, return authkey; auth_key_revoked: - up_read(¤t->request_key_auth->sem); + up_read(¤t->cred->request_key_auth->sem); kfree(rka->callout_info); kfree(rka); kleave("= -EKEYREVOKED"); diff --git a/security/selinux/exports.c b/security/selinux/exports.c index 64af2d3409ef..cf02490cd1eb 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c @@ -39,7 +39,7 @@ EXPORT_SYMBOL_GPL(selinux_string_to_sid); int selinux_secmark_relabel_packet_permission(u32 sid) { if (selinux_enabled) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; return avc_has_perm(tsec->sid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 9f6da154cc82..328308f2882a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -167,21 +167,21 @@ static int task_alloc_security(struct task_struct *task) return -ENOMEM; tsec->osid = tsec->sid = SECINITSID_UNLABELED; - task->security = tsec; + task->cred->security = tsec; return 0; } static void task_free_security(struct task_struct *task) { - struct task_security_struct *tsec = task->security; - task->security = NULL; + struct task_security_struct *tsec = task->cred->security; + task->cred->security = NULL; kfree(tsec); } static int inode_alloc_security(struct inode *inode) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct inode_security_struct *isec; isec = kmem_cache_zalloc(sel_inode_cache, GFP_NOFS); @@ -215,7 +215,7 @@ static void inode_free_security(struct inode *inode) static int file_alloc_security(struct file *file) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct file_security_struct *fsec; fsec = kzalloc(sizeof(struct file_security_struct), GFP_KERNEL); @@ -554,7 +554,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts) { int rc = 0, i; - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct superblock_security_struct *sbsec = sb->s_security; const char *name = sb->s_type->name; struct inode *inode = sbsec->sb->s_root->d_inode; @@ -1353,8 +1353,8 @@ static int task_has_perm(struct task_struct *tsk1, { struct task_security_struct *tsec1, *tsec2; - tsec1 = tsk1->security; - tsec2 = tsk2->security; + tsec1 = tsk1->cred->security; + tsec2 = tsk2->cred->security; return avc_has_perm(tsec1->sid, tsec2->sid, SECCLASS_PROCESS, perms, NULL); } @@ -1374,7 +1374,7 @@ static int task_has_capability(struct task_struct *tsk, u32 av = CAP_TO_MASK(cap); int rc; - tsec = tsk->security; + tsec = tsk->cred->security; AVC_AUDIT_DATA_INIT(&ad, CAP); ad.tsk = tsk; @@ -1405,7 +1405,7 @@ static int task_has_system(struct task_struct *tsk, { struct task_security_struct *tsec; - tsec = tsk->security; + tsec = tsk->cred->security; return avc_has_perm(tsec->sid, SECINITSID_KERNEL, SECCLASS_SYSTEM, perms, NULL); @@ -1426,7 +1426,7 @@ static int inode_has_perm(struct task_struct *tsk, if (unlikely(IS_PRIVATE(inode))) return 0; - tsec = tsk->security; + tsec = tsk->cred->security; isec = inode->i_security; if (!adp) { @@ -1466,7 +1466,7 @@ static int file_has_perm(struct task_struct *tsk, struct file *file, u32 av) { - struct task_security_struct *tsec = tsk->security; + struct task_security_struct *tsec = tsk->cred->security; struct file_security_struct *fsec = file->f_security; struct inode *inode = file->f_path.dentry->d_inode; struct avc_audit_data ad; @@ -1503,7 +1503,7 @@ static int may_create(struct inode *dir, struct avc_audit_data ad; int rc; - tsec = current->security; + tsec = current->cred->security; dsec = dir->i_security; sbsec = dir->i_sb->s_security; @@ -1540,7 +1540,7 @@ static int may_create_key(u32 ksid, { struct task_security_struct *tsec; - tsec = ctx->security; + tsec = ctx->cred->security; return avc_has_perm(tsec->sid, ksid, SECCLASS_KEY, KEY__CREATE, NULL); } @@ -1561,7 +1561,7 @@ static int may_link(struct inode *dir, u32 av; int rc; - tsec = current->security; + tsec = current->cred->security; dsec = dir->i_security; isec = dentry->d_inode->i_security; @@ -1606,7 +1606,7 @@ static inline int may_rename(struct inode *old_dir, int old_is_dir, new_is_dir; int rc; - tsec = current->security; + tsec = current->cred->security; old_dsec = old_dir->i_security; old_isec = old_dentry->d_inode->i_security; old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode); @@ -1659,7 +1659,7 @@ static int superblock_has_perm(struct task_struct *tsk, struct task_security_struct *tsec; struct superblock_security_struct *sbsec; - tsec = tsk->security; + tsec = tsk->cred->security; sbsec = sb->s_security; return avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, perms, ad); @@ -1758,8 +1758,8 @@ static int selinux_ptrace_may_access(struct task_struct *child, return rc; if (mode == PTRACE_MODE_READ) { - struct task_security_struct *tsec = current->security; - struct task_security_struct *csec = child->security; + struct task_security_struct *tsec = current->cred->security; + struct task_security_struct *csec = child->cred->security; return avc_has_perm(tsec->sid, csec->sid, SECCLASS_FILE, FILE__READ, NULL); } @@ -1874,7 +1874,7 @@ static int selinux_sysctl(ctl_table *table, int op) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; rc = selinux_sysctl_get_sid(table, (op == 0001) ? SECCLASS_DIR : SECCLASS_FILE, &tsid); @@ -2025,7 +2025,7 @@ static int selinux_bprm_set_security(struct linux_binprm *bprm) if (bsec->set) return 0; - tsec = current->security; + tsec = current->cred->security; isec = inode->i_security; /* Default to the current task SID. */ @@ -2090,7 +2090,7 @@ static int selinux_bprm_check_security(struct linux_binprm *bprm) static int selinux_bprm_secureexec(struct linux_binprm *bprm) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; int atsecure = 0; if (tsec->osid != tsec->sid) { @@ -2214,7 +2214,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) secondary_ops->bprm_apply_creds(bprm, unsafe); - tsec = current->security; + tsec = current->cred->security; bsec = bprm->security; sid = bsec->sid; @@ -2243,7 +2243,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) rcu_read_lock(); tracer = tracehook_tracer_task(current); if (likely(tracer != NULL)) { - sec = tracer->security; + sec = tracer->cred->security; ptsid = sec->sid; } rcu_read_unlock(); @@ -2274,7 +2274,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm) int rc, i; unsigned long flags; - tsec = current->security; + tsec = current->cred->security; bsec = bprm->security; if (bsec->unsafe) { @@ -2521,7 +2521,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, int rc; char *namep = NULL, *context; - tsec = current->security; + tsec = current->cred->security; dsec = dir->i_security; sbsec = dir->i_sb->s_security; @@ -2706,7 +2706,7 @@ static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name) static int selinux_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct inode *inode = dentry->d_inode; struct inode_security_struct *isec = inode->i_security; struct superblock_security_struct *sbsec; @@ -2918,7 +2918,7 @@ static int selinux_revalidate_file_permission(struct file *file, int mask) static int selinux_file_permission(struct file *file, int mask) { struct inode *inode = file->f_path.dentry->d_inode; - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct file_security_struct *fsec = file->f_security; struct inode_security_struct *isec = inode->i_security; @@ -2995,7 +2995,8 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { int rc = 0; - u32 sid = ((struct task_security_struct *)(current->security))->sid; + u32 sid = ((struct task_security_struct *) + (current->cred->security))->sid; if (addr < mmap_min_addr) rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT, @@ -3107,7 +3108,7 @@ static int selinux_file_set_fowner(struct file *file) struct task_security_struct *tsec; struct file_security_struct *fsec; - tsec = current->security; + tsec = current->cred->security; fsec = file->f_security; fsec->fown_sid = tsec->sid; @@ -3125,7 +3126,7 @@ static int selinux_file_send_sigiotask(struct task_struct *tsk, /* struct fown_struct is never outside the context of a struct file */ file = container_of(fown, struct file, f_owner); - tsec = tsk->security; + tsec = tsk->cred->security; fsec = file->f_security; if (!signum) @@ -3188,12 +3189,12 @@ static int selinux_task_alloc_security(struct task_struct *tsk) struct task_security_struct *tsec1, *tsec2; int rc; - tsec1 = current->security; + tsec1 = current->cred->security; rc = task_alloc_security(tsk); if (rc) return rc; - tsec2 = tsk->security; + tsec2 = tsk->cred->security; tsec2->osid = tsec1->osid; tsec2->sid = tsec1->sid; @@ -3251,7 +3252,7 @@ static int selinux_task_getsid(struct task_struct *p) static void selinux_task_getsecid(struct task_struct *p, u32 *secid) { - struct task_security_struct *tsec = p->security; + struct task_security_struct *tsec = p->cred->security; *secid = tsec->sid; } @@ -3343,7 +3344,7 @@ static int selinux_task_kill(struct task_struct *p, struct siginfo *info, perm = PROCESS__SIGNULL; /* null signal; existence test */ else perm = signal_to_av(sig); - tsec = p->security; + tsec = p->cred->security; if (secid) rc = avc_has_perm(secid, tsec->sid, SECCLASS_PROCESS, perm, NULL); else @@ -3375,7 +3376,7 @@ static void selinux_task_reparent_to_init(struct task_struct *p) secondary_ops->task_reparent_to_init(p); - tsec = p->security; + tsec = p->cred->security; tsec->osid = tsec->sid; tsec->sid = SECINITSID_KERNEL; return; @@ -3384,7 +3385,7 @@ static void selinux_task_reparent_to_init(struct task_struct *p) static void selinux_task_to_inode(struct task_struct *p, struct inode *inode) { - struct task_security_struct *tsec = p->security; + struct task_security_struct *tsec = p->cred->security; struct inode_security_struct *isec = inode->i_security; isec->sid = tsec->sid; @@ -3632,7 +3633,7 @@ static int socket_has_perm(struct task_struct *task, struct socket *sock, struct avc_audit_data ad; int err = 0; - tsec = task->security; + tsec = task->cred->security; isec = SOCK_INODE(sock)->i_security; if (isec->sid == SECINITSID_KERNEL) @@ -3656,7 +3657,7 @@ static int selinux_socket_create(int family, int type, if (kern) goto out; - tsec = current->security; + tsec = current->cred->security; newsid = tsec->sockcreate_sid ? : tsec->sid; err = avc_has_perm(tsec->sid, newsid, socket_type_to_security_class(family, type, @@ -3677,7 +3678,7 @@ static int selinux_socket_post_create(struct socket *sock, int family, isec = SOCK_INODE(sock)->i_security; - tsec = current->security; + tsec = current->cred->security; newsid = tsec->sockcreate_sid ? : tsec->sid; isec->sclass = socket_type_to_security_class(family, type, protocol); isec->sid = kern ? SECINITSID_KERNEL : newsid; @@ -3723,7 +3724,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in struct sock *sk = sock->sk; u32 sid, node_perm; - tsec = current->security; + tsec = current->cred->security; isec = SOCK_INODE(sock)->i_security; if (family == PF_INET) { @@ -4764,7 +4765,7 @@ static int ipc_alloc_security(struct task_struct *task, struct kern_ipc_perm *perm, u16 sclass) { - struct task_security_struct *tsec = task->security; + struct task_security_struct *tsec = task->cred->security; struct ipc_security_struct *isec; isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL); @@ -4814,7 +4815,7 @@ static int ipc_has_perm(struct kern_ipc_perm *ipc_perms, struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = ipc_perms->security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -4845,7 +4846,7 @@ static int selinux_msg_queue_alloc_security(struct msg_queue *msq) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; isec = msq->q_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -4871,7 +4872,7 @@ static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg) struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = msq->q_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -4917,7 +4918,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, struct avc_audit_data ad; int rc; - tsec = current->security; + tsec = current->cred->security; isec = msq->q_perm.security; msec = msg->security; @@ -4965,7 +4966,7 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, struct avc_audit_data ad; int rc; - tsec = target->security; + tsec = target->cred->security; isec = msq->q_perm.security; msec = msg->security; @@ -4992,7 +4993,7 @@ static int selinux_shm_alloc_security(struct shmid_kernel *shp) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; isec = shp->shm_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5018,7 +5019,7 @@ static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg) struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = shp->shm_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5091,7 +5092,7 @@ static int selinux_sem_alloc_security(struct sem_array *sma) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; isec = sma->sem_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5117,7 +5118,7 @@ static int selinux_sem_associate(struct sem_array *sma, int semflg) struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = sma->sem_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5224,7 +5225,7 @@ static int selinux_getprocattr(struct task_struct *p, return error; } - tsec = p->security; + tsec = p->cred->security; if (!strcmp(name, "current")) sid = tsec->sid; @@ -5308,7 +5309,7 @@ static int selinux_setprocattr(struct task_struct *p, operation. See selinux_bprm_set_security for the execve checks and may_create for the file creation checks. The operation will then fail if the context is not permitted. */ - tsec = p->security; + tsec = p->cred->security; if (!strcmp(name, "exec")) tsec->exec_sid = sid; else if (!strcmp(name, "fscreate")) @@ -5361,7 +5362,8 @@ boundary_ok: rcu_read_lock(); tracer = tracehook_tracer_task(p); if (tracer != NULL) { - struct task_security_struct *ptsec = tracer->security; + struct task_security_struct *ptsec = + tracer->cred->security; u32 ptsid = ptsec->sid; rcu_read_unlock(); error = avc_has_perm_noaudit(ptsid, sid, @@ -5405,7 +5407,7 @@ static void selinux_release_secctx(char *secdata, u32 seclen) static int selinux_key_alloc(struct key *k, struct task_struct *tsk, unsigned long flags) { - struct task_security_struct *tsec = tsk->security; + struct task_security_struct *tsec = tsk->cred->security; struct key_security_struct *ksec; ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL); @@ -5439,7 +5441,7 @@ static int selinux_key_permission(key_ref_t key_ref, key = key_ref_to_ptr(key_ref); - tsec = ctx->security; + tsec = ctx->cred->security; ksec = key->security; /* if no specific permissions are requested, we skip the @@ -5683,7 +5685,7 @@ static __init int selinux_init(void) /* Set the security state for the initial task. */ if (task_alloc_security(current)) panic("SELinux: Failed to initialize initial task.\n"); - tsec = current->security; + tsec = current->cred->security; tsec->osid = tsec->sid = SECINITSID_KERNEL; sel_inode_cache = kmem_cache_create("selinux_inode_security", diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 69c9dccc8cf0..10715d1330b9 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -97,7 +97,7 @@ static int task_has_security(struct task_struct *tsk, { struct task_security_struct *tsec; - tsec = tsk->security; + tsec = tsk->cred->security; if (!tsec) return -EACCES; diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 8f17f542a116..d7db76617b0e 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -197,7 +197,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx, u32 sid) { int rc = 0; - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct xfrm_sec_ctx *ctx = NULL; char *ctx_str = NULL; u32 str_len; @@ -333,7 +333,7 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx) */ int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; int rc = 0; if (ctx) { @@ -378,7 +378,7 @@ void selinux_xfrm_state_free(struct xfrm_state *x) */ int selinux_xfrm_state_delete(struct xfrm_state *x) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct xfrm_sec_ctx *ctx = x->security; int rc = 0; diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index 79ff21ed4c3b..b6dd4fc0fb0b 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -164,7 +164,7 @@ int smk_curacc(char *obj_label, u32 mode) { int rc; - rc = smk_access(current->security, obj_label, mode); + rc = smk_access(current->cred->security, obj_label, mode); if (rc == 0) return 0; @@ -173,7 +173,7 @@ int smk_curacc(char *obj_label, u32 mode) * only one that gets privilege and current does not * have that label. */ - if (smack_onlycap != NULL && smack_onlycap != current->security) + if (smack_onlycap != NULL && smack_onlycap != current->cred->security) return rc; if (capable(CAP_MAC_OVERRIDE)) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 6e2dc0bab70d..791da238d049 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -102,7 +102,8 @@ static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) if (rc != 0) return rc; - rc = smk_access(current->security, ctp->security, MAY_READWRITE); + rc = smk_access(current->cred->security, ctp->cred->security, + MAY_READWRITE); if (rc != 0 && capable(CAP_MAC_OVERRIDE)) return 0; return rc; @@ -124,7 +125,8 @@ static int smack_ptrace_traceme(struct task_struct *ptp) if (rc != 0) return rc; - rc = smk_access(ptp->security, current->security, MAY_READWRITE); + rc = smk_access(ptp->cred->security, current->cred->security, + MAY_READWRITE); if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE)) return 0; return rc; @@ -141,7 +143,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp) static int smack_syslog(int type) { int rc; - char *sp = current->security; + char *sp = current->cred->security; rc = cap_syslog(type); if (rc != 0) @@ -373,7 +375,7 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags) */ static int smack_inode_alloc_security(struct inode *inode) { - inode->i_security = new_inode_smack(current->security); + inode->i_security = new_inode_smack(current->cred->security); if (inode->i_security == NULL) return -ENOMEM; return 0; @@ -818,7 +820,7 @@ static int smack_file_permission(struct file *file, int mask) */ static int smack_file_alloc_security(struct file *file) { - file->f_security = current->security; + file->f_security = current->cred->security; return 0; } @@ -916,7 +918,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, */ static int smack_file_set_fowner(struct file *file) { - file->f_security = current->security; + file->f_security = current->cred->security; return 0; } @@ -941,7 +943,7 @@ static int smack_file_send_sigiotask(struct task_struct *tsk, * struct fown_struct is never outside the context of a struct file */ file = container_of(fown, struct file, f_owner); - rc = smk_access(file->f_security, tsk->security, MAY_WRITE); + rc = smk_access(file->f_security, tsk->cred->security, MAY_WRITE); if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE)) return 0; return rc; @@ -984,7 +986,7 @@ static int smack_file_receive(struct file *file) */ static int smack_task_alloc_security(struct task_struct *tsk) { - tsk->security = current->security; + tsk->cred->security = current->cred->security; return 0; } @@ -999,7 +1001,7 @@ static int smack_task_alloc_security(struct task_struct *tsk) */ static void smack_task_free_security(struct task_struct *task) { - task->security = NULL; + task->cred->security = NULL; } /** @@ -1011,7 +1013,7 @@ static void smack_task_free_security(struct task_struct *task) */ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) { - return smk_curacc(p->security, MAY_WRITE); + return smk_curacc(p->cred->security, MAY_WRITE); } /** @@ -1022,7 +1024,7 @@ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) */ static int smack_task_getpgid(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1033,7 +1035,7 @@ static int smack_task_getpgid(struct task_struct *p) */ static int smack_task_getsid(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1045,7 +1047,7 @@ static int smack_task_getsid(struct task_struct *p) */ static void smack_task_getsecid(struct task_struct *p, u32 *secid) { - *secid = smack_to_secid(p->security); + *secid = smack_to_secid(p->cred->security); } /** @@ -1061,7 +1063,7 @@ static int smack_task_setnice(struct task_struct *p, int nice) rc = cap_task_setnice(p, nice); if (rc == 0) - rc = smk_curacc(p->security, MAY_WRITE); + rc = smk_curacc(p->cred->security, MAY_WRITE); return rc; } @@ -1078,7 +1080,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) rc = cap_task_setioprio(p, ioprio); if (rc == 0) - rc = smk_curacc(p->security, MAY_WRITE); + rc = smk_curacc(p->cred->security, MAY_WRITE); return rc; } @@ -1090,7 +1092,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) */ static int smack_task_getioprio(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1108,7 +1110,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, rc = cap_task_setscheduler(p, policy, lp); if (rc == 0) - rc = smk_curacc(p->security, MAY_WRITE); + rc = smk_curacc(p->cred->security, MAY_WRITE); return rc; } @@ -1120,7 +1122,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, */ static int smack_task_getscheduler(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1131,7 +1133,7 @@ static int smack_task_getscheduler(struct task_struct *p) */ static int smack_task_movememory(struct task_struct *p) { - return smk_curacc(p->security, MAY_WRITE); + return smk_curacc(p->cred->security, MAY_WRITE); } /** @@ -1154,13 +1156,13 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info, * can write the receiver. */ if (secid == 0) - return smk_curacc(p->security, MAY_WRITE); + return smk_curacc(p->cred->security, MAY_WRITE); /* * If the secid isn't 0 we're dealing with some USB IO * specific behavior. This is not clean. For one thing * we can't take privilege into account. */ - return smk_access(smack_from_secid(secid), p->security, MAY_WRITE); + return smk_access(smack_from_secid(secid), p->cred->security, MAY_WRITE); } /** @@ -1173,7 +1175,7 @@ static int smack_task_wait(struct task_struct *p) { int rc; - rc = smk_access(current->security, p->security, MAY_WRITE); + rc = smk_access(current->cred->security, p->cred->security, MAY_WRITE); if (rc == 0) return 0; @@ -1204,7 +1206,7 @@ static int smack_task_wait(struct task_struct *p) static void smack_task_to_inode(struct task_struct *p, struct inode *inode) { struct inode_smack *isp = inode->i_security; - isp->smk_inode = p->security; + isp->smk_inode = p->cred->security; } /* @@ -1223,7 +1225,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode) */ static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags) { - char *csp = current->security; + char *csp = current->cred->security; struct socket_smack *ssp; ssp = kzalloc(sizeof(struct socket_smack), gfp_flags); @@ -1448,7 +1450,7 @@ static int smack_flags_to_may(int flags) */ static int smack_msg_msg_alloc_security(struct msg_msg *msg) { - msg->security = current->security; + msg->security = current->cred->security; return 0; } @@ -1484,7 +1486,7 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp) { struct kern_ipc_perm *isp = &shp->shm_perm; - isp->security = current->security; + isp->security = current->cred->security; return 0; } @@ -1593,7 +1595,7 @@ static int smack_sem_alloc_security(struct sem_array *sma) { struct kern_ipc_perm *isp = &sma->sem_perm; - isp->security = current->security; + isp->security = current->cred->security; return 0; } @@ -1697,7 +1699,7 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq) { struct kern_ipc_perm *kisp = &msq->q_perm; - kisp->security = current->security; + kisp->security = current->cred->security; return 0; } @@ -1852,7 +1854,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) struct super_block *sbp; struct superblock_smack *sbsp; struct inode_smack *isp; - char *csp = current->security; + char *csp = current->cred->security; char *fetched; char *final; struct dentry *dp; @@ -2009,7 +2011,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value) if (strcmp(name, "current") != 0) return -EINVAL; - cp = kstrdup(p->security, GFP_KERNEL); + cp = kstrdup(p->cred->security, GFP_KERNEL); if (cp == NULL) return -ENOMEM; @@ -2055,7 +2057,7 @@ static int smack_setprocattr(struct task_struct *p, char *name, if (newsmack == NULL) return -EINVAL; - p->security = newsmack; + p->cred->security = newsmack; return size; } @@ -2288,8 +2290,8 @@ static void smack_sock_graft(struct sock *sk, struct socket *parent) return; ssp = sk->sk_security; - ssp->smk_in = current->security; - ssp->smk_out = current->security; + ssp->smk_in = current->cred->security; + ssp->smk_out = current->cred->security; ssp->smk_packet[0] = '\0'; rc = smack_netlabel(sk); @@ -2362,7 +2364,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, static int smack_key_alloc(struct key *key, struct task_struct *tsk, unsigned long flags) { - key->security = tsk->security; + key->security = tsk->cred->security; return 0; } @@ -2403,10 +2405,11 @@ static int smack_key_permission(key_ref_t key_ref, /* * This should not occur */ - if (context->security == NULL) + if (context->cred->security == NULL) return -EACCES; - return smk_access(context->security, keyp->security, MAY_READWRITE); + return smk_access(context->cred->security, keyp->security, + MAY_READWRITE); } #endif /* CONFIG_KEYS */ @@ -2726,7 +2729,7 @@ static __init int smack_init(void) /* * Set the security state for the initial task. */ - current->security = &smack_known_floor.smk_known; + current->cred->security = &smack_known_floor.smk_known; /* * Initialize locks diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index c21d8c8bf0c7..c5ca279e0506 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -336,7 +336,7 @@ static void smk_cipso_doi(void) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->security); + audit_info.secid = smack_to_secid(current->cred->security); rc = netlbl_cfg_map_del(NULL, &audit_info); if (rc != 0) @@ -371,7 +371,7 @@ static void smk_unlbl_ambient(char *oldambient) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->security); + audit_info.secid = smack_to_secid(current->cred->security); if (oldambient != NULL) { rc = netlbl_cfg_map_del(oldambient, &audit_info); @@ -843,7 +843,7 @@ static ssize_t smk_write_onlycap(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char in[SMK_LABELLEN]; - char *sp = current->security; + char *sp = current->cred->security; if (!capable(CAP_MAC_ADMIN)) return -EPERM; -- cgit v1.2.3 From 86a264abe542cfececb4df129bc45a0338d8cdb9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:18 +1100 Subject: CRED: Wrap current->cred and a few other accessors Wrap current->cred and a few other accessors to hide their actual implementation. Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- arch/ia64/ia32/sys_ia32.c | 7 +- drivers/net/tun.c | 8 +- drivers/usb/core/devio.c | 10 ++- fs/binfmt_elf.c | 10 +-- fs/binfmt_elf_fdpic.c | 9 +- fs/exec.c | 5 +- fs/fcntl.c | 3 +- fs/file_table.c | 7 +- fs/hugetlbfs/inode.c | 5 +- fs/ioprio.c | 4 +- fs/smbfs/dir.c | 3 +- include/linux/cred.h | 187 ++++++++++++++++++++++++++++++++---------- include/linux/securebits.h | 2 +- ipc/mqueue.c | 2 +- ipc/shm.c | 4 +- kernel/sys.c | 59 +++++++------ kernel/uid16.c | 31 +++---- net/core/scm.c | 2 +- net/sunrpc/auth.c | 14 ++-- security/commoncap.c | 2 +- security/keys/process_keys.c | 2 +- security/keys/request_key.c | 11 +-- security/selinux/exports.c | 8 +- security/selinux/xfrm.c | 6 +- security/smack/smack_access.c | 2 +- security/smack/smack_lsm.c | 26 +++--- security/smack/smackfs.c | 4 +- 27 files changed, 271 insertions(+), 162 deletions(-) (limited to 'net/core') diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 2445a9d3488e..16ef61a91d95 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1767,25 +1767,24 @@ groups16_from_user(struct group_info *group_info, short __user *grouplist) asmlinkage long sys32_getgroups16 (int gidsetsize, short __user *grouplist) { + const struct cred *cred = current_cred(); int i; if (gidsetsize < 0) return -EINVAL; - get_group_info(current->cred->group_info); - i = current->cred->group_info->ngroups; + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->cred->group_info)) { + if (groups16_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->cred->group_info); return i; } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b14e2025e221..55dc70c6b4db 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -702,6 +702,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) struct tun_net *tn; struct tun_struct *tun; struct net_device *dev; + const struct cred *cred = current_cred(); int err; tn = net_generic(net, tun_net_id); @@ -712,11 +713,12 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) /* Check permissions */ if (((tun->owner != -1 && - current_euid() != tun->owner) || + cred->euid != tun->owner) || (tun->group != -1 && - current_egid() != tun->group)) && - !capable(CAP_NET_ADMIN)) + cred->egid != tun->group)) && + !capable(CAP_NET_ADMIN)) { return -EPERM; + } } else if (__dev_get_by_name(net, ifr->ifr_name)) return -EINVAL; diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 1aadb9387027..aa79280df15d 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -574,6 +574,7 @@ static int usbdev_open(struct inode *inode, struct file *file) { struct usb_device *dev = NULL; struct dev_state *ps; + const struct cred *cred = current_cred(); int ret; lock_kernel(); @@ -617,8 +618,8 @@ static int usbdev_open(struct inode *inode, struct file *file) init_waitqueue_head(&ps->wait); ps->discsignr = 0; ps->disc_pid = get_pid(task_pid(current)); - ps->disc_uid = current_uid(); - ps->disc_euid = current_euid(); + ps->disc_uid = cred->uid; + ps->disc_euid = cred->euid; ps->disccontext = NULL; ps->ifclaimed = 0; security_task_getsecid(current, &ps->secid); @@ -967,6 +968,7 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb, struct usb_host_endpoint *ep; struct async *as; struct usb_ctrlrequest *dr = NULL; + const struct cred *cred = current_cred(); unsigned int u, totlen, isofrmlen; int ret, ifnum = -1; int is_in; @@ -1174,8 +1176,8 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb, as->signr = uurb->signr; as->ifnum = ifnum; as->pid = get_pid(task_pid(current)); - as->uid = current_uid(); - as->euid = current_euid(); + as->uid = cred->uid; + as->euid = cred->euid; security_task_getsecid(current, &as->secid); if (!is_in) { if (copy_from_user(as->urb->transfer_buffer, uurb->buffer, diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7a52477ce493..0e6655613169 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -157,7 +157,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, int items; elf_addr_t *elf_info; int ei_index = 0; - struct task_struct *tsk = current; + const struct cred *cred = current_cred(); struct vm_area_struct *vma; /* @@ -223,10 +223,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_BASE, interp_load_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec->e_entry); - NEW_AUX_ENT(AT_UID, tsk->cred->uid); - NEW_AUX_ENT(AT_EUID, tsk->cred->euid); - NEW_AUX_ENT(AT_GID, tsk->cred->gid); - NEW_AUX_ENT(AT_EGID, tsk->cred->egid); + NEW_AUX_ENT(AT_UID, cred->uid); + NEW_AUX_ENT(AT_EUID, cred->euid); + NEW_AUX_ENT(AT_GID, cred->gid); + NEW_AUX_ENT(AT_EGID, cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); if (k_platform) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 9f67054c2c4e..1f6e8c023b4c 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -475,6 +475,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, struct elf_fdpic_params *exec_params, struct elf_fdpic_params *interp_params) { + const struct cred *cred = current_cred(); unsigned long sp, csp, nitems; elf_caddr_t __user *argv, *envp; size_t platform_len = 0, len; @@ -623,10 +624,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr); - NEW_AUX_ENT(AT_UID, (elf_addr_t) current->cred->uid); - NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->cred->euid); - NEW_AUX_ENT(AT_GID, (elf_addr_t) current->cred->gid); - NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->cred->egid); + NEW_AUX_ENT(AT_UID, (elf_addr_t) cred->uid); + NEW_AUX_ENT(AT_EUID, (elf_addr_t) cred->euid); + NEW_AUX_ENT(AT_GID, (elf_addr_t) cred->gid); + NEW_AUX_ENT(AT_EGID, (elf_addr_t) cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); diff --git a/fs/exec.c b/fs/exec.c index 31149e430a89..a5330e1a2216 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1388,6 +1388,7 @@ EXPORT_SYMBOL(set_binfmt); */ static int format_corename(char *corename, long signr) { + const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; int ispipe = (*pat_ptr == '|'); char *out_ptr = corename; @@ -1424,7 +1425,7 @@ static int format_corename(char *corename, long signr) /* uid */ case 'u': rc = snprintf(out_ptr, out_end - out_ptr, - "%d", current_uid()); + "%d", cred->uid); if (rc > out_end - out_ptr) goto out; out_ptr += rc; @@ -1432,7 +1433,7 @@ static int format_corename(char *corename, long signr) /* gid */ case 'g': rc = snprintf(out_ptr, out_end - out_ptr, - "%d", current_gid()); + "%d", cred->gid); if (rc > out_end - out_ptr) goto out; out_ptr += rc; diff --git a/fs/fcntl.c b/fs/fcntl.c index 63964d863ad6..c594cc0e40fb 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -205,13 +205,14 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, int force) { + const struct cred *cred = current_cred(); int err; err = security_file_set_fowner(filp); if (err) return err; - f_modown(filp, pid, type, current_uid(), current_euid(), force); + f_modown(filp, pid, type, cred->uid, cred->euid, force); return 0; } EXPORT_SYMBOL(__f_setown); diff --git a/fs/file_table.c b/fs/file_table.c index 3152b53cfab0..bc4563fe791d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -94,7 +94,7 @@ int proc_nr_files(ctl_table *table, int write, struct file *filp, */ struct file *get_empty_filp(void) { - struct task_struct *tsk; + const struct cred *cred = current_cred(); static int old_max; struct file * f; @@ -118,12 +118,11 @@ struct file *get_empty_filp(void) if (security_file_alloc(f)) goto fail_sec; - tsk = current; INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); - f->f_uid = tsk->cred->fsuid; - f->f_gid = tsk->cred->fsgid; + f->f_uid = cred->fsuid; + f->f_gid = cred->fsgid; eventpoll_init_file(f); /* f->f_version: 0 */ return f; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 870a721b8bd2..7d479ce3aceb 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -951,6 +951,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size) struct inode *inode; struct dentry *dentry, *root; struct qstr quick_string; + struct user_struct *user = current_user(); if (!hugetlbfs_vfsmount) return ERR_PTR(-ENOENT); @@ -958,7 +959,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size) if (!can_do_hugetlb_shm()) return ERR_PTR(-EPERM); - if (!user_shm_lock(size, current->cred->user)) + if (!user_shm_lock(size, user)) return ERR_PTR(-ENOMEM); root = hugetlbfs_vfsmount->mnt_root; @@ -998,7 +999,7 @@ out_inode: out_dentry: dput(dentry); out_shm_unlock: - user_shm_unlock(size, current->cred->user); + user_shm_unlock(size, user); return ERR_PTR(error); } diff --git a/fs/ioprio.c b/fs/ioprio.c index bb5210af77c2..5112554fd210 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -123,7 +123,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; case IOPRIO_WHO_USER: if (!who) - user = current->cred->user; + user = current_user(); else user = find_user(who); @@ -216,7 +216,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; case IOPRIO_WHO_USER: if (!who) - user = current->cred->user; + user = current_user(); else user = find_user(who); diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index 9e9bb0db4f6d..e7ddd0328ddc 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -667,8 +667,7 @@ smb_make_node(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID; attr.ia_mode = mode; - attr.ia_uid = current_euid(); - attr.ia_gid = current_egid(); + current_euid_egid(&attr.ia_uid, &attr.ia_gid); if (!new_valid_dev(dev)) return -EINVAL; diff --git a/include/linux/cred.h b/include/linux/cred.h index a7a686074cb0..4221ec6000c1 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -37,15 +37,16 @@ struct group_info { * get_group_info - Get a reference to a group info structure * @group_info: The group info to reference * - * This must be called with the owning task locked (via task_lock()) when task - * != current. The reason being that the vast majority of callers are looking - * at current->group_info, which can not be changed except by the current task. - * Changing current->group_info requires the task lock, too. + * This gets a reference to a set of supplementary groups. + * + * If the caller is accessing a task's credentials, they must hold the RCU read + * lock when reading. */ -#define get_group_info(group_info) \ -do { \ - atomic_inc(&(group_info)->usage); \ -} while (0) +static inline struct group_info *get_group_info(struct group_info *gi) +{ + atomic_inc(&gi->usage); + return gi; +} /** * put_group_info - Release a reference to a group info structure @@ -61,7 +62,7 @@ extern struct group_info *groups_alloc(int); extern void groups_free(struct group_info *); extern int set_current_groups(struct group_info *); extern int set_groups(struct cred *, struct group_info *); -extern int groups_search(struct group_info *, gid_t); +extern int groups_search(const struct group_info *, gid_t); /* access the groups "array" with this macro */ #define GROUP_AT(gi, i) \ @@ -123,41 +124,6 @@ struct cred { spinlock_t lock; /* lock for pointer changes */ }; -#define get_current_user() (get_uid(current->cred->user)) - -#define task_uid(task) ((task)->cred->uid) -#define task_gid(task) ((task)->cred->gid) -#define task_euid(task) ((task)->cred->euid) -#define task_egid(task) ((task)->cred->egid) - -#define current_uid() (current->cred->uid) -#define current_gid() (current->cred->gid) -#define current_euid() (current->cred->euid) -#define current_egid() (current->cred->egid) -#define current_suid() (current->cred->suid) -#define current_sgid() (current->cred->sgid) -#define current_fsuid() (current->cred->fsuid) -#define current_fsgid() (current->cred->fsgid) -#define current_cap() (current->cred->cap_effective) - -#define current_uid_gid(_uid, _gid) \ -do { \ - *(_uid) = current->cred->uid; \ - *(_gid) = current->cred->gid; \ -} while(0) - -#define current_euid_egid(_uid, _gid) \ -do { \ - *(_uid) = current->cred->euid; \ - *(_gid) = current->cred->egid; \ -} while(0) - -#define current_fsuid_fsgid(_uid, _gid) \ -do { \ - *(_uid) = current->cred->fsuid; \ - *(_gid) = current->cred->fsgid; \ -} while(0) - extern void __put_cred(struct cred *); extern int copy_creds(struct task_struct *, unsigned long); @@ -187,4 +153,137 @@ static inline void put_cred(struct cred *cred) __put_cred(cred); } +/** + * current_cred - Access the current task's credentials + * + * Access the credentials of the current task. + */ +#define current_cred() \ + (current->cred) + +/** + * __task_cred - Access another task's credentials + * @task: The task to query + * + * Access the credentials of another task. The caller must hold the + * RCU readlock. + * + * The caller must make sure task doesn't go away, either by holding a ref on + * task or by holding tasklist_lock to prevent it from being unlinked. + */ +#define __task_cred(task) \ + ((const struct cred *)(rcu_dereference((task)->cred))) + +/** + * get_task_cred - Get another task's credentials + * @task: The task to query + * + * Get the credentials of a task, pinning them so that they can't go away. + * Accessing a task's credentials directly is not permitted. + * + * The caller must make sure task doesn't go away, either by holding a ref on + * task or by holding tasklist_lock to prevent it from being unlinked. + */ +#define get_task_cred(task) \ +({ \ + struct cred *__cred; \ + rcu_read_lock(); \ + __cred = (struct cred *) __task_cred((task)); \ + get_cred(__cred); \ + rcu_read_unlock(); \ + __cred; \ +}) + +/** + * get_current_cred - Get the current task's credentials + * + * Get the credentials of the current task, pinning them so that they can't go + * away. Accessing the current task's credentials directly is not permitted. + */ +#define get_current_cred() \ + (get_cred(current_cred())) + +/** + * get_current_user - Get the current task's user_struct + * + * Get the user record of the current task, pinning it so that it can't go + * away. + */ +#define get_current_user() \ +({ \ + struct user_struct *__u; \ + struct cred *__cred; \ + __cred = (struct cred *) current_cred(); \ + __u = get_uid(__cred->user); \ + __u; \ +}) + +/** + * get_current_groups - Get the current task's supplementary group list + * + * Get the supplementary group list of the current task, pinning it so that it + * can't go away. + */ +#define get_current_groups() \ +({ \ + struct group_info *__groups; \ + struct cred *__cred; \ + __cred = (struct cred *) current_cred(); \ + __groups = get_group_info(__cred->group_info); \ + __groups; \ +}) + +#define task_cred_xxx(task, xxx) \ +({ \ + __typeof__(task->cred->xxx) ___val; \ + rcu_read_lock(); \ + ___val = __task_cred((task))->xxx; \ + rcu_read_unlock(); \ + ___val; \ +}) + +#define task_uid(task) (task_cred_xxx((task), uid)) +#define task_euid(task) (task_cred_xxx((task), euid)) + +#define current_cred_xxx(xxx) \ +({ \ + current->cred->xxx; \ +}) + +#define current_uid() (current_cred_xxx(uid)) +#define current_gid() (current_cred_xxx(gid)) +#define current_euid() (current_cred_xxx(euid)) +#define current_egid() (current_cred_xxx(egid)) +#define current_suid() (current_cred_xxx(suid)) +#define current_sgid() (current_cred_xxx(sgid)) +#define current_fsuid() (current_cred_xxx(fsuid)) +#define current_fsgid() (current_cred_xxx(fsgid)) +#define current_cap() (current_cred_xxx(cap_effective)) +#define current_user() (current_cred_xxx(user)) +#define current_security() (current_cred_xxx(security)) + +#define current_uid_gid(_uid, _gid) \ +do { \ + const struct cred *__cred; \ + __cred = current_cred(); \ + *(_uid) = __cred->uid; \ + *(_gid) = __cred->gid; \ +} while(0) + +#define current_euid_egid(_euid, _egid) \ +do { \ + const struct cred *__cred; \ + __cred = current_cred(); \ + *(_euid) = __cred->euid; \ + *(_egid) = __cred->egid; \ +} while(0) + +#define current_fsuid_fsgid(_fsuid, _fsgid) \ +do { \ + const struct cred *__cred; \ + __cred = current_cred(); \ + *(_fsuid) = __cred->fsuid; \ + *(_fsgid) = __cred->fsgid; \ +} while(0) + #endif /* _LINUX_CRED_H */ diff --git a/include/linux/securebits.h b/include/linux/securebits.h index 6d389491bfa2..d2c5ed845bcc 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -32,7 +32,7 @@ setting is locked or not. A setting which is locked cannot be changed from user-level. */ #define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current->cred->securebits) +#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index e1885b494bac..1151881ccb9a 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -112,6 +112,7 @@ static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode) static struct inode *mqueue_get_inode(struct super_block *sb, int mode, struct mq_attr *attr) { + struct user_struct *u = current_user(); struct inode *inode; inode = new_inode(sb); @@ -126,7 +127,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode, if (S_ISREG(mode)) { struct mqueue_inode_info *info; struct task_struct *p = current; - struct user_struct *u = p->cred->user; unsigned long mq_bytes, mq_msg_tblsz; inode->i_fop = &mqueue_file_operations; diff --git a/ipc/shm.c b/ipc/shm.c index 264a9d33c5dd..38a055758a9b 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -366,7 +366,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (shmflg & SHM_HUGETLB) { /* hugetlb_file_setup takes care of mlock user accounting */ file = hugetlb_file_setup(name, size); - shp->mlock_user = current->cred->user; + shp->mlock_user = current_user(); } else { int acctflag = VM_ACCOUNT; /* @@ -767,7 +767,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) goto out_unlock; if(cmd==SHM_LOCK) { - struct user_struct *user = current->cred->user; + struct user_struct *user = current_user(); if (!is_file_hugepages(shp->shm_file)) { err = shmem_lock(shp->shm_file, 1, user); if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){ diff --git a/kernel/sys.c b/kernel/sys.c index 5d81f07c0150..c4d6b59553e9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -143,6 +143,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) { struct task_struct *g, *p; struct user_struct *user; + const struct cred *cred = current_cred(); int error = -EINVAL; struct pid *pgrp; @@ -176,18 +177,18 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->cred->user; + user = cred->user; if (!who) - who = current_uid(); - else - if (who != current_uid() && !(user = find_user(who))) - goto out_unlock; /* No processes for this user */ + who = cred->uid; + else if ((who != cred->uid) && + !(user = find_user(who))) + goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->cred->uid == who) + if (__task_cred(p)->uid == who) error = set_one_prio(p, niceval, error); while_each_thread(g, p); - if (who != current_uid()) + if (who != cred->uid) free_uid(user); /* For find_user() */ break; } @@ -207,6 +208,7 @@ asmlinkage long sys_getpriority(int which, int who) { struct task_struct *g, *p; struct user_struct *user; + const struct cred *cred = current_cred(); long niceval, retval = -ESRCH; struct pid *pgrp; @@ -238,21 +240,21 @@ asmlinkage long sys_getpriority(int which, int who) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->cred->user; + user = (struct user_struct *) cred->user; if (!who) - who = current_uid(); - else - if (who != current_uid() && !(user = find_user(who))) - goto out_unlock; /* No processes for this user */ + who = cred->uid; + else if ((who != cred->uid) && + !(user = find_user(who))) + goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->cred->uid == who) { + if (__task_cred(p)->uid == who) { niceval = 20 - task_nice(p); if (niceval > retval) retval = niceval; } while_each_thread(g, p); - if (who != current_uid()) + if (who != cred->uid) free_uid(user); /* for find_user() */ break; } @@ -743,11 +745,11 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); int retval; - if (!(retval = put_user(cred->uid, ruid)) && - !(retval = put_user(cred->euid, euid))) + if (!(retval = put_user(cred->uid, ruid)) && + !(retval = put_user(cred->euid, euid))) retval = put_user(cred->suid, suid); return retval; @@ -796,11 +798,11 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); int retval; - if (!(retval = put_user(cred->gid, rgid)) && - !(retval = put_user(cred->egid, egid))) + if (!(retval = put_user(cred->gid, rgid)) && + !(retval = put_user(cred->egid, egid))) retval = put_user(cred->sgid, sgid); return retval; @@ -1199,7 +1201,7 @@ static void groups_sort(struct group_info *group_info) } /* a simple bsearch */ -int groups_search(struct group_info *group_info, gid_t grp) +int groups_search(const struct group_info *group_info, gid_t grp) { unsigned int left, right; @@ -1268,13 +1270,8 @@ EXPORT_SYMBOL(set_current_groups); asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) { - struct cred *cred = current->cred; - int i = 0; - - /* - * SMP: Nobody else can change our grouplist. Thus we are - * safe. - */ + const struct cred *cred = current_cred(); + int i; if (gidsetsize < 0) return -EINVAL; @@ -1330,8 +1327,9 @@ asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) */ int in_group_p(gid_t grp) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); int retval = 1; + if (grp != cred->fsgid) retval = groups_search(cred->group_info, grp); return retval; @@ -1341,8 +1339,9 @@ EXPORT_SYMBOL(in_group_p); int in_egroup_p(gid_t grp) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); int retval = 1; + if (grp != cred->egid) retval = groups_search(cred->group_info, grp); return retval; diff --git a/kernel/uid16.c b/kernel/uid16.c index 71f07fc39fea..2460c3199b5a 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -84,11 +84,12 @@ asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid) { + const struct cred *cred = current_cred(); int retval; - if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && - !(retval = put_user(high2lowuid(current->cred->euid), euid))) - retval = put_user(high2lowuid(current->cred->suid), suid); + if (!(retval = put_user(high2lowuid(cred->uid), ruid)) && + !(retval = put_user(high2lowuid(cred->euid), euid))) + retval = put_user(high2lowuid(cred->suid), suid); return retval; } @@ -104,11 +105,12 @@ asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid) { + const struct cred *cred = current_cred(); int retval; - if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && - !(retval = put_user(high2lowgid(current->cred->egid), egid))) - retval = put_user(high2lowgid(current->cred->sgid), sgid); + if (!(retval = put_user(high2lowgid(cred->gid), rgid)) && + !(retval = put_user(high2lowgid(cred->egid), egid))) + retval = put_user(high2lowgid(cred->sgid), sgid); return retval; } @@ -161,25 +163,24 @@ static int groups16_from_user(struct group_info *group_info, asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist) { - int i = 0; + const struct cred *cred = current_cred(); + int i; if (gidsetsize < 0) return -EINVAL; - get_group_info(current->cred->group_info); - i = current->cred->group_info->ngroups; + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->cred->group_info)) { + if (groups16_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->cred->group_info); return i; } @@ -210,20 +211,20 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) asmlinkage long sys_getuid16(void) { - return high2lowuid(current->cred->uid); + return high2lowuid(current_uid()); } asmlinkage long sys_geteuid16(void) { - return high2lowuid(current->cred->euid); + return high2lowuid(current_euid()); } asmlinkage long sys_getgid16(void) { - return high2lowgid(current->cred->gid); + return high2lowgid(current_gid()); } asmlinkage long sys_getegid16(void) { - return high2lowgid(current->cred->egid); + return high2lowgid(current_egid()); } diff --git a/net/core/scm.c b/net/core/scm.c index c28ca32a7d93..f73c44b17dda 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -44,7 +44,7 @@ static __inline__ int scm_check_creds(struct ucred *creds) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && ((creds->uid == cred->uid || creds->uid == cred->euid || diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index c79543212602..0443f8349458 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -350,16 +350,18 @@ EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *auth, int flags) { - struct auth_cred acred = { - .uid = current_fsuid(), - .gid = current_fsgid(), - .group_info = current->cred->group_info, - }; + struct auth_cred acred; struct rpc_cred *ret; + const struct cred *cred = current_cred(); dprintk("RPC: looking up %s cred\n", auth->au_ops->au_name); - get_group_info(acred.group_info); + + memset(&acred, 0, sizeof(acred)); + acred.uid = cred->fsuid; + acred.gid = cred->fsgid; + acred.group_info = get_group_info(((struct cred *)cred)->group_info); + ret = auth->au_ops->lookup_cred(auth, &acred, flags); put_group_info(acred.group_info); return ret; diff --git a/security/commoncap.c b/security/commoncap.c index fa61679f8c73..61307f590003 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -641,7 +641,7 @@ int cap_task_setnice (struct task_struct *p, int nice) int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5, long *rc_p) { - struct cred *cred = current->cred; + struct cred *cred = current_cred(); long error = 0; switch (option) { diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index b0904cdda2e7..ce8ac6073d57 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -582,7 +582,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, { struct request_key_auth *rka; struct task_struct *t = current; - struct cred *cred = t->cred; + struct cred *cred = current_cred(); struct key *key; key_ref_t key_ref, skey_ref; int ret; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 3e9b9eb1dd28..0488b0af5bd6 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -67,6 +67,7 @@ static int call_sbin_request_key(struct key_construction *cons, void *aux) { struct task_struct *tsk = current; + const struct cred *cred = current_cred(); key_serial_t prkey, sskey; struct key *key = cons->key, *authkey = cons->authkey, *keyring; char *argv[9], *envp[3], uid_str[12], gid_str[12]; @@ -96,16 +97,16 @@ static int call_sbin_request_key(struct key_construction *cons, goto error_link; /* record the UID and GID */ - sprintf(uid_str, "%d", current_fsuid()); - sprintf(gid_str, "%d", current_fsgid()); + sprintf(uid_str, "%d", cred->fsuid); + sprintf(gid_str, "%d", cred->fsgid); /* we say which key is under construction */ sprintf(key_str, "%d", key->serial); /* we specify the process's default keyrings */ sprintf(keyring_str[0], "%d", - tsk->cred->thread_keyring ? - tsk->cred->thread_keyring->serial : 0); + cred->thread_keyring ? + cred->thread_keyring->serial : 0); prkey = 0; if (tsk->signal->process_keyring) @@ -118,7 +119,7 @@ static int call_sbin_request_key(struct key_construction *cons, sskey = rcu_dereference(tsk->signal->session_keyring)->serial; rcu_read_unlock(); } else { - sskey = tsk->cred->user->session_keyring->serial; + sskey = cred->user->session_keyring->serial; } sprintf(keyring_str[2], "%d", sskey); diff --git a/security/selinux/exports.c b/security/selinux/exports.c index cf02490cd1eb..c73aeaa008e8 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c @@ -39,9 +39,13 @@ EXPORT_SYMBOL_GPL(selinux_string_to_sid); int selinux_secmark_relabel_packet_permission(u32 sid) { if (selinux_enabled) { - struct task_security_struct *tsec = current->cred->security; + const struct task_security_struct *__tsec; + u32 tsid; - return avc_has_perm(tsec->sid, sid, SECCLASS_PACKET, + __tsec = current_security(); + tsid = __tsec->sid; + + return avc_has_perm(tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); } return 0; diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index d7db76617b0e..c0eb72013d67 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -197,7 +197,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx, u32 sid) { int rc = 0; - struct task_security_struct *tsec = current->cred->security; + const struct task_security_struct *tsec = current_security(); struct xfrm_sec_ctx *ctx = NULL; char *ctx_str = NULL; u32 str_len; @@ -333,7 +333,7 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx) */ int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { - struct task_security_struct *tsec = current->cred->security; + const struct task_security_struct *tsec = current_security(); int rc = 0; if (ctx) { @@ -378,7 +378,7 @@ void selinux_xfrm_state_free(struct xfrm_state *x) */ int selinux_xfrm_state_delete(struct xfrm_state *x) { - struct task_security_struct *tsec = current->cred->security; + const struct task_security_struct *tsec = current_security(); struct xfrm_sec_ctx *ctx = x->security; int rc = 0; diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index b6dd4fc0fb0b..247cec3b5a43 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -164,7 +164,7 @@ int smk_curacc(char *obj_label, u32 mode) { int rc; - rc = smk_access(current->cred->security, obj_label, mode); + rc = smk_access(current_security(), obj_label, mode); if (rc == 0) return 0; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index cc837314fb0e..e8a4fcb1ad04 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -143,7 +143,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp) static int smack_syslog(int type) { int rc; - char *sp = current->cred->security; + char *sp = current_security(); rc = cap_syslog(type); if (rc != 0) @@ -375,7 +375,7 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags) */ static int smack_inode_alloc_security(struct inode *inode) { - inode->i_security = new_inode_smack(current->cred->security); + inode->i_security = new_inode_smack(current_security()); if (inode->i_security == NULL) return -ENOMEM; return 0; @@ -820,7 +820,7 @@ static int smack_file_permission(struct file *file, int mask) */ static int smack_file_alloc_security(struct file *file) { - file->f_security = current->cred->security; + file->f_security = current_security(); return 0; } @@ -918,7 +918,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, */ static int smack_file_set_fowner(struct file *file) { - file->f_security = current->cred->security; + file->f_security = current_security(); return 0; } @@ -986,8 +986,7 @@ static int smack_file_receive(struct file *file) */ static int smack_cred_alloc_security(struct cred *cred) { - cred->security = current->cred->security; - + cred->security = current_security(); return 0; } @@ -1225,7 +1224,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode) */ static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags) { - char *csp = current->cred->security; + char *csp = current_security(); struct socket_smack *ssp; ssp = kzalloc(sizeof(struct socket_smack), gfp_flags); @@ -1450,7 +1449,7 @@ static int smack_flags_to_may(int flags) */ static int smack_msg_msg_alloc_security(struct msg_msg *msg) { - msg->security = current->cred->security; + msg->security = current_security(); return 0; } @@ -1486,7 +1485,7 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp) { struct kern_ipc_perm *isp = &shp->shm_perm; - isp->security = current->cred->security; + isp->security = current_security(); return 0; } @@ -1595,7 +1594,7 @@ static int smack_sem_alloc_security(struct sem_array *sma) { struct kern_ipc_perm *isp = &sma->sem_perm; - isp->security = current->cred->security; + isp->security = current_security(); return 0; } @@ -1699,7 +1698,7 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq) { struct kern_ipc_perm *kisp = &msq->q_perm; - kisp->security = current->cred->security; + kisp->security = current_security(); return 0; } @@ -1854,7 +1853,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) struct super_block *sbp; struct superblock_smack *sbsp; struct inode_smack *isp; - char *csp = current->cred->security; + char *csp = current_security(); char *fetched; char *final; struct dentry *dp; @@ -2290,8 +2289,7 @@ static void smack_sock_graft(struct sock *sk, struct socket *parent) return; ssp = sk->sk_security; - ssp->smk_in = current->cred->security; - ssp->smk_out = current->cred->security; + ssp->smk_in = ssp->smk_out = current_security(); ssp->smk_packet[0] = '\0'; rc = smack_netlabel(sk); diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index c5ca279e0506..ca257dfdc75d 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -336,7 +336,7 @@ static void smk_cipso_doi(void) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->cred->security); + audit_info.secid = smack_to_secid(current_security()); rc = netlbl_cfg_map_del(NULL, &audit_info); if (rc != 0) @@ -371,7 +371,7 @@ static void smk_unlbl_ambient(char *oldambient) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->cred->security); + audit_info.secid = smack_to_secid(current_security()); if (oldambient != NULL) { rc = netlbl_cfg_map_del(oldambient, &audit_info); -- cgit v1.2.3 From ef711cf1d156428d4c2911b8c86c6ce90519dc45 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Nov 2008 00:53:54 -0800 Subject: net: speedup dst_release() During tbench/oprofile sessions, I found that dst_release() was in third position. CPU: Core 2, speed 2999.68 MHz (estimated) Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (Unhalted core cycles) count 100000 samples % symbol name 483726 9.0185 __copy_user_zeroing_intel 191466 3.5697 __copy_user_intel 185475 3.4580 dst_release 175114 3.2648 ip_queue_xmit 153447 2.8608 tcp_sendmsg 108775 2.0280 tcp_recvmsg 102659 1.9140 sysenter_past_esp 101450 1.8914 tcp_current_mss 95067 1.7724 __copy_from_user_ll 86531 1.6133 tcp_transmit_skb Of course, all CPUS fight on the dst_entry associated with 127.0.0.1 Instead of first checking the refcount value, then decrement it, we use atomic_dec_return() to help CPU to make the right memory transaction (ie getting the cache line in exclusive mode) dst_release() is now at the fifth position, and tbench a litle bit faster ;) CPU: Core 2, speed 3000.1 MHz (estimated) Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (Unhalted core cycles) count 100000 samples % symbol name 647107 8.8072 __copy_user_zeroing_intel 258840 3.5229 ip_queue_xmit 258302 3.5155 __copy_user_intel 209629 2.8531 tcp_sendmsg 165632 2.2543 dst_release 149232 2.0311 tcp_current_mss 147821 2.0119 tcp_recvmsg 137893 1.8767 sysenter_past_esp 127473 1.7349 __copy_from_user_ll 121308 1.6510 ip_finish_output 118510 1.6129 tcp_transmit_skb 109295 1.4875 tcp_v4_rcv Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dst.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/dst.c b/net/core/dst.c index 09c1530f4681..57bc4d5b8d08 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -263,9 +263,11 @@ again: void dst_release(struct dst_entry *dst) { if (dst) { - WARN_ON(atomic_read(&dst->__refcnt) < 1); + int newrefcnt; + smp_mb__before_atomic_dec(); - atomic_dec(&dst->__refcnt); + newrefcnt = atomic_dec_return(&dst->__refcnt); + WARN_ON(newrefcnt < 0); } } EXPORT_SYMBOL(dst_release); -- cgit v1.2.3 From 3ab5aee7fe840b5b1b35a8d1ac11c3de5281e611 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:40:17 -0800 Subject: net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls RCU was added to UDP lookups, using a fast infrastructure : - sockets kmem_cache use SLAB_DESTROY_BY_RCU and dont pay the price of call_rcu() at freeing time. - hlist_nulls permits to use few memory barriers. This patch uses same infrastructure for TCP/DCCP established and timewait sockets. Thanks to SLAB_DESTROY_BY_RCU, no slowdown for applications using short lived TCP connections. A followup patch, converting rwlocks to spinlocks will even speedup this case. __inet_lookup_established() is pretty fast now we dont have to dirty a contended cache line (read_lock/read_unlock) Only established and timewait hashtable are converted to RCU (bind table and listen table are still using traditional locking) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 4 +-- include/net/inet_timewait_sock.h | 10 +++--- net/core/sock.c | 4 ++- net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 1 + net/dccp/proto.c | 4 +-- net/ipv4/inet_diag.c | 7 ++-- net/ipv4/inet_hashtables.c | 78 ++++++++++++++++++++++++++++------------ net/ipv4/inet_timewait_sock.c | 26 ++++++++------ net/ipv4/tcp.c | 4 +-- net/ipv4/tcp_ipv4.c | 25 ++++++------- net/ipv6/inet6_hashtables.c | 70 ++++++++++++++++++++++++------------ net/ipv6/tcp_ipv6.c | 1 + 13 files changed, 151 insertions(+), 84 deletions(-) (limited to 'net/core') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index cb31fbf8ae2a..481896045111 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -41,8 +41,8 @@ * I'll experiment with dynamic table growth later. */ struct inet_ehash_bucket { - struct hlist_head chain; - struct hlist_head twchain; + struct hlist_nulls_head chain; + struct hlist_nulls_head twchain; }; /* There are a few simple rules, which allow for local port reuse by diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 80e4977631b8..4b8ece22b8e9 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -110,7 +110,7 @@ struct inet_timewait_sock { #define tw_state __tw_common.skc_state #define tw_reuse __tw_common.skc_reuse #define tw_bound_dev_if __tw_common.skc_bound_dev_if -#define tw_node __tw_common.skc_node +#define tw_node __tw_common.skc_nulls_node #define tw_bind_node __tw_common.skc_bind_node #define tw_refcnt __tw_common.skc_refcnt #define tw_hash __tw_common.skc_hash @@ -137,10 +137,10 @@ struct inet_timewait_sock { struct hlist_node tw_death_node; }; -static inline void inet_twsk_add_node(struct inet_timewait_sock *tw, - struct hlist_head *list) +static inline void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, + struct hlist_nulls_head *list) { - hlist_add_head(&tw->tw_node, list); + hlist_nulls_add_head_rcu(&tw->tw_node, list); } static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, @@ -175,7 +175,7 @@ static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) } #define inet_twsk_for_each(tw, node, head) \ - hlist_for_each_entry(tw, node, head, tw_node) + hlist_nulls_for_each_entry(tw, node, head, tw_node) #define inet_twsk_for_each_inmate(tw, node, jail) \ hlist_for_each_entry(tw, node, jail, tw_death_node) diff --git a/net/core/sock.c b/net/core/sock.c index ded1eb5d2fd4..38de9c3f563b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2082,7 +2082,9 @@ int proto_register(struct proto *prot, int alloc_slab) prot->twsk_prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name, prot->twsk_prot->twsk_obj_size, - 0, SLAB_HWCACHE_ALIGN, + 0, + SLAB_HWCACHE_ALIGN | + prot->slab_flags, NULL); if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 528baa2e5be4..d1dd95289b89 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -938,6 +938,7 @@ static struct proto dccp_v4_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .rsk_prot = &dccp_request_sock_ops, .twsk_prot = &dccp_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4aa1148cdb20..f033e845bb07 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1140,6 +1140,7 @@ static struct proto dccp_v6_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .rsk_prot = &dccp6_request_sock_ops, .twsk_prot = &dccp6_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 46cb3490d48e..1117d4d8c8f1 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1090,8 +1090,8 @@ static int __init dccp_init(void) } for (i = 0; i < dccp_hashinfo.ehash_size; i++) { - INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); - INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); + INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); + INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i); } if (inet_ehash_locks_alloc(&dccp_hashinfo)) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 564230dabcb8..41b36720e977 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -778,18 +778,19 @@ skip_listen_ht: struct inet_ehash_bucket *head = &hashinfo->ehash[i]; rwlock_t *lock = inet_ehash_lockp(hashinfo, i); struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; num = 0; - if (hlist_empty(&head->chain) && hlist_empty(&head->twchain)) + if (hlist_nulls_empty(&head->chain) && + hlist_nulls_empty(&head->twchain)) continue; if (i > s_i) s_num = 0; read_lock_bh(lock); - sk_for_each(sk, node, &head->chain) { + sk_nulls_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index be41ebbec4eb..fd269cfef0ec 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -223,35 +223,65 @@ struct sock * __inet_lookup_established(struct net *net, INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; - const struct hlist_node *node; + const struct hlist_nulls_node *node; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); + unsigned int slot = hash & (hashinfo->ehash_size - 1); + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - prefetch(head->chain.first); - read_lock(lock); - sk_for_each(sk, node, &head->chain) { + rcu_read_lock(); +begin: + sk_nulls_for_each_rcu(sk, node, &head->chain) { if (INET_MATCH(sk, net, hash, acookie, - saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ + saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + goto begintw; + if (unlikely(!INET_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif))) { + sock_put(sk); + goto begin; + } + goto out; + } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begin; +begintw: /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &head->twchain) { + sk_nulls_for_each_rcu(sk, node, &head->twchain) { if (INET_TW_MATCH(sk, net, hash, acookie, - saddr, daddr, ports, dif)) - goto hit; + saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + sk = NULL; + goto out; + } + if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif))) { + sock_put(sk); + goto begintw; + } + goto out; + } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begintw; sk = NULL; out: - read_unlock(lock); + rcu_read_unlock(); return sk; -hit: - sock_hold(sk); - goto out; } EXPORT_SYMBOL_GPL(__inet_lookup_established); @@ -272,14 +302,14 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; - const struct hlist_node *node; + const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; prefetch(head->chain.first); write_lock(lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &head->twchain) { + sk_nulls_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET_TW_MATCH(sk2, net, hash, acookie, @@ -293,7 +323,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, tw = NULL; /* And established part... */ - sk_for_each(sk2, node, &head->chain) { + sk_nulls_for_each(sk2, node, &head->chain) { if (INET_MATCH(sk2, net, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; @@ -306,7 +336,7 @@ unique: inet->sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); + __sk_nulls_add_node_rcu(sk, &head->chain); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); @@ -338,7 +368,7 @@ static inline u32 inet_sk_port_offset(const struct sock *sk) void __inet_hash_nolisten(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; + struct hlist_nulls_head *list; rwlock_t *lock; struct inet_ehash_bucket *head; @@ -350,7 +380,7 @@ void __inet_hash_nolisten(struct sock *sk) lock = inet_ehash_lockp(hashinfo, sk->sk_hash); write_lock(lock); - __sk_add_node(sk, list); + __sk_nulls_add_node_rcu(sk, list); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); } @@ -400,13 +430,15 @@ void inet_unhash(struct sock *sk) local_bh_disable(); inet_listen_wlock(hashinfo); lock = &hashinfo->lhash_lock; + if (__sk_del_node_init(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } else { lock = inet_ehash_lockp(hashinfo, sk->sk_hash); write_lock_bh(lock); + if (__sk_nulls_del_node_init_rcu(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } - if (__sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); write_unlock_bh(lock); out: if (sk->sk_state == TCP_LISTEN) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1c5fd38f8824..60689951ecdb 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -23,12 +23,12 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); write_lock(lock); - if (hlist_unhashed(&tw->tw_node)) { + if (hlist_nulls_unhashed(&tw->tw_node)) { write_unlock(lock); return; } - __hlist_del(&tw->tw_node); - sk_node_init(&tw->tw_node); + hlist_nulls_del_rcu(&tw->tw_node); + sk_nulls_node_init(&tw->tw_node); write_unlock(lock); /* Disassociate with bind bucket. */ @@ -92,13 +92,17 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, write_lock(lock); - /* Step 2: Remove SK from established hash. */ - if (__sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - - /* Step 3: Hash TW into TIMEWAIT chain. */ - inet_twsk_add_node(tw, &ehead->twchain); + /* + * Step 2: Hash TW into TIMEWAIT chain. + * Should be done before removing sk from established chain + * because readers are lockless and search established first. + */ atomic_inc(&tw->tw_refcnt); + inet_twsk_add_node_rcu(tw, &ehead->twchain); + + /* Step 3: Remove SK from established hash. */ + if (__sk_nulls_del_node_init_rcu(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); write_unlock(lock); } @@ -416,7 +420,7 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, { struct inet_timewait_sock *tw; struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; int h; local_bh_disable(); @@ -426,7 +430,7 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, rwlock_t *lock = inet_ehash_lockp(hashinfo, h); restart: write_lock(lock); - sk_for_each(sk, node, &head->twchain) { + sk_nulls_for_each(sk, node, &head->twchain) { tw = inet_twsk(sk); if (!net_eq(twsk_net(tw), net) || diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f60a5917e54d..044224a341eb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2707,8 +2707,8 @@ void __init tcp_init(void) thash_entries ? 0 : 512 * 1024); tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; for (i = 0; i < tcp_hashinfo.ehash_size; i++) { - INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); - INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); + INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); + INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); } if (inet_ehash_locks_alloc(&tcp_hashinfo)) panic("TCP: failed to alloc ehash_locks"); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d49233f409b5..b2e3ab2287ba 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1857,16 +1857,16 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ -static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) +static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) { - return hlist_empty(head) ? NULL : + return hlist_nulls_empty(head) ? NULL : list_entry(head->first, struct inet_timewait_sock, tw_node); } static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) { - return tw->tw_node.next ? - hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; + return !is_a_nulls(tw->tw_node.next) ? + hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; } static void *listening_get_next(struct seq_file *seq, void *cur) @@ -1954,8 +1954,8 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) static inline int empty_bucket(struct tcp_iter_state *st) { - return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) && - hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain); + return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && + hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); } static void *established_get_first(struct seq_file *seq) @@ -1966,7 +1966,7 @@ static void *established_get_first(struct seq_file *seq) for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; struct inet_timewait_sock *tw; rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); @@ -1975,7 +1975,7 @@ static void *established_get_first(struct seq_file *seq) continue; read_lock_bh(lock); - sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { + sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family || !net_eq(sock_net(sk), net)) { continue; @@ -2004,7 +2004,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) { struct sock *sk = cur; struct inet_timewait_sock *tw; - struct hlist_node *node; + struct hlist_nulls_node *node; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); @@ -2032,11 +2032,11 @@ get_tw: return NULL; read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); - sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); + sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); } else - sk = sk_next(sk); + sk = sk_nulls_next(sk); - sk_for_each_from(sk, node) { + sk_nulls_for_each_from(sk, node) { if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) goto found; } @@ -2375,6 +2375,7 @@ struct proto tcp_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, .h.hashinfo = &tcp_hashinfo, diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 1646a5658255..c1b4d401fd95 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -25,24 +25,28 @@ void __inet6_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; rwlock_t *lock; WARN_ON(!sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { + struct hlist_head *list; + list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; lock = &hashinfo->lhash_lock; inet_listen_wlock(hashinfo); + __sk_add_node(sk, list); } else { unsigned int hash; + struct hlist_nulls_head *list; + sk->sk_hash = hash = inet6_sk_ehashfn(sk); list = &inet_ehash_bucket(hashinfo, hash)->chain; lock = inet_ehash_lockp(hashinfo, hash); write_lock(lock); + __sk_nulls_add_node_rcu(sk, list); } - __sk_add_node(sk, list); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); } @@ -63,33 +67,53 @@ struct sock *__inet6_lookup_established(struct net *net, const int dif) { struct sock *sk; - const struct hlist_node *node; + const struct hlist_nulls_node *node; const __portpair ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); + unsigned int slot = hash & (hashinfo->ehash_size - 1); + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - prefetch(head->chain.first); - read_lock(lock); - sk_for_each(sk, node, &head->chain) { + + rcu_read_lock(); +begin: + sk_nulls_for_each_rcu(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ - if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ + if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + goto begintw; + if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + sock_put(sk); + goto begin; + } + goto out; + } } + if (get_nulls_value(node) != slot) + goto begin; + +begintw: /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &head->twchain) { - if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) - goto hit; + sk_nulls_for_each_rcu(sk, node, &head->twchain) { + if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + sk = NULL; + goto out; + } + if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + sock_put(sk); + goto begintw; + } + goto out; + } } - read_unlock(lock); - return NULL; - -hit: - sock_hold(sk); - read_unlock(lock); + if (get_nulls_value(node) != slot) + goto begintw; + sk = NULL; +out: + rcu_read_unlock(); return sk; } EXPORT_SYMBOL(__inet6_lookup_established); @@ -172,14 +196,14 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; - const struct hlist_node *node; + const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; prefetch(head->chain.first); write_lock(lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &head->twchain) { + sk_nulls_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) { @@ -192,7 +216,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, tw = NULL; /* And established part... */ - sk_for_each(sk2, node, &head->chain) { + sk_nulls_for_each(sk2, node, &head->chain) { if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) goto not_unique; } @@ -203,7 +227,7 @@ unique: inet->num = lport; inet->sport = htons(lport); WARN_ON(!sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); + __sk_nulls_add_node_rcu(sk, &head->chain); sk->sk_hash = hash; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 984276463a8d..b35787056313 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2043,6 +2043,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = &tcp_hashinfo, -- cgit v1.2.3 From 908cd2dabbfbbefb02f6b908a1188a62e685136a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 16 Nov 2008 19:50:35 -0800 Subject: net: use %pF for /proc/net/ptype Technically, patch changes format for modules, but I think nobody cares. -86dd :ipv6:ipv6_rcv+0x0 +86dd ipv6_rcv+0x0/0x400 [ipv6] Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/core/dev.c | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 31568b2068ac..e08c0fcd603b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -108,7 +108,6 @@ #include #include #include -#include #include #include #include @@ -2801,31 +2800,6 @@ static void ptype_seq_stop(struct seq_file *seq, void *v) rcu_read_unlock(); } -static void ptype_seq_decode(struct seq_file *seq, void *sym) -{ -#ifdef CONFIG_KALLSYMS - unsigned long offset = 0, symsize; - const char *symname; - char *modname; - char namebuf[128]; - - symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset, - &modname, namebuf); - - if (symname) { - char *delim = ":"; - - if (!modname) - modname = delim = ""; - seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim, - symname, offset); - return; - } -#endif - - seq_printf(seq, "[%p]", sym); -} - static int ptype_seq_show(struct seq_file *seq, void *v) { struct packet_type *pt = v; @@ -2838,10 +2812,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v) else seq_printf(seq, "%04x", ntohs(pt->type)); - seq_printf(seq, " %-8s ", - pt->dev ? pt->dev->name : ""); - ptype_seq_decode(seq, pt->func); - seq_putc(seq, '\n'); + seq_printf(seq, " %-8s %pF\n", + pt->dev ? pt->dev->name : "", pt->func); } return 0; -- cgit v1.2.3 From 14e943db133489c98d426a0dcfce4a99c6e8ad97 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Nov 2008 15:14:01 -0800 Subject: net: make /proc/net/protocols namespace aware Converting /proc/net/protocols to be namespace aware is quite easy and permits us to use sock_prot_inuse_get(). This provides seperate counters for each protocol. For example we can really count TCPv6 sockets and TCPv4 sockets, while previously, we had the same value, and this value was not namespace aware. Signed-off-by: Eric Dumazet Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/core/sock.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index 5a6fe4dfad46..a4e840e5a053 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2174,7 +2174,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto) "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", proto->name, proto->obj_size, - proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1, + sock_prot_inuse_get(seq_file_net(seq), proto), proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1, proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", proto->max_header, @@ -2228,7 +2228,8 @@ static const struct seq_operations proto_seq_ops = { static int proto_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &proto_seq_ops); + return seq_open_net(inode, file, &proto_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations proto_seq_fops = { @@ -2236,13 +2237,31 @@ static const struct file_operations proto_seq_fops = { .open = proto_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, +}; + +static __net_init int proto_init_net(struct net *net) +{ + if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops)) + return -ENOMEM; + + return 0; +} + +static __net_exit void proto_exit_net(struct net *net) +{ + proc_net_remove(net, "protocols"); +} + + +static __net_initdata struct pernet_operations proto_net_ops = { + .init = proto_init_net, + .exit = proto_exit_net, }; static int __init proto_init(void) { - /* register /proc/net/protocols */ - return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; + return register_pernet_subsys(&proto_net_ops); } subsys_initcall(proto_init); -- cgit v1.2.3 From 07f0757a6808f2f36a0e58c3a54867ccffdb8dc9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 19 Nov 2008 15:44:53 -0800 Subject: include/net net/ - csum_partial - remove unnecessary casts The first argument to csum_partial is const void * casts to char/u8 * are not necessary Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/checksum.h | 2 +- include/net/ip_vs.h | 6 +++--- net/core/netpoll.c | 2 +- net/ipv4/inet_lro.c | 4 ++-- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/icmp.c | 4 ++-- net/ipv6/mcast.c | 2 +- net/ipv6/ndisc.c | 4 ++-- net/ipv6/tcp_ipv6.c | 6 +++--- net/unix/af_unix.c | 4 ++-- 10 files changed, 19 insertions(+), 19 deletions(-) (limited to 'net/core') diff --git a/include/net/checksum.h b/include/net/checksum.h index 07602b7fa218..ba55d8b8c87c 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -98,7 +98,7 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __be32 diff[] = { ~from, to }; - *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum_unfold(*sum))); + *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); } static inline void csum_replace2(__sum16 *sum, __be16 from, __be16 to) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 8f6abf4883e3..ab9b003ab671 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -913,7 +913,7 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum) { __be32 diff[2] = { ~old, new }; - return csum_partial((char *) diff, sizeof(diff), oldsum); + return csum_partial(diff, sizeof(diff), oldsum); } #ifdef CONFIG_IP_VS_IPV6 @@ -923,7 +923,7 @@ static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new, __be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0], new[3], new[2], new[1], new[0] }; - return csum_partial((char *) diff, sizeof(diff), oldsum); + return csum_partial(diff, sizeof(diff), oldsum); } #endif @@ -931,7 +931,7 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) { __be16 diff[2] = { ~old, new }; - return csum_partial((char *) diff, sizeof(diff), oldsum); + return csum_partial(diff, sizeof(diff), oldsum); } #endif /* __KERNEL__ */ diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 34f5d072f168..fc4e28e23b89 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -343,7 +343,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->check = csum_tcpudp_magic(htonl(np->local_ip), htonl(np->remote_ip), udp_len, IPPROTO_UDP, - csum_partial((unsigned char *)udph, udp_len, 0)); + csum_partial(udph, udp_len, 0)); if (udph->check == 0) udph->check = CSUM_MANGLED_0; diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index cfd034a2b96e..6a667dae315e 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -120,7 +120,7 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); tcph->check = 0; - tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), 0); + tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, lro_desc->ip_tot_len - @@ -135,7 +135,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) __wsum tcp_ps_hdr_csum; tcp_csum = ~csum_unfold(tcph->check); - tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), tcp_csum); + tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum); tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, len + TCP_HDR_LEN(tcph), diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b2e3ab2287ba..5559fea61e87 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -492,7 +492,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = tcp_v4_check(len, inet->saddr, inet->daddr, - csum_partial((char *)th, + csum_partial(th, th->doff << 2, skb->csum)); } @@ -726,7 +726,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, th->check = tcp_v4_check(skb->len, ireq->loc_addr, ireq->rmt_addr, - csum_partial((char *)th, skb->len, + csum_partial(th, skb->len, skb->csum)); err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index be351009fd03..a77b8d103804 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -233,7 +233,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6h->icmp6_cksum = 0; if (skb_queue_len(&sk->sk_write_queue) == 1) { - skb->csum = csum_partial((char *)icmp6h, + skb->csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), skb->csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, @@ -246,7 +246,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct tmp_csum = csum_add(tmp_csum, skb->csum); } - tmp_csum = csum_partial((char *)icmp6h, + tmp_csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), tmp_csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a76199ecad23..870a1d64605a 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1817,7 +1817,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len, IPPROTO_ICMPV6, - csum_partial((__u8 *) hdr, len, 0)); + csum_partial(hdr, len, 0)); idev = in6_dev_get(skb->dev); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index fbf451c0d77a..af6705f03b5c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -491,7 +491,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev, hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len, IPPROTO_ICMPV6, - csum_partial((__u8 *) hdr, + csum_partial(hdr, len, 0)); return skb; @@ -1612,7 +1612,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr, len, IPPROTO_ICMPV6, - csum_partial((u8 *) icmph, len, 0)); + csum_partial(icmph, len, 0)); buff->dst = dst; idev = in6_dev_get(dst->dev); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b35787056313..a5d750acd793 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -501,7 +501,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) th->check = tcp_v6_check(th, skb->len, &treq->loc_addr, &treq->rmt_addr, - csum_partial((char *)th, skb->len, skb->csum)); + csum_partial(th, skb->len, skb->csum)); ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); err = ip6_xmit(sk, skb, &fl, opt, 0); @@ -915,7 +915,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, - csum_partial((char *)th, th->doff<<2, + csum_partial(th, th->doff<<2, skb->csum)); } } @@ -997,7 +997,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, } #endif - buff->csum = csum_partial((char *)t1, tot_len, 0); + buff->csum = csum_partial(t1, tot_len, 0); memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f2cf3f583f62..ebc4a9a4b1f7 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -216,7 +216,7 @@ static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp) return len; } - *hashp = unix_hash_fold(csum_partial((char *)sunaddr, len, 0)); + *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); return len; } @@ -686,7 +686,7 @@ static int unix_autobind(struct socket *sock) retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); - addr->hash = unix_hash_fold(csum_partial((void *)addr->name, addr->len, 0)); + addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; -- cgit v1.2.3 From d314774cf2cd5dfeb39a00d37deee65d4c627927 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 21:32:24 -0800 Subject: netdev: network device operations infrastructure This patch changes the network device internal API to move adminstrative operations out of the network device structure and into a separate structure. This patch involves some hackery to maintain compatablity between the new and old model, so all 300+ drivers don't have to be changed at once. For drivers that aren't converted yet, the netdevice_ops virt function list still resides in the net_device structure. For old protocols, the new net_device_ops are copied out to the old net_device pointers. After the transistion is completed the nag message can be changed to an WARN_ON, and the compatiablity code can be made configurable. Some function pointers aren't moved: * destructor can't be in net_device_ops because it may need to be referenced after the module is unloaded. * neighbor setup is manipulated in a couple of places that need special consideration * hard_start_xmit is in the fast path for transmit. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 232 +++++++++++++++++++++++++++++++++------------- net/Kconfig | 3 + net/core/dev.c | 109 +++++++++++++++------- net/core/netpoll.c | 7 +- net/core/rtnetlink.c | 9 +- net/sched/sch_generic.c | 4 +- 6 files changed, 259 insertions(+), 105 deletions(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 12d7f4469dc9..9060f5f3517a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -451,6 +451,131 @@ struct netdev_queue { struct Qdisc *qdisc_sleeping; } ____cacheline_aligned_in_smp; + +/* + * This structure defines the management hooks for network devices. + * The following hooks can bed defined and are optonal (can be null) + * unless otherwise noted. + * + * int (*ndo_init)(struct net_device *dev); + * This function is called once when network device is registered. + * The network device can use this to any late stage initializaton + * or semantic validattion. It can fail with an error code which will + * be propogated back to register_netdev + * + * void (*ndo_uninit)(struct net_device *dev); + * This function is called when device is unregistered or when registration + * fails. It is not called if init fails. + * + * int (*ndo_open)(struct net_device *dev); + * This function is called when network device transistions to the up + * state. + * + * int (*ndo_stop)(struct net_device *dev); + * This function is called when network device transistions to the down + * state. + * + * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); + * This function is called to allow device receiver to make + * changes to configuration when multicast or promiscious is enabled. + * + * void (*ndo_set_rx_mode)(struct net_device *dev); + * This function is called device changes address list filtering. + * + * void (*ndo_set_multicast_list)(struct net_device *dev); + * This function is called when the multicast address list changes. + * + * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); + * This function is called when the Media Access Control address + * needs to be changed. If not this interface is not defined, the + * mac address can not be changed. + * + * int (*ndo_validate_addr)(struct net_device *dev); + * Test if Media Access Control address is valid for the device. + * + * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); + * Called when a user request an ioctl which can't be handled by + * the generic interface code. If not defined ioctl's return + * not supported error code. + * + * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); + * Used to set network devices bus interface parameters. This interface + * is retained for legacy reason, new devices should use the bus + * interface (PCI) for low level management. + * + * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); + * Called when a user wants to change the Maximum Transfer Unit + * of a device. If not defined, any request to change MTU will + * will return an error. + * + * void (*ndo_tx_timeout) (struct net_device *dev); + * Callback uses when the transmitter has not made any progress + * for dev->watchdog ticks. + * + * struct net_device_stats* (*get_stats)(struct net_device *dev); + * Called when a user wants to get the network device usage + * statistics. If not defined, the counters in dev->stats will + * be used. + * + * void (*ndo_vlan_rx_register)(struct net_device *dev, struct vlan_group *grp); + * If device support VLAN receive accleration + * (ie. dev->features & NETIF_F_HW_VLAN_RX), then this function is called + * when vlan groups for the device changes. Note: grp is NULL + * if no vlan's groups are being used. + * + * void (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); + * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) + * this function is called when a VLAN id is registered. + * + * void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); + * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) + * this function is called when a VLAN id is unregistered. + * + * void (*ndo_poll_controller)(struct net_device *dev); + */ +struct net_device_ops { + int (*ndo_init)(struct net_device *dev); + void (*ndo_uninit)(struct net_device *dev); + int (*ndo_open)(struct net_device *dev); + int (*ndo_stop)(struct net_device *dev); +#define HAVE_CHANGE_RX_FLAGS + void (*ndo_change_rx_flags)(struct net_device *dev, + int flags); +#define HAVE_SET_RX_MODE + void (*ndo_set_rx_mode)(struct net_device *dev); +#define HAVE_MULTICAST + void (*ndo_set_multicast_list)(struct net_device *dev); +#define HAVE_SET_MAC_ADDR + int (*ndo_set_mac_address)(struct net_device *dev, + void *addr); +#define HAVE_VALIDATE_ADDR + int (*ndo_validate_addr)(struct net_device *dev); +#define HAVE_PRIVATE_IOCTL + int (*ndo_do_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); +#define HAVE_SET_CONFIG + int (*ndo_set_config)(struct net_device *dev, + struct ifmap *map); +#define HAVE_CHANGE_MTU + int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); + +#define HAVE_TX_TIMEOUT + void (*ndo_tx_timeout) (struct net_device *dev); + + struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); + + void (*ndo_vlan_rx_register)(struct net_device *dev, + struct vlan_group *grp); + void (*ndo_vlan_rx_add_vid)(struct net_device *dev, + unsigned short vid); + void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, + unsigned short vid); +#ifdef CONFIG_NET_POLL_CONTROLLER +#define HAVE_NETDEV_POLL + void (*ndo_poll_controller)(struct net_device *dev); +#endif +}; + /* * The DEVICE structure. * Actually, this whole structure is a big mistake. It mixes I/O @@ -498,11 +623,6 @@ struct net_device #ifdef CONFIG_NETPOLL struct list_head napi_list; #endif - - /* The device initialization function. Called only once. */ - int (*init)(struct net_device *dev); - - /* ------- Fields preinitialized in Space.c finish here ------- */ /* Net device features */ unsigned long features; @@ -546,15 +666,13 @@ struct net_device * for all in netdev_increment_features. */ #define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ - NETIF_F_SG | NETIF_F_HIGHDMA | \ + NETIF_F_SG | NETIF_F_HIGHDMA | \ NETIF_F_FRAGLIST) /* Interface index. Unique device identifier */ int ifindex; int iflink; - - struct net_device_stats* (*get_stats)(struct net_device *dev); struct net_device_stats stats; #ifdef CONFIG_WIRELESS_EXT @@ -564,18 +682,13 @@ struct net_device /* Instance data managed by the core of Wireless Extensions. */ struct iw_public_data * wireless_data; #endif + /* Management operations */ + const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; /* Hardware header description */ const struct header_ops *header_ops; - /* - * This marks the end of the "visible" part of the structure. All - * fields hereafter are internal to the system, and may change at - * will (read: may be cleaned up at will). - */ - - unsigned int flags; /* interface flags (a la BSD) */ unsigned short gflags; unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ @@ -634,7 +747,7 @@ struct net_device unsigned long last_rx; /* Time of last Rx */ /* Interface address info used in eth_type_trans() */ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast - because most packets are unicast) */ + because most packets are unicast) */ unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ @@ -648,6 +761,10 @@ struct net_device /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; + /* Map buffer to appropriate transmit queue */ + u16 (*select_queue)(struct net_device *dev, + struct sk_buff *skb); + unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; /* @@ -662,9 +779,6 @@ struct net_device int watchdog_timeo; /* used by dev_watchdog() */ struct timer_list watchdog_timer; -/* - * refcnt is a very hot point, so align it on SMP - */ /* Number of references to this device */ atomic_t refcnt ____cacheline_aligned_in_smp; @@ -683,56 +797,14 @@ struct net_device NETREG_RELEASED, /* called free_netdev */ } reg_state; - /* Called after device is detached from network. */ - void (*uninit)(struct net_device *dev); - /* Called after last user reference disappears. */ - void (*destructor)(struct net_device *dev); + /* Called from unregister, can be used to call free_netdev */ + void (*destructor)(struct net_device *dev); - /* Pointers to interface service routines. */ - int (*open)(struct net_device *dev); - int (*stop)(struct net_device *dev); -#define HAVE_NETDEV_POLL -#define HAVE_CHANGE_RX_FLAGS - void (*change_rx_flags)(struct net_device *dev, - int flags); -#define HAVE_SET_RX_MODE - void (*set_rx_mode)(struct net_device *dev); -#define HAVE_MULTICAST - void (*set_multicast_list)(struct net_device *dev); -#define HAVE_SET_MAC_ADDR - int (*set_mac_address)(struct net_device *dev, - void *addr); -#define HAVE_VALIDATE_ADDR - int (*validate_addr)(struct net_device *dev); -#define HAVE_PRIVATE_IOCTL - int (*do_ioctl)(struct net_device *dev, - struct ifreq *ifr, int cmd); -#define HAVE_SET_CONFIG - int (*set_config)(struct net_device *dev, - struct ifmap *map); -#define HAVE_CHANGE_MTU - int (*change_mtu)(struct net_device *dev, int new_mtu); + int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); -#define HAVE_TX_TIMEOUT - void (*tx_timeout) (struct net_device *dev); - - void (*vlan_rx_register)(struct net_device *dev, - struct vlan_group *grp); - void (*vlan_rx_add_vid)(struct net_device *dev, - unsigned short vid); - void (*vlan_rx_kill_vid)(struct net_device *dev, - unsigned short vid); - - int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); #ifdef CONFIG_NETPOLL struct netpoll_info *npinfo; #endif -#ifdef CONFIG_NET_POLL_CONTROLLER - void (*poll_controller)(struct net_device *dev); -#endif - - u16 (*select_queue)(struct net_device *dev, - struct sk_buff *skb); #ifdef CONFIG_NET_NS /* Network namespace this network device is inside */ @@ -763,6 +835,38 @@ struct net_device /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; + +#ifdef CONFIG_COMPAT_NET_DEV_OPS + struct { + int (*init)(struct net_device *dev); + void (*uninit)(struct net_device *dev); + int (*open)(struct net_device *dev); + int (*stop)(struct net_device *dev); + void (*change_rx_flags)(struct net_device *dev, + int flags); + void (*set_rx_mode)(struct net_device *dev); + void (*set_multicast_list)(struct net_device *dev); + int (*set_mac_address)(struct net_device *dev, + void *addr); + int (*validate_addr)(struct net_device *dev); + int (*do_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); + int (*set_config)(struct net_device *dev, + struct ifmap *map); + int (*change_mtu)(struct net_device *dev, int new_mtu); + void (*tx_timeout) (struct net_device *dev); + struct net_device_stats* (*get_stats)(struct net_device *dev); + void (*vlan_rx_register)(struct net_device *dev, + struct vlan_group *grp); + void (*vlan_rx_add_vid)(struct net_device *dev, + unsigned short vid); + void (*vlan_rx_kill_vid)(struct net_device *dev, + unsigned short vid); +#ifdef CONFIG_NET_POLL_CONTROLLER + void (*poll_controller)(struct net_device *dev); +#endif +#endif + }; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/net/Kconfig b/net/Kconfig index 8c3d97ca0d96..4e2e40ba8ba6 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -32,6 +32,9 @@ config NET_NS Allow user space to create what appear to be multiple instances of the network stack. +config COMPAT_NET_DEV_OPS + def_bool y + source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" diff --git a/net/core/dev.c b/net/core/dev.c index e08c0fcd603b..ca14ab407b33 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1059,6 +1059,7 @@ void dev_load(struct net *net, const char *name) */ int dev_open(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; int ret = 0; ASSERT_RTNL(); @@ -1081,11 +1082,11 @@ int dev_open(struct net_device *dev) */ set_bit(__LINK_STATE_START, &dev->state); - if (dev->validate_addr) - ret = dev->validate_addr(dev); + if (ops->ndo_validate_addr) + ret = ops->ndo_validate_addr(dev); - if (!ret && dev->open) - ret = dev->open(dev); + if (!ret && ops->ndo_open) + ret = ops->ndo_open(dev); /* * If it went open OK then: @@ -1129,6 +1130,7 @@ int dev_open(struct net_device *dev) */ int dev_close(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; ASSERT_RTNL(); might_sleep(); @@ -1161,8 +1163,8 @@ int dev_close(struct net_device *dev) * We allow it to be called even after a DETACH hot-plug * event. */ - if (dev->stop) - dev->stop(dev); + if (ops->ndo_stop) + ops->ndo_stop(dev); /* * Device is now down. @@ -2930,8 +2932,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) static void dev_change_rx_flags(struct net_device *dev, int flags) { - if (dev->flags & IFF_UP && dev->change_rx_flags) - dev->change_rx_flags(dev, flags); + const struct net_device_ops *ops = dev->netdev_ops; + + if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) + ops->ndo_change_rx_flags(dev, flags); } static int __dev_set_promiscuity(struct net_device *dev, int inc) @@ -3051,6 +3055,8 @@ int dev_set_allmulti(struct net_device *dev, int inc) */ void __dev_set_rx_mode(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; + /* dev_open will call this function so the list will stay sane. */ if (!(dev->flags&IFF_UP)) return; @@ -3058,8 +3064,8 @@ void __dev_set_rx_mode(struct net_device *dev) if (!netif_device_present(dev)) return; - if (dev->set_rx_mode) - dev->set_rx_mode(dev); + if (ops->ndo_set_rx_mode) + ops->ndo_set_rx_mode(dev); else { /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. @@ -3072,8 +3078,8 @@ void __dev_set_rx_mode(struct net_device *dev) dev->uc_promisc = 0; } - if (dev->set_multicast_list) - dev->set_multicast_list(dev); + if (ops->ndo_set_multicast_list) + ops->ndo_set_multicast_list(dev); } } @@ -3432,6 +3438,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) */ int dev_set_mtu(struct net_device *dev, int new_mtu) { + const struct net_device_ops *ops = dev->netdev_ops; int err; if (new_mtu == dev->mtu) @@ -3445,10 +3452,11 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) return -ENODEV; err = 0; - if (dev->change_mtu) - err = dev->change_mtu(dev, new_mtu); + if (ops->ndo_change_mtu) + err = ops->ndo_change_mtu(dev, new_mtu); else dev->mtu = new_mtu; + if (!err && dev->flags & IFF_UP) call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); return err; @@ -3463,15 +3471,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) */ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) { + const struct net_device_ops *ops = dev->netdev_ops; int err; - if (!dev->set_mac_address) + if (!ops->ndo_set_mac_address) return -EOPNOTSUPP; if (sa->sa_family != dev->type) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - err = dev->set_mac_address(dev, sa); + err = ops->ndo_set_mac_address(dev, sa); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; @@ -3551,6 +3560,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + const struct net_device_ops *ops = dev->netdev_ops; if (!dev) return -ENODEV; @@ -3578,15 +3588,15 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return 0; case SIOCSIFMAP: - if (dev->set_config) { + if (ops->ndo_set_config) { if (!netif_device_present(dev)) return -ENODEV; - return dev->set_config(dev, &ifr->ifr_map); + return ops->ndo_set_config(dev, &ifr->ifr_map); } return -EOPNOTSUPP; case SIOCADDMULTI: - if ((!dev->set_multicast_list && !dev->set_rx_mode) || + if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -3595,7 +3605,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) dev->addr_len, 1); case SIOCDELMULTI: - if ((!dev->set_multicast_list && !dev->set_rx_mode) || + if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -3633,10 +3643,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) cmd == SIOCBRDELIF || cmd == SIOCWANDEV) { err = -EOPNOTSUPP; - if (dev->do_ioctl) { + if (ops->ndo_do_ioctl) { if (netif_device_present(dev)) - err = dev->do_ioctl(dev, ifr, - cmd); + err = ops->ndo_do_ioctl(dev, ifr, cmd); else err = -ENODEV; } @@ -3897,8 +3906,8 @@ static void rollback_registered(struct net_device *dev) */ dev_addr_discard(dev); - if (dev->uninit) - dev->uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); /* Notifier chain MUST detach us from master device. */ WARN_ON(dev->master); @@ -3988,7 +3997,7 @@ int register_netdevice(struct net_device *dev) struct hlist_head *head; struct hlist_node *p; int ret; - struct net *net; + struct net *net = dev_net(dev); BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -3997,8 +4006,7 @@ int register_netdevice(struct net_device *dev) /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); - BUG_ON(!dev_net(dev)); - net = dev_net(dev); + BUG_ON(!net); spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); @@ -4006,9 +4014,46 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; +#ifdef CONFIG_COMPAT_NET_DEV_OPS + /* Netdevice_ops API compatiability support. + * This is temporary until all network devices are converted. + */ + if (dev->netdev_ops) { + const struct net_device_ops *ops = dev->netdev_ops; + + dev->init = ops->ndo_init; + dev->uninit = ops->ndo_uninit; + dev->open = ops->ndo_open; + dev->change_rx_flags = ops->ndo_change_rx_flags; + dev->set_rx_mode = ops->ndo_set_rx_mode; + dev->set_multicast_list = ops->ndo_set_multicast_list; + dev->set_mac_address = ops->ndo_set_mac_address; + dev->validate_addr = ops->ndo_validate_addr; + dev->do_ioctl = ops->ndo_do_ioctl; + dev->set_config = ops->ndo_set_config; + dev->change_mtu = ops->ndo_change_mtu; + dev->tx_timeout = ops->ndo_tx_timeout; + dev->get_stats = ops->ndo_get_stats; + dev->vlan_rx_register = ops->ndo_vlan_rx_register; + dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; + dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = ops->ndo_poll_controller; +#endif + } else { + char drivername[64]; + pr_info("%s (%s): not using net_device_ops yet\n", + dev->name, netdev_drivername(dev, drivername, 64)); + + /* This works only because net_device_ops and the + compatiablity structure are the same. */ + dev->netdev_ops = (void *) &(dev->init); + } +#endif + /* Init, if this function is available */ - if (dev->init) { - ret = dev->init(dev); + if (dev->netdev_ops->ndo_init) { + ret = dev->netdev_ops->ndo_init(dev); if (ret) { if (ret > 0) ret = -EIO; @@ -4086,8 +4131,8 @@ out: return ret; err_uninit: - if (dev->uninit) - dev->uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); goto out; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index fc4e28e23b89..630df6034444 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -172,12 +172,13 @@ static void service_arp_queue(struct netpoll_info *npi) void netpoll_poll(struct netpoll *np) { struct net_device *dev = np->dev; + const struct net_device_ops *ops = dev->netdev_ops; - if (!dev || !netif_running(dev) || !dev->poll_controller) + if (!dev || !netif_running(dev) || !ops->ndo_poll_controller) return; /* Process pending work on NIC */ - dev->poll_controller(dev); + ops->ndo_poll_controller(dev); poll_napi(dev); @@ -694,7 +695,7 @@ int netpoll_setup(struct netpoll *np) atomic_inc(&npinfo->refcnt); } - if (!ndev->poll_controller) { + if (!ndev->netdev_ops->ndo_poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); err = -ENOTSUPP; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4dfb6b4d4559..6f8e0778e565 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -762,6 +762,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { + const struct net_device_ops *ops = dev->netdev_ops; int send_addr_notify = 0; int err; @@ -783,7 +784,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct rtnl_link_ifmap *u_map; struct ifmap k_map; - if (!dev->set_config) { + if (!ops->ndo_set_config) { err = -EOPNOTSUPP; goto errout; } @@ -801,7 +802,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, k_map.dma = (unsigned char) u_map->dma; k_map.port = (unsigned char) u_map->port; - err = dev->set_config(dev, &k_map); + err = ops->ndo_set_config(dev, &k_map); if (err < 0) goto errout; @@ -812,7 +813,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct sockaddr *sa; int len; - if (!dev->set_mac_address) { + if (!ops->ndo_set_mac_address) { err = -EOPNOTSUPP; goto errout; } @@ -831,7 +832,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - err = dev->set_mac_address(dev, sa); + err = ops->ndo_set_mac_address(dev, sa); kfree(sa); if (err) goto errout; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 80c8f3dbbea1..95ab55c064f1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -224,7 +224,7 @@ static void dev_watchdog(unsigned long arg) char drivername[64]; WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", dev->name, netdev_drivername(dev, drivername, 64)); - dev->tx_timeout(dev); + dev->netdev_ops->ndo_tx_timeout(dev); } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + @@ -239,7 +239,7 @@ static void dev_watchdog(unsigned long arg) void __netdev_watchdog_up(struct net_device *dev) { - if (dev->tx_timeout) { + if (dev->netdev_ops->ndo_tx_timeout) { if (dev->watchdog_timeo <= 0) dev->watchdog_timeo = 5*HZ; if (!mod_timer(&dev->watchdog_timer, -- cgit v1.2.3 From eeda3fd64f75bcbfaa70ce946513abaf3f23b8e0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 21:40:23 -0800 Subject: netdev: introduce dev_get_stats() In order for the network device ops get_stats call to be immutable, the handling of the default internal network device stats block has to be changed. Add a new helper function which replaces the old use of internal_get_stats. Note: change return code to make it clear that the caller should not go changing the returned statistics. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- arch/s390/appldata/appldata_net_sum.c | 4 ++-- drivers/net/bonding/bond_main.c | 5 +++-- drivers/net/sfc/ethtool.c | 2 +- drivers/parisc/led.c | 4 ++-- include/linux/netdevice.h | 4 +++- net/core/dev.c | 23 ++++++++++++++++++----- net/core/net-sysfs.c | 3 +-- net/core/rtnetlink.c | 6 +++--- 8 files changed, 33 insertions(+), 18 deletions(-) (limited to 'net/core') diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 3b746556e1a3..fa741f84c5b9 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -67,7 +67,6 @@ static void appldata_get_net_sum_data(void *data) int i; struct appldata_net_sum_data *net_data; struct net_device *dev; - struct net_device_stats *stats; unsigned long rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors, tx_errors, rx_dropped, tx_dropped, collisions; @@ -86,7 +85,8 @@ static void appldata_get_net_sum_data(void *data) collisions = 0; read_lock(&dev_base_lock); for_each_netdev(&init_net, dev) { - stats = dev->get_stats(dev); + const struct net_device_stats *stats = dev_get_stats(dev); + rx_packets += stats->rx_packets; tx_packets += stats->tx_packets; rx_bytes += stats->rx_bytes; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a08ea4808056..db5f5c24a250 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3899,7 +3899,7 @@ static int bond_close(struct net_device *bond_dev) static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); - struct net_device_stats *stats = &(bond->stats), *sstats; + struct net_device_stats *stats = &bond->stats; struct net_device_stats local_stats; struct slave *slave; int i; @@ -3909,7 +3909,8 @@ static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) read_lock_bh(&bond->lock); bond_for_each_slave(bond, slave, i) { - sstats = slave->dev->get_stats(slave->dev); + const struct net_device_stats *sstats = dev_get_stats(slave->dev); + local_stats.rx_packets += sstats->rx_packets; local_stats.rx_bytes += sstats->rx_bytes; local_stats.rx_errors += sstats->rx_errors; diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c index abd8fcd6e62d..cd92c4d8dbc5 100644 --- a/drivers/net/sfc/ethtool.c +++ b/drivers/net/sfc/ethtool.c @@ -426,7 +426,7 @@ static void efx_ethtool_get_stats(struct net_device *net_dev, EFX_BUG_ON_PARANOID(stats->n_stats != EFX_ETHTOOL_NUM_STATS); /* Update MAC and NIC statistics */ - net_dev->get_stats(net_dev); + dev_get_stats(net_dev); /* Fill detailed statistics buffer */ for (i = 0; i < EFX_ETHTOOL_NUM_STATS; i++) { diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index f9b12664f9fb..454b6532e409 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -360,13 +360,13 @@ static __inline__ int led_get_net_activity(void) read_lock(&dev_base_lock); rcu_read_lock(); for_each_netdev(&init_net, dev) { - struct net_device_stats *stats; + const struct net_device_stats *stats; struct in_device *in_dev = __in_dev_get_rcu(dev); if (!in_dev || !in_dev->ifa_list) continue; if (ipv4_is_loopback(in_dev->ifa_list->ifa_local)) continue; - stats = dev->get_stats(dev); + stats = dev_get_stats(dev); rx_total += stats->rx_packets; tx_total += stats->tx_packets; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9060f5f3517a..981a089d5149 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -864,9 +864,9 @@ struct net_device unsigned short vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*poll_controller)(struct net_device *dev); -#endif #endif }; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -1780,6 +1780,8 @@ extern void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ extern void dev_load(struct net *net, const char *name); extern void dev_mcast_init(void); +extern const struct net_device_stats *dev_get_stats(struct net_device *dev); + extern int netdev_max_backlog; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); diff --git a/net/core/dev.c b/net/core/dev.c index ca14ab407b33..8843f4e3f5e1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2620,7 +2620,7 @@ void dev_seq_stop(struct seq_file *seq, void *v) static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { - struct net_device_stats *stats = dev->get_stats(dev); + const struct net_device_stats *stats = dev_get_stats(dev); seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", @@ -4288,10 +4288,24 @@ void netdev_run_todo(void) } } -static struct net_device_stats *internal_stats(struct net_device *dev) -{ - return &dev->stats; +/** + * dev_get_stats - get network device statistics + * @dev: device to get statistics from + * + * Get network statistics from device. The device driver may provide + * its own method by setting dev->netdev_ops->get_stats; otherwise + * the internal statistics structure is used. + */ +const struct net_device_stats *dev_get_stats(struct net_device *dev) + { + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_get_stats) + return ops->ndo_get_stats(dev); + else + return &dev->stats; } +EXPORT_SYMBOL(dev_get_stats); static void netdev_init_one_queue(struct net_device *dev, struct netdev_queue *queue, @@ -4370,7 +4384,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, netdev_init_queues(dev); - dev->get_stats = internal_stats; netpoll_netdev_init(dev); setup(dev); strcpy(dev->name, name); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 146dcfeb060e..afd42d717320 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -270,7 +270,6 @@ static ssize_t netstat_show(const struct device *d, unsigned long offset) { struct net_device *dev = to_net_dev(d); - struct net_device_stats *stats; ssize_t ret = -EINVAL; WARN_ON(offset > sizeof(struct net_device_stats) || @@ -278,7 +277,7 @@ static ssize_t netstat_show(const struct device *d, read_lock(&dev_base_lock); if (dev_isalive(dev)) { - stats = dev->get_stats(dev); + const struct net_device_stats *stats = dev_get_stats(dev); ret = sprintf(buf, fmt_ulong, *(unsigned long *)(((u8 *) stats) + offset)); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 6f8e0778e565..790dd205bb5d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -551,7 +551,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } static void copy_rtnl_link_stats(struct rtnl_link_stats *a, - struct net_device_stats *b) + const struct net_device_stats *b) { a->rx_packets = b->rx_packets; a->tx_packets = b->tx_packets; @@ -609,7 +609,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq; struct ifinfomsg *ifm; struct nlmsghdr *nlh; - struct net_device_stats *stats; + const struct net_device_stats *stats; struct nlattr *attr; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -666,7 +666,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (attr == NULL) goto nla_put_failure; - stats = dev->get_stats(dev); + stats = dev_get_stats(dev); copy_rtnl_link_stats(nla_data(attr), stats); if (dev->rtnl_link_ops) { -- cgit v1.2.3 From d214c7537bbf2f247991fb65b3420b0b3d712c67 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 20 Nov 2008 00:49:27 -0800 Subject: filter: add SKF_AD_NLATTR_NEST to look for nested attributes SKF_AD_NLATTR allows us to find the first matching attribute in a stream of netlink attributes from one offset to the end of the netlink message. This is not suitable to look for a specific matching inside a set of nested attributes. For example, in ctnetlink messages, if we look for the CTA_V6_SRC attribute in a message that talks about an IPv4 connection, SKF_AD_NLATTR returns the offset of CTA_STATUS which has the same value of CTA_V6_SRC but outside the nest. To differenciate CTA_STATUS and CTA_V6_SRC, we would have to make assumptions on the size of the attribute and the usual offset, resulting in horrible BSF code. This patch adds SKF_AD_NLATTR_NEST, which is a variant of SKF_AD_NLATTR, that looks for an attribute inside the limits of a nested attributes, but not further. This patch validates that we have enough room to look for the nested attributes - based on a suggestion from Patrick McHardy. Signed-off-by: Pablo Neira Ayuso Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- net/core/filter.c | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/include/linux/filter.h b/include/linux/filter.h index b6ea9aa9e853..1354aaf6abbe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -122,7 +122,8 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_PKTTYPE 4 #define SKF_AD_IFINDEX 8 #define SKF_AD_NLATTR 12 -#define SKF_AD_MAX 16 +#define SKF_AD_NLATTR_NEST 16 +#define SKF_AD_MAX 20 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index df3744355839..d1d779ca096d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -319,6 +319,25 @@ load_b: A = 0; continue; } + case SKF_AD_NLATTR_NEST: { + struct nlattr *nla; + + if (skb_is_nonlinear(skb)) + return 0; + if (A > skb->len - sizeof(struct nlattr)) + return 0; + + nla = (struct nlattr *)&skb->data[A]; + if (nla->nla_len > A - skb->len) + return 0; + + nla = nla_find_nested(nla, X); + if (nla) + A = (void *)nla - (void *)skb->data; + else + A = 0; + continue; + } default: return 0; } -- cgit v1.2.3 From 008298231abbeb91bc7be9e8b078607b816d1a4a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:14:53 -0800 Subject: netdev: add more functions to netdevice ops This patch moves neigh_setup and hard_start_xmit into the network device ops structure. For bisection, fix all the previously converted drivers as well. Bonding driver took the biggest hit on this. Added a prefetch of the hard_start_xmit in the fast path to try and reduce any impact this would have. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/8139cp.c | 2 +- drivers/net/8139too.c | 2 +- drivers/net/acenic.c | 4 ++- drivers/net/atl1e/atl1e_main.c | 3 ++- drivers/net/atlx/atl1.c | 4 +-- drivers/net/atlx/atl2.c | 2 +- drivers/net/bonding/bond_main.c | 56 +++++++++++++++++++++++++++++++++-------- drivers/net/chelsio/cxgb2.c | 6 ++--- drivers/net/cxgb3/cxgb3_main.c | 1 - drivers/net/e100.c | 2 +- drivers/net/e1000/e1000_main.c | 2 +- drivers/net/e1000e/netdev.c | 2 +- drivers/net/enic/enic_main.c | 2 +- drivers/net/forcedeth.c | 22 +++++++++++++--- drivers/net/ifb.c | 4 +-- drivers/net/igb/igb_main.c | 2 +- drivers/net/ixgb/ixgb_main.c | 2 +- drivers/net/ixgbe/ixgbe_main.c | 2 +- drivers/net/loopback.c | 2 +- drivers/net/macvlan.c | 4 +-- drivers/net/niu.c | 2 +- drivers/net/ppp_generic.c | 5 ++-- drivers/net/r8169.c | 2 +- drivers/net/skge.c | 2 +- drivers/net/sky2.c | 3 ++- drivers/net/tg3.c | 45 ++++++++++++++++++++++----------- drivers/net/tun.c | 4 +-- drivers/net/veth.c | 2 +- drivers/net/via-velocity.c | 2 +- include/linux/netdevice.h | 39 ++++++++++++++++++---------- net/bridge/br_device.c | 10 ++++---- net/bridge/br_if.c | 2 +- net/core/dev.c | 12 ++++++--- net/core/neighbour.c | 6 ++--- net/core/netpoll.c | 6 +++-- net/core/pktgen.c | 8 +++--- 36 files changed, 183 insertions(+), 93 deletions(-) (limited to 'net/core') diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c index 13f75b67872d..f6d9d1353dd5 100644 --- a/drivers/net/8139cp.c +++ b/drivers/net/8139cp.c @@ -1824,6 +1824,7 @@ static const struct net_device_ops cp_netdev_ops = { .ndo_set_multicast_list = cp_set_rx_mode, .ndo_get_stats = cp_get_stats, .ndo_do_ioctl = cp_ioctl, + .ndo_start_xmit = cp_start_xmit, .ndo_tx_timeout = cp_tx_timeout, #if CP_VLAN_TAG_USED .ndo_vlan_rx_register = cp_vlan_rx_register, @@ -1949,7 +1950,6 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); dev->netdev_ops = &cp_netdev_ops; - dev->hard_start_xmit = cp_start_xmit; netif_napi_add(dev, &cp->napi, cp_rx_poll, 16); dev->ethtool_ops = &cp_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c index f8866552386a..445a479db79d 100644 --- a/drivers/net/8139too.c +++ b/drivers/net/8139too.c @@ -921,6 +921,7 @@ static const struct net_device_ops rtl8139_netdev_ops = { .ndo_stop = rtl8139_close, .ndo_get_stats = rtl8139_get_stats, .ndo_validate_addr = eth_validate_addr, + .ndo_start_xmit = rtl8139_start_xmit, .ndo_set_multicast_list = rtl8139_set_rx_mode, .ndo_do_ioctl = netdev_ioctl, .ndo_tx_timeout = rtl8139_tx_timeout, @@ -992,7 +993,6 @@ static int __devinit rtl8139_init_one (struct pci_dev *pdev, dev->netdev_ops = &rtl8139_netdev_ops; dev->ethtool_ops = &rtl8139_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; - dev->hard_start_xmit = rtl8139_start_xmit; netif_napi_add(dev, &tp->napi, rtl8139_poll, 64); /* note: the hardware is not capable of sg/csum/highdma, however diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c index 309a90ea9211..21d24320210a 100644 --- a/drivers/net/acenic.c +++ b/drivers/net/acenic.c @@ -455,10 +455,13 @@ static const struct net_device_ops ace_netdev_ops = { .ndo_stop = ace_close, .ndo_tx_timeout = ace_watchdog, .ndo_get_stats = ace_get_stats, + .ndo_start_xmit = ace_start_xmit, .ndo_set_multicast_list = ace_set_multicast_list, .ndo_set_mac_address = ace_set_mac_addr, .ndo_change_mtu = ace_change_mtu, +#if ACENIC_DO_VLAN .ndo_vlan_rx_register = ace_vlan_rx_register, +#endif }; static int __devinit acenic_probe_one(struct pci_dev *pdev, @@ -489,7 +492,6 @@ static int __devinit acenic_probe_one(struct pci_dev *pdev, dev->watchdog_timeo = 5*HZ; dev->netdev_ops = &ace_netdev_ops; - dev->hard_start_xmit = &ace_start_xmit; SET_ETHTOOL_OPS(dev, &ace_ethtool_ops); /* we only display this string ONCE */ diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c index a815fffc2a5b..98b2a7a466b8 100644 --- a/drivers/net/atl1e/atl1e_main.c +++ b/drivers/net/atl1e/atl1e_main.c @@ -2256,6 +2256,7 @@ static void atl1e_shutdown(struct pci_dev *pdev) static const struct net_device_ops atl1e_netdev_ops = { .ndo_open = atl1e_open, .ndo_stop = atl1e_close, + .ndo_start_xmit = atl1e_xmit_frame, .ndo_get_stats = atl1e_get_stats, .ndo_set_multicast_list = atl1e_set_multi, .ndo_validate_addr = eth_validate_addr, @@ -2277,7 +2278,7 @@ static int atl1e_init_netdev(struct net_device *netdev, struct pci_dev *pdev) netdev->irq = pdev->irq; netdev->netdev_ops = &atl1e_netdev_ops; - netdev->hard_start_xmit = atl1e_xmit_frame, + netdev->watchdog_timeo = AT_TX_WATCHDOG; atl1e_set_ethtool_ops(netdev); diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 7a0fb04e3480..aef7e47fdd24 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -2883,12 +2883,13 @@ static void atl1_poll_controller(struct net_device *netdev) static const struct net_device_ops atl1_netdev_ops = { .ndo_open = atl1_open, .ndo_stop = atl1_close, + .ndo_start_xmit = atl1_xmit_frame, .ndo_set_multicast_list = atlx_set_multi, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = atl1_set_mac, .ndo_change_mtu = atl1_change_mtu, .ndo_do_ioctl = atlx_ioctl, - .ndo_tx_timeout = atlx_tx_timeout, + .ndo_tx_timeout = atlx_tx_timeout, .ndo_vlan_rx_register = atlx_vlan_rx_register, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = atl1_poll_controller, @@ -2983,7 +2984,6 @@ static int __devinit atl1_probe(struct pci_dev *pdev, adapter->mii.reg_num_mask = 0x1f; netdev->netdev_ops = &atl1_netdev_ops; - netdev->hard_start_xmit = &atl1_xmit_frame; netdev->watchdog_timeo = 5 * HZ; netdev->ethtool_ops = &atl1_ethtool_ops; diff --git a/drivers/net/atlx/atl2.c b/drivers/net/atlx/atl2.c index b8d585722e1a..0326a84503e3 100644 --- a/drivers/net/atlx/atl2.c +++ b/drivers/net/atlx/atl2.c @@ -1315,6 +1315,7 @@ static void atl2_poll_controller(struct net_device *netdev) static const struct net_device_ops atl2_netdev_ops = { .ndo_open = atl2_open, .ndo_stop = atl2_close, + .ndo_start_xmit = atl2_xmit_frame, .ndo_set_multicast_list = atl2_set_multi, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = atl2_set_mac, @@ -1400,7 +1401,6 @@ static int __devinit atl2_probe(struct pci_dev *pdev, atl2_setup_pcicmd(pdev); - netdev->hard_start_xmit = &atl2_xmit_frame; netdev->netdev_ops = &atl2_netdev_ops; atl2_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 614656c8187b..a339a8052737 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1377,14 +1377,12 @@ done: return 0; } - static void bond_setup_by_slave(struct net_device *bond_dev, struct net_device *slave_dev) { struct bonding *bond = netdev_priv(bond_dev); - bond_dev->neigh_setup = slave_dev->neigh_setup; - bond_dev->header_ops = slave_dev->header_ops; + bond_dev->header_ops = slave_dev->header_ops; bond_dev->type = slave_dev->type; bond_dev->hard_header_len = slave_dev->hard_header_len; @@ -4124,6 +4122,20 @@ static void bond_set_multicast_list(struct net_device *bond_dev) read_unlock(&bond->lock); } +static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms) +{ + struct bonding *bond = netdev_priv(dev); + struct slave *slave = bond->first_slave; + + if (slave) { + const struct net_device_ops *slave_ops + = slave->dev->netdev_ops; + if (slave_ops->ndo_neigh_setup) + return slave_ops->ndo_neigh_setup(dev, parms); + } + return 0; +} + /* * Change the MTU of all of a master's slaves to match the master */ @@ -4490,6 +4502,35 @@ static void bond_set_xmit_hash_policy(struct bonding *bond) } } +static int bond_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + const struct bonding *bond = netdev_priv(dev); + + switch (bond->params.mode) { + case BOND_MODE_ROUNDROBIN: + return bond_xmit_roundrobin(skb, dev); + case BOND_MODE_ACTIVEBACKUP: + return bond_xmit_activebackup(skb, dev); + case BOND_MODE_XOR: + return bond_xmit_xor(skb, dev); + case BOND_MODE_BROADCAST: + return bond_xmit_broadcast(skb, dev); + case BOND_MODE_8023AD: + return bond_3ad_xmit_xor(skb, dev); + case BOND_MODE_ALB: + case BOND_MODE_TLB: + return bond_alb_xmit(skb, dev); + default: + /* Should never happen, mode already checked */ + printk(KERN_ERR DRV_NAME ": %s: Error: Unknown bonding mode %d\n", + dev->name, bond->params.mode); + WARN_ON_ONCE(1); + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } +} + + /* * set bond mode specific net device operations */ @@ -4499,28 +4540,22 @@ void bond_set_mode_ops(struct bonding *bond, int mode) switch (mode) { case BOND_MODE_ROUNDROBIN: - bond_dev->hard_start_xmit = bond_xmit_roundrobin; break; case BOND_MODE_ACTIVEBACKUP: - bond_dev->hard_start_xmit = bond_xmit_activebackup; break; case BOND_MODE_XOR: - bond_dev->hard_start_xmit = bond_xmit_xor; bond_set_xmit_hash_policy(bond); break; case BOND_MODE_BROADCAST: - bond_dev->hard_start_xmit = bond_xmit_broadcast; break; case BOND_MODE_8023AD: bond_set_master_3ad_flags(bond); - bond_dev->hard_start_xmit = bond_3ad_xmit_xor; bond_set_xmit_hash_policy(bond); break; case BOND_MODE_ALB: bond_set_master_alb_flags(bond); /* FALLTHRU */ case BOND_MODE_TLB: - bond_dev->hard_start_xmit = bond_alb_xmit; break; default: /* Should never happen, mode already checked */ @@ -4553,12 +4588,13 @@ static const struct ethtool_ops bond_ethtool_ops = { static const struct net_device_ops bond_netdev_ops = { .ndo_open = bond_open, .ndo_stop = bond_close, + .ndo_start_xmit = bond_start_xmit, .ndo_get_stats = bond_get_stats, .ndo_do_ioctl = bond_do_ioctl, .ndo_set_multicast_list = bond_set_multicast_list, .ndo_change_mtu = bond_change_mtu, - .ndo_validate_addr = NULL, .ndo_set_mac_address = bond_set_mac_address, + .ndo_neigh_setup = bond_neigh_setup, .ndo_vlan_rx_register = bond_vlan_rx_register, .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, diff --git a/drivers/net/chelsio/cxgb2.c b/drivers/net/chelsio/cxgb2.c index 482741797ebf..9b6011e7678e 100644 --- a/drivers/net/chelsio/cxgb2.c +++ b/drivers/net/chelsio/cxgb2.c @@ -915,7 +915,7 @@ static int t1_set_mac_addr(struct net_device *dev, void *p) } #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) -static void vlan_rx_register(struct net_device *dev, +static void t1_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) { struct adapter *adapter = dev->ml_priv; @@ -1013,6 +1013,7 @@ void t1_fatal_err(struct adapter *adapter) static const struct net_device_ops cxgb_netdev_ops = { .ndo_open = cxgb_open, .ndo_stop = cxgb_close, + .ndo_start_xmit = t1_start_xmit, .ndo_get_stats = t1_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = t1_set_rxmode, @@ -1020,7 +1021,7 @@ static const struct net_device_ops cxgb_netdev_ops = { .ndo_change_mtu = t1_change_mtu, .ndo_set_mac_address = t1_set_mac_addr, #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - .ndo_vlan_rx_register = vlan_rx_register, + .ndo_vlan_rx_register = t1_vlan_rx_register, #endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = t1_netpoll, @@ -1157,7 +1158,6 @@ static int __devinit init_one(struct pci_dev *pdev, } netdev->netdev_ops = &cxgb_netdev_ops; - netdev->hard_start_xmit = t1_start_xmit; netdev->hard_header_len += (adapter->flags & TSO_CAPABLE) ? sizeof(struct cpl_tx_pkt_lso) : sizeof(struct cpl_tx_pkt); diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c index a9479be53ec3..cd9fcaca70f3 100644 --- a/drivers/net/cxgb3/cxgb3_main.c +++ b/drivers/net/cxgb3/cxgb3_main.c @@ -2955,7 +2955,6 @@ static int __devinit init_one(struct pci_dev *pdev, netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; netdev->netdev_ops = &cxgb_netdev_ops; - netdev->hard_start_xmit = t3_eth_xmit; SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops); } diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 5894716de19f..2001a63794f5 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -2615,6 +2615,7 @@ static int e100_close(struct net_device *netdev) static const struct net_device_ops e100_netdev_ops = { .ndo_open = e100_open, .ndo_stop = e100_close, + .ndo_start_xmit = e100_xmit_frame, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = e100_set_multicast_list, .ndo_set_mac_address = e100_set_mac_address, @@ -2640,7 +2641,6 @@ static int __devinit e100_probe(struct pci_dev *pdev, } netdev->netdev_ops = &e100_netdev_ops; - netdev->hard_start_xmit = e100_xmit_frame; SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops); netdev->watchdog_timeo = E100_WATCHDOG_PERIOD; strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index debbba390d40..5c098c9d584e 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -891,6 +891,7 @@ static int e1000_is_need_ioport(struct pci_dev *pdev) static const struct net_device_ops e1000_netdev_ops = { .ndo_open = e1000_open, .ndo_stop = e1000_close, + .ndo_start_xmit = e1000_xmit_frame, .ndo_get_stats = e1000_get_stats, .ndo_set_rx_mode = e1000_set_rx_mode, .ndo_set_mac_address = e1000_set_mac, @@ -1001,7 +1002,6 @@ static int __devinit e1000_probe(struct pci_dev *pdev, } netdev->netdev_ops = &e1000_netdev_ops; - netdev->hard_start_xmit = &e1000_xmit_frame; e1000_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; netif_napi_add(netdev, &adapter->napi, e1000_clean, 64); diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index ced839e4cae8..cc0502bbb9ff 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -4707,6 +4707,7 @@ static void e1000_eeprom_checks(struct e1000_adapter *adapter) static const struct net_device_ops e1000e_netdev_ops = { .ndo_open = e1000_open, .ndo_stop = e1000_close, + .ndo_start_xmit = e1000_xmit_frame, .ndo_get_stats = e1000_get_stats, .ndo_set_multicast_list = e1000_set_multi, .ndo_set_mac_address = e1000_set_mac, @@ -4822,7 +4823,6 @@ static int __devinit e1000_probe(struct pci_dev *pdev, /* construct the net_device struct */ netdev->netdev_ops = &e1000e_netdev_ops; - netdev->hard_start_xmit = &e1000_xmit_frame; e1000e_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; netif_napi_add(netdev, &adapter->napi, e1000_clean, 64); diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c index 40f8c88b166d..1c409df735d4 100644 --- a/drivers/net/enic/enic_main.c +++ b/drivers/net/enic/enic_main.c @@ -1593,6 +1593,7 @@ static void enic_iounmap(struct enic *enic) static const struct net_device_ops enic_netdev_ops = { .ndo_open = enic_open, .ndo_stop = enic_stop, + .ndo_start_xmit = enic_hard_start_xmit, .ndo_get_stats = enic_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = enic_set_multicast_list, @@ -1830,7 +1831,6 @@ static int __devinit enic_probe(struct pci_dev *pdev, } netdev->netdev_ops = &enic_netdev_ops; - netdev->hard_start_xmit = enic_hard_start_xmit; netdev->watchdog_timeo = 2 * HZ; netdev->ethtool_ops = &enic_ethtool_ops; diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index dd2e1f670b0d..0d7e5750245a 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -5412,6 +5412,23 @@ static const struct net_device_ops nv_netdev_ops = { .ndo_open = nv_open, .ndo_stop = nv_close, .ndo_get_stats = nv_get_stats, + .ndo_start_xmit = nv_start_xmit, + .ndo_tx_timeout = nv_tx_timeout, + .ndo_change_mtu = nv_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = nv_set_mac_address, + .ndo_set_multicast_list = nv_set_multicast, + .ndo_vlan_rx_register = nv_vlan_rx_register, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = nv_poll_controller, +#endif +}; + +static const struct net_device_ops nv_netdev_ops_optimized = { + .ndo_open = nv_open, + .ndo_stop = nv_close, + .ndo_get_stats = nv_get_stats, + .ndo_start_xmit = nv_start_xmit_optimized, .ndo_tx_timeout = nv_tx_timeout, .ndo_change_mtu = nv_change_mtu, .ndo_validate_addr = eth_validate_addr, @@ -5592,11 +5609,10 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i goto out_freering; if (!nv_optimized(np)) - dev->hard_start_xmit = nv_start_xmit; + dev->netdev_ops = &nv_netdev_ops; else - dev->hard_start_xmit = nv_start_xmit_optimized; + dev->netdev_ops = &nv_netdev_ops_optimized; - dev->netdev_ops = &nv_netdev_ops; #ifdef CONFIG_FORCEDETH_NAPI netif_napi_add(dev, &np->napi, nv_napi_poll, RX_WORK_PER_LOOP); #endif diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 363a166df8fb..60a263001933 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -138,15 +138,15 @@ resched: } static const struct net_device_ops ifb_netdev_ops = { - .ndo_validate_addr = eth_validate_addr, .ndo_open = ifb_open, .ndo_stop = ifb_close, + .ndo_start_xmit = ifb_xmit, + .ndo_validate_addr = eth_validate_addr, }; static void ifb_setup(struct net_device *dev) { /* Initialize the device structure. */ - dev->hard_start_xmit = ifb_xmit; dev->destructor = free_netdev; dev->netdev_ops = &ifb_netdev_ops; diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index ceb0a0458796..eca5684d5655 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -953,6 +953,7 @@ static int igb_is_need_ioport(struct pci_dev *pdev) static const struct net_device_ops igb_netdev_ops = { .ndo_open = igb_open, .ndo_stop = igb_close, + .ndo_start_xmit = igb_xmit_frame_adv, .ndo_get_stats = igb_get_stats, .ndo_set_multicast_list = igb_set_multi, .ndo_set_mac_address = igb_set_mac, @@ -1080,7 +1081,6 @@ static int __devinit igb_probe(struct pci_dev *pdev, netdev->netdev_ops = &igb_netdev_ops; igb_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; - netdev->hard_start_xmit = &igb_xmit_frame_adv; strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index 3ca9daa70b38..a04e3892ddf4 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -324,6 +324,7 @@ ixgb_reset(struct ixgb_adapter *adapter) static const struct net_device_ops ixgb_netdev_ops = { .ndo_open = ixgb_open, .ndo_stop = ixgb_close, + .ndo_start_xmit = ixgb_xmit_frame, .ndo_get_stats = ixgb_get_stats, .ndo_set_multicast_list = ixgb_set_multi, .ndo_validate_addr = eth_validate_addr, @@ -414,7 +415,6 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } netdev->netdev_ops = &ixgb_netdev_ops; - netdev->hard_start_xmit = &ixgb_xmit_frame; ixgb_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; netif_napi_add(netdev, &adapter->napi, ixgb_clean, 64); diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 7ad07a00680a..40108523377f 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -3728,6 +3728,7 @@ static int ixgbe_link_config(struct ixgbe_hw *hw) static const struct net_device_ops ixgbe_netdev_ops = { .ndo_open = ixgbe_open, .ndo_stop = ixgbe_close, + .ndo_start_xmit = ixgbe_xmit_frame, .ndo_get_stats = ixgbe_get_stats, .ndo_set_multicast_list = ixgbe_set_rx_mode, .ndo_validate_addr = eth_validate_addr, @@ -3824,7 +3825,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, } netdev->netdev_ops = &ixgbe_netdev_ops; - netdev->hard_start_xmit = &ixgbe_xmit_frame; ixgbe_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; strcpy(netdev->name, pci_name(pdev)); diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 958450124dec..b7d438a367f3 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -145,6 +145,7 @@ static void loopback_dev_free(struct net_device *dev) static const struct net_device_ops loopback_ops = { .ndo_init = loopback_dev_init, + .ndo_start_xmit= loopback_xmit, .ndo_get_stats = loopback_get_stats, }; @@ -155,7 +156,6 @@ static const struct net_device_ops loopback_ops = { static void loopback_setup(struct net_device *dev) { dev->mtu = (16 * 1024) + 20 + 20 + 12; - dev->hard_start_xmit = loopback_xmit; dev->hard_header_len = ETH_HLEN; /* 14 */ dev->addr_len = ETH_ALEN; /* 6 */ dev->tx_queue_len = 0; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d00ea444e0a3..e8879217a1d2 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -140,7 +140,7 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) return NULL; } -static int macvlan_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +static int macvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) { const struct macvlan_dev *vlan = netdev_priv(dev); unsigned int len = skb->len; @@ -365,6 +365,7 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_init = macvlan_init, .ndo_open = macvlan_open, .ndo_stop = macvlan_stop, + .ndo_start_xmit = macvlan_start_xmit, .ndo_change_mtu = macvlan_change_mtu, .ndo_change_rx_flags = macvlan_change_rx_flags, .ndo_set_mac_address = macvlan_set_mac_address, @@ -377,7 +378,6 @@ static void macvlan_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &macvlan_netdev_ops; - dev->hard_start_xmit = macvlan_hard_start_xmit; dev->destructor = free_netdev; dev->header_ops = &macvlan_hard_header_ops, dev->ethtool_ops = &macvlan_ethtool_ops; diff --git a/drivers/net/niu.c b/drivers/net/niu.c index 318537efd583..a8d10630f804 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -8892,6 +8892,7 @@ static struct net_device * __devinit niu_alloc_and_init( static const struct net_device_ops niu_netdev_ops = { .ndo_open = niu_open, .ndo_stop = niu_close, + .ndo_start_xmit = niu_start_xmit, .ndo_get_stats = niu_get_stats, .ndo_set_multicast_list = niu_set_rx_mode, .ndo_validate_addr = eth_validate_addr, @@ -8904,7 +8905,6 @@ static const struct net_device_ops niu_netdev_ops = { static void __devinit niu_assign_netdev_ops(struct net_device *dev) { dev->netdev_ops = &niu_netdev_ops; - dev->hard_start_xmit = niu_start_xmit; dev->ethtool_ops = &niu_ethtool_ops; dev->watchdog_timeo = NIU_TX_TIMEOUT; } diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index bad99e8cac33..1b15a088a3ba 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -972,7 +972,8 @@ ppp_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } static const struct net_device_ops ppp_netdev_ops = { - .ndo_do_ioctl = ppp_net_ioctl, + .ndo_start_xmit = ppp_start_xmit, + .ndo_do_ioctl = ppp_net_ioctl, }; static void ppp_setup(struct net_device *dev) @@ -2437,8 +2438,6 @@ ppp_create_interface(int unit, int *retp) skb_queue_head_init(&ppp->mrq); #endif /* CONFIG_PPP_MULTILINK */ - dev->hard_start_xmit = ppp_start_xmit; - ret = -EEXIST; mutex_lock(&all_ppp_mutex); if (unit < 0) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index bac58ca628dd..dddf6aeff498 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -1927,6 +1927,7 @@ static const struct net_device_ops rtl8169_netdev_ops = { .ndo_open = rtl8169_open, .ndo_stop = rtl8169_close, .ndo_get_stats = rtl8169_get_stats, + .ndo_start_xmit = rtl8169_start_xmit, .ndo_tx_timeout = rtl8169_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = rtl8169_change_mtu, @@ -2125,7 +2126,6 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->dev_addr[i] = RTL_R8(MAC0 + i); memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); - dev->hard_start_xmit = rtl8169_start_xmit; SET_ETHTOOL_OPS(dev, &rtl8169_ethtool_ops); dev->watchdog_timeo = RTL8169_TX_TIMEOUT; dev->irq = pdev->irq; diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 93c1b1d92962..f73ee7974003 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -3805,6 +3805,7 @@ static __exit void skge_debug_cleanup(void) static const struct net_device_ops skge_netdev_ops = { .ndo_open = skge_up, .ndo_stop = skge_down, + .ndo_start_xmit = skge_xmit_frame, .ndo_do_ioctl = skge_ioctl, .ndo_get_stats = skge_get_stats, .ndo_tx_timeout = skge_tx_timeout, @@ -3831,7 +3832,6 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port, } SET_NETDEV_DEV(dev, &hw->pdev->dev); - dev->hard_start_xmit = skge_xmit_frame; dev->netdev_ops = &skge_netdev_ops; dev->ethtool_ops = &skge_ethtool_ops; dev->watchdog_timeo = TX_WATCHDOG; diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 251505125cb8..3668e81e474d 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -4047,6 +4047,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = { { .ndo_open = sky2_up, .ndo_stop = sky2_down, + .ndo_start_xmit = sky2_xmit_frame, .ndo_do_ioctl = sky2_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = sky2_set_mac_address, @@ -4063,6 +4064,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = { { .ndo_open = sky2_up, .ndo_stop = sky2_down, + .ndo_start_xmit = sky2_xmit_frame, .ndo_do_ioctl = sky2_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = sky2_set_mac_address, @@ -4090,7 +4092,6 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw, SET_NETDEV_DEV(dev, &hw->pdev->dev); dev->irq = hw->pdev->irq; - dev->hard_start_xmit = sky2_xmit_frame; SET_ETHTOOL_OPS(dev, &sky2_ethtool_ops); dev->watchdog_timeo = TX_WATCHDOG; dev->netdev_ops = &sky2_netdev_ops[port]; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 4b97cb601361..9ba18e1bc341 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -12614,19 +12614,6 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) else tp->tg3_flags &= ~TG3_FLAG_POLL_SERDES; - /* All chips before 5787 can get confused if TX buffers - * straddle the 4GB address boundary in some cases. - */ - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) - tp->dev->hard_start_xmit = tg3_start_xmit; - else - tp->dev->hard_start_xmit = tg3_start_xmit_dma_bug; - tp->rx_offset = 2; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701 && (tp->tg3_flags & TG3_FLAG_PCIX_MODE) != 0) @@ -13346,6 +13333,26 @@ static void __devinit tg3_init_coal(struct tg3 *tp) static const struct net_device_ops tg3_netdev_ops = { .ndo_open = tg3_open, .ndo_stop = tg3_close, + .ndo_start_xmit = tg3_start_xmit, + .ndo_get_stats = tg3_get_stats, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_multicast_list = tg3_set_rx_mode, + .ndo_set_mac_address = tg3_set_mac_addr, + .ndo_do_ioctl = tg3_ioctl, + .ndo_tx_timeout = tg3_tx_timeout, + .ndo_change_mtu = tg3_change_mtu, +#if TG3_VLAN_TAG_USED + .ndo_vlan_rx_register = tg3_vlan_rx_register, +#endif +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = tg3_poll_controller, +#endif +}; + +static const struct net_device_ops tg3_netdev_ops_dma_bug = { + .ndo_open = tg3_open, + .ndo_stop = tg3_close, + .ndo_start_xmit = tg3_start_xmit_dma_bug, .ndo_get_stats = tg3_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = tg3_set_rx_mode, @@ -13475,7 +13482,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, tp->rx_jumbo_pending = TG3_DEF_RX_JUMBO_RING_PENDING; tp->tx_pending = TG3_DEF_TX_RING_PENDING; - dev->netdev_ops = &tg3_netdev_ops; netif_napi_add(dev, &tp->napi, tg3_poll, 64); dev->ethtool_ops = &tg3_ethtool_ops; dev->watchdog_timeo = TG3_TX_TIMEOUT; @@ -13488,6 +13494,17 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, goto err_out_iounmap; } + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) + dev->netdev_ops = &tg3_netdev_ops; + else + dev->netdev_ops = &tg3_netdev_ops_dma_bug; + + /* The EPB bridge inside 5714, 5715, and 5780 and any * device behind the EPB cannot support DMA addresses > 40-bit. * On 64-bit systems with IOMMU, use 40-bit dma_mask. diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b4c941444756..fd0b11ea5562 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -308,13 +308,14 @@ tun_net_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops tun_netdev_ops = { .ndo_open = tun_net_open, .ndo_stop = tun_net_close, + .ndo_start_xmit = tun_net_xmit, .ndo_change_mtu = tun_net_change_mtu, - }; static const struct net_device_ops tap_netdev_ops = { .ndo_open = tun_net_open, .ndo_stop = tun_net_close, + .ndo_start_xmit = tun_net_xmit, .ndo_change_mtu = tun_net_change_mtu, .ndo_set_multicast_list = tun_net_mclist, .ndo_set_mac_address = eth_mac_addr, @@ -691,7 +692,6 @@ static void tun_setup(struct net_device *dev) tun->owner = -1; tun->group = -1; - dev->hard_start_xmit = tun_net_xmit; dev->ethtool_ops = &tun_ethtool_ops; dev->destructor = free_netdev; dev->features |= NETIF_F_NETNS_LOCAL; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 4f93a55aaaa5..852d0e7c4e62 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -265,6 +265,7 @@ static void veth_dev_free(struct net_device *dev) static const struct net_device_ops veth_netdev_ops = { .ndo_init = veth_dev_init, .ndo_open = veth_open, + .ndo_start_xmit = veth_xmit, .ndo_get_stats = veth_get_stats, }; @@ -273,7 +274,6 @@ static void veth_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &veth_netdev_ops; - dev->hard_start_xmit = veth_xmit; dev->ethtool_ops = &veth_ethtool_ops; dev->features |= NETIF_F_LLTX; dev->destructor = veth_dev_free; diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index 033e63a68436..58e25d090ae0 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -852,6 +852,7 @@ static int velocity_soft_reset(struct velocity_info *vptr) static const struct net_device_ops velocity_netdev_ops = { .ndo_open = velocity_open, .ndo_stop = velocity_close, + .ndo_start_xmit = velocity_xmit, .ndo_get_stats = velocity_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = velocity_set_multi, @@ -971,7 +972,6 @@ static int __devinit velocity_found1(struct pci_dev *pdev, const struct pci_devi vptr->phy_id = MII_GET_PHY_ID(vptr->mac_regs); dev->irq = pdev->irq; - dev->hard_start_xmit = velocity_xmit; dev->netdev_ops = &velocity_netdev_ops; dev->ethtool_ops = &velocity_ethtool_ops; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 981a089d5149..d8fb23679ee3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -454,8 +454,8 @@ struct netdev_queue { /* * This structure defines the management hooks for network devices. - * The following hooks can bed defined and are optonal (can be null) - * unless otherwise noted. + * The following hooks can be defined; unless noted otherwise, they are + * optional and can be filled with a null pointer. * * int (*ndo_init)(struct net_device *dev); * This function is called once when network device is registered. @@ -475,6 +475,15 @@ struct netdev_queue { * This function is called when network device transistions to the down * state. * + * int (*ndo_hard_start_xmit)(struct sk_buff *skb, struct net_device *dev); + * Called when a packet needs to be transmitted. + * Must return NETDEV_TX_OK , NETDEV_TX_BUSY, or NETDEV_TX_LOCKED, + * Required can not be NULL. + * + * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb); + * Called to decide which queue to when device supports multiple + * transmit queues. + * * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); * This function is called to allow device receiver to make * changes to configuration when multicast or promiscious is enabled. @@ -508,7 +517,7 @@ struct netdev_queue { * of a device. If not defined, any request to change MTU will * will return an error. * - * void (*ndo_tx_timeout) (struct net_device *dev); + * void (*ndo_tx_timeout)(struct net_device *dev); * Callback uses when the transmitter has not made any progress * for dev->watchdog ticks. * @@ -538,6 +547,10 @@ struct net_device_ops { void (*ndo_uninit)(struct net_device *dev); int (*ndo_open)(struct net_device *dev); int (*ndo_stop)(struct net_device *dev); + int (*ndo_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + u16 (*ndo_select_queue)(struct net_device *dev, + struct sk_buff *skb); #define HAVE_CHANGE_RX_FLAGS void (*ndo_change_rx_flags)(struct net_device *dev, int flags); @@ -557,8 +570,10 @@ struct net_device_ops { int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); #define HAVE_CHANGE_MTU - int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); - + int (*ndo_change_mtu)(struct net_device *dev, + int new_mtu); + int (*ndo_neigh_setup)(struct net_device *dev, + struct neigh_parms *); #define HAVE_TX_TIMEOUT void (*ndo_tx_timeout) (struct net_device *dev); @@ -761,18 +776,12 @@ struct net_device /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; - /* Map buffer to appropriate transmit queue */ - u16 (*select_queue)(struct net_device *dev, - struct sk_buff *skb); - unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; /* * One part is mostly used on xmit path (device) */ void *priv; /* pointer to private data */ - int (*hard_start_xmit) (struct sk_buff *skb, - struct net_device *dev); /* These may be needed for future network-power-down code. */ unsigned long trans_start; /* Time (in jiffies) of last Tx */ @@ -800,8 +809,6 @@ struct net_device /* Called from unregister, can be used to call free_netdev */ void (*destructor)(struct net_device *dev); - int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); - #ifdef CONFIG_NETPOLL struct netpoll_info *npinfo; #endif @@ -842,6 +849,10 @@ struct net_device void (*uninit)(struct net_device *dev); int (*open)(struct net_device *dev); int (*stop)(struct net_device *dev); + int (*hard_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + u16 (*select_queue)(struct net_device *dev, + struct sk_buff *skb); void (*change_rx_flags)(struct net_device *dev, int flags); void (*set_rx_mode)(struct net_device *dev); @@ -854,6 +865,8 @@ struct net_device int (*set_config)(struct net_device *dev, struct ifmap *map); int (*change_mtu)(struct net_device *dev, int new_mtu); + int (*neigh_setup)(struct net_device *dev, + struct neigh_parms *); void (*tx_timeout) (struct net_device *dev); struct net_device_stats* (*get_stats)(struct net_device *dev); void (*vlan_rx_register)(struct net_device *dev, diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 920ce3348398..18538d7460d7 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -163,10 +163,11 @@ static const struct ethtool_ops br_ethtool_ops = { static const struct net_device_ops br_netdev_ops = { .ndo_open = br_dev_open, .ndo_stop = br_dev_stop, - .ndo_set_mac_address = br_set_mac_address, - .ndo_set_multicast_list = br_dev_set_multicast_list, - .ndo_change_mtu = br_change_mtu, - .ndo_do_ioctl = br_dev_ioctl, + .ndo_start_xmit = br_dev_xmit, + .ndo_set_mac_address = br_set_mac_address, + .ndo_set_multicast_list = br_dev_set_multicast_list, + .ndo_change_mtu = br_change_mtu, + .ndo_do_ioctl = br_dev_ioctl, }; void br_dev_setup(struct net_device *dev) @@ -175,7 +176,6 @@ void br_dev_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &br_netdev_ops; - dev->hard_start_xmit = br_dev_xmit; dev->destructor = free_netdev; SET_ETHTOOL_OPS(dev, &br_ethtool_ops); dev->tx_queue_len = 0; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ee3a8dd13f55..727c5c510a60 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -373,7 +373,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER) return -EINVAL; - if (dev->hard_start_xmit == br_dev_xmit) + if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) return -ELOOP; if (dev->br_port != NULL) diff --git a/net/core/dev.c b/net/core/dev.c index 8843f4e3f5e1..4615e9a443aa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1660,6 +1660,9 @@ static int dev_gso_segment(struct sk_buff *skb) int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { + const struct net_device_ops *ops = dev->netdev_ops; + + prefetch(&dev->netdev_ops->ndo_start_xmit); if (likely(!skb->next)) { if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); @@ -1671,7 +1674,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto gso; } - return dev->hard_start_xmit(skb, dev); + return ops->ndo_start_xmit(skb, dev); } gso: @@ -1681,7 +1684,7 @@ gso: skb->next = nskb->next; nskb->next = NULL; - rc = dev->hard_start_xmit(nskb, dev); + rc = ops->ndo_start_xmit(nskb, dev); if (unlikely(rc)) { nskb->next = skb->next; skb->next = nskb; @@ -1755,10 +1758,11 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { + const struct net_device_ops *ops = dev->netdev_ops; u16 queue_index = 0; - if (dev->select_queue) - queue_index = dev->select_queue(dev, skb); + if (ops->ndo_select_queue) + queue_index = ops->ndo_select_queue(dev, skb); else if (dev->real_num_tx_queues > 1) queue_index = simple_tx_hash(dev, skb); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index cca6a55909eb..9c3717a23cf7 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1327,9 +1327,9 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { struct neigh_parms *p, *ref; - struct net *net; + struct net *net = dev_net(dev); + const struct net_device_ops *ops = dev->netdev_ops; - net = dev_net(dev); ref = lookup_neigh_params(tbl, net, 0); if (!ref) return NULL; @@ -1341,7 +1341,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); - if (dev->neigh_setup && dev->neigh_setup(dev, p)) { + if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { kfree(p); return NULL; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 630df6034444..96fb0519eb7a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -58,6 +58,7 @@ static void queue_process(struct work_struct *work) while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; + const struct net_device_ops *ops = dev->netdev_ops; struct netdev_queue *txq; if (!netif_device_present(dev) || !netif_running(dev)) { @@ -71,7 +72,7 @@ static void queue_process(struct work_struct *work) __netif_tx_lock(txq, smp_processor_id()); if (netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); local_irq_restore(flags); @@ -273,6 +274,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) int status = NETDEV_TX_BUSY; unsigned long tries; struct net_device *dev = np->dev; + const struct net_device_ops *ops = dev->netdev_ops; struct netpoll_info *npinfo = np->dev->npinfo; if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { @@ -293,7 +295,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_tx_queue_stopped(txq)) - status = dev->hard_start_xmit(skb, dev); + status = ops->ndo_start_xmit(skb, dev); __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 4e77914c4d42..15e0c2c7aacf 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3352,14 +3352,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) { - struct net_device *odev = NULL; + struct net_device *odev = pkt_dev->odev; + int (*xmit)(struct sk_buff *, struct net_device *) + = odev->netdev_ops->ndo_start_xmit; struct netdev_queue *txq; __u64 idle_start = 0; u16 queue_map; int ret; - odev = pkt_dev->odev; - if (pkt_dev->delay_us || pkt_dev->delay_ns) { u64 now; @@ -3440,7 +3440,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) atomic_inc(&(pkt_dev->skb->users)); retry_now: - ret = odev->hard_start_xmit(pkt_dev->skb, odev); + ret = (*xmit)(pkt_dev->skb, odev); if (likely(ret == NETDEV_TX_OK)) { pkt_dev->last_ok = 1; pkt_dev->sofar++; -- cgit v1.2.3 From 4db0acf3c0afbbbb2ae35a65f8896ca6655a47ec Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 24 Nov 2008 15:48:05 -0800 Subject: net: gen_estimator: Fix gen_kill_estimator() lookups gen_kill_estimator() linear lists lookups are very slow, and e.g. while deleting a large number of HTB classes soft lockups were reported. Here is another try to fix this problem: this time internally, with rbtree, so similarly to Jamal's hashing idea IIRC. (Looking for next hits could be still optimized, but it's really fast as it is.) Reported-by: Badalian Vyacheslav Reported-by: Denys Fedoryshchenko Signed-off-by: Jarek Poplawski Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 76 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 20 deletions(-) (limited to 'net/core') diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 57abe8266be1..80aa160877e9 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -89,6 +90,7 @@ struct gen_estimator u32 avpps; u32 avbps; struct rcu_head e_rcu; + struct rb_node node; }; struct gen_estimator_head @@ -102,6 +104,9 @@ static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; /* Protects against NULL dereference */ static DEFINE_RWLOCK(est_lock); +/* Protects against soft lockup during large deletion */ +static struct rb_root est_root = RB_ROOT; + static void est_timer(unsigned long arg) { int idx = (int)arg; @@ -139,6 +144,45 @@ skip: rcu_read_unlock(); } +static void gen_add_node(struct gen_estimator *est) +{ + struct rb_node **p = &est_root.rb_node, *parent = NULL; + + while (*p) { + struct gen_estimator *e; + + parent = *p; + e = rb_entry(parent, struct gen_estimator, node); + + if (est->bstats > e->bstats) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + rb_link_node(&est->node, parent, p); + rb_insert_color(&est->node, &est_root); +} + +static struct gen_estimator *gen_find_node(struct gnet_stats_basic *bstats, + struct gnet_stats_rate_est *rate_est) +{ + struct rb_node *p = est_root.rb_node; + + while (p) { + struct gen_estimator *e; + + e = rb_entry(p, struct gen_estimator, node); + + if (bstats > e->bstats) + p = p->rb_right; + else if (bstats < e->bstats || rate_est != e->rate_est) + p = p->rb_left; + else + return e; + } + return NULL; +} + /** * gen_new_estimator - create a new rate estimator * @bstats: basic statistics @@ -194,6 +238,8 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx)); list_add_rcu(&est->list, &elist[idx].list); + gen_add_node(est); + return 0; } @@ -209,34 +255,24 @@ static void __gen_kill_estimator(struct rcu_head *head) * @bstats: basic statistics * @rate_est: rate estimator statistics * - * Removes the rate estimator specified by &bstats and &rate_est - * and deletes the timer. + * Removes the rate estimator specified by &bstats and &rate_est. * * NOTE: Called under rtnl_mutex */ void gen_kill_estimator(struct gnet_stats_basic *bstats, - struct gnet_stats_rate_est *rate_est) + struct gnet_stats_rate_est *rate_est) { - int idx; - struct gen_estimator *e, *n; - - for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { - - /* Skip non initialized indexes */ - if (!elist[idx].timer.function) - continue; + struct gen_estimator *e; - list_for_each_entry_safe(e, n, &elist[idx].list, list) { - if (e->rate_est != rate_est || e->bstats != bstats) - continue; + while ((e = gen_find_node(bstats, rate_est))) { + rb_erase(&e->node, &est_root); - write_lock_bh(&est_lock); - e->bstats = NULL; - write_unlock_bh(&est_lock); + write_lock_bh(&est_lock); + e->bstats = NULL; + write_unlock_bh(&est_lock); - list_del_rcu(&e->list); - call_rcu(&e->e_rcu, __gen_kill_estimator); - } + list_del_rcu(&e->list); + call_rcu(&e->e_rcu, __gen_kill_estimator); } } -- cgit v1.2.3 From 832d11c5cd076abc0aa1eaf7be96c81d1a59ce41 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:20:15 -0800 Subject: tcp: Try to restore large SKBs while SACK processing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During SACK processing, most of the benefits of TSO are eaten by the SACK blocks that one-by-one fragment SKBs to MSS sized chunks. Then we're in problems when cleanup work for them has to be done when a large cumulative ACK comes. Try to return back to pre-split state already while more and more SACK info gets discovered by combining newly discovered SACK areas with the previous skb if that's SACKed as well. This approach has a number of benefits: 1) The processing overhead is spread more equally over the RTT 2) Write queue has less skbs to process (affect everything which has to walk in the queue past the sacked areas) 3) Write queue is consistent whole the time, so no other parts of TCP has to be aware of this (this was not the case with some other approach that was, well, quite intrusive all around). 4) Clean_rtx_queue can release most of the pages using single put_page instead of previous PAGE_SIZE/mss+1 calls In case a hole is fully filled by the new SACK block, we attempt to combine the next skb too which allows construction of skbs that are even larger than what tso split them to and it handles hole per on every nth patterns that often occur during slow start overshoot pretty nicely. Though this to be really useful also a retransmission would have to get lost since cumulative ACKs advance one hole at a time in the most typical case. TODO: handle upwards only merging. That should be rather easy when segment is fully sacked but I'm leaving that as future work item (it won't make very large difference anyway since this current approach already covers quite a lot of normal cases). I was earlier thinking of some sophisticated way of tracking timestamps of the first and the last segment but later on realized that it won't be that necessary at all to store the timestamp of the last segment. The cases that can occur are basically either: 1) ambiguous => no sensible measurement can be taken anyway 2) non-ambiguous is due to reordering => having the timestamp of the last segment there is just skewing things more off than does some good since the ack got triggered by one of the holes (besides some substle issues that would make determining right hole/skb even harder problem). Anyway, it has nothing to do with this change then. I choose to route some abnormal looking cases with goto noop, some could be handled differently (eg., by stopping the walking at that skb but again). In general, they either shouldn't happen at all or are rare enough to make no difference in practice. In theory this change (as whole) could cause some macroscale regression (global) because of cache misses that are taken over the round-trip time but it gets very likely better because of much less (local) cache misses per other write queue walkers and the big recovery clearing cumulative ack. Worth to note that these benefits would be very easy to get also without TSO/GSO being on as long as the data is in pages so that we can merge them. Currently I won't let that happen because DSACK splitting at fragment that would mess up pcounts due to sk_can_gso in tcp_set_skb_tso_segs. Once DSACKs fragments gets avoided, we have some conditions that can be made less strict. TODO: I will probably have to convert the excessive pointer passing to struct sacktag_state... :-) My testing revealed that considerable amount of skbs couldn't be shifted because they were cloned (most likely still awaiting tx reclaim)... [The rest is considering future work instead since I got repeatably EFAULT to tcpdump's recvfrom when I added pskb_expand_head to deal with clones, so I separated that into another, later patch] ...To counter that, I gave up on the fifth advantage: 5) When growing previous SACK block, less allocs for new skbs are done, basically a new alloc is needed only when new hole is detected and when the previous skb runs out of frags space ...which now only happens of if reclaim is fast enough to dispose the clone before the SACK block comes in (the window is RTT long), otherwise we'll have to alloc some. With clones being handled I got these numbers (will be somewhat worse without that), taken with fine-grained mibs: TCPSackShifted 398 TCPSackMerged 877 TCPSackShiftFallback 320 TCPSACKCOLLAPSEFALLBACKGSO 0 TCPSACKCOLLAPSEFALLBACKSKBBITS 0 TCPSACKCOLLAPSEFALLBACKSKBDATA 0 TCPSACKCOLLAPSEFALLBACKBELOW 0 TCPSACKCOLLAPSEFALLBACKFIRST 1 TCPSACKCOLLAPSEFALLBACKPREVBITS 318 TCPSACKCOLLAPSEFALLBACKMSS 1 TCPSACKCOLLAPSEFALLBACKNOHEAD 0 TCPSACKCOLLAPSEFALLBACKSHIFT 0 TCPSACKCOLLAPSENOOPSEQ 0 TCPSACKCOLLAPSENOOPSMALLPCOUNT 0 TCPSACKCOLLAPSENOOPSMALLLEN 0 TCPSACKCOLLAPSEHOLE 12 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 33 +++++++ include/net/tcp.h | 5 + net/core/skbuff.c | 140 +++++++++++++++++++++++++++ net/ipv4/tcp_input.c | 256 +++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 427 insertions(+), 7 deletions(-) (limited to 'net/core') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a01b6f84e3bc..acf17af45af9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -492,6 +492,19 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, return (skb->next == (struct sk_buff *) list); } +/** + * skb_queue_is_first - check if skb is the first entry in the queue + * @list: queue head + * @skb: buffer + * + * Returns true if @skb is the first buffer on the list. + */ +static inline bool skb_queue_is_first(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + return (skb->prev == (struct sk_buff *) list); +} + /** * skb_queue_next - return the next packet in the queue * @list: queue head @@ -510,6 +523,24 @@ static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list, return skb->next; } +/** + * skb_queue_prev - return the prev packet in the queue + * @list: queue head + * @skb: current buffer + * + * Return the prev packet in @list before @skb. It is only valid to + * call this if skb_queue_is_first() evaluates to false. + */ +static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + /* This BUG_ON may seem severe, but if we just return then we + * are going to dereference garbage. + */ + BUG_ON(skb_queue_is_first(list, skb)); + return skb->prev; +} + /** * skb_get - reference buffer * @skb: buffer to reference @@ -1652,6 +1683,8 @@ extern int skb_splice_bits(struct sk_buff *skb, extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); +extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, + int shiftlen); extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); diff --git a/include/net/tcp.h b/include/net/tcp.h index 90b4c3b4c336..265392470b26 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1192,6 +1192,11 @@ static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_bu return skb_queue_next(&sk->sk_write_queue, skb); } +static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_buff *skb) +{ + return skb_queue_prev(&sk->sk_write_queue, skb); +} + #define tcp_for_write_queue(skb, sk) \ skb_queue_walk(&(sk)->sk_write_queue, skb) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 267185a848f6..844b8abeb18c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2018,6 +2018,146 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) skb_split_no_header(skb, skb1, len, pos); } +/* Shifting from/to a cloned skb is a no-go. + * + * TODO: handle cloned skbs by using pskb_expand_head() + */ +static int skb_prepare_for_shift(struct sk_buff *skb) +{ + return skb_cloned(skb); +} + +/** + * skb_shift - Shifts paged data partially from skb to another + * @tgt: buffer into which tail data gets added + * @skb: buffer from which the paged data comes from + * @shiftlen: shift up to this many bytes + * + * Attempts to shift up to shiftlen worth of bytes, which may be less than + * the length of the skb, from tgt to skb. Returns number bytes shifted. + * It's up to caller to free skb if everything was shifted. + * + * If @tgt runs out of frags, the whole operation is aborted. + * + * Skb cannot include anything else but paged data while tgt is allowed + * to have non-paged data as well. + * + * TODO: full sized shift could be optimized but that would need + * specialized skb free'er to handle frags without up-to-date nr_frags. + */ +int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) +{ + int from, to, merge, todo; + struct skb_frag_struct *fragfrom, *fragto; + + BUG_ON(shiftlen > skb->len); + BUG_ON(skb_headlen(skb)); /* Would corrupt stream */ + + todo = shiftlen; + from = 0; + to = skb_shinfo(tgt)->nr_frags; + fragfrom = &skb_shinfo(skb)->frags[from]; + + /* Actual merge is delayed until the point when we know we can + * commit all, so that we don't have to undo partial changes + */ + if (!to || + !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) { + merge = -1; + } else { + merge = to - 1; + + todo -= fragfrom->size; + if (todo < 0) { + if (skb_prepare_for_shift(skb) || + skb_prepare_for_shift(tgt)) + return 0; + + fragto = &skb_shinfo(tgt)->frags[merge]; + + fragto->size += shiftlen; + fragfrom->size -= shiftlen; + fragfrom->page_offset += shiftlen; + + goto onlymerged; + } + + from++; + } + + /* Skip full, not-fitting skb to avoid expensive operations */ + if ((shiftlen == skb->len) && + (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to)) + return 0; + + if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) + return 0; + + while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { + if (to == MAX_SKB_FRAGS) + return 0; + + fragfrom = &skb_shinfo(skb)->frags[from]; + fragto = &skb_shinfo(tgt)->frags[to]; + + if (todo >= fragfrom->size) { + *fragto = *fragfrom; + todo -= fragfrom->size; + from++; + to++; + + } else { + get_page(fragfrom->page); + fragto->page = fragfrom->page; + fragto->page_offset = fragfrom->page_offset; + fragto->size = todo; + + fragfrom->page_offset += todo; + fragfrom->size -= todo; + todo = 0; + + to++; + break; + } + } + + /* Ready to "commit" this state change to tgt */ + skb_shinfo(tgt)->nr_frags = to; + + if (merge >= 0) { + fragfrom = &skb_shinfo(skb)->frags[0]; + fragto = &skb_shinfo(tgt)->frags[merge]; + + fragto->size += fragfrom->size; + put_page(fragfrom->page); + } + + /* Reposition in the original skb */ + to = 0; + while (from < skb_shinfo(skb)->nr_frags) + skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; + skb_shinfo(skb)->nr_frags = to; + + BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); + +onlymerged: + /* Most likely the tgt won't ever need its checksum anymore, skb on + * the other hand might need it if it needs to be resent + */ + tgt->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_PARTIAL; + + /* Yak, is it really working this way? Some helper please? */ + skb->len -= shiftlen; + skb->data_len -= shiftlen; + skb->truesize -= shiftlen; + tgt->len += shiftlen; + tgt->data_len += shiftlen; + tgt->truesize += shiftlen; + + return shiftlen; +} + /** * skb_prepare_seq_read - Prepare a sequential read of skb data * @skb: the buffer to read diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3c8e297e2c39..97d57676b8ee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1242,6 +1242,8 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, * aligned portion of it that matches. Therefore we might need to fragment * which may fail and creates some hassle (caller must handle error case * returns). + * + * FIXME: this could be merged to shift decision code */ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, u32 start_seq, u32 end_seq) @@ -1353,9 +1355,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; - - if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) - tcp_advance_highest_sack(sk, skb); } /* D-SACK. We can detect redundant retransmission in S|R and plain R @@ -1370,12 +1369,231 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, return flag; } +static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, + struct sk_buff *skb, unsigned int pcount, + int shifted, int fack_count, int *reord, + int *flag, int mss) +{ + struct tcp_sock *tp = tcp_sk(sk); + u8 dummy_sacked = TCP_SKB_CB(skb)->sacked; /* We discard results */ + + BUG_ON(!pcount); + + TCP_SKB_CB(prev)->end_seq += shifted; + TCP_SKB_CB(skb)->seq += shifted; + + skb_shinfo(prev)->gso_segs += pcount; + BUG_ON(skb_shinfo(skb)->gso_segs < pcount); + skb_shinfo(skb)->gso_segs -= pcount; + + /* When we're adding to gso_segs == 1, gso_size will be zero, + * in theory this shouldn't be necessary but as long as DSACK + * code can come after this skb later on it's better to keep + * setting gso_size to something. + */ + if (!skb_shinfo(prev)->gso_size) { + skb_shinfo(prev)->gso_size = mss; + skb_shinfo(prev)->gso_type = sk->sk_gso_type; + } + + /* CHECKME: To clear or not to clear? Mimics normal skb currently */ + if (skb_shinfo(skb)->gso_segs <= 1) { + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; + } + + *flag |= tcp_sacktag_one(skb, sk, reord, 0, fack_count, &dummy_sacked, + pcount); + + /* Difference in this won't matter, both ACKed by the same cumul. ACK */ + TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); + + tcp_clear_all_retrans_hints(tp); + + if (skb->len > 0) { + BUG_ON(!tcp_skb_pcount(skb)); + return 0; + } + + /* Whole SKB was eaten :-) */ + + TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; + if (skb == tcp_highest_sack(sk)) + tcp_advance_highest_sack(sk, skb); + + tcp_unlink_write_queue(skb, sk); + sk_wmem_free_skb(sk, skb); + + return 1; +} + +/* I wish gso_size would have a bit more sane initialization than + * something-or-zero which complicates things + */ +static int tcp_shift_mss(struct sk_buff *skb) +{ + int mss = tcp_skb_mss(skb); + + if (!mss) + mss = skb->len; + + return mss; +} + +/* Shifting pages past head area doesn't work */ +static int skb_can_shift(struct sk_buff *skb) +{ + return !skb_headlen(skb) && skb_is_nonlinear(skb); +} + +/* Try collapsing SACK blocks spanning across multiple skbs to a single + * skb. + */ +static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, + u32 start_seq, u32 end_seq, + int dup_sack, int *fack_count, + int *reord, int *flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *prev; + int mss; + int pcount = 0; + int len; + int in_sack; + + if (!sk_can_gso(sk)) + goto fallback; + + /* Normally R but no L won't result in plain S */ + if (!dup_sack && + (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) == TCPCB_SACKED_RETRANS) + goto fallback; + if (!skb_can_shift(skb)) + goto fallback; + /* This frame is about to be dropped (was ACKed). */ + if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) + goto fallback; + + /* Can only happen with delayed DSACK + discard craziness */ + if (unlikely(skb == tcp_write_queue_head(sk))) + goto fallback; + prev = tcp_write_queue_prev(sk, skb); + + if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) + goto fallback; + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && + !before(end_seq, TCP_SKB_CB(skb)->end_seq); + + if (in_sack) { + len = skb->len; + pcount = tcp_skb_pcount(skb); + mss = tcp_shift_mss(skb); + + /* TODO: Fix DSACKs to not fragment already SACKed and we can + * drop this restriction as unnecessary + */ + if (mss != tcp_shift_mss(prev)) + goto fallback; + } else { + if (!after(TCP_SKB_CB(skb)->end_seq, start_seq)) + goto noop; + /* CHECKME: This is non-MSS split case only?, this will + * cause skipped skbs due to advancing loop btw, original + * has that feature too + */ + if (tcp_skb_pcount(skb) <= 1) + goto noop; + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); + if (!in_sack) { + /* TODO: head merge to next could be attempted here + * if (!after(TCP_SKB_CB(skb)->end_seq, end_seq)), + * though it might not be worth of the additional hassle + * + * ...we can probably just fallback to what was done + * previously. We could try merging non-SACKed ones + * as well but it probably isn't going to buy off + * because later SACKs might again split them, and + * it would make skb timestamp tracking considerably + * harder problem. + */ + goto fallback; + } + + len = end_seq - TCP_SKB_CB(skb)->seq; + BUG_ON(len < 0); + BUG_ON(len > skb->len); + + /* MSS boundaries should be honoured or else pcount will + * severely break even though it makes things bit trickier. + * Optimize common case to avoid most of the divides + */ + mss = tcp_skb_mss(skb); + + /* TODO: Fix DSACKs to not fragment already SACKed and we can + * drop this restriction as unnecessary + */ + if (mss != tcp_shift_mss(prev)) + goto fallback; + + if (len == mss) { + pcount = 1; + } else if (len < mss) { + goto noop; + } else { + pcount = len / mss; + len = pcount * mss; + } + } + + if (!skb_shift(prev, skb, len)) + goto fallback; + if (!tcp_shifted_skb(sk, prev, skb, pcount, len, *fack_count, reord, + flag, mss)) + goto out; + + /* Hole filled allows collapsing with the next as well, this is very + * useful when hole on every nth skb pattern happens + */ + if (prev == tcp_write_queue_tail(sk)) + goto out; + skb = tcp_write_queue_next(sk, prev); + + if (!skb_can_shift(skb)) + goto out; + if (skb == tcp_send_head(sk)) + goto out; + if ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) + goto out; + + len = skb->len; + if (skb_shift(prev, skb, len)) { + pcount += tcp_skb_pcount(skb); + tcp_shifted_skb(sk, prev, skb, tcp_skb_pcount(skb), len, + *fack_count, reord, flag, mss); + } + +out: + *fack_count += pcount; + return prev; + +noop: + return skb; + +fallback: + return NULL; +} + static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, struct tcp_sack_block *next_dup, u32 start_seq, u32 end_seq, int dup_sack_in, int *fack_count, int *reord, int *flag) { + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *tmp; + tcp_for_write_queue_from(skb, sk) { int in_sack = 0; int dup_sack = dup_sack_in; @@ -1396,18 +1614,42 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, dup_sack = 1; } - if (in_sack <= 0) - in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, - end_seq); + /* skb reference here is a bit tricky to get right, since + * shifting can eat and free both this skb and the next, + * so not even _safe variant of the loop is enough. + */ + if (in_sack <= 0) { + tmp = tcp_shift_skb_data(sk, skb, start_seq, + end_seq, dup_sack, + fack_count, reord, flag); + if (tmp != NULL) { + if (tmp != skb) { + skb = tmp; + continue; + } + + in_sack = 0; + } else { + in_sack = tcp_match_skb_to_sack(sk, skb, + start_seq, + end_seq); + } + } + if (unlikely(in_sack < 0)) break; - if (in_sack) + if (in_sack) { *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, *fack_count, &(TCP_SKB_CB(skb)->sacked), tcp_skb_pcount(skb)); + if (!before(TCP_SKB_CB(skb)->seq, + tcp_highest_sack_seq(tp))) + tcp_advance_highest_sack(sk, skb); + } + *fack_count += tcp_skb_pcount(skb); } return skb; -- cgit v1.2.3 From 0ace285605314c54339710484b54814945a60df8 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:30:21 -0800 Subject: tcp: handle shift/merge of cloned skbs too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This caused me to get repeatably: tcpdump: pcap_loop: recvfrom: Bad address Happens occassionally when I tcpdump my for-looped test xfers: while [ : ]; do echo -n "$(date '+%s.%N') "; ./sendfile; sleep 20; done Rest of the relevant commands: ethtool -K eth0 tso off tc qdisc add dev eth0 root netem drop 4% tcpdump -n -s0 -i eth0 -w sacklog.all Running net-next under kvm, connection goes to the same host (basically just out of kvm). The connection itself works ok and data gets sent without corruption even with a large number of tests while tcpdump fails usually within less than 5 tests. Whether it only happens because of this change or not, I don't know for sure but it's the only thing with which I've seen that error. The non-cloned variant works w/o it for much longer time. I'm yet to debug where the error actually comes from. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/core/skbuff.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 844b8abeb18c..57555a4525da 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2018,13 +2018,10 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) skb_split_no_header(skb, skb1, len, pos); } -/* Shifting from/to a cloned skb is a no-go. - * - * TODO: handle cloned skbs by using pskb_expand_head() - */ +/* Shifting from/to a cloned skb is a no-go. */ static int skb_prepare_for_shift(struct sk_buff *skb) { - return skb_cloned(skb); + return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } /** -- cgit v1.2.3 From 9f782db3f5ceee9aa8de6f853969fbec1b8c6e65 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 25 Nov 2008 13:57:01 -0800 Subject: tcp: skb_shift cannot cache frag ptrs past pskb_expand_head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since pskb_expand_head creates copy of the shared area we cannot keep any frag ptr past de-cloning. This fixes the tcpdump recvfrom -EFAULT problem. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/core/skbuff.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 57555a4525da..e03d77d4c1c9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2018,7 +2018,10 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) skb_split_no_header(skb, skb1, len, pos); } -/* Shifting from/to a cloned skb is a no-go. */ +/* Shifting from/to a cloned skb is a no-go. + * + * Caller cannot keep skb_shinfo related pointers past calling here! + */ static int skb_prepare_for_shift(struct sk_buff *skb) { return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC); @@ -2070,6 +2073,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) skb_prepare_for_shift(tgt)) return 0; + /* All previous frag pointers might be stale! */ + fragfrom = &skb_shinfo(skb)->frags[from]; fragto = &skb_shinfo(tgt)->frags[merge]; fragto->size += shiftlen; -- cgit v1.2.3 From 09bb52175bf4d6a46fc8502e76be29206d9a677a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 25 Nov 2008 16:46:37 -0800 Subject: netns: filter out uevent not belonging to init_net This patch will filter out the uevent not related to the init_net. Without this patch if a network device is created in a network namespace with the same name as one network device belonging to the initial network namespace (eg. eth0), when the network namespace will die and the network device will be destroyed, an event will be sent and catched by the udevd daemon. That will result to have the real network device to be shutdown because the udevd/uevent are not namespace aware. Signed-off-by: Daniel Lezcano Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/core') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index afd42d717320..6ac29a46e23e 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -427,6 +427,9 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) struct net_device *dev = to_net_dev(d); int retval; + if (!net_eq(dev_net(dev), &init_net)) + return 0; + /* pass interface to uevent. */ retval = add_uevent_var(env, "INTERFACE=%s", dev->name); if (retval) -- cgit v1.2.3 From 5447c5e401c49aba0c36bb1066f2d25b152553b7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:31:51 -0800 Subject: netns xfrm: finding states in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 7 ++--- net/core/pktgen.c | 3 ++- net/key/af_key.c | 6 ++--- net/xfrm/xfrm_state.c | 73 +++++++++++++++++++++++++++------------------------ net/xfrm/xfrm_user.c | 4 +-- 5 files changed, 49 insertions(+), 44 deletions(-) (limited to 'net/core') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 15136c5e2622..4cbd0557c698 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1315,7 +1315,8 @@ extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family); -extern struct xfrm_state * xfrm_stateonly_find(xfrm_address_t *daddr, +extern struct xfrm_state * xfrm_stateonly_find(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, u8 mode, u8 proto, u32 reqid); @@ -1361,7 +1362,7 @@ struct xfrmk_spdinfo { u32 spdhmcnt; }; -extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); +extern struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info); extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si); @@ -1446,7 +1447,7 @@ struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete, int *err); int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); -struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, +struct xfrm_state * xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 15e0c2c7aacf..65498483325a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2165,7 +2165,8 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) struct xfrm_state *x = pkt_dev->flows[flow].x; if (!x) { /*slow path: we dont already have xfrm_state*/ - x = xfrm_stateonly_find((xfrm_address_t *)&pkt_dev->cur_daddr, + x = xfrm_stateonly_find(&init_net, + (xfrm_address_t *)&pkt_dev->cur_daddr, (xfrm_address_t *)&pkt_dev->cur_saddr, AF_INET, pkt_dev->ipsmode, diff --git a/net/key/af_key.c b/net/key/af_key.c index 449a5d03e283..4ef0827009e9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1348,7 +1348,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h } if (hdr->sadb_msg_seq) { - x = xfrm_find_acq_byseq(hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(&init_net, hdr->sadb_msg_seq); if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; @@ -1356,7 +1356,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h } if (!x) - x = xfrm_find_acq(mode, reqid, proto, xdaddr, xsaddr, 1, family); + x = xfrm_find_acq(&init_net, mode, reqid, proto, xdaddr, xsaddr, 1, family); if (x == NULL) return -ENOENT; @@ -1404,7 +1404,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg * if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0) return 0; - x = xfrm_find_acq_byseq(hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(&init_net, hdr->sadb_msg_seq); if (x == NULL) return 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index cd51e4e3d023..0d974fc9dd6c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -765,6 +765,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { + struct net *net = xp_net(pol); unsigned int h; struct hlist_node *entry; struct xfrm_state *x, *x0, *to_put; @@ -775,8 +776,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, to_put = NULL; spin_lock_bh(&xfrm_state_lock); - h = xfrm_dst_hash(&init_net, daddr, saddr, tmpl->reqid, family); - hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { + h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -820,13 +821,13 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup(&init_net, daddr, tmpl->id.spi, + (x0 = __xfrm_state_lookup(net, daddr, tmpl->id.spi, tmpl->id.proto, family)) != NULL) { to_put = x0; error = -EEXIST; goto out; } - x = xfrm_state_alloc(&init_net); + x = xfrm_state_alloc(net); if (x == NULL) { error = -ENOMEM; goto out; @@ -845,19 +846,19 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add(&x->km.all, &init_net.xfrm.state_all); - hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h); - h = xfrm_src_hash(&init_net, daddr, saddr, family); - hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h); + list_add(&x->km.all, &net->xfrm.state_all); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + h = xfrm_src_hash(net, daddr, saddr, family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(&init_net, &x->id.daddr, x->id.spi, x->id.proto, family); - hlist_add_head(&x->byspi, init_net.xfrm.state_byspi+h); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family); + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); - init_net.xfrm.state_num++; - xfrm_hash_grow_check(&init_net, x->bydst.next != NULL); + net->xfrm.state_num++; + xfrm_hash_grow_check(net, x->bydst.next != NULL); } else { x->km.state = XFRM_STATE_DEAD; to_put = x; @@ -877,7 +878,8 @@ out: } struct xfrm_state * -xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, +xfrm_stateonly_find(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, u8 mode, u8 proto, u32 reqid) { unsigned int h; @@ -885,8 +887,8 @@ xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct hlist_node *entry; spin_lock(&xfrm_state_lock); - h = xfrm_dst_hash(&init_net, daddr, saddr, reqid, family); - hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { + h = xfrm_dst_hash(net, daddr, saddr, reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -972,13 +974,13 @@ void xfrm_state_insert(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ -static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { - unsigned int h = xfrm_dst_hash(&init_net, daddr, saddr, reqid, family); + unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -1010,7 +1012,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re if (!create) return NULL; - x = xfrm_state_alloc(&init_net); + x = xfrm_state_alloc(net); if (likely(x)) { switch (family) { case AF_INET: @@ -1045,23 +1047,24 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re xfrm_state_hold(x); x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); - list_add(&x->km.all, &init_net.xfrm.state_all); - hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h); - h = xfrm_src_hash(&init_net, daddr, saddr, family); - hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h); + list_add(&x->km.all, &net->xfrm.state_all); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + h = xfrm_src_hash(net, daddr, saddr, family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); - init_net.xfrm.state_num++; + net->xfrm.state_num++; - xfrm_hash_grow_check(&init_net, x->bydst.next != NULL); + xfrm_hash_grow_check(net, x->bydst.next != NULL); } return x; } -static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq); int xfrm_state_add(struct xfrm_state *x) { + struct net *net = xs_net(x); struct xfrm_state *x1, *to_put; int family; int err; @@ -1082,7 +1085,7 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && x->km.seq) { - x1 = __xfrm_find_acq_byseq(x->km.seq); + x1 = __xfrm_find_acq_byseq(net, x->km.seq); if (x1 && ((x1->id.proto != x->id.proto) || xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) { to_put = x1; @@ -1091,7 +1094,7 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && !x1) - x1 = __find_acq_core(family, x->props.mode, x->props.reqid, + x1 = __find_acq_core(net, family, x->props.mode, x->props.reqid, x->id.proto, &x->id.daddr, &x->props.saddr, 0); @@ -1390,14 +1393,14 @@ xfrm_state_lookup_byaddr(struct net *net, EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * -xfrm_find_acq(u8 mode, u32 reqid, u8 proto, +xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create); + x = __find_acq_core(net, family, mode, reqid, proto, daddr, saddr, create); spin_unlock_bh(&xfrm_state_lock); return x; @@ -1444,15 +1447,15 @@ EXPORT_SYMBOL(xfrm_state_sort); /* Silly enough, but I'm lazy to build resolution list */ -static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq) { int i; - for (i = 0; i <= init_net.xfrm.state_hmask; i++) { + for (i = 0; i <= net->xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); @@ -1463,12 +1466,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) return NULL; } -struct xfrm_state *xfrm_find_acq_byseq(u32 seq) +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_find_acq_byseq(seq); + x = __xfrm_find_acq_byseq(net, seq); spin_unlock_bh(&xfrm_state_lock); return x; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e02ef3361190..3d577440b673 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -837,7 +837,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, x = NULL; if (p->info.seq) { - x = xfrm_find_acq_byseq(p->info.seq); + x = xfrm_find_acq_byseq(&init_net, p->info.seq); if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; @@ -845,7 +845,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, } if (!x) - x = xfrm_find_acq(p->info.mode, p->info.reqid, + x = xfrm_find_acq(&init_net, p->info.mode, p->info.reqid, p->info.id.proto, daddr, &p->info.saddr, 1, family); -- cgit v1.2.3 From 52479b623d3d41df84c499325b6a8c7915413032 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:35:18 -0800 Subject: netns xfrm: lookup in netns Pass netns to xfrm_lookup()/__xfrm_lookup(). For that pass netns to flow_cache_lookup() and resolver callback. Take it from socket or netdevice. Stub DECnet to init_net. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/dst.h | 16 ++++++++-------- include/net/flow.h | 9 +++++---- net/core/flow.c | 4 ++-- net/dccp/ipv6.c | 10 +++++----- net/decnet/dn_route.c | 6 +++--- net/ipv4/icmp.c | 4 ++-- net/ipv4/netfilter.c | 4 ++-- net/ipv4/route.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/datagram.c | 3 ++- net/ipv6/icmp.c | 6 +++--- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_tunnel.c | 5 +++-- net/ipv6/mcast.c | 4 ++-- net/ipv6/ndisc.c | 4 ++-- net/ipv6/netfilter.c | 2 +- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/raw.c | 3 ++- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 11 ++++++----- net/ipv6/udp.c | 3 ++- net/xfrm/xfrm_policy.c | 38 ++++++++++++++++++++------------------ 22 files changed, 75 insertions(+), 67 deletions(-) (limited to 'net/core') diff --git a/include/net/dst.h b/include/net/dst.h index 6c778799bf10..6be3b082a070 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -291,21 +291,21 @@ enum { struct flowi; #ifndef CONFIG_XFRM -static inline int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) +static inline int xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags) { return 0; } -static inline int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) +static inline int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags) { return 0; } #else -extern int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags); -extern int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags); +extern int xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags); +extern int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags); #endif #endif diff --git a/include/net/flow.h b/include/net/flow.h index b45a5e4fcadd..809970b7dfee 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -84,12 +84,13 @@ struct flowi { #define FLOW_DIR_OUT 1 #define FLOW_DIR_FWD 2 +struct net; struct sock; -typedef int (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, - void **objp, atomic_t **obj_refp); +typedef int (*flow_resolve_t)(struct net *net, struct flowi *key, u16 family, + u8 dir, void **objp, atomic_t **obj_refp); -extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, - flow_resolve_t resolver); +extern void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, + u8 dir, flow_resolve_t resolver); extern void flow_cache_flush(void); extern atomic_t flow_cache_genid; diff --git a/net/core/flow.c b/net/core/flow.c index d323388dd1ba..96015871ecea 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -165,7 +165,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) return 0; } -void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, +void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver) { struct flow_cache_entry *fle, **head; @@ -225,7 +225,7 @@ nocache: void *obj; atomic_t *obj_ref; - err = resolver(key, family, dir, &obj, &obj_ref); + err = resolver(net, key, family, dir, &obj, &obj_ref); if (fle && !err) { fle->genid = atomic_read(&flow_cache_genid); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index f033e845bb07..b963f35c65f6 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -168,7 +168,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(net, &dst, &fl, sk, 0); if (err < 0) { sk->sk_err_soft = -err; goto out; @@ -279,7 +279,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0); if (err < 0) goto done; @@ -343,7 +343,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(ctl_sk, &skb->dst, &fl)) { - if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { + if (xfrm_lookup(net, &skb->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, skb, &fl, NULL, 0); DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); @@ -569,7 +569,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out; } @@ -1004,7 +1004,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT); + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 768df000523b..eeaa3d819f9c 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1184,7 +1184,7 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int f err = __dn_route_output_key(pprt, flp, flags); if (err == 0 && flp->proto) { - err = xfrm_lookup(pprt, flp, NULL, 0); + err = xfrm_lookup(&init_net, pprt, flp, NULL, 0); } return err; } @@ -1195,8 +1195,8 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); if (err == 0 && fl->proto) { - err = xfrm_lookup(pprt, fl, sk, (flags & MSG_DONTWAIT) ? - 0 : XFRM_LOOKUP_WAIT); + err = xfrm_lookup(&init_net, pprt, fl, sk, + (flags & MSG_DONTWAIT) ? 0 : XFRM_LOOKUP_WAIT); } return err; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 7b88be9803b1..705b33b184a3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -562,7 +562,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) /* No need to clone since we're just using its address. */ rt2 = rt; - err = xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); + err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); switch (err) { case 0: if (rt != rt2) @@ -601,7 +601,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (err) goto relookup_failed; - err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL, + err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, XFRM_LOOKUP_ICMP); switch (err) { case 0: diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6efdb70b3eb2..c99eecf89da5 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -66,7 +66,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, &fl, AF_INET) == 0) - if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) return -1; #endif @@ -97,7 +97,7 @@ int ip_xfrm_me_harder(struct sk_buff *skb) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0) + if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0) return -1; dst_release(skb->dst); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4e6959c29819..77bfba975959 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2761,7 +2761,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, flp->fl4_src = (*rp)->rt_src; if (!flp->fl4_dst) flp->fl4_dst = (*rp)->rt_dst; - err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, + err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, flags ? XFRM_LOOKUP_WAIT : 0); if (err == -EREMOTE) err = ipv4_dst_blackhole(net, rp, flp); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 01edac888510..437b750b98fd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -637,7 +637,7 @@ int inet6_sk_rebuild_header(struct sock *sk) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; return err; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e44deb8d4df2..e2bdc6d83a43 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -175,7 +175,8 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a77b8d103804..4f433847d95f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -427,7 +427,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, /* No need to clone since we're just using its address. */ dst2 = dst; - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(net, &dst, &fl, sk, 0); switch (err) { case 0: if (dst != dst2) @@ -446,7 +446,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (ip6_dst_lookup(sk, &dst2, &fl)) goto relookup_failed; - err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP); + err = xfrm_lookup(net, &dst2, &fl, sk, XFRM_LOOKUP_ICMP); switch (err) { case 0: dst_release(dst); @@ -552,7 +552,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto out; - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) goto out; if (ipv6_addr_is_multicast(&fl.fl6_dst)) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 16d43f20b32f..3c3732d50c1a 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -219,7 +219,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { sk->sk_route_caps = 0; kfree_skb(skb); return err; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ef249ab5c93c..58e2b0d93758 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -846,6 +846,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, int encap_limit, __u32 *pmtu) { + struct net *net = dev_net(dev); struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -861,9 +862,9 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { - dst = ip6_route_output(dev_net(dev), NULL, fl); + dst = ip6_route_output(net, NULL, fl); - if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0) + if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) goto tx_err_link_failure; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 870a1d64605a..0f3896032830 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1466,7 +1466,7 @@ static void mld_sendpack(struct sk_buff *skb) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); if (err) goto err_out; @@ -1831,7 +1831,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); if (err) goto err_out; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index af6705f03b5c..e4acc212345e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -524,7 +524,7 @@ void ndisc_send_skb(struct sk_buff *skb, return; } - err = xfrm_lookup(&dst, &fl, NULL, 0); + err = xfrm_lookup(net, &dst, &fl, NULL, 0); if (err < 0) { kfree_skb(skb); return; @@ -1524,7 +1524,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, if (dst == NULL) return; - err = xfrm_lookup(&dst, &fl, NULL, 0); + err = xfrm_lookup(net, &dst, &fl, NULL, 0); if (err) return; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index fd5b3a4e3329..627e21db65df 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -29,7 +29,7 @@ int ip6_route_me_harder(struct sk_buff *skb) #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, &fl, AF_INET6) == 0) - if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) return -1; #endif diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 0981b4ccb8b1..5a2d0a41694a 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -97,7 +97,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) dst = ip6_route_output(net, NULL, &fl); if (dst == NULL) return; - if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0)) + if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0)) return; hh_len = (dst->dev->hard_header_len + 15)&~15; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2ba04d41dc25..61f6827e5906 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -860,7 +860,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 676c80b5b14b..711175e0571f 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -259,7 +259,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out_free; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a5d750acd793..f259c9671f3e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -260,7 +260,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) @@ -390,7 +391,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; goto out; } @@ -492,7 +493,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) goto done; if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto done; skb = tcp_make_synack(sk, dst, req); @@ -1018,7 +1019,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, * namespace */ if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { - if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { + if (xfrm_lookup(net, &buff->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); if (rst) @@ -1316,7 +1317,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fd2d9ad4a8a3..38390dd19636 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -849,7 +849,8 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6165218fd7c2..7c88a25c7af5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -940,7 +940,8 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, return ret; } -static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, +static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, + struct flowi *fl, u16 family, u8 dir) { int err; @@ -956,7 +957,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, return NULL; read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(&init_net, daddr, saddr, family, dir); + chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); @@ -973,7 +974,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, break; } } - chain = &init_net.xfrm.policy_inexact[dir]; + chain = &net->xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { @@ -996,14 +997,14 @@ fail: return ret; } -static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, + u8 dir, void **objp, atomic_t **obj_refp) { struct xfrm_policy *pol; int err = 0; #ifdef CONFIG_XFRM_SUB_POLICY - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); if (IS_ERR(pol)) { err = PTR_ERR(pol); pol = NULL; @@ -1011,7 +1012,7 @@ static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, if (pol || err) goto end; #endif - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); if (IS_ERR(pol)) { err = PTR_ERR(pol); pol = NULL; @@ -1537,7 +1538,7 @@ static int stale_bundle(struct dst_entry *dst); * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ -int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *policy; @@ -1575,10 +1576,10 @@ restart: if (!policy) { /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || - !init_net.xfrm.policy_count[XFRM_POLICY_OUT]) + !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - policy = flow_cache_lookup(fl, dst_orig->ops->family, + policy = flow_cache_lookup(net, fl, dst_orig->ops->family, dir, xfrm_policy_lookup); err = PTR_ERR(policy); if (IS_ERR(policy)) { @@ -1635,7 +1636,8 @@ restart: #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(net, + XFRM_POLICY_TYPE_MAIN, fl, family, XFRM_POLICY_OUT); if (pols[1]) { @@ -1683,11 +1685,11 @@ restart: if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { DECLARE_WAITQUEUE(wait, current); - add_wait_queue(&init_net.xfrm.km_waitq, &wait); + add_wait_queue(&net->xfrm.km_waitq, &wait); set_current_state(TASK_INTERRUPTIBLE); schedule(); set_current_state(TASK_RUNNING); - remove_wait_queue(&init_net.xfrm.km_waitq, &wait); + remove_wait_queue(&net->xfrm.km_waitq, &wait); nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); @@ -1781,10 +1783,10 @@ nopol: } EXPORT_SYMBOL(__xfrm_lookup); -int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { - int err = __xfrm_lookup(dst_p, fl, sk, flags); + int err = __xfrm_lookup(net, dst_p, fl, sk, flags); if (err == -EREMOTE) { dst_release(*dst_p); @@ -1936,7 +1938,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } if (!pol) - pol = flow_cache_lookup(&fl, family, fl_dir, + pol = flow_cache_lookup(&init_net, &fl, family, fl_dir, xfrm_policy_lookup); if (IS_ERR(pol)) { @@ -1959,7 +1961,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, npols ++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(&init_net, XFRM_POLICY_TYPE_MAIN, &fl, family, XFRM_POLICY_IN); if (pols[1]) { @@ -2049,7 +2051,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) return 0; } - return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; + return xfrm_lookup(&init_net, &skb->dst, &fl, NULL, 0) == 0; } EXPORT_SYMBOL(__xfrm_route_forward); -- cgit v1.2.3 From b27aeadb5948d400df83db4d29590fb9862ba49d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 18:00:48 -0800 Subject: netns xfrm: per-netns sysctls Make net.core.xfrm_aevent_etime net.core.xfrm_acq_expires net.core.xfrm_aevent_rseqth net.core.xfrm_larval_drop sysctls per-netns. For that make net_core_path[] global, register it to prevent two /proc/net/core antries and change initcall position -- xfrm_init() is called from fs_initcall, so this one should be fs_initcall at least. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/ip.h | 1 + include/net/netns/xfrm.h | 10 ++++++ include/net/xfrm.h | 14 +++++--- net/core/sysctl_net_core.c | 42 +++-------------------- net/xfrm/Makefile | 4 +-- net/xfrm/xfrm_policy.c | 10 ++++-- net/xfrm/xfrm_state.c | 16 +++------ net/xfrm/xfrm_sysctl.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_user.c | 4 +-- 9 files changed, 125 insertions(+), 61 deletions(-) create mode 100644 net/xfrm/xfrm_sysctl.c (limited to 'net/core') diff --git a/include/net/ip.h b/include/net/ip.h index ddef10c22e3a..10868139e656 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -187,6 +187,7 @@ extern void inet_get_local_port_range(int *low, int *high); extern int sysctl_ip_default_ttl; extern int sysctl_ip_nonlocal_bind; +extern struct ctl_path net_core_path[]; extern struct ctl_path net_ipv4_ctl_path[]; /* From inetpeer.c */ diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 09f3060e9d18..1ba912749caa 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -6,6 +6,8 @@ #include #include +struct ctl_table_header; + struct xfrm_policy_hash { struct hlist_head *table; unsigned int hmask; @@ -41,6 +43,14 @@ struct netns_xfrm { struct work_struct policy_hash_work; struct sock *nlsk; + + u32 sysctl_aevent_etime; + u32 sysctl_aevent_rseqth; + int sysctl_larval_drop; + u32 sysctl_acq_expires; +#ifdef CONFIG_SYSCTL + struct ctl_table_header *sysctl_hdr; +#endif }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1554ccd0c940..2e9f5c0018ae 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -47,11 +47,6 @@ #define XFRM_INC_STATS_USER(net, field) ((void)(net)) #endif -extern u32 sysctl_xfrm_aevent_etime; -extern u32 sysctl_xfrm_aevent_rseqth; -extern int sysctl_xfrm_larval_drop; -extern u32 sysctl_xfrm_acq_expires; - extern struct mutex xfrm_cfg_mutex; /* Organization of SPD aka "XFRM rules" @@ -1310,6 +1305,15 @@ extern int xfrm_proc_init(struct net *net); extern void xfrm_proc_fini(struct net *net); #endif +extern int xfrm_sysctl_init(struct net *net); +#ifdef CONFIG_SYSCTL +extern void xfrm_sysctl_fini(struct net *net); +#else +static inline void xfrm_sysctl_fini(struct net *net) +{ +} +#endif + extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); extern int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 2bc0384b0448..83d3398559ea 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -12,7 +12,6 @@ #include #include #include -#include static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET @@ -89,40 +88,6 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, -#ifdef CONFIG_XFRM - { - .ctl_name = NET_CORE_AEVENT_ETIME, - .procname = "xfrm_aevent_etime", - .data = &sysctl_xfrm_aevent_etime, - .maxlen = sizeof(u32), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .ctl_name = NET_CORE_AEVENT_RSEQTH, - .procname = "xfrm_aevent_rseqth", - .data = &sysctl_xfrm_aevent_rseqth, - .maxlen = sizeof(u32), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "xfrm_larval_drop", - .data = &sysctl_xfrm_larval_drop, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "xfrm_acq_expires", - .data = &sysctl_xfrm_acq_expires, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, -#endif /* CONFIG_XFRM */ #endif /* CONFIG_NET */ { .ctl_name = NET_CORE_BUDGET, @@ -155,7 +120,7 @@ static struct ctl_table netns_core_table[] = { { .ctl_name = 0 } }; -static __net_initdata struct ctl_path net_core_path[] = { +__net_initdata struct ctl_path net_core_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "core", .ctl_name = NET_CORE, }, { }, @@ -207,8 +172,11 @@ static __net_initdata struct pernet_operations sysctl_core_ops = { static __init int sysctl_core_init(void) { + static struct ctl_table empty[1]; + + register_sysctl_paths(net_core_path, empty); register_net_sysctl_rotable(net_core_path, net_core_table); return register_pernet_subsys(&sysctl_core_ops); } -__initcall(sysctl_core_init); +fs_initcall(sysctl_core_init); diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 0f439a72ccab..c631047e1b27 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -3,8 +3,8 @@ # obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ - xfrm_input.o xfrm_output.o xfrm_algo.o + xfrm_input.o xfrm_output.o xfrm_algo.o \ + xfrm_sysctl.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o - diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 38822b34ba7d..393cc65dbfa4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -34,8 +34,6 @@ #include "xfrm_hash.h" -int sysctl_xfrm_larval_drop __read_mostly = 1; - DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); @@ -1671,7 +1669,7 @@ restart: if (unlikely(nx<0)) { err = nx; - if (err == -EAGAIN && sysctl_xfrm_larval_drop) { + if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) { /* EREMOTE tells the caller to generate * a one-shot blackhole route. */ @@ -2504,8 +2502,13 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_policy_init(net); if (rv < 0) goto out_policy; + rv = xfrm_sysctl_init(net); + if (rv < 0) + goto out_sysctl; return 0; +out_sysctl: + xfrm_policy_fini(net); out_policy: xfrm_state_fini(net); out_state: @@ -2516,6 +2519,7 @@ out_statistics: static void __net_exit xfrm_net_exit(struct net *net) { + xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); xfrm_statistics_fini(net); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 662e47b0bcc3..2fd57f8f77c1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -24,14 +24,6 @@ #include "xfrm_hash.h" -u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME; -EXPORT_SYMBOL(sysctl_xfrm_aevent_etime); - -u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE; -EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); - -u32 sysctl_xfrm_acq_expires __read_mostly = 30; - /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -851,8 +843,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family); hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; + x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ; add_timer(&x->timer); net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); @@ -1040,9 +1032,9 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family x->props.family = family; x->props.mode = mode; x->props.reqid = reqid; - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; xfrm_state_hold(x); - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; + x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ; add_timer(&x->timer); list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c new file mode 100644 index 000000000000..2e6ffb66f06f --- /dev/null +++ b/net/xfrm/xfrm_sysctl.c @@ -0,0 +1,85 @@ +#include +#include +#include + +static void __xfrm_sysctl_init(struct net *net) +{ + net->xfrm.sysctl_aevent_etime = XFRM_AE_ETIME; + net->xfrm.sysctl_aevent_rseqth = XFRM_AE_SEQT_SIZE; + net->xfrm.sysctl_larval_drop = 1; + net->xfrm.sysctl_acq_expires = 30; +} + +#ifdef CONFIG_SYSCTL +static struct ctl_table xfrm_table[] = { + { + .ctl_name = NET_CORE_AEVENT_ETIME, + .procname = "xfrm_aevent_etime", + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = NET_CORE_AEVENT_RSEQTH, + .procname = "xfrm_aevent_rseqth", + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm_larval_drop", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm_acq_expires", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + {} +}; + +int __net_init xfrm_sysctl_init(struct net *net) +{ + struct ctl_table *table; + + __xfrm_sysctl_init(net); + + table = kmemdup(xfrm_table, sizeof(xfrm_table), GFP_KERNEL); + if (!table) + goto out_kmemdup; + table[0].data = &net->xfrm.sysctl_aevent_etime; + table[1].data = &net->xfrm.sysctl_aevent_rseqth; + table[2].data = &net->xfrm.sysctl_larval_drop; + table[3].data = &net->xfrm.sysctl_acq_expires; + + net->xfrm.sysctl_hdr = register_net_sysctl_table(net, net_core_path, table); + if (!net->xfrm.sysctl_hdr) + goto out_register; + return 0; + +out_register: + kfree(table); +out_kmemdup: + return -ENOMEM; +} + +void xfrm_sysctl_fini(struct net *net) +{ + struct ctl_table *table; + + table = net->xfrm.sysctl_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->xfrm.sysctl_hdr); + kfree(table); +} +#else +int __net_init xfrm_sysctl_init(struct net *net) +{ + __xfrm_sysctl_init(net); + return 0; +} +#endif diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b7240d5b77ad..38ffaf33312e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -368,9 +368,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; x->km.seq = p->seq; - x->replay_maxdiff = sysctl_xfrm_aevent_rseqth; + x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth; /* sysctl_xfrm_aevent_etime is in 100ms units */ - x->replay_maxage = (sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M; + x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; x->preplay.bitmap = 0; x->preplay.seq = x->replay.seq+x->replay_maxdiff; x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; -- cgit v1.2.3 From c1b56878fb68e9c14070939ea4537ad4db79ffae Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 25 Nov 2008 21:14:06 -0800 Subject: tc: policing requires a rate estimator Found that while trying average rate policing, it was possible to request average rate policing without a rate estimator. This results in no policing which is harmless but incorrect. Since policing could be setup in two steps, need to check in the kernel. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/gen_stats.h | 1 + net/core/gen_estimator.c | 30 +++++++++++++++++++++++++++--- net/sched/act_police.c | 6 ++++++ 3 files changed, 34 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 8cd8185fa2ed..dcf5bfa7d4f1 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -45,5 +45,6 @@ extern void gen_kill_estimator(struct gnet_stats_basic *bstats, extern int gen_replace_estimator(struct gnet_stats_basic *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt); +extern int gen_estimator_active(const struct gnet_stats_rate_est *rate_est); #endif diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 80aa160877e9..3885550f0187 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -242,6 +242,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, return 0; } +EXPORT_SYMBOL(gen_new_estimator); static void __gen_kill_estimator(struct rcu_head *head) { @@ -275,6 +276,7 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats, call_rcu(&e->e_rcu, __gen_kill_estimator); } } +EXPORT_SYMBOL(gen_kill_estimator); /** * gen_replace_estimator - replace rate estimator configuration @@ -295,8 +297,30 @@ int gen_replace_estimator(struct gnet_stats_basic *bstats, gen_kill_estimator(bstats, rate_est); return gen_new_estimator(bstats, rate_est, stats_lock, opt); } +EXPORT_SYMBOL(gen_replace_estimator); + +/** + * gen_estimator_active - test if estimator is currently in use + * @rate_est: rate estimator statistics + * + * Returns 1 if estimator is active, and 0 if not. + */ +int gen_estimator_active(const struct gnet_stats_rate_est *rate_est) +{ + int idx; + struct gen_estimator *e; + ASSERT_RTNL(); -EXPORT_SYMBOL(gen_kill_estimator); -EXPORT_SYMBOL(gen_new_estimator); -EXPORT_SYMBOL(gen_replace_estimator); + for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { + if (!elist[idx].timer.function) + continue; + + list_for_each_entry(e, &elist[idx].list, list) { + if (e->rate_est == rate_est) + return 1; + } + } + return 0; +} +EXPORT_SYMBOL(gen_estimator_active); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index e19a0261144a..c39f60cea6ee 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -182,6 +182,12 @@ override: R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]); if (R_tab == NULL) goto failure; + + if (!est && !gen_estimator_active(&police->tcf_rate_est)) { + err = -EINVAL; + goto failure; + } + if (parm->peakrate.rate) { P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE]); -- cgit v1.2.3 From 1748376b6626acf59c24e9592ac67b3fe2a0e026 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Nov 2008 21:16:35 -0800 Subject: net: Use a percpu_counter for sockets_allocated Instead of using one atomic_t per protocol, use a percpu_counter for "sockets_allocated", to reduce cache line contention on heavy duty network servers. Note : We revert commit (248969ae31e1b3276fc4399d67ce29a5d81e6fd9 net: af_unix can make unix_nr_socks visbile in /proc), since it is not anymore used after sock_prot_inuse_add() addition Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 1 + include/net/sock.h | 2 +- include/net/tcp.h | 2 +- net/core/sock.c | 10 +++++++--- net/ipv4/proc.c | 3 ++- net/ipv4/tcp.c | 8 ++++++-- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 2 +- net/sctp/protocol.c | 6 +++++- net/sctp/socket.c | 6 +++--- net/unix/af_unix.c | 1 - 11 files changed, 29 insertions(+), 16 deletions(-) (limited to 'net/core') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 23797506f593..bbb7742195b0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -138,6 +138,7 @@ void sctp_write_space(struct sock *sk); unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); +extern struct percpu_counter sctp_sockets_allocated; /* * sctp/primitive.c diff --git a/include/net/sock.h b/include/net/sock.h index 00cd486d362f..a2a3890959c4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -649,7 +649,7 @@ struct proto { /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); atomic_t *memory_allocated; /* Current allocated memory. */ - atomic_t *sockets_allocated; /* Current number of sockets. */ + struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. diff --git a/include/net/tcp.h b/include/net/tcp.h index e8ae90a8c35e..cbca3b8a133d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -238,7 +238,7 @@ extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; extern atomic_t tcp_memory_allocated; -extern atomic_t tcp_sockets_allocated; +extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; /* diff --git a/net/core/sock.c b/net/core/sock.c index a4e840e5a053..7a081b647bf9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1071,7 +1071,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) newsk->sk_sleep = NULL; if (newsk->sk_prot->sockets_allocated) - atomic_inc(newsk->sk_prot->sockets_allocated); + percpu_counter_inc(newsk->sk_prot->sockets_allocated); } out: return newsk; @@ -1463,8 +1463,12 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) } if (prot->memory_pressure) { - if (!*prot->memory_pressure || - prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) * + int alloc; + + if (!*prot->memory_pressure) + return 1; + alloc = percpu_counter_read_positive(prot->sockets_allocated); + if (prot->sysctl_mem[2] > alloc * sk_mem_pages(sk->sk_wmem_queued + atomic_read(&sk->sk_rmem_alloc) + sk->sk_forward_alloc)) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 731789bb499f..4944b47ad628 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -55,7 +55,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", sock_prot_inuse_get(net, &tcp_prot), atomic_read(&tcp_orphan_count), - tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), + tcp_death_row.tw_count, + (int)percpu_counter_sum_positive(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(net, &udp_prot), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 044224a341eb..e6fade9ebf62 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -290,9 +290,12 @@ EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); atomic_t tcp_memory_allocated; /* Current allocated memory. */ -atomic_t tcp_sockets_allocated; /* Current number of TCP sockets. */ - EXPORT_SYMBOL(tcp_memory_allocated); + +/* + * Current number of TCP sockets. + */ +struct percpu_counter tcp_sockets_allocated; EXPORT_SYMBOL(tcp_sockets_allocated); /* @@ -2685,6 +2688,7 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); + percpu_counter_init(&tcp_sockets_allocated, 0); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cab2458f86fd..26b9030747cc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1797,7 +1797,7 @@ static int tcp_v4_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - atomic_inc(&tcp_sockets_allocated); + percpu_counter_inc(&tcp_sockets_allocated); return 0; } @@ -1845,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk) sk->sk_sndmsg_page = NULL; } - atomic_dec(&tcp_sockets_allocated); + percpu_counter_dec(&tcp_sockets_allocated); } EXPORT_SYMBOL(tcp_v4_destroy_sock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f259c9671f3e..8702b06cb60a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1830,7 +1830,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - atomic_inc(&tcp_sockets_allocated); + percpu_counter_inc(&tcp_sockets_allocated); return 0; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a8ca743241ee..d5ea232c9126 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -102,6 +102,8 @@ struct sock *sctp_get_ctl_sock(void) /* Set up the proc fs entry for the SCTP protocol. */ static __init int sctp_proc_init(void) { + if (percpu_counter_init(&sctp_sockets_allocated, 0)) + goto out_nomem; #ifdef CONFIG_PROC_FS if (!proc_net_sctp) { struct proc_dir_entry *ent; @@ -110,7 +112,7 @@ static __init int sctp_proc_init(void) ent->owner = THIS_MODULE; proc_net_sctp = ent; } else - goto out_nomem; + goto out_free_percpu; } if (sctp_snmp_proc_init()) @@ -135,6 +137,8 @@ out_snmp_proc_init: proc_net_sctp = NULL; remove_proc_entry("sctp", init_net.proc_net); } +out_free_percpu: + percpu_counter_destroy(&sctp_sockets_allocated); out_nomem: return -ENOMEM; #else diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ba81fe3ccab8..a2de585888d0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -114,7 +114,7 @@ extern int sysctl_sctp_wmem[3]; static int sctp_memory_pressure; static atomic_t sctp_memory_allocated; -static atomic_t sctp_sockets_allocated; +struct percpu_counter sctp_sockets_allocated; static void sctp_enter_memory_pressure(struct sock *sk) { @@ -3613,7 +3613,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); - atomic_inc(&sctp_sockets_allocated); + percpu_counter_inc(&sctp_sockets_allocated); local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -3632,7 +3632,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) /* Release our hold on the endpoint. */ ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); - atomic_dec(&sctp_sockets_allocated); + percpu_counter_dec(&sctp_sockets_allocated); local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3a35a6e8bf91..5aaf23e43f1d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -571,7 +571,6 @@ static const struct proto_ops unix_seqpacket_ops = { static struct proto unix_proto = { .name = "UNIX", .owner = THIS_MODULE, - .sockets_allocated = &unix_nr_socks, .obj_size = sizeof(struct unix_sock), }; -- cgit v1.2.3 From 70355602879229c6f8bd694ec9c0814222bc4936 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Nov 2008 01:08:18 -0800 Subject: net: release skb->dst in sock_queue_rcv_skb() When queuing a skb to sk->sk_receive_queue, we can release its dst, not anymore needed. Since current cpu did the dst_hold(), refcount is probably still hot int this cpu caches. This avoids readers to access the original dst to decrement its refcount, possibly a long time after packet reception. This should speedup UDP and RAW receive path. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index 7a081b647bf9..b28764558a7d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -289,7 +289,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) skb->dev = NULL; skb_set_owner_r(skb, sk); - + /* + * release dst right now while its hot + */ + dst_release(skb->dst); + skb->dst = NULL; /* Cache the SKB length before we tack it onto the receive * queue. Once it is added it no longer belongs to us and * may be freed by other threads of control pulling packets -- cgit v1.2.3 From 244e6c2d0724bc4908a1995804704bdee3b31528 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 26 Nov 2008 15:24:32 -0800 Subject: pkt_sched: gen_estimator: Optimize gen_estimator_active() Since all other gen_estimator functions use bstats and rate_est params together, and searching for them is optimized now, let's use this also in gen_estimator_active(). The return type of gen_estimator_active() is changed to bool, and gen_find_node() parameters to const, btw. In tcf_act_police_locate() a check for ACT_P_CREATED is added before calling gen_estimator_active(). Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/gen_stats.h | 4 ++-- net/core/gen_estimator.c | 25 ++++++++----------------- net/sched/act_police.c | 4 +++- 3 files changed, 13 insertions(+), 20 deletions(-) (limited to 'net/core') diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index dcf5bfa7d4f1..d136b5240ef2 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -45,6 +45,6 @@ extern void gen_kill_estimator(struct gnet_stats_basic *bstats, extern int gen_replace_estimator(struct gnet_stats_basic *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt); -extern int gen_estimator_active(const struct gnet_stats_rate_est *rate_est); - +extern bool gen_estimator_active(const struct gnet_stats_basic *bstats, + const struct gnet_stats_rate_est *rate_est); #endif diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 3885550f0187..9cc9f95b109e 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -163,8 +163,9 @@ static void gen_add_node(struct gen_estimator *est) rb_insert_color(&est->node, &est_root); } -static struct gen_estimator *gen_find_node(struct gnet_stats_basic *bstats, - struct gnet_stats_rate_est *rate_est) +static +struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats, + const struct gnet_stats_rate_est *rate_est) { struct rb_node *p = est_root.rb_node; @@ -301,26 +302,16 @@ EXPORT_SYMBOL(gen_replace_estimator); /** * gen_estimator_active - test if estimator is currently in use + * @bstats: basic statistics * @rate_est: rate estimator statistics * - * Returns 1 if estimator is active, and 0 if not. + * Returns true if estimator is active, and false if not. */ -int gen_estimator_active(const struct gnet_stats_rate_est *rate_est) +bool gen_estimator_active(const struct gnet_stats_basic *bstats, + const struct gnet_stats_rate_est *rate_est) { - int idx; - struct gen_estimator *e; - ASSERT_RTNL(); - for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { - if (!elist[idx].timer.function) - continue; - - list_for_each_entry(e, &elist[idx].list, list) { - if (e->rate_est == rate_est) - return 1; - } - } - return 0; + return gen_find_node(bstats, rate_est) != NULL; } EXPORT_SYMBOL(gen_estimator_active); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index c39f60cea6ee..5c72a116b1a4 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -183,7 +183,9 @@ override: if (R_tab == NULL) goto failure; - if (!est && !gen_estimator_active(&police->tcf_rate_est)) { + if (!est && (ret == ACT_P_CREATED || + !gen_estimator_active(&police->tcf_bstats, + &police->tcf_rate_est))) { err = -EINVAL; goto failure; } -- cgit v1.2.3 From b74ca3a896b9ab5f952bc440154758e708c48884 Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Mon, 8 Dec 2008 01:14:16 -0800 Subject: netdevice: Kill netdev->priv This is the last shoot of this series. After I removing all directly reference of netdev->priv, I am killing "priv" of "struct net_device" and fixing relative comments/docs. Anyone will not be allowed to reference netdev->priv directly. If you want to reference the memory of private data, use netdev_priv() instead. If the private data is not allocted when alloc_netdev(), use netdev->ml_priv to point that memory after you creating that private data. Signed-off-by: Wang Chen Signed-off-by: David S. Miller --- Documentation/networking/driver.txt | 2 +- Documentation/networking/netdevices.txt | 2 +- drivers/net/3c501.h | 2 +- drivers/net/atp.c | 2 +- drivers/net/eexpress.c | 2 +- drivers/net/forcedeth.c | 4 ++-- drivers/net/lance.c | 2 +- drivers/net/myri_sbus.c | 2 +- drivers/net/pci-skeleton.c | 2 +- drivers/net/sun3_82586.c | 2 +- drivers/net/sunbmac.c | 2 +- drivers/net/tokenring/3c359.c | 5 +++-- drivers/net/via-rhine.c | 9 +++++---- drivers/net/wireless/strip.c | 2 +- include/linux/hdlc.h | 2 +- include/linux/netdevice.h | 1 - net/atm/mpc.c | 4 ++-- net/core/dev.c | 6 ------ 18 files changed, 24 insertions(+), 29 deletions(-) (limited to 'net/core') diff --git a/Documentation/networking/driver.txt b/Documentation/networking/driver.txt index ea72d2e66ca8..03283daa64fe 100644 --- a/Documentation/networking/driver.txt +++ b/Documentation/networking/driver.txt @@ -13,7 +13,7 @@ Transmit path guidelines: static int drv_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct drv *dp = dev->priv; + struct drv *dp = netdev_priv(dev); lock_tx(dp); ... diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt index d0f71fc7f782..a2ab6a0b116d 100644 --- a/Documentation/networking/netdevices.txt +++ b/Documentation/networking/netdevices.txt @@ -18,7 +18,7 @@ There are routines in net_init.c to handle the common cases of alloc_etherdev, alloc_netdev. These reserve extra space for driver private data which gets freed when the network device is freed. If separately allocated data is attached to the network device -(dev->priv) then it is up to the module exit handler to free that. +(netdev_priv(dev)) then it is up to the module exit handler to free that. MTU === diff --git a/drivers/net/3c501.h b/drivers/net/3c501.h index cfec64efff78..f40b0493337a 100644 --- a/drivers/net/3c501.h +++ b/drivers/net/3c501.h @@ -23,7 +23,7 @@ static const struct ethtool_ops netdev_ethtool_ops; static int el_debug = EL_DEBUG; /* - * Board-specific info in dev->priv. + * Board-specific info in netdev_priv(dev). */ struct net_local diff --git a/drivers/net/atp.c b/drivers/net/atp.c index 7028b276dfd3..1d6b74c5d6c9 100644 --- a/drivers/net/atp.c +++ b/drivers/net/atp.c @@ -420,7 +420,7 @@ static unsigned short __init eeprom_op(long ioaddr, u32 cmd) registers that "should" only need to be set once at boot, so that there is non-reboot way to recover if something goes wrong. - This is an attachable device: if there is no dev->priv entry then it wasn't + This is an attachable device: if there is no private entry then it wasn't probed for at boot-time, and we need to probe for it again. */ static int net_open(struct net_device *dev) diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c index a125e41240f5..9ff3f2f5e382 100644 --- a/drivers/net/eexpress.c +++ b/drivers/net/eexpress.c @@ -1046,7 +1046,7 @@ static void eexp_hw_tx_pio(struct net_device *dev, unsigned short *buf, /* * Sanity check the suspected EtherExpress card * Read hardware address, reset card, size memory and initialize buffer - * memory pointers. These are held in dev->priv, in case someone has more + * memory pointers. These are held in netdev_priv(), in case someone has more * than one card in a machine. */ diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 12384df8cb2b..1f2b24743ee9 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -712,12 +712,12 @@ struct nv_skb_map { /* * SMP locking: - * All hardware access under dev->priv->lock, except the performance + * All hardware access under netdev_priv(dev)->lock, except the performance * critical parts: * - rx is (pseudo-) lockless: it relies on the single-threading provided * by the arch code for interrupts. * - tx setup is lockless: it relies on netif_tx_lock. Actual submission - * needs dev->priv->lock :-( + * needs netdev_priv(dev)->lock :-( * - set_multicast_list: preparation lockless, relies on netif_tx_lock. */ diff --git a/drivers/net/lance.c b/drivers/net/lance.c index e81b6113ed94..d7afb938ea62 100644 --- a/drivers/net/lance.c +++ b/drivers/net/lance.c @@ -519,7 +519,7 @@ static int __init lance_probe1(struct net_device *dev, int ioaddr, int irq, int } } - /* We can't allocate dev->priv from alloc_etherdev() because it must + /* We can't allocate private data from alloc_etherdev() because it must a ISA DMA-able region. */ chipname = chip_table[lance_version].name; printk("%s: %s at %#3x, ", dev->name, chipname, ioaddr); diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c index 6833f65f8aec..899ed065a147 100644 --- a/drivers/net/myri_sbus.c +++ b/drivers/net/myri_sbus.c @@ -1091,7 +1091,7 @@ static int __devinit myri_sbus_probe(struct of_device *op, const struct of_devic err_free_irq: free_irq(dev->irq, dev); err: - /* This will also free the co-allocated 'dev->priv' */ + /* This will also free the co-allocated private data*/ free_netdev(dev); return -ENODEV; } diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c index b23b5c397b1d..c95fd72c3bb9 100644 --- a/drivers/net/pci-skeleton.c +++ b/drivers/net/pci-skeleton.c @@ -781,7 +781,7 @@ static int __devinit netdrv_init_one (struct pci_dev *pdev, dev->irq = pdev->irq; dev->base_addr = (unsigned long) ioaddr; - /* dev->priv/tp zeroed and aligned in alloc_etherdev */ + /* netdev_priv()/tp zeroed and aligned in alloc_etherdev */ tp = netdev_priv(dev); /* note: tp->chipset set in netdrv_init_board */ diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c index e8f97d5c9c23..e0d84772771c 100644 --- a/drivers/net/sun3_82586.c +++ b/drivers/net/sun3_82586.c @@ -209,7 +209,7 @@ static int sun3_82586_open(struct net_device *dev) static int check586(struct net_device *dev,char *where,unsigned size) { struct priv pb; - struct priv *p = /* (struct priv *) dev->priv*/ &pb; + struct priv *p = &pb; char *iscp_addr; int i; diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c index 977b3e08bbfc..7f69c7f176c4 100644 --- a/drivers/net/sunbmac.c +++ b/drivers/net/sunbmac.c @@ -1233,7 +1233,7 @@ fail_and_cleanup: bp->bmac_block, bp->bblock_dvma); - /* This also frees the co-located 'dev->priv' */ + /* This also frees the co-located private data */ free_netdev(dev); return -ENODEV; } diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c index e7a944657cf8..43853e3b210e 100644 --- a/drivers/net/tokenring/3c359.c +++ b/drivers/net/tokenring/3c359.c @@ -296,8 +296,9 @@ static int __devinit xl_probe(struct pci_dev *pdev, } ; /* - * Allowing init_trdev to allocate the dev->priv structure will align xl_private - * on a 32 bytes boundary which we need for the rx/tx descriptors + * Allowing init_trdev to allocate the private data will align + * xl_private on a 32 bytes boundary which we need for the rx/tx + * descriptors */ dev = alloc_trdev(sizeof(struct xl_private)) ; diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c index 93b74b7b7077..8d405c83df8b 100644 --- a/drivers/net/via-rhine.c +++ b/drivers/net/via-rhine.c @@ -191,12 +191,13 @@ IIId. Synchronization The driver runs as two independent, single-threaded flows of control. One is the send-packet routine, which enforces single-threaded use by the -dev->priv->lock spinlock. The other thread is the interrupt handler, which -is single threaded by the hardware and interrupt handling software. +netdev_priv(dev)->lock spinlock. The other thread is the interrupt handler, +which is single threaded by the hardware and interrupt handling software. The send packet thread has partial control over the Tx ring. It locks the -dev->priv->lock whenever it's queuing a Tx packet. If the next slot in the ring -is not available it stops the transmit queue by calling netif_stop_queue. +netdev_priv(dev)->lock whenever it's queuing a Tx packet. If the next slot in +the ring is not available it stops the transmit queue by +calling netif_stop_queue. The interrupt handler has exclusive control over the Rx ring and records stats from the Tx ring. After reaping the stats, it marks the Tx queue entry as diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index 692e6c5e009a..dd0de3a9ed4e 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -2494,7 +2494,7 @@ static void strip_dev_setup(struct net_device *dev) dev->type = ARPHRD_METRICOM; /* dtang */ dev->hard_header_len = sizeof(STRIP_Header); /* - * dev->priv Already holds a pointer to our struct strip + * netdev_priv(dev) Already holds a pointer to our struct strip */ *(MetricomAddress *) & dev->broadcast = broadcast_address; diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index e960faac609d..fd47a151665e 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -43,7 +43,7 @@ struct hdlc_proto { }; -/* Pointed to by dev->priv */ +/* Pointed to by netdev_priv(dev) */ typedef struct hdlc_device { /* used by HDLC layer to take control over HDLC device from hw driver*/ int (*attach)(struct net_device *dev, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0df0db068ac3..47e731528315 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -785,7 +785,6 @@ struct net_device /* * One part is mostly used on xmit path (device) */ - void *priv; /* pointer to private data */ /* These may be needed for future network-power-down code. */ unsigned long trans_start; /* Time (in jiffies) of last Tx */ diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 12e9ea371db1..039d5cc72c3d 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -341,8 +341,8 @@ static const char *mpoa_device_type_string(char type) } /* - * lec device calls this via its dev->priv->lane2_ops->associate_indicator() - * when it sees a TLV in LE_ARP packet. + * lec device calls this via its netdev_priv(dev)->lane2_ops + * ->associate_indicator() when it sees a TLV in LE_ARP packet. * We fill in the pointer above when we see a LANE2 lec initializing * See LANE2 spec 3.1.5 * diff --git a/net/core/dev.c b/net/core/dev.c index 4615e9a443aa..f54cac76438a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4378,12 +4378,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->num_tx_queues = queue_count; dev->real_num_tx_queues = queue_count; - if (sizeof_priv) { - dev->priv = ((char *)dev + - ((sizeof(struct net_device) + NETDEV_ALIGN_CONST) - & ~NETDEV_ALIGN_CONST)); - } - dev->gso_max_size = GSO_MAX_SIZE; netdev_init_queues(dev); -- cgit v1.2.3 From 7b363e440021a1cf9ed76944b2685f48dacefb3e Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 9 Dec 2008 23:22:26 -0800 Subject: netpoll: fix race on poll_list resulting in garbage entry A few months back a race was discused between the netpoll napi service path, and the fast path through net_rx_action: http://kerneltrap.org/mailarchive/linux-netdev/2007/10/16/345470 A patch was submitted for that bug, but I think we missed a case. Consider the following scenario: INITIAL STATE CPU0 has one napi_struct A on its poll_list CPU1 is calling netpoll_send_skb and needs to call poll_napi on the same napi_struct A that CPU0 has on its list CPU0 CPU1 net_rx_action poll_napi !list_empty (returns true) locks poll_lock for A poll_one_napi napi->poll netif_rx_complete __napi_complete (removes A from poll_list) list_entry(list->next) In the above scenario, net_rx_action assumes that the per-cpu poll_list is exclusive to that cpu. netpoll of course violates that, and because the netpoll path can dequeue from the poll list, its possible for CPU0 to detect a non-empty list at the top of the while loop in net_rx_action, but have it become empty by the time it calls list_entry. Since the poll_list isn't surrounded by any other structure, the returned data from that list_entry call in this situation is garbage, and any number of crashes can result based on what exactly that garbage is. Given that its not fasible for performance reasons to place exclusive locks arround each cpus poll list to provide that mutal exclusion, I think the best solution is modify the netpoll path in such a way that we continue to guarantee that the poll_list for a cpu is in fact exclusive to that cpu. To do this I've implemented the patch below. It adds an additional bit to the state field in the napi_struct. When executing napi->poll from the netpoll_path, this bit will be set. When a driver calls netif_rx_complete, if that bit is set, it will not remove the napi_struct from the poll_list. That work will be saved for the next iteration of net_rx_action. I've tested this and it seems to work well. About the biggest drawback I can see to it is the fact that it might result in an extra loop through net_rx_action in the event that the device is actually contended for (i.e. the netpoll path actually preforms all the needed work no the device, and the call to net_rx_action winds up doing nothing, except removing the napi_struct from the poll_list. However I think this is probably a small price to pay, given that the alternative is a crash. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ net/core/netpoll.c | 2 ++ 2 files changed, 9 insertions(+) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9d77b1d7dca8..e26f54952892 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -319,6 +319,7 @@ enum { NAPI_STATE_SCHED, /* Poll is scheduled */ NAPI_STATE_DISABLE, /* Disable pending */ + NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ }; extern void __napi_schedule(struct napi_struct *n); @@ -1497,6 +1498,12 @@ static inline void netif_rx_complete(struct net_device *dev, { unsigned long flags; + /* + * don't let napi dequeue from the cpu poll list + * just in case its running on a different cpu + */ + if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state))) + return; local_irq_save(flags); __netif_rx_complete(dev, napi); local_irq_restore(flags); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 6c7af390be0a..dadac6281f20 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -133,9 +133,11 @@ static int poll_one_napi(struct netpoll_info *npinfo, npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); + set_bit(NAPI_STATE_NPSVC, &napi->state); work = napi->poll(napi, budget); + clear_bit(NAPI_STATE_NPSVC, &napi->state); atomic_dec(&trapped); npinfo->rx_flags &= ~NETPOLL_RX_DROP; -- cgit v1.2.3 From 89319d3801d1d3ac29c7df1f067038986f267d29 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:26:06 -0800 Subject: net: Add frag_list support to skb_segment This patch adds limited support for handling frag_list packets in skb_segment. The intention is to support GRO (Generic Receive Offload) packets which will be constructed by chaining normal packets using frag_list. As such we require all frag_list members terminate on exact MSS boundaries. This is checked using BUG_ON. As there should only be one producer in the kernel of such packets, namely GRO, this requirement should not be difficult to maintain. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 73 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 14 deletions(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b1f628741f4c..18e224af05a6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2428,6 +2428,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; + struct sk_buff *fskb = skb_shinfo(skb)->frag_list; unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; @@ -2447,7 +2448,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) struct sk_buff *nskb; skb_frag_t *frag; int hsize; - int k; int size; len = skb->len - offset; @@ -2460,9 +2460,36 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) if (hsize > len || !sg) hsize = len; - nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC); - if (unlikely(!nskb)) - goto err; + if (!hsize && i >= nfrags) { + BUG_ON(fskb->len != len); + + pos += len; + nskb = skb_clone(fskb, GFP_ATOMIC); + fskb = fskb->next; + + if (unlikely(!nskb)) + goto err; + + hsize = skb_end_pointer(nskb) - nskb->head; + if (skb_cow_head(nskb, doffset + headroom)) { + kfree_skb(nskb); + goto err; + } + + nskb->truesize += skb_end_pointer(nskb) - nskb->head - + hsize; + skb_release_head_state(nskb); + __skb_push(nskb, doffset); + } else { + nskb = alloc_skb(hsize + doffset + headroom, + GFP_ATOMIC); + + if (unlikely(!nskb)) + goto err; + + skb_reserve(nskb, headroom); + __skb_put(nskb, doffset); + } if (segs) tail->next = nskb; @@ -2473,13 +2500,15 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) __copy_skb_header(nskb, skb); nskb->mac_len = skb->mac_len; - skb_reserve(nskb, headroom); skb_reset_mac_header(nskb); skb_set_network_header(nskb, skb->mac_len); nskb->transport_header = (nskb->network_header + skb_network_header_len(skb)); - skb_copy_from_linear_data(skb, skb_put(nskb, doffset), - doffset); + skb_copy_from_linear_data(skb, nskb->data, doffset); + + if (pos >= offset + len) + continue; + if (!sg) { nskb->ip_summed = CHECKSUM_NONE; nskb->csum = skb_copy_and_csum_bits(skb, offset, @@ -2489,14 +2518,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) } frag = skb_shinfo(nskb)->frags; - k = 0; skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); - while (pos < offset + len) { - BUG_ON(i >= nfrags); - + while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; get_page(frag->page); size = frag->size; @@ -2506,20 +2532,39 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) frag->size -= offset - pos; } - k++; + skb_shinfo(nskb)->nr_frags++; if (pos + size <= offset + len) { i++; pos += size; } else { frag->size -= pos + size - (offset + len); - break; + goto skip_fraglist; } frag++; } - skb_shinfo(nskb)->nr_frags = k; + if (pos < offset + len) { + struct sk_buff *fskb2 = fskb; + + BUG_ON(pos + fskb->len != offset + len); + + pos += fskb->len; + fskb = fskb->next; + + if (fskb2->next) { + fskb2 = skb_clone(fskb2, GFP_ATOMIC); + if (!fskb2) + goto err; + } else + skb_get(fskb2); + + BUG_ON(skb_shinfo(nskb)->frag_list); + skb_shinfo(nskb)->frag_list = fskb2; + } + +skip_fraglist: nskb->data_len = len - hsize; nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; -- cgit v1.2.3 From 1a881f27c50b4fbd6858a8696a189263621136b0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:27:47 -0800 Subject: net: Add frag_list support to GSO This patch allows GSO to handle frag_list in a limited way for the purposes of allowing packets merged by GRO to be refragmented on output. Most hardware won't (and aren't expected to) support handling GRO frag_list packets directly. Therefore we will perform GSO in software for those cases. However, for drivers that can support it (such as virtual NICs) we may not have to segment the packets at all. Whether the added overhead of GRO/GSO is worthwhile for bridges and routers when weighed against the benefit of potentially increasing the MTU within the host is still an open question. However, for the case of host nodes this is undoubtedly a win. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b60c26b7d31c..bdf5465deb91 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1858,6 +1858,8 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { return skb_is_gso(skb) && (!skb_gso_ok(skb, dev->features) || + (skb_shinfo(skb)->frag_list && + !(dev->features & NETIF_F_FRAGLIST)) || unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); } diff --git a/net/core/dev.c b/net/core/dev.c index f54cac76438a..e415f0b0d0d0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1533,8 +1533,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) __be16 type = skb->protocol; int err; - BUG_ON(skb_shinfo(skb)->frag_list); - skb_reset_mac_header(skb); skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); -- cgit v1.2.3 From d565b0a1a9b6ee7dff46e1f68b26b526ac11ae50 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:38:52 -0800 Subject: net: Add Generic Receive Offload infrastructure This patch adds the top-level GRO (Generic Receive Offload) infrastructure. This is pretty similar to LRO except that this is protocol-independent. Instead of holding packets in an lro_mgr structure, they're now held in napi_struct. For drivers that intend to use this, they can set the NETIF_F_GRO bit and call napi_gro_receive instead of netif_receive_skb or just call netif_rx. The latter will call napi_receive_skb automatically. When napi_gro_receive is used, the driver must either call napi_complete/napi_rx_complete, or call napi_gro_flush in softirq context if the driver uses the primitives __napi_complete/__napi_rx_complete. Protocols will set the gro_receive and gro_complete function pointers in order to participate in this scheme. In addition to the packet, gro_receive will get a list of currently held packets. Each packet in the list has a same_flow field which is non-zero if it is a potential match for the new packet. For each packet that may match, they also have a flush field which is non-zero if the held packet must not be merged with the new packet. Once gro_receive has determined that the new skb matches a held packet, the held packet may be processed immediately if the new skb cannot be merged with it. In this case gro_receive should return the pointer to the existing skb in gro_list. Otherwise the new skb should be merged into the existing packet and NULL should be returned, unless the new skb makes it impossible for any further merges to be made (e.g., FIN packet) where the merged skb should be returned. Whenever the skb is merged into an existing entry, the gro_receive function should set NAPI_GRO_CB(skb)->same_flow. Note that if an skb merely matches an existing entry but can't be merged with it, then this shouldn't be set. If gro_receive finds it pointless to hold the new skb for future merging, it should set NAPI_GRO_CB(skb)->flush. Held packets will be flushed by napi_gro_flush which is called by napi_complete and napi_rx_complete. Currently held packets are stored in a singly liked list just like LRO. The list is limited to a maximum of 8 entries. In future, this may be expanded to use a hash table to allow more flows to be held for merging. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 80 +++++++------------ include/linux/netpoll.h | 5 -- net/core/dev.c | 193 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 219 insertions(+), 59 deletions(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bdf5465deb91..58856b6737fb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -314,8 +314,9 @@ struct napi_struct { spinlock_t poll_lock; int poll_owner; struct net_device *dev; - struct list_head dev_list; #endif + struct list_head dev_list; + struct sk_buff *gro_list; }; enum @@ -376,22 +377,8 @@ static inline int napi_reschedule(struct napi_struct *napi) * * Mark NAPI processing as complete. */ -static inline void __napi_complete(struct napi_struct *n) -{ - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - list_del(&n->poll_list); - smp_mb__before_clear_bit(); - clear_bit(NAPI_STATE_SCHED, &n->state); -} - -static inline void napi_complete(struct napi_struct *n) -{ - unsigned long flags; - - local_irq_save(flags); - __napi_complete(n); - local_irq_restore(flags); -} +extern void __napi_complete(struct napi_struct *n); +extern void napi_complete(struct napi_struct *n); /** * napi_disable - prevent NAPI from scheduling @@ -640,9 +627,7 @@ struct net_device unsigned long state; struct list_head dev_list; -#ifdef CONFIG_NETPOLL struct list_head napi_list; -#endif /* Net device features */ unsigned long features; @@ -661,6 +646,7 @@ struct net_device #define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ /* do not use LLTX in new drivers */ #define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ +#define NETIF_F_GRO 16384 /* Generic receive offload */ #define NETIF_F_LRO 32768 /* large receive offload */ /* Segmentation offload features */ @@ -984,22 +970,8 @@ static inline void *netdev_priv(const struct net_device *dev) * netif_napi_add() must be used to initialize a napi context prior to calling * *any* of the other napi related functions. */ -static inline void netif_napi_add(struct net_device *dev, - struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), - int weight) -{ - INIT_LIST_HEAD(&napi->poll_list); - napi->poll = poll; - napi->weight = weight; -#ifdef CONFIG_NETPOLL - napi->dev = dev; - list_add(&napi->dev_list, &dev->napi_list); - spin_lock_init(&napi->poll_lock); - napi->poll_owner = -1; -#endif - set_bit(NAPI_STATE_SCHED, &napi->state); -} +void netif_napi_add(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight); /** * netif_napi_del - remove a napi context @@ -1007,12 +979,20 @@ static inline void netif_napi_add(struct net_device *dev, * * netif_napi_del() removes a napi context from the network device napi list */ -static inline void netif_napi_del(struct napi_struct *napi) -{ -#ifdef CONFIG_NETPOLL - list_del(&napi->dev_list); -#endif -} +void netif_napi_del(struct napi_struct *napi); + +struct napi_gro_cb { + /* This is non-zero if the packet may be of the same flow. */ + int same_flow; + + /* This is non-zero if the packet cannot be merged with the new skb. */ + int flush; + + /* Number of segments aggregated. */ + int count; +}; + +#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) struct packet_type { __be16 type; /* This is really htons(ether_type). */ @@ -1024,6 +1004,9 @@ struct packet_type { struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); int (*gso_send_check)(struct sk_buff *skb); + struct sk_buff **(*gro_receive)(struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sk_buff *skb); void *af_packet_priv; struct list_head list; }; @@ -1377,6 +1360,9 @@ extern int netif_rx(struct sk_buff *skb); extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); +extern void napi_gro_flush(struct napi_struct *napi); +extern int napi_gro_receive(struct napi_struct *napi, + struct sk_buff *skb); extern void netif_nit_deliver(struct sk_buff *skb); extern int dev_valid_name(const char *name); extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); @@ -1621,17 +1607,7 @@ static inline void __netif_rx_complete(struct net_device *dev, static inline void netif_rx_complete(struct net_device *dev, struct napi_struct *napi) { - unsigned long flags; - - /* - * don't let napi dequeue from the cpu poll list - * just in case its running on a different cpu - */ - if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state))) - return; - local_irq_save(flags); - __netif_rx_complete(dev, napi); - local_irq_restore(flags); + napi_complete(napi); } static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index e3d79593fb3a..e38d3c9dccda 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -94,11 +94,6 @@ static inline void netpoll_poll_unlock(void *have) rcu_read_unlock(); } -static inline void netpoll_netdev_init(struct net_device *dev) -{ - INIT_LIST_HEAD(&dev->napi_list); -} - #else static inline int netpoll_rx(struct sk_buff *skb) { diff --git a/net/core/dev.c b/net/core/dev.c index e415f0b0d0d0..d8d7d1fccde4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -129,6 +129,9 @@ #include "net-sysfs.h" +/* Instead of increasing this, you should create a hash table. */ +#define MAX_GRO_SKBS 8 + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -2335,6 +2338,122 @@ static void flush_backlog(void *arg) } } +static int napi_gro_complete(struct sk_buff *skb) +{ + struct packet_type *ptype; + __be16 type = skb->protocol; + struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + int err = -ENOENT; + + if (!skb_shinfo(skb)->frag_list) + goto out; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, head, list) { + if (ptype->type != type || ptype->dev || !ptype->gro_complete) + continue; + + err = ptype->gro_complete(skb); + break; + } + rcu_read_unlock(); + + if (err) { + WARN_ON(&ptype->list == head); + kfree_skb(skb); + return NET_RX_SUCCESS; + } + +out: + __skb_push(skb, -skb_network_offset(skb)); + return netif_receive_skb(skb); +} + +void napi_gro_flush(struct napi_struct *napi) +{ + struct sk_buff *skb, *next; + + for (skb = napi->gro_list; skb; skb = next) { + next = skb->next; + skb->next = NULL; + napi_gro_complete(skb); + } + + napi->gro_list = NULL; +} +EXPORT_SYMBOL(napi_gro_flush); + +int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct packet_type *ptype; + __be16 type = skb->protocol; + struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + int count = 0; + int mac_len; + + if (!(skb->dev->features & NETIF_F_GRO)) + goto normal; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, head, list) { + struct sk_buff *p; + + if (ptype->type != type || ptype->dev || !ptype->gro_receive) + continue; + + skb_reset_network_header(skb); + mac_len = skb->network_header - skb->mac_header; + skb->mac_len = mac_len; + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 0; + + for (p = napi->gro_list; p; p = p->next) { + count++; + NAPI_GRO_CB(p)->same_flow = + p->mac_len == mac_len && + !memcmp(skb_mac_header(p), skb_mac_header(skb), + mac_len); + NAPI_GRO_CB(p)->flush = 0; + } + + pp = ptype->gro_receive(&napi->gro_list, skb); + break; + } + rcu_read_unlock(); + + if (&ptype->list == head) + goto normal; + + if (pp) { + struct sk_buff *nskb = *pp; + + *pp = nskb->next; + nskb->next = NULL; + napi_gro_complete(nskb); + count--; + } + + if (NAPI_GRO_CB(skb)->same_flow) + goto ok; + + if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { + __skb_push(skb, -skb_network_offset(skb)); + goto normal; + } + + NAPI_GRO_CB(skb)->count = 1; + skb->next = napi->gro_list; + napi->gro_list = skb; + +ok: + return NET_RX_SUCCESS; + +normal: + return netif_receive_skb(skb); +} +EXPORT_SYMBOL(napi_gro_receive); + static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; @@ -2354,9 +2473,11 @@ static int process_backlog(struct napi_struct *napi, int quota) } local_irq_enable(); - netif_receive_skb(skb); + napi_gro_receive(napi, skb); } while (++work < quota && jiffies == start_time); + napi_gro_flush(napi); + return work; } @@ -2377,6 +2498,68 @@ void __napi_schedule(struct napi_struct *n) } EXPORT_SYMBOL(__napi_schedule); +void __napi_complete(struct napi_struct *n) +{ + BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); + BUG_ON(n->gro_list); + + list_del(&n->poll_list); + smp_mb__before_clear_bit(); + clear_bit(NAPI_STATE_SCHED, &n->state); +} +EXPORT_SYMBOL(__napi_complete); + +void napi_complete(struct napi_struct *n) +{ + unsigned long flags; + + /* + * don't let napi dequeue from the cpu poll list + * just in case its running on a different cpu + */ + if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) + return; + + napi_gro_flush(n); + local_irq_save(flags); + __napi_complete(n); + local_irq_restore(flags); +} +EXPORT_SYMBOL(napi_complete); + +void netif_napi_add(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight) +{ + INIT_LIST_HEAD(&napi->poll_list); + napi->gro_list = NULL; + napi->poll = poll; + napi->weight = weight; + list_add(&napi->dev_list, &dev->napi_list); +#ifdef CONFIG_NETPOLL + napi->dev = dev; + spin_lock_init(&napi->poll_lock); + napi->poll_owner = -1; +#endif + set_bit(NAPI_STATE_SCHED, &napi->state); +} +EXPORT_SYMBOL(netif_napi_add); + +void netif_napi_del(struct napi_struct *napi) +{ + struct sk_buff *skb, *next; + + list_del(&napi->dev_list); + + for (skb = napi->gro_list; skb; skb = next) { + next = skb->next; + skb->next = NULL; + kfree_skb(skb); + } + + napi->gro_list = NULL; +} +EXPORT_SYMBOL(netif_napi_del); + static void net_rx_action(struct softirq_action *h) { @@ -4380,7 +4563,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, netdev_init_queues(dev); - netpoll_netdev_init(dev); + INIT_LIST_HEAD(&dev->napi_list); setup(dev); strcpy(dev->name, name); return dev; @@ -4397,10 +4580,15 @@ EXPORT_SYMBOL(alloc_netdev_mq); */ void free_netdev(struct net_device *dev) { + struct napi_struct *p, *n; + release_net(dev_net(dev)); kfree(dev->_tx); + list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) + netif_napi_del(p); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); @@ -4949,6 +5137,7 @@ static int __init net_dev_init(void) queue->backlog.poll = process_backlog; queue->backlog.weight = weight_p; + queue->backlog.gro_list = NULL; } dev_boot_phase = 0; -- cgit v1.2.3 From 71d93b39e52e92aea35f1058d957cf12250d0b75 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:42:33 -0800 Subject: net: Add skb_gro_receive This patch adds the helper skb_gro_receive to merge packets for GRO. The current method is to allocate a new header skb and then chain the original packets to its frag_list. This is done to make it easier to integrate into the existing GSO framework. In future as GSO is moved into the drivers, we can undo this and simply chain the original packets together. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ net/core/skbuff.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) (limited to 'net/core') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index acf17af45af9..cf2cb50f77d1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1687,6 +1687,8 @@ extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); +extern int skb_gro_receive(struct sk_buff **head, + struct sk_buff *skb); static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 18e224af05a6..b8d0abb26433 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2582,6 +2582,65 @@ err: EXPORT_SYMBOL_GPL(skb_segment); +int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct sk_buff *p = *head; + struct sk_buff *nskb; + unsigned int headroom; + unsigned int hlen = p->data - skb_mac_header(p); + + if (hlen + p->len + skb->len >= 65536) + return -E2BIG; + + if (skb_shinfo(p)->frag_list) + goto merge; + + headroom = skb_headroom(p); + nskb = netdev_alloc_skb(p->dev, headroom); + if (unlikely(!nskb)) + return -ENOMEM; + + __copy_skb_header(nskb, p); + nskb->mac_len = p->mac_len; + + skb_reserve(nskb, headroom); + + skb_set_mac_header(nskb, -hlen); + skb_set_network_header(nskb, skb_network_offset(p)); + skb_set_transport_header(nskb, skb_transport_offset(p)); + + memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen); + + *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); + skb_shinfo(nskb)->frag_list = p; + skb_header_release(p); + nskb->prev = p; + + nskb->data_len += p->len; + nskb->truesize += p->len; + nskb->len += p->len; + + *head = nskb; + nskb->next = p->next; + p->next = NULL; + + p = nskb; + +merge: + NAPI_GRO_CB(p)->count++; + p->prev->next = skb; + p->prev = skb; + skb_header_release(skb); + + p->data_len += skb->len; + p->truesize += skb->len; + p->len += skb->len; + + NAPI_GRO_CB(skb)->same_flow = 1; + return 0; +} +EXPORT_SYMBOL_GPL(skb_gro_receive); + void __init skb_init(void) { skbuff_head_cache = kmem_cache_create("skbuff_head_cache", -- cgit v1.2.3 From b240a0e5644eb817c4a397098a40e1ad42a615bc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:44:31 -0800 Subject: ethtool: Add GGRO and SGRO ops This patch adds the ethtool ops to enable and disable GRO. It also makes GRO depend on RX checksum offload much the same as how TSO depends on SG support. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ net/core/ethtool.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index b4b038b89ee6..27c67a542235 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -467,6 +467,8 @@ struct ethtool_ops { #define ETHTOOL_GRXFH 0x00000029 /* Get RX flow hash configuration */ #define ETHTOOL_SRXFH 0x0000002a /* Set RX flow hash configuration */ +#define ETHTOOL_GGRO 0x0000002b /* Get GRO enable (ethtool_value) */ +#define ETHTOOL_SGRO 0x0000002c /* Set GRO enable (ethtool_value) */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 14ada537f895..947710a36ced 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -528,6 +528,22 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tx_csum(dev, edata.data); } +static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata; + + if (!dev->ethtool_ops->set_rx_csum) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + if (!edata.data && dev->ethtool_ops->set_sg) + dev->features &= ~NETIF_F_GRO; + + return dev->ethtool_ops->set_rx_csum(dev, edata.data); +} + static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -599,6 +615,34 @@ static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) return 0; } +static int ethtool_get_gro(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GGRO }; + + edata.data = dev->features & NETIF_F_GRO; + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_gro(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + if (edata.data) { + if (!dev->ethtool_ops->get_rx_csum || + !dev->ethtool_ops->get_rx_csum(dev)) + return -EINVAL; + dev->features |= NETIF_F_GRO; + } else + dev->features &= ~NETIF_F_GRO; + + return 0; +} + static int ethtool_self_test(struct net_device *dev, char __user *useraddr) { struct ethtool_test test; @@ -932,8 +976,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) dev->ethtool_ops->get_rx_csum); break; case ETHTOOL_SRXCSUM: - rc = ethtool_set_value(dev, useraddr, - dev->ethtool_ops->set_rx_csum); + rc = ethtool_set_rx_csum(dev, useraddr); break; case ETHTOOL_GTXCSUM: rc = ethtool_get_value(dev, useraddr, ethcmd, @@ -1014,6 +1057,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFH: rc = ethtool_set_rxhash(dev, useraddr); break; + case ETHTOOL_GGRO: + rc = ethtool_get_gro(dev, useraddr); + break; + case ETHTOOL_SGRO: + rc = ethtool_set_gro(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.2.3 From 2d91d78b68606ff7ce52ea70e187dee7831aa2f6 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 17 Dec 2008 15:47:29 -0800 Subject: Phonet: allocate a non-Ethernet ARP type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also leave some room for more 802.11 types. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_arp.h | 2 ++ net/core/dev.c | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 4d3401812e6c..11df77ab2dbb 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -87,6 +87,8 @@ #define ARPHRD_IEEE80211_PRISM 802 /* IEEE 802.11 + Prism2 header */ #define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ +#define ARPHRD_PHONET 820 /* PhoNet media type */ + #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ diff --git a/net/core/dev.c b/net/core/dev.c index d8d7d1fccde4..15aab0c46d6d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -283,8 +283,8 @@ static const unsigned short netdev_lock_type[] = ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, - ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID, - ARPHRD_NONE}; + ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, + ARPHRD_VOID, ARPHRD_NONE}; static const char *netdev_lock_name[] = {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", @@ -300,8 +300,8 @@ static const char *netdev_lock_name[] = "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", - "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID", - "_xmit_NONE"}; + "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", + "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; -- cgit v1.2.3 From 57c81fffc863fb4c1804bc963bcbfb82d736c6df Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 17 Dec 2008 15:47:48 -0800 Subject: Phonet: allocate separate ARP type for GPRS over a Phonet pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A separate xmit lock class supports GPRS over a Phonet pipe over a TUN device (type ARPHRD_NONE). Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_arp.h | 1 + net/core/dev.c | 4 ++-- net/phonet/pep-gprs.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 11df77ab2dbb..5ff89809a581 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -88,6 +88,7 @@ #define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ #define ARPHRD_PHONET 820 /* PhoNet media type */ +#define ARPHRD_PHONET_PIPE 821 /* PhoNet pipe header */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ diff --git a/net/core/dev.c b/net/core/dev.c index 15aab0c46d6d..048cf1197872 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -284,7 +284,7 @@ static const unsigned short netdev_lock_type[] = ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, - ARPHRD_VOID, ARPHRD_NONE}; + ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE}; static const char *netdev_lock_name[] = {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", @@ -301,7 +301,7 @@ static const char *netdev_lock_name[] = "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", - "_xmit_VOID", "_xmit_NONE"}; + "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index 0b640b0fce0c..a2873203dff2 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -260,7 +260,7 @@ static int gprs_set_mtu(struct net_device *dev, int new_mtu) static void gprs_setup(struct net_device *dev) { dev->features = NETIF_F_FRAGLIST; - dev->type = ARPHRD_NONE; + dev->type = ARPHRD_PHONET_PIPE; dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = GPRS_DEFAULT_MTU; dev->hard_header_len = 0; -- cgit v1.2.3 From 49ad9599d42da4787d5b3a19263440e0fcd4d1fc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 17 Dec 2008 22:11:38 -0800 Subject: Revert "net: release skb->dst in sock_queue_rcv_skb()" This reverts commit 70355602879229c6f8bd694ec9c0814222bc4936. As pointed out by Mark McLoughlin IP_PKTINFO cmsg data is one post-queueing user, so this optimization is not valid right now. Signed-off-by: David S. Miller --- net/core/sock.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index ac4f0e79226b..f3a0d08cbb48 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -289,11 +289,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) skb->dev = NULL; skb_set_owner_r(skb, sk); - /* - * release dst right now while its hot - */ - dst_release(skb->dst); - skb->dst = NULL; + /* Cache the SKB length before we tack it onto the receive * queue. Once it is added it no longer belongs to us and * may be freed by other threads of control pulling packets -- cgit v1.2.3 From 5f2f6da76c429c42d54f73807f00b8fd761a7d68 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 22 Dec 2008 19:35:28 -0800 Subject: net: Fix oops in dev_ifsioc() A command like this: "brctl addif br1 eth1" issued as a user gave me an oops when bridge module wasn't loaded. It's caused by using a dev pointer before checking for NULL. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 048cf1197872..daca72e6b37b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3745,11 +3745,13 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); - const struct net_device_ops *ops = dev->netdev_ops; + const struct net_device_ops *ops; if (!dev) return -ENODEV; + ops = dev->netdev_ops; + switch (cmd) { case SIOCSIFFLAGS: /* Set interface flags */ return dev_change_flags(dev, ifr->ifr_flags); -- cgit v1.2.3 From d7b06636be162d3f74c9ce5d6d0d9ea4e5d362c8 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Fri, 26 Dec 2008 01:35:35 -0800 Subject: net: Init NAPI dev_list on napi_del The recent GRO patches introduced the NAPI removal of devices in free_netdev. For drivers that can change the number of queues during driver operation, the NAPI infrastructure doesn't allow the freeing and re-addition of NAPI entities without reloading the driver. This change reinitializes the dev_list in each NAPI struct on delete, instead of just deleting it (and assigning the list pointers to POISON). Drivers that wish to remove/re-add NAPI will need to re-initialize the netdev napi_list after removing all NAPI instances, before re-adding NAPI devices again. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index daca72e6b37b..536a8ac189c8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2548,7 +2548,7 @@ void netif_napi_del(struct napi_struct *napi) { struct sk_buff *skb, *next; - list_del(&napi->dev_list); + list_del_init(&napi->dev_list); for (skb = napi->gro_list; skb; skb = next) { next = skb->next; -- cgit v1.2.3 From 0da2afd59653d2edf5c8e0f09b23f367ab5bc80f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 26 Dec 2008 14:57:42 -0800 Subject: gro: Fix potential use after free The initial skb may have been freed after napi_gro_complete in napi_gro_receive if it was merged into an existing packet. Thus we cannot check same_flow (which indicates whether it was merged) after calling napi_gro_complete. This patch fixes this by saving the same_flow status before the call to napi_gro_complete. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 536a8ac189c8..303e984ee6a6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2390,6 +2390,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) __be16 type = skb->protocol; struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; int count = 0; + int same_flow; int mac_len; if (!(skb->dev->features & NETIF_F_GRO)) @@ -2425,6 +2426,8 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (&ptype->list == head) goto normal; + same_flow = NAPI_GRO_CB(skb)->same_flow; + if (pp) { struct sk_buff *nskb = *pp; @@ -2434,7 +2437,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) count--; } - if (NAPI_GRO_CB(skb)->same_flow) + if (same_flow) goto ok; if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { -- cgit v1.2.3 From 8eb79863962bbf18ebf648335e329bfd468432fa Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 29 Dec 2008 18:21:48 -0800 Subject: netns: foreach_netdev_safe is insufficient in default_device_exit During network namespace teardown we either move or delete all of the network devices associated with a network namespace. In the case of veth devices deleting one will also delete it's pair device. If both devices are in the same network namespace then for_each_netdev_safe is insufficient as next may point to the second veth device we have deleted. To avoid problems I do what we do in __rtnl_kill_links and restart the scan of the device list, after we have deleted a device. Currently dev_change_netnamespace does not appear to suffer from this problem, but wireless devices are also paired and likely should be moved between network namespaces together. So I have errored on the side of caution and restart the scan of the network devices in that case as well. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 446424027d24..09c66a449da6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5066,13 +5066,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = { static void __net_exit default_device_exit(struct net *net) { - struct net_device *dev, *next; + struct net_device *dev; /* * Push all migratable of the network devices back to the * initial network namespace */ rtnl_lock(); - for_each_netdev_safe(net, dev, next) { +restart: + for_each_netdev(net, dev) { int err; char fb_name[IFNAMSIZ]; @@ -5083,7 +5084,7 @@ static void __net_exit default_device_exit(struct net *net) /* Delete virtual devices */ if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { dev->rtnl_link_ops->dellink(dev); - continue; + goto restart; } /* Push remaing network devices to init_net */ @@ -5094,6 +5095,7 @@ static void __net_exit default_device_exit(struct net *net) __func__, dev->name, err); BUG(); } + goto restart; } rtnl_unlock(); } -- cgit v1.2.3 From 0f23174aa8c1aa7a2a6050a72a60d290ef9ee578 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 29 Dec 2008 12:23:42 +0000 Subject: cpumask: prepare for iterators to only go to nr_cpu_ids/nr_cpumask_bits: net In future all cpumask ops will only be valid (in general) for bit numbers < nr_cpu_ids. So use that instead of NR_CPUS in iterators and other comparisons. This is always safe: no cpu number can be >= nr_cpu_ids, and nr_cpu_ids is initialized to NR_CPUS at boot. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Ingo Molnar Signed-off-by: David S. Miller --- net/core/neighbour.c | 4 ++-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 ++-- net/ipv4/route.c | 4 ++-- net/netfilter/nf_conntrack_standalone.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net/core') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9c3717a23cf7..f66c58df8953 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2414,7 +2414,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; @@ -2429,7 +2429,7 @@ static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct neigh_table *tbl = pde->data; int cpu; - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 313ebf00ee36..6ba5c557690c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -291,7 +291,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; @@ -306,7 +306,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct net *net = seq_file_net(seq); int cpu; - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 77bfba975959..97f71153584f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -429,7 +429,7 @@ static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; @@ -442,7 +442,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) { int cpu; - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index f37b9b74c6a8..4da54b0b9233 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -200,7 +200,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu + 1; @@ -215,7 +215,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct net *net = seq_file_net(seq); int cpu; - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu + 1; -- cgit v1.2.3