From 1080d709fb9d8cd4392f93476ee46a9d6ea05a5b Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 27 Oct 2008 12:28:25 -0700 Subject: net: implement emergency route cache rebulds when gc_elasticity is exceeded This is a patch to provide on demand route cache rebuilding. Currently, our route cache is rebulid periodically regardless of need. This introduced unneeded periodic latency. This patch offers a better approach. Using code provided by Eric Dumazet, we compute the standard deviation of the average hash bucket chain length while running rt_check_expire. Should any given chain length grow to larger that average plus 4 standard deviations, we trigger an emergency hash table rebuild for that net namespace. This allows for the common case in which chains are well behaved and do not grow unevenly to not incur any latency at all, while those systems (which may be being maliciously attacked), only rebuild when the attack is detected. This patch take 2 other factors into account: 1) chains with multiple entries that differ by attributes that do not affect the hash value are only counted once, so as not to unduly bias system to rebuilding if features like QOS are heavily used 2) if rebuilding crosses a certain threshold (which is adjustable via the added sysctl in this patch), route caching is disabled entirely for that net namespace, since constant rebuilding is less efficient that no caching at all Tested successfully by me. Signed-off-by: Neil Horman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 132 ++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/sysctl_net_ipv4.c | 12 +++++ 2 files changed, 142 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2ea6dcc3e2cc..21ce7e1b2284 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; +static int rt_chain_length_max __read_mostly = 20; static void rt_worker_func(struct work_struct *work); static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); @@ -145,6 +146,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); static int rt_garbage_collect(struct dst_ops *ops); +static void rt_emergency_hash_rebuild(struct net *net); static struct dst_ops ipv4_dst_ops = { @@ -201,6 +203,7 @@ const __u8 ip_tos2prio[16] = { struct rt_hash_bucket { struct rtable *chain; }; + #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ defined(CONFIG_PROVE_LOCKING) /* @@ -674,6 +677,20 @@ static inline u32 rt_score(struct rtable *rt) return score; } +static inline bool rt_caching(const struct net *net) +{ + return net->ipv4.current_rt_cache_rebuild_count <= + net->ipv4.sysctl_rt_cache_rebuild_count; +} + +static inline bool compare_hash_inputs(const struct flowi *fl1, + const struct flowi *fl2) +{ + return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | + (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | + (fl1->iif ^ fl2->iif)) == 0); +} + static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | @@ -753,11 +770,24 @@ static void rt_do_flush(int process_context) } } +/* + * While freeing expired entries, we compute average chain length + * and standard deviation, using fixed-point arithmetic. + * This to have an estimation of rt_chain_length_max + * rt_chain_length_max = max(elasticity, AVG + 4*SD) + * We use 3 bits for frational part, and 29 (or 61) for magnitude. + */ + +#define FRACT_BITS 3 +#define ONE (1UL << FRACT_BITS) + static void rt_check_expire(void) { static unsigned int rover; unsigned int i = rover, goal; struct rtable *rth, **rthp; + unsigned long length = 0, samples = 0; + unsigned long sum = 0, sum2 = 0; u64 mult; mult = ((u64)ip_rt_gc_interval) << rt_hash_log; @@ -766,6 +796,7 @@ static void rt_check_expire(void) goal = (unsigned int)mult; if (goal > rt_hash_mask) goal = rt_hash_mask + 1; + length = 0; for (; goal > 0; goal--) { unsigned long tmo = ip_rt_gc_timeout; @@ -775,6 +806,8 @@ static void rt_check_expire(void) if (need_resched()) cond_resched(); + samples++; + if (*rthp == NULL) continue; spin_lock_bh(rt_hash_lock_addr(i)); @@ -789,11 +822,29 @@ static void rt_check_expire(void) if (time_before_eq(jiffies, rth->u.dst.expires)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; + /* + * Only bump our length if the hash + * inputs on entries n and n+1 are not + * the same, we only count entries on + * a chain with equal hash inputs once + * so that entries for different QOS + * levels, and other non-hash input + * attributes don't unfairly skew + * the length computation + */ + if ((*rthp == NULL) || + !compare_hash_inputs(&(*rthp)->fl, + &rth->fl)) + length += ONE; continue; } } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; + if ((*rthp == NULL) || + !compare_hash_inputs(&(*rthp)->fl, + &rth->fl)) + length += ONE; continue; } @@ -802,6 +853,15 @@ static void rt_check_expire(void) rt_free(rth); } spin_unlock_bh(rt_hash_lock_addr(i)); + sum += length; + sum2 += length*length; + } + if (samples) { + unsigned long avg = sum / samples; + unsigned long sd = int_sqrt(sum2 / samples - avg*avg); + rt_chain_length_max = max_t(unsigned long, + ip_rt_gc_elasticity, + (avg + 4*sd) >> FRACT_BITS); } rover = i; } @@ -851,6 +911,26 @@ static void rt_secret_rebuild(unsigned long __net) mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); } +static void rt_secret_rebuild_oneshot(struct net *net) +{ + del_timer_sync(&net->ipv4.rt_secret_timer); + rt_cache_invalidate(net); + if (ip_rt_secret_interval) { + net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval; + add_timer(&net->ipv4.rt_secret_timer); + } +} + +static void rt_emergency_hash_rebuild(struct net *net) +{ + if (net_ratelimit()) { + printk(KERN_WARNING "Route hash chain too long!\n"); + printk(KERN_WARNING "Adjust your secret_interval!\n"); + } + + rt_secret_rebuild_oneshot(net); +} + /* Short description of GC goals. @@ -989,6 +1069,7 @@ out: return 0; static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) { struct rtable *rth, **rthp; + struct rtable *rthi; unsigned long now; struct rtable *cand, **candp; u32 min_score; @@ -1002,7 +1083,13 @@ restart: candp = NULL; now = jiffies; + if (!rt_caching(dev_net(rt->u.dst.dev))) { + rt_drop(rt); + return 0; + } + rthp = &rt_hash_table[hash].chain; + rthi = NULL; spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { @@ -1048,6 +1135,17 @@ restart: chain_length++; rthp = &rth->u.dst.rt_next; + + /* + * check to see if the next entry in the chain + * contains the same hash input values as rt. If it does + * This is where we will insert into the list, instead of + * at the head. This groups entries that differ by aspects not + * relvant to the hash function together, which we use to adjust + * our chain length + */ + if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl)) + rthi = rth; } if (cand) { @@ -1061,6 +1159,16 @@ restart: *candp = cand->u.dst.rt_next; rt_free(cand); } + } else { + if (chain_length > rt_chain_length_max) { + struct net *net = dev_net(rt->u.dst.dev); + int num = ++net->ipv4.current_rt_cache_rebuild_count; + if (!rt_caching(dev_net(rt->u.dst.dev))) { + printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", + rt->u.dst.dev->name, num); + } + rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev)); + } } /* Try to bind route to arp only if it is output @@ -1098,7 +1206,11 @@ restart: } } - rt->u.dst.rt_next = rt_hash_table[hash].chain; + if (rthi) + rt->u.dst.rt_next = rthi->u.dst.rt_next; + else + rt->u.dst.rt_next = rt_hash_table[hash].chain; + #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { struct rtable *trt; @@ -1114,7 +1226,11 @@ restart: * previous writes to rt are comitted to memory * before making rt visible to other CPUS. */ - rcu_assign_pointer(rt_hash_table[hash].chain, rt); + if (rthi) + rcu_assign_pointer(rthi->u.dst.rt_next, rt); + else + rcu_assign_pointer(rt_hash_table[hash].chain, rt); + spin_unlock_bh(rt_hash_lock_addr(hash)); *rp = rt; return 0; @@ -1217,6 +1333,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, || ipv4_is_zeronet(new_gw)) goto reject_redirect; + if (!rt_caching(net)) + goto reject_redirect; + if (!IN_DEV_SHARED_MEDIA(in_dev)) { if (!inet_addr_onlink(in_dev, new_gw, old_gw)) goto reject_redirect; @@ -2130,6 +2249,10 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct net *net; net = dev_net(dev); + + if (!rt_caching(net)) + goto skip_cache; + tos &= IPTOS_RT_MASK; hash = rt_hash(daddr, saddr, iif, rt_genid(net)); @@ -2154,6 +2277,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, } rcu_read_unlock(); +skip_cache: /* Multicast recognition logic is moved from route cache to here. The problem was that too many Ethernet cards have broken/missing hardware multicast filters :-( As result the host on multicasting @@ -2539,6 +2663,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, unsigned hash; struct rtable *rth; + if (!rt_caching(net)) + goto slow_output; + hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); rcu_read_lock_bh(); @@ -2563,6 +2690,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, } rcu_read_unlock_bh(); +slow_output: return ip_route_output_slow(net, rp, flp); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1bb10df8ce7d..0cc8d31f9ac0 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -795,6 +795,14 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rt_cache_rebuild_count", + .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, { } }; @@ -827,8 +835,12 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) &net->ipv4.sysctl_icmp_ratelimit; table[5].data = &net->ipv4.sysctl_icmp_ratemask; + table[6].data = + &net->ipv4.sysctl_rt_cache_rebuild_count; } + net->ipv4.sysctl_rt_cache_rebuild_count = 4; + net->ipv4.ipv4_hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); if (net->ipv4.ipv4_hdr == NULL) -- cgit v1.2.3 From def8b4faff5ca349beafbbfeb2c51f3602a6ef3a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 28 Oct 2008 13:24:06 -0700 Subject: net: reduce structures when XFRM=n ifdef out * struct sk_buff::sp (pointer) * struct dst_entry::xfrm (pointer) * struct sock::sk_policy (2 pointers) Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/skbuff.h | 15 ++++++++++++++- include/net/dst.h | 3 ++- include/net/sock.h | 2 ++ include/net/xfrm.h | 4 ++++ net/core/skbuff.c | 2 +- net/ipv4/icmp.c | 3 ++- net/ipv4/ip_forward.c | 2 +- net/ipv4/route.c | 2 ++ net/ipv6/icmp.c | 3 ++- net/ipv6/ip6_output.c | 2 +- security/selinux/hooks.c | 4 ++-- 11 files changed, 33 insertions(+), 9 deletions(-) (limited to 'net/ipv4') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2725f4e5a9bf..487e34507b41 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -269,8 +269,9 @@ struct sk_buff { struct dst_entry *dst; struct rtable *rtable; }; +#ifdef CONFIG_XFRM struct sec_path *sp; - +#endif /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you @@ -1864,6 +1865,18 @@ static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_bu to->queue_mapping = from->queue_mapping; } +#ifdef CONFIG_XFRM +static inline struct sec_path *skb_sec_path(struct sk_buff *skb) +{ + return skb->sp; +} +#else +static inline struct sec_path *skb_sec_path(struct sk_buff *skb) +{ + return NULL; +} +#endif + static inline int skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/include/net/dst.h b/include/net/dst.h index 8a8b71e5f3f1..f96c4ba4dd32 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -59,8 +59,9 @@ struct dst_entry struct neighbour *neighbour; struct hh_cache *hh; +#ifdef CONFIG_XFRM struct xfrm_state *xfrm; - +#endif int (*input)(struct sk_buff*); int (*output)(struct sk_buff*); diff --git a/include/net/sock.h b/include/net/sock.h index ada50c04d09f..d6b750a25078 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -229,7 +229,9 @@ struct sock { } sk_backlog; wait_queue_head_t *sk_sleep; struct dst_entry *sk_dst_cache; +#ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; +#endif rwlock_t sk_dst_lock; atomic_t sk_rmem_alloc; atomic_t sk_wmem_alloc; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 11c890ad8ebb..f2c5ba28a428 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -882,6 +882,7 @@ struct xfrm_dst u32 path_cookie; }; +#ifdef CONFIG_XFRM static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) { dst_release(xdst->route); @@ -894,6 +895,7 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) xdst->partner = NULL; #endif } +#endif extern void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); @@ -1536,9 +1538,11 @@ static inline void xfrm_states_delete(struct xfrm_state **states, int n) } #endif +#ifdef CONFIG_XFRM static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { return skb->sp->xvec[skb->sp->len - 1]; } +#endif #endif /* _NET_XFRM_H */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e22e3a35359..cdfe473181af 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -489,7 +489,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->network_header = old->network_header; new->mac_header = old->mac_header; new->dst = dst_clone(old->dst); -#ifdef CONFIG_INET +#ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif memcpy(new->cb, old->cb, sizeof(old->cb)); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 72b2de76f1cd..e9d6ea0b49ca 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -976,9 +976,10 @@ int icmp_rcv(struct sk_buff *skb) struct net *net = dev_net(rt->u.dst.dev); if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { + struct sec_path *sp = skb_sec_path(skb); int nh; - if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & + if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP)) goto drop; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 450016b89a18..df3fe50bbf0d 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -106,7 +106,7 @@ int ip_forward(struct sk_buff *skb) * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ - if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb->sp) + if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 21ce7e1b2284..ffb2c5705432 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1399,7 +1399,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->u.dst.path = &rt->u.dst; rt->u.dst.neighbour = NULL; rt->u.dst.hh = NULL; +#ifdef CONFIG_XFRM rt->u.dst.xfrm = NULL; +#endif rt->rt_genid = rt_genid(net); rt->rt_flags |= RTCF_REDIRECTED; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 9b7d19ae5ced..508a713ac045 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -646,9 +646,10 @@ static int icmpv6_rcv(struct sk_buff *skb) int type; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + struct sec_path *sp = skb_sec_path(skb); int nh; - if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & + if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP)) goto drop_no_count; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c77db0b95e26..7d92fd97cfb9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -490,7 +490,7 @@ int ip6_forward(struct sk_buff *skb) We don't send redirects to frames decapsulated from IPsec. */ if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && - !skb->sp) { + !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct rt6_info *rt; struct neighbour *n = dst->neighbour; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 3e3fde7c1d2b..aedf02b1345a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4626,7 +4626,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, * as fast and as clean as possible. */ if (selinux_compat_net || !selinux_policycap_netpeer) return selinux_ip_postroute_compat(skb, ifindex, family); - +#ifdef CONFIG_XFRM /* If skb->dst->xfrm is non-NULL then the packet is undergoing an IPsec * packet transformation so allow the packet to pass without any checks * since we'll have another chance to perform access control checks @@ -4635,7 +4635,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, * is NULL, in this case go ahead and apply access control. */ if (skb->dst != NULL && skb->dst->xfrm != NULL) return NF_ACCEPT; - +#endif secmark_active = selinux_secmark_enabled(); peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled(); if (!secmark_active && !peerlbl_active) -- cgit v1.2.3 From 93adcc80f3288f1827baf6f821af818f6eeef7f9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 28 Oct 2008 13:25:09 -0700 Subject: net: don't use INIT_RCU_HEAD call_rcu() will unconditionally rewrite RCU head anyway. Applies to struct neigh_parms struct neigh_table struct net struct cipso_v4_doi struct in_ifaddr struct in_device rt->u.dst Signed-off-by: Alexey Dobriyan Acked-by: Paul E. McKenney Signed-off-by: David S. Miller --- net/core/neighbour.c | 2 -- net/core/net_namespace.c | 2 -- net/ipv4/cipso_ipv4.c | 1 - net/ipv4/devinet.c | 9 +-------- net/ipv4/route.c | 1 - 5 files changed, 1 insertion(+), 14 deletions(-) (limited to 'net/ipv4') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1dc728b38589..b337a937ea52 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1340,7 +1340,6 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, if (p) { p->tbl = tbl; atomic_set(&p->refcnt, 1); - INIT_RCU_HEAD(&p->rcu_head); p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); @@ -1412,7 +1411,6 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) tbl->parms.net = &init_net; #endif atomic_set(&tbl->parms.refcnt, 1); - INIT_RCU_HEAD(&tbl->parms.rcu_head); tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f1d07b5c1e17..861b4cbf40db 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -47,7 +47,6 @@ static __net_init int setup_net(struct net *net) goto out; ng->len = INITIAL_NET_GEN_PTRS; - INIT_RCU_HEAD(&ng->rcu); rcu_assign_pointer(net->gen, ng); error = 0; @@ -446,7 +445,6 @@ int net_assign_generic(struct net *net, int id, void *data) */ ng->len = id; - INIT_RCU_HEAD(&ng->rcu); memcpy(&ng->ptr, &old_ng->ptr, old_ng->len); rcu_assign_pointer(net->gen, ng); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 490e035c6d90..4bcec7f77251 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -490,7 +490,6 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) } atomic_set(&doi_def->refcount, 1); - INIT_RCU_HEAD(&doi_def->rcu); spin_lock(&cipso_v4_doi_list_lock); if (cipso_v4_doi_search(doi_def->doi) != NULL) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 56fce3ab6c55..0bff576d2918 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -112,13 +112,7 @@ static inline void devinet_sysctl_unregister(struct in_device *idev) static struct in_ifaddr *inet_alloc_ifa(void) { - struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL); - - if (ifa) { - INIT_RCU_HEAD(&ifa->rcu_head); - } - - return ifa; + return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); } static void inet_rcu_free_ifa(struct rcu_head *head) @@ -161,7 +155,6 @@ static struct in_device *inetdev_init(struct net_device *dev) in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); if (!in_dev) goto out; - INIT_RCU_HEAD(&in_dev->rcu_head); memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, sizeof(in_dev->cnf)); in_dev->cnf.sysctl = NULL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ffb2c5705432..e59b4dcf6778 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1386,7 +1386,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, /* Copy all the information. */ *rt = *rth; - INIT_RCU_HEAD(&rt->u.dst.rcu_head); rt->u.dst.__use = 1; atomic_set(&rt->u.dst.__refcnt, 1); rt->u.dst.child = NULL; -- cgit v1.2.3 From 0c6ce78abf6e228d44c3840edb8a4ae0c1299825 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 28 Oct 2008 16:09:23 -0700 Subject: net: replace uses of NIP6_FMT with %p6 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/linux/sunrpc/svc_xprt.h | 4 +-- include/net/ip_vs.h | 4 +-- include/net/netfilter/nf_conntrack_tuple.h | 6 ++--- include/net/sctp/sctp.h | 4 +-- net/ipv4/tcp_input.c | 4 +-- net/ipv4/tcp_timer.c | 4 +-- net/ipv6/addrlabel.c | 34 +++++++++----------------- net/ipv6/ah6.c | 4 +-- net/ipv6/esp6.c | 4 +-- net/ipv6/exthdrs.c | 2 +- net/ipv6/icmp.c | 4 +-- net/ipv6/ip6mr.c | 5 ++-- net/ipv6/ipcomp6.c | 4 +-- net/ipv6/ndisc.c | 7 ++---- net/ipv6/netfilter/ip6t_LOG.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 5 ++-- net/ipv6/tcp_ipv6.c | 8 +++--- 17 files changed, 43 insertions(+), 62 deletions(-) (limited to 'net/ipv4') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 6fd7b016517f..42e01c93c7ea 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -145,8 +145,8 @@ static inline char *__svc_print_addr(struct sockaddr *addr, break; case AF_INET6: - snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", - NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), + snprintf(buf, len, "%p6, port=%u", + &((struct sockaddr_in6 *)addr)->sin6_addr, ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); break; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index fe9fcf73c85e..6a6692067092 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -87,8 +87,8 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, int len; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - len = snprintf(&buf[*idx], buf_len - *idx, "[" NIP6_FMT "]", - NIP6(addr->in6)) + 1; + len = snprintf(&buf[*idx], buf_len - *idx, "[%p6]", + &addr->in6) + 1; else #endif len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT, diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index a6874ba22d54..303efaf68d08 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -122,10 +122,10 @@ static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t) static inline void nf_ct_dump_tuple_ipv6(const struct nf_conntrack_tuple *t) { #ifdef DEBUG - printk("tuple %p: %u " NIP6_FMT " %hu -> " NIP6_FMT " %hu\n", + printk("tuple %p: %u %p6 %hu -> %p6 %hu\n", t, t->dst.protonum, - NIP6(*(struct in6_addr *)t->src.u3.all), ntohs(t->src.u.all), - NIP6(*(struct in6_addr *)t->dst.u3.all), ntohs(t->dst.u.all)); + t->src.u3.all, ntohs(t->src.u.all), + t->dst.u3.all, ntohs(t->dst.u.all)); #endif } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index ed71b110edf7..a84c3976e1b0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -285,9 +285,9 @@ extern int sctp_debug_flag; if (sctp_debug_flag) { \ if (saddr->sa.sa_family == AF_INET6) { \ printk(KERN_DEBUG \ - lead NIP6_FMT trail, \ + lead "%p6" trail, \ leadparm, \ - NIP6(saddr->v6.sin6_addr), \ + &saddr->v6.sin6_addr, \ otherparms); \ } else { \ printk(KERN_DEBUG \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d77c0d29e239..191c06bb0f67 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2346,9 +2346,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - printk(KERN_DEBUG "Undo %s " NIP6_FMT "/%u c%u l%u ss%u/%u p%u\n", + printk(KERN_DEBUG "Undo %s %p6/%u c%u l%u ss%u/%u p%u\n", msg, - NIP6(np->daddr), ntohs(inet->dport), + &np->daddr, ntohs(inet->dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6b6dff1164b9..4e6ee5205237 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -306,8 +306,8 @@ static void tcp_retransmit_timer(struct sock *sk) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIP6_FMT ":%u/%u shrinks window %u:%u. Repaired.\n", - NIP6(np->daddr), ntohs(inet->dport), + LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %p6:%u/%u shrinks window %u:%u. Repaired.\n", + &np->daddr, ntohs(inet->dport), inet->num, tp->snd_una, tp->snd_nxt); } #endif diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 08909039d87b..d28036659d27 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -186,10 +186,8 @@ u32 ipv6_addr_label(struct net *net, label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; rcu_read_unlock(); - ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n", - __func__, - NIP6(*addr), type, ifindex, - label); + ADDRLABEL(KERN_DEBUG "%s(addr=%p6, type=%d, ifindex=%d) => %08x\n", + __func__, addr, type, ifindex, label); return label; } @@ -203,11 +201,8 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, struct ip6addrlbl_entry *newp; int addrtype; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex, - (unsigned int)label); + ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u)\n", + __func__, prefix, prefixlen, ifindex, (unsigned int)label); addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); @@ -294,12 +289,9 @@ static int ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex, - (unsigned int)label, - replace); + ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", + __func__, prefix, prefixlen, ifindex, (unsigned int)label, + replace); newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); if (IS_ERR(newp)) @@ -321,10 +313,8 @@ static int __ip6addrlbl_del(struct net *net, struct hlist_node *pos, *n; int ret = -ESRCH; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex); + ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n", + __func__, prefix, prefixlen, ifindex); hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && @@ -347,10 +337,8 @@ static int ip6addrlbl_del(struct net *net, struct in6_addr prefix_buf; int ret; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex); + ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n", + __func__, prefix, prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); spin_lock(&ip6addrlbl_table.lock); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 2ff0c8233e47..9bc43f2527ce 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -419,8 +419,8 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/" NIP6_FMT "\n", - ntohl(ah->spi), NIP6(iph->daddr)); + NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%p6\n", + ntohl(ah->spi), &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index b181b08fb761..ec4be188c341 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -367,8 +367,8 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/" NIP6_FMT "\n", - ntohl(esph->spi), NIP6(iph->daddr)); + printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%p6\n", + ntohl(esph->spi), &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 6bfffec2371c..a89051468359 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -219,7 +219,7 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff) if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { LIMIT_NETDEBUG( - KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr)); + KERN_DEBUG "hao is not an unicast addr: %p6\n", &hao->addr); goto discard; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 508a713ac045..b3fa38e40dc4 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -681,8 +681,8 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n", - NIP6(*saddr), NIP6(*daddr)); + LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%p6 > %p6]\n", + saddr, daddr); goto discard_it; } } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 182f8a177e7f..798e6a29dd83 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -297,9 +297,8 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc6_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, - NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld", - NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin), + seq_printf(seq, "%p6 %p6 %-3d %8ld %8ld %8ld", + &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, mfc->mf6c_parent, mfc->mfc_un.res.pkt, mfc->mfc_un.res.bytes, diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 4545e4306862..9566d8f73140 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -67,8 +67,8 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIP6_FMT "\n", - spi, NIP6(iph->daddr)); + printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%p6\n", + spi, &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 172438320eec..191bb0722a7c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -647,11 +647,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if ((probes -= neigh->parms->ucast_probes) < 0) { if (!(neigh->nud_state & NUD_VALID)) { - ND_PRINTK1(KERN_DEBUG - "%s(): trying to ucast probe in NUD_INVALID: " - NIP6_FMT "\n", - __func__, - NIP6(*target)); + ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %p6\n", + __func__, target); } ndisc_send_ns(dev, neigh, target, target, saddr); } else if ((probes -= neigh->parms->app_probes) < 0) { diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index caa441d09567..a61ce3010007 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -61,7 +61,7 @@ static void dump_packet(const struct nf_loginfo *info, } /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ - printk("SRC=" NIP6_FMT " DST=" NIP6_FMT " ", NIP6(ih->saddr), NIP6(ih->daddr)); + printk("SRC=%p6 DST=%p6 ", &ih->saddr, &ih->daddr); /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index e91db16611d9..b165a273c6c0 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -56,9 +56,8 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, static int ipv6_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "src=" NIP6_FMT " dst=" NIP6_FMT " ", - NIP6(*((struct in6_addr *)tuple->src.u3.ip6)), - NIP6(*((struct in6_addr *)tuple->dst.u3.ip6))); + return seq_printf(s, "src=%p6 dst=%p6 ", + tuple->src.u3.ip6, tuple->dst.u3.ip6); } /* diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b6b356b7912a..483550cfdf33 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -872,12 +872,10 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash %s for " - "(" NIP6_FMT ", %u)->" - "(" NIP6_FMT ", %u)\n", + printk(KERN_INFO "MD5 Hash %s for (%p6, %u)->(%p6, %u)\n", genhash ? "failed" : "mismatch", - NIP6(ip6h->saddr), ntohs(th->source), - NIP6(ip6h->daddr), ntohs(th->dest)); + &ip6h->saddr, ntohs(th->source), + &ip6h->daddr, ntohs(th->dest)); } return 1; } -- cgit v1.2.3 From 645ca708f936b2fbeb79e52d7823e3eb2c0905f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 01:41:45 -0700 Subject: udp: introduce struct udp_table and multiple spinlocks UDP sockets are hashed in a 128 slots hash table. This hash table is protected by *one* rwlock. This rwlock is readlocked each time an incoming UDP message is handled. This rwlock is writelocked each time a socket must be inserted in hash table (bind time), or deleted from this table (close time) This is not scalable on SMP machines : 1) Even in read mode, lock() and unlock() are atomic operations and must dirty a contended cache line, shared by all cpus. 2) A writer might be starved if many readers are 'in flight'. This can happen on a machine with some NIC receiving many UDP messages. User process can be delayed a long time at socket creation/dismantle time. This patch prepares RCU migration, by introducing 'struct udp_table and struct udp_hslot', and using one spinlock per chain, to reduce contention on central rwlock. Introducing one spinlock per chain reduces latencies, for port randomization on heavily loaded UDP servers. This also speedup bindings to specific ports. udp_lib_unhash() was uninlined, becoming to big. Some cleanups were done to ease review of following patch (RCUification of UDP Unicast lookups) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- include/net/udp.h | 25 +++--- include/net/udplite.h | 2 +- net/ipv4/udp.c | 207 ++++++++++++++++++++++++++++++-------------------- net/ipv4/udp_impl.h | 4 +- net/ipv4/udplite.c | 13 ++-- net/ipv6/udp.c | 112 +++++++++++++++------------ net/ipv6/udp_impl.h | 4 +- net/ipv6/udplite.c | 8 +- 9 files changed, 214 insertions(+), 163 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/sock.h b/include/net/sock.h index d6b750a25078..d200dfbe1ef6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -599,7 +599,7 @@ struct proto { union { struct inet_hashinfo *hashinfo; - struct hlist_head *udp_hash; + struct udp_table *udp_table; struct raw_hashinfo *raw_hash; } h; diff --git a/include/net/udp.h b/include/net/udp.h index 1e205095ea68..df2bfe545374 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -50,8 +50,15 @@ struct udp_skb_cb { }; #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) -extern struct hlist_head udp_hash[UDP_HTABLE_SIZE]; -extern rwlock_t udp_hash_lock; +struct udp_hslot { + struct hlist_head head; + spinlock_t lock; +} __attribute__((aligned(2 * sizeof(long)))); +struct udp_table { + struct udp_hslot hash[UDP_HTABLE_SIZE]; +}; +extern struct udp_table udp_table; +extern void udp_table_init(struct udp_table *); /* Note: this must match 'valbool' in sock_setsockopt */ @@ -110,15 +117,7 @@ static inline void udp_lib_hash(struct sock *sk) BUG(); } -static inline void udp_lib_unhash(struct sock *sk) -{ - write_lock_bh(&udp_hash_lock); - if (sk_del_node_init(sk)) { - inet_sk(sk)->num = 0; - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - } - write_unlock_bh(&udp_hash_lock); -} +extern void udp_lib_unhash(struct sock *sk); static inline void udp_lib_close(struct sock *sk, long timeout) { @@ -187,7 +186,7 @@ extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, struct udp_seq_afinfo { char *name; sa_family_t family; - struct hlist_head *hashtable; + struct udp_table *udp_table; struct file_operations seq_fops; struct seq_operations seq_ops; }; @@ -196,7 +195,7 @@ struct udp_iter_state { struct seq_net_private p; sa_family_t family; int bucket; - struct hlist_head *hashtable; + struct udp_table *udp_table; }; #ifdef CONFIG_PROC_FS diff --git a/include/net/udplite.h b/include/net/udplite.h index b76b2e377af4..afdffe607b24 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -11,7 +11,7 @@ #define UDPLITE_RECV_CSCOV 11 /* receiver partial coverage (threshold ) */ extern struct proto udplite_prot; -extern struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; +extern struct udp_table udplite_table; /* * Checksum computation is all in software, hence simpler getfrag. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2095abc3caba..2a6c491f97d7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -104,12 +104,8 @@ #include #include "udp_impl.h" -/* - * Snmp MIB for the UDP layer - */ - -struct hlist_head udp_hash[UDP_HTABLE_SIZE]; -DEFINE_RWLOCK(udp_hash_lock); +struct udp_table udp_table; +EXPORT_SYMBOL(udp_table); int sysctl_udp_mem[3] __read_mostly; int sysctl_udp_rmem_min __read_mostly; @@ -123,7 +119,7 @@ atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); static int udp_lib_lport_inuse(struct net *net, __u16 num, - const struct hlist_head udptable[], + const struct udp_hslot *hslot, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2)) @@ -131,7 +127,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, struct sock *sk2; struct hlist_node *node; - sk_for_each(sk2, node, &udptable[udp_hashfn(net, num)]) + sk_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && sk2 != sk && sk2->sk_hash == num && @@ -154,12 +150,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2 ) ) { - struct hlist_head *udptable = sk->sk_prot->h.udp_hash; + struct udp_hslot *hslot; + struct udp_table *udptable = sk->sk_prot->h.udp_table; int error = 1; struct net *net = sock_net(sk); - write_lock_bh(&udp_hash_lock); - if (!snum) { int low, high, remaining; unsigned rand; @@ -171,26 +166,34 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, rand = net_random(); snum = first = rand % remaining + low; rand |= 1; - while (udp_lib_lport_inuse(net, snum, udptable, sk, - saddr_comp)) { + for (;;) { + hslot = &udptable->hash[udp_hashfn(net, snum)]; + spin_lock_bh(&hslot->lock); + if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) + break; + spin_unlock_bh(&hslot->lock); do { snum = snum + rand; } while (snum < low || snum > high); if (snum == first) goto fail; } - } else if (udp_lib_lport_inuse(net, snum, udptable, sk, saddr_comp)) - goto fail; - + } else { + hslot = &udptable->hash[udp_hashfn(net, snum)]; + spin_lock_bh(&hslot->lock); + if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) + goto fail_unlock; + } inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - sk_add_node(sk, &udptable[udp_hashfn(net, snum)]); + sk_add_node(sk, &hslot->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } error = 0; +fail_unlock: + spin_unlock_bh(&hslot->lock); fail: - write_unlock_bh(&udp_hash_lock); return error; } @@ -208,63 +211,73 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); } +static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, + unsigned short hnum, + __be16 sport, __be32 daddr, __be16 dport, int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && + !ipv6_only_sock(sk)) { + struct inet_sock *inet = inet_sk(sk); + + score = (sk->sk_family == PF_INET ? 1 : 0); + if (inet->rcv_saddr) { + if (inet->rcv_saddr != daddr) + return -1; + score += 2; + } + if (inet->daddr) { + if (inet->daddr != saddr) + return -1; + score += 2; + } + if (inet->dport) { + if (inet->dport != sport) + return -1; + score += 2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score += 2; + } + } + return score; +} + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, - int dif, struct hlist_head udptable[]) + int dif, struct udp_table *udptable) { struct sock *sk, *result = NULL; struct hlist_node *node; unsigned short hnum = ntohs(dport); - int badness = -1; - - read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { - struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && - !ipv6_only_sock(sk)) { - int score = (sk->sk_family == PF_INET ? 1 : 0); - if (inet->rcv_saddr) { - if (inet->rcv_saddr != daddr) - continue; - score+=2; - } - if (inet->daddr) { - if (inet->daddr != saddr) - continue; - score+=2; - } - if (inet->dport) { - if (inet->dport != sport) - continue; - score+=2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score+=2; - } - if (score == 9) { - result = sk; - break; - } else if (score > badness) { - result = sk; - badness = score; - } + unsigned int hash = udp_hashfn(net, hnum); + struct udp_hslot *hslot = &udptable->hash[hash]; + int score, badness = -1; + + spin_lock(&hslot->lock); + sk_for_each(sk, node, &hslot->head) { + score = compute_score(sk, net, saddr, hnum, sport, + daddr, dport, dif); + if (score > badness) { + result = sk; + badness = score; } } if (result) sock_hold(result); - read_unlock(&udp_hash_lock); + spin_unlock(&hslot->lock); return result; } static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk; const struct iphdr *iph = ip_hdr(skb); @@ -280,7 +293,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { - return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash); + return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); } EXPORT_SYMBOL_GPL(udp4_lib_lookup); @@ -323,7 +336,7 @@ found: * to find the appropriate port. */ -void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) +void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) { struct inet_sock *inet; struct iphdr *iph = (struct iphdr*)skb->data; @@ -392,7 +405,7 @@ out: void udp_err(struct sk_buff *skb, u32 info) { - __udp4_lib_err(skb, info, udp_hash); + __udp4_lib_err(skb, info, &udp_table); } /* @@ -933,6 +946,21 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } +void udp_lib_unhash(struct sock *sk) +{ + struct udp_table *udptable = sk->sk_prot->h.udp_table; + unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); + struct udp_hslot *hslot = &udptable->hash[hash]; + + spin_lock(&hslot->lock); + if (sk_del_node_init(sk)) { + inet_sk(sk)->num = 0; + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + } + spin_unlock(&hslot->lock); +} +EXPORT_SYMBOL(udp_lib_unhash); + static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int is_udplite = IS_UDPLITE(sk); @@ -1071,13 +1099,14 @@ drop: static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udphdr *uh, __be32 saddr, __be32 daddr, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk; + struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; int dif; - read_lock(&udp_hash_lock); - sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); + spin_lock(&hslot->lock); + sk = sk_head(&hslot->head); dif = skb->dev->ifindex; sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { @@ -1102,7 +1131,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, } while (sknext); } else kfree_skb(skb); - read_unlock(&udp_hash_lock); + spin_unlock(&hslot->lock); return 0; } @@ -1148,7 +1177,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, * All we need to do is get the socket, and then do a checksum. */ -int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], +int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { struct sock *sk; @@ -1246,7 +1275,7 @@ drop: int udp_rcv(struct sk_buff *skb) { - return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); + return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); } void udp_destroy_sock(struct sock *sk) @@ -1488,7 +1517,7 @@ struct proto udp_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp_sock), - .h.udp_hash = udp_hash, + .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, @@ -1498,20 +1527,23 @@ struct proto udp_prot = { /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -static struct sock *udp_get_first(struct seq_file *seq) +static struct sock *udp_get_first(struct seq_file *seq, int start) { struct sock *sk; struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { + for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { struct hlist_node *node; - sk_for_each(sk, node, state->hashtable + state->bucket) { + struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; + spin_lock_bh(&hslot->lock); + sk_for_each(sk, node, &hslot->head) { if (!net_eq(sock_net(sk), net)) continue; if (sk->sk_family == state->family) goto found; } + spin_unlock_bh(&hslot->lock); } sk = NULL; found: @@ -1525,20 +1557,18 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) do { sk = sk_next(sk); -try_again: - ; } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); - if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { - sk = sk_head(state->hashtable + state->bucket); - goto try_again; + if (!sk) { + spin_unlock(&state->udp_table->hash[state->bucket].lock); + return udp_get_first(seq, state->bucket + 1); } return sk; } static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) { - struct sock *sk = udp_get_first(seq); + struct sock *sk = udp_get_first(seq, 0); if (sk) while (pos && (sk = udp_get_next(seq, sk)) != NULL) @@ -1547,9 +1577,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) } static void *udp_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(udp_hash_lock) { - read_lock(&udp_hash_lock); return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } @@ -1567,9 +1595,11 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void udp_seq_stop(struct seq_file *seq, void *v) - __releases(udp_hash_lock) { - read_unlock(&udp_hash_lock); + struct udp_iter_state *state = seq->private; + + if (state->bucket < UDP_HTABLE_SIZE) + spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); } static int udp_seq_open(struct inode *inode, struct file *file) @@ -1585,7 +1615,7 @@ static int udp_seq_open(struct inode *inode, struct file *file) s = ((struct seq_file *)file->private_data)->private; s->family = afinfo->family; - s->hashtable = afinfo->hashtable; + s->udp_table = afinfo->udp_table; return err; } @@ -1657,7 +1687,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) static struct udp_seq_afinfo udp4_seq_afinfo = { .name = "udp", .family = AF_INET, - .hashtable = udp_hash, + .udp_table = &udp_table, .seq_fops = { .owner = THIS_MODULE, }, @@ -1692,10 +1722,21 @@ void udp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +void __init udp_table_init(struct udp_table *table) +{ + int i; + + for (i = 0; i < UDP_HTABLE_SIZE; i++) { + INIT_HLIST_HEAD(&table->hash[i].head); + spin_lock_init(&table->hash[i].lock); + } +} + void __init udp_init(void) { unsigned long limit; + udp_table_init(&udp_table); /* Set the pressure threshold up by the same strategy of TCP. It is a * fraction of global memory that is up to 1/2 at 256 MB, decreasing * toward zero with the amount of memory, with a floor of 128 pages. @@ -1712,8 +1753,6 @@ void __init udp_init(void) } EXPORT_SYMBOL(udp_disconnect); -EXPORT_SYMBOL(udp_hash); -EXPORT_SYMBOL(udp_hash_lock); EXPORT_SYMBOL(udp_ioctl); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 2e9bad2fa1bc..9f4a6165f722 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -5,8 +5,8 @@ #include #include -extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); -extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); +extern int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int ); +extern void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); extern int udp_v4_get_port(struct sock *sk, unsigned short snum); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 3c807964da96..d8ea8e5f5ea3 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -12,16 +12,17 @@ */ #include "udp_impl.h" -struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; +struct udp_table udplite_table; +EXPORT_SYMBOL(udplite_table); static int udplite_rcv(struct sk_buff *skb) { - return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); + return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); } static void udplite_err(struct sk_buff *skb, u32 info) { - __udp4_lib_err(skb, info, udplite_hash); + __udp4_lib_err(skb, info, &udplite_table); } static struct net_protocol udplite_protocol = { @@ -50,7 +51,7 @@ struct proto udplite_prot = { .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, .obj_size = sizeof(struct udp_sock), - .h.udp_hash = udplite_hash, + .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, @@ -71,7 +72,7 @@ static struct inet_protosw udplite4_protosw = { static struct udp_seq_afinfo udplite4_seq_afinfo = { .name = "udplite", .family = AF_INET, - .hashtable = udplite_hash, + .udp_table = &udplite_table, .seq_fops = { .owner = THIS_MODULE, }, @@ -108,6 +109,7 @@ static inline int udplite4_proc_init(void) void __init udplite4_register(void) { + udp_table_init(&udplite_table); if (proto_register(&udplite_prot, 1)) goto out_register_err; @@ -126,5 +128,4 @@ out_register_err: printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); } -EXPORT_SYMBOL(udplite_hash); EXPORT_SYMBOL(udplite_prot); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e51da8c092fa..ccee7244ca0f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -54,62 +54,73 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); } +static inline int compute_score(struct sock *sk, struct net *net, + unsigned short hnum, + struct in6_addr *saddr, __be16 sport, + struct in6_addr *daddr, __be16 dport, + int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && + sk->sk_family == PF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet_sock *inet = inet_sk(sk); + + score = 0; + if (inet->dport) { + if (inet->dport != sport) + return -1; + score++; + } + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + return -1; + score++; + } + if (!ipv6_addr_any(&np->daddr)) { + if (!ipv6_addr_equal(&np->daddr, saddr)) + return -1; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + } + return score; +} + static struct sock *__udp6_lib_lookup(struct net *net, struct in6_addr *saddr, __be16 sport, struct in6_addr *daddr, __be16 dport, - int dif, struct hlist_head udptable[]) + int dif, struct udp_table *udptable) { struct sock *sk, *result = NULL; struct hlist_node *node; unsigned short hnum = ntohs(dport); - int badness = -1; - - read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { - struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && - sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - int score = 0; - if (inet->dport) { - if (inet->dport != sport) - continue; - score++; - } - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) - continue; - score++; - } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) - continue; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score++; - } - if (score == 4) { - result = sk; - break; - } else if (score > badness) { - result = sk; - badness = score; - } + unsigned int hash = udp_hashfn(net, hnum); + struct udp_hslot *hslot = &udptable->hash[hash]; + int score, badness = -1; + + spin_lock(&hslot->lock); + sk_for_each(sk, node, &hslot->head) { + score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); + if (score > badness) { + result = sk; + badness = score; } } if (result) sock_hold(result); - read_unlock(&udp_hash_lock); + spin_unlock(&hslot->lock); return result; } static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk; struct ipv6hdr *iph = ipv6_hdr(skb); @@ -239,7 +250,7 @@ csum_copy_err: void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info, - struct hlist_head udptable[] ) + struct udp_table *udptable) { struct ipv6_pinfo *np; struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; @@ -275,7 +286,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info ) { - __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); + __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) @@ -374,14 +385,15 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct in6_addr *saddr, struct in6_addr *daddr, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk, *sk2; const struct udphdr *uh = udp_hdr(skb); + struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; int dif; - read_lock(&udp_hash_lock); - sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); + spin_lock(&hslot->lock); + sk = sk_head(&hslot->head); dif = inet6_iif(skb); sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { @@ -409,7 +421,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, sk_add_backlog(sk, skb); bh_unlock_sock(sk); out: - read_unlock(&udp_hash_lock); + spin_unlock(&hslot->lock); return 0; } @@ -447,7 +459,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, return 0; } -int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], +int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { struct sock *sk; @@ -544,7 +556,7 @@ discard: static __inline__ int udpv6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP); + return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP); } /* @@ -1008,7 +1020,7 @@ int udp6_seq_show(struct seq_file *seq, void *v) static struct udp_seq_afinfo udp6_seq_afinfo = { .name = "udp6", .family = AF_INET6, - .hashtable = udp_hash, + .udp_table = &udp_table, .seq_fops = { .owner = THIS_MODULE, }, @@ -1050,7 +1062,7 @@ struct proto udpv6_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), - .h.udp_hash = udp_hash, + .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 92dd7da766d8..23779208c334 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -7,9 +7,9 @@ #include #include -extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); +extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, - int , int , int , __be32 , struct hlist_head []); + int , int , int , __be32 , struct udp_table *); extern int udp_v6_get_port(struct sock *sk, unsigned short snum); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 3cd1a1ac3d6c..f1e892a99e05 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -15,14 +15,14 @@ static int udplitev6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); + return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); } static void udplitev6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { - __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); + __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table); } static struct inet6_protocol udplitev6_protocol = { @@ -49,7 +49,7 @@ struct proto udplitev6_prot = { .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, .obj_size = sizeof(struct udp6_sock), - .h.udp_hash = udplite_hash, + .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, @@ -95,7 +95,7 @@ void udplitev6_exit(void) static struct udp_seq_afinfo udplite6_seq_afinfo = { .name = "udplite6", .family = AF_INET6, - .hashtable = udplite_hash, + .udp_table = &udplite_table, .seq_fops = { .owner = THIS_MODULE, }, -- cgit v1.2.3 From 271b72c7fa82c2c7a795bc16896149933110672d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 02:11:14 -0700 Subject: udp: RCU handling for Unicast packets. Goals are : 1) Optimizing handling of incoming Unicast UDP frames, so that no memory writes should happen in the fast path. Note: Multicasts and broadcasts still will need to take a lock, because doing a full lockless lookup in this case is difficult. 2) No expensive operations in the socket bind/unhash phases : - No expensive synchronize_rcu() calls. - No added rcu_head in socket structure, increasing memory needs, but more important, forcing us to use call_rcu() calls, that have the bad property of making sockets structure cold. (rcu grace period between socket freeing and its potential reuse make this socket being cold in CPU cache). David did a previous patch using call_rcu() and noticed a 20% impact on TCP connection rates. Quoting Cristopher Lameter : "Right. That results in cacheline cooldown. You'd want to recycle the object as they are cache hot on a per cpu basis. That is screwed up by the delayed regular rcu processing. We have seen multiple regressions due to cacheline cooldown. The only choice in cacheline hot sensitive areas is to deal with the complexity that comes with SLAB_DESTROY_BY_RCU or give up on RCU." - Because udp sockets are allocated from dedicated kmem_cache, use of SLAB_DESTROY_BY_RCU can help here. Theory of operation : --------------------- As the lookup is lockfree (using rcu_read_lock()/rcu_read_unlock()), special attention must be taken by readers and writers. Use of SLAB_DESTROY_BY_RCU is tricky too, because a socket can be freed, reused, inserted in a different chain or in worst case in the same chain while readers could do lookups in the same time. In order to avoid loops, a reader must check each socket found in a chain really belongs to the chain the reader was traversing. If it finds a mismatch, lookup must start again at the begining. This *restart* loop is the reason we had to use rdlock for the multicast case, because we dont want to send same message several times to the same socket. We use RCU only for fast path. Thus, /proc/net/udp still takes spinlocks. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 37 ++++++++++++++++++++++++++++++++++++- net/core/sock.c | 3 ++- net/ipv4/udp.c | 35 ++++++++++++++++++++++++++--------- net/ipv4/udplite.c | 1 + net/ipv6/udp.c | 31 ++++++++++++++++++++++++------- net/ipv6/udplite.c | 1 + 6 files changed, 90 insertions(+), 18 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/sock.h b/include/net/sock.h index d200dfbe1ef6..0bea25db5471 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -363,6 +363,27 @@ static __inline__ int sk_del_node_init(struct sock *sk) return rc; } +static __inline__ int __sk_del_node_init_rcu(struct sock *sk) +{ + if (sk_hashed(sk)) { + hlist_del_init_rcu(&sk->sk_node); + return 1; + } + return 0; +} + +static __inline__ int sk_del_node_init_rcu(struct sock *sk) +{ + int rc = __sk_del_node_init_rcu(sk); + + if (rc) { + /* paranoid for a while -acme */ + WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + __sock_put(sk); + } + return rc; +} + static __inline__ void __sk_add_node(struct sock *sk, struct hlist_head *list) { hlist_add_head(&sk->sk_node, list); @@ -374,6 +395,17 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) __sk_add_node(sk, list); } +static __inline__ void __sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +{ + hlist_add_head_rcu(&sk->sk_node, list); +} + +static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +{ + sock_hold(sk); + __sk_add_node_rcu(sk, list); +} + static __inline__ void __sk_del_bind_node(struct sock *sk) { __hlist_del(&sk->sk_bind_node); @@ -387,6 +419,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_for_each(__sk, node, list) \ hlist_for_each_entry(__sk, node, list, sk_node) +#define sk_for_each_rcu(__sk, node, list) \ + hlist_for_each_entry_rcu(__sk, node, list, sk_node) #define sk_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_from(__sk, node, sk_node) @@ -589,8 +623,9 @@ struct proto { int *sysctl_rmem; int max_header; - struct kmem_cache *slab; + struct kmem_cache *slab; unsigned int obj_size; + int slab_flags; atomic_t *orphan_count; diff --git a/net/core/sock.c b/net/core/sock.c index 5e2a3132a8c9..ded1eb5d2fd4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2042,7 +2042,8 @@ int proto_register(struct proto *prot, int alloc_slab) if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_HWCACHE_ALIGN | prot->slab_flags, + NULL); if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2a6c491f97d7..0ea974bf7962 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -187,7 +187,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - sk_add_node(sk, &hslot->head); + sk_add_node_rcu(sk, &hslot->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } error = 0; @@ -253,15 +253,24 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, struct udp_table *udptable) { - struct sock *sk, *result = NULL; + struct sock *sk, *result; struct hlist_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; - int score, badness = -1; + int score, badness; - spin_lock(&hslot->lock); - sk_for_each(sk, node, &hslot->head) { + rcu_read_lock(); +begin: + result = NULL; + badness = -1; + sk_for_each_rcu(sk, node, &hslot->head) { + /* + * lockless reader, and SLAB_DESTROY_BY_RCU items: + * We must check this item was not moved to another chain + */ + if (udp_hashfn(net, sk->sk_hash) != hash) + goto begin; score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { @@ -269,9 +278,16 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, badness = score; } } - if (result) - sock_hold(result); - spin_unlock(&hslot->lock); + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, saddr, hnum, sport, + daddr, dport, dif) < badness)) { + sock_put(result); + goto begin; + } + } + rcu_read_unlock(); return result; } @@ -953,7 +969,7 @@ void udp_lib_unhash(struct sock *sk) struct udp_hslot *hslot = &udptable->hash[hash]; spin_lock(&hslot->lock); - if (sk_del_node_init(sk)) { + if (sk_del_node_init_rcu(sk)) { inet_sk(sk)->num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } @@ -1517,6 +1533,7 @@ struct proto udp_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index d8ea8e5f5ea3..c784891cb7e5 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -51,6 +51,7 @@ struct proto udplite_prot = { .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, .obj_size = sizeof(struct udp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ccee7244ca0f..1d9790e43dfc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -97,24 +97,40 @@ static struct sock *__udp6_lib_lookup(struct net *net, struct in6_addr *daddr, __be16 dport, int dif, struct udp_table *udptable) { - struct sock *sk, *result = NULL; + struct sock *sk, *result; struct hlist_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; - int score, badness = -1; + int score, badness; - spin_lock(&hslot->lock); - sk_for_each(sk, node, &hslot->head) { + rcu_read_lock(); +begin: + result = NULL; + badness = -1; + sk_for_each_rcu(sk, node, &hslot->head) { + /* + * lockless reader, and SLAB_DESTROY_BY_RCU items: + * We must check this item was not moved to another chain + */ + if (udp_hashfn(net, sk->sk_hash) != hash) + goto begin; score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } - if (result) - sock_hold(result); - spin_unlock(&hslot->lock); + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, saddr, sport, + daddr, dport, dif) < badness)) { + sock_put(result); + goto begin; + } + } + rcu_read_unlock(); return result; } @@ -1062,6 +1078,7 @@ struct proto udpv6_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index f1e892a99e05..ba162a824585 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -49,6 +49,7 @@ struct proto udplitev6_prot = { .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, .obj_size = sizeof(struct udp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, -- cgit v1.2.3 From 8203efb3c612743fecb1ed67cf1daf9d9c127462 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 02:32:32 -0700 Subject: udp: calculate udp_mem based on low memory instead of all memory This patch mimics commit 57413ebc4e0f1e471a3b4db4aff9a85c083d090e (tcp: calculate tcp_mem based on low memory instead of all memory) The udp_mem array which contains limits on the total amount of memory used by UDP sockets is calculated based on nr_all_pages. On a 32 bits x86 system, we should base this on the number of lowmem pages. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0ea974bf7962..5ba03401b914 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -81,6 +81,8 @@ #include #include #include +#include +#include #include #include #include @@ -1751,15 +1753,16 @@ void __init udp_table_init(struct udp_table *table) void __init udp_init(void) { - unsigned long limit; + unsigned long nr_pages, limit; udp_table_init(&udp_table); /* Set the pressure threshold up by the same strategy of TCP. It is a * fraction of global memory that is up to 1/2 at 256 MB, decreasing * toward zero with the amount of memory, with a floor of 128 pages. */ - limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + nr_pages = totalram_pages - totalhigh_pages; + limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); limit = max(limit, 128UL); sysctl_udp_mem[0] = limit / 4 * 3; sysctl_udp_mem[1] = limit; -- cgit v1.2.3 From f52b5054ec108aaa9e903850d6b62af8ae3fe6ae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 11:19:11 -0700 Subject: udp: udp_get_next() should use spin_unlock_bh() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5ba03401b914..ced820318f94 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1579,7 +1579,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); if (!sk) { - spin_unlock(&state->udp_table->hash[state->bucket].lock); + spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); return udp_get_first(seq, state->bucket + 1); } return sk; -- cgit v1.2.3 From 96631ed16c514cf8b28fab991a076985ce378c26 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 11:19:58 -0700 Subject: udp: introduce sk_for_each_rcu_safenext() Corey Minyard found a race added in commit 271b72c7fa82c2c7a795bc16896149933110672d (udp: RCU handling for Unicast packets.) "If the socket is moved from one list to another list in-between the time the hash is calculated and the next field is accessed, and the socket has moved to the end of the new list, the traversal will not complete properly on the list it should have, since the socket will be on the end of the new list and there's not a way to tell it's on a new list and restart the list traversal. I think that this can be solved by pre-fetching the "next" field (with proper barriers) before checking the hash." This patch corrects this problem, introducing a new sk_for_each_rcu_safenext() macro. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rculist.h | 17 +++++++++++++++++ include/net/sock.h | 4 ++-- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 4 files changed, 23 insertions(+), 6 deletions(-) (limited to 'net/ipv4') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index e649bd3f2c97..3ba2998b22ba 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -383,5 +383,22 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference(pos->next)) +/** + * hlist_for_each_entry_rcu_safenext - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * @next: the &struct hlist_node to use as a next cursor + * + * Special version of hlist_for_each_entry_rcu that make sure + * each next pointer is fetched before each iteration. + */ +#define hlist_for_each_entry_rcu_safenext(tpos, pos, head, member, next) \ + for (pos = rcu_dereference((head)->first); \ + pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference(next)) + #endif /* __KERNEL__ */ #endif diff --git a/include/net/sock.h b/include/net/sock.h index 0bea25db5471..a4f6d3fc0470 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -419,8 +419,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_for_each(__sk, node, list) \ hlist_for_each_entry(__sk, node, list, sk_node) -#define sk_for_each_rcu(__sk, node, list) \ - hlist_for_each_entry_rcu(__sk, node, list, sk_node) +#define sk_for_each_rcu_safenext(__sk, node, list, next) \ + hlist_for_each_entry_rcu_safenext(__sk, node, list, sk_node, next) #define sk_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_from(__sk, node, sk_node) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ced820318f94..c3ecec8a9e1c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -256,7 +256,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, int dif, struct udp_table *udptable) { struct sock *sk, *result; - struct hlist_node *node; + struct hlist_node *node, *next; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; @@ -266,7 +266,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, begin: result = NULL; badness = -1; - sk_for_each_rcu(sk, node, &hslot->head) { + sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { /* * lockless reader, and SLAB_DESTROY_BY_RCU items: * We must check this item was not moved to another chain diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1d9790e43dfc..32d914db6c4f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -98,7 +98,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, int dif, struct udp_table *udptable) { struct sock *sk, *result; - struct hlist_node *node; + struct hlist_node *node, *next; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; @@ -108,7 +108,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, begin: result = NULL; badness = -1; - sk_for_each_rcu(sk, node, &hslot->head) { + sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { /* * lockless reader, and SLAB_DESTROY_BY_RCU items: * We must check this item was not moved to another chain -- cgit v1.2.3 From 5b095d98928fdb9e3b75be20a54b7a6cbf6ca9ad Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 29 Oct 2008 12:52:50 -0700 Subject: net: replace %p6 with %pI6 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- drivers/firmware/iscsi_ibft.c | 2 +- drivers/infiniband/core/sysfs.c | 2 +- drivers/infiniband/hw/mthca/mthca_mcg.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_main.c | 12 +++++------ drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 30 +++++++++++++------------- drivers/infiniband/ulp/srp/ib_srp.c | 6 +++--- drivers/net/mlx4/mcg.c | 4 ++-- drivers/scsi/iscsi_tcp.c | 2 +- fs/lockd/host.c | 2 +- fs/nfs/super.c | 2 +- include/linux/sunrpc/svc_xprt.h | 2 +- include/net/ip_vs.h | 2 +- include/net/netfilter/nf_conntrack_tuple.h | 2 +- include/net/sctp/sctp.h | 2 +- net/bridge/netfilter/ebt_log.c | 2 +- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_timer.c | 2 +- net/ipv6/addrlabel.c | 10 ++++----- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/ipv6/exthdrs.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/ip6mr.c | 2 +- net/ipv6/ipcomp6.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/netfilter/ip6t_LOG.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/netfilter/ipvs/ip_vs_conn.c | 4 ++-- net/netfilter/ipvs/ip_vs_core.c | 4 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- net/netfilter/ipvs/ip_vs_proto.c | 6 +++--- net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 2 +- net/netfilter/ipvs/ip_vs_xmit.c | 8 +++---- net/netfilter/nf_conntrack_ftp.c | 2 +- net/netfilter/nf_conntrack_h323_main.c | 4 ++-- net/netfilter/xt_hashlimit.c | 2 +- net/netfilter/xt_recent.c | 2 +- net/netlabel/netlabel_addrlist.c | 2 +- net/sctp/ipv6.c | 18 ++++++++-------- net/sctp/sm_statefuns.c | 2 +- net/sunrpc/clnt.c | 2 +- net/sunrpc/rpcb_clnt.c | 2 +- net/sunrpc/svcauth_unix.c | 4 ++-- net/sunrpc/xprtsock.c | 8 +++---- net/xfrm/xfrm_policy.c | 4 ++-- net/xfrm/xfrm_state.c | 4 ++-- security/selinux/avc.c | 2 +- 49 files changed, 100 insertions(+), 100 deletions(-) (limited to 'net/ipv4') diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index 0a6472097a81..acb82aff8808 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -290,7 +290,7 @@ static ssize_t sprintf_ipaddr(char *buf, u8 *ip) /* * IPv6 */ - str += sprintf(str, "%p6", ip); + str += sprintf(str, "%pI6", ip); } str += sprintf(str, "\n"); return str - buf; diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index e985193d631c..4f4d1bb9f069 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -262,7 +262,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, if (ret) return ret; - return sprintf(buf, "%p6\n", gid.raw); + return sprintf(buf, "%pI6\n", gid.raw); } static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr, diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index 693bed0b2d1c..d4c81053e439 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -87,7 +87,7 @@ static int find_mgm(struct mthca_dev *dev, } if (0) - mthca_dbg(dev, "Hash for %p6 is %04x\n", gid, *hash); + mthca_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash); *index = *hash; *prev = -1; @@ -254,7 +254,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; if (index == -1) { - mthca_err(dev, "MGID %p6 not found\n", gid->raw); + mthca_err(dev, "MGID %pI6 not found\n", gid->raw); err = -EINVAL; goto out; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index d98d87bfe365..47d588ba2a7f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1128,7 +1128,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, goto err_send_cm; } - ipoib_dbg(priv, "Request connection 0x%x for gid %p6 qpn 0x%x\n", + ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n", p->qp->qp_num, pathrec->dgid.raw, qpn); return 0; @@ -1276,7 +1276,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { list_move(&tx->list, &priv->cm.reap_list); queue_work(ipoib_workqueue, &priv->cm.reap_task); - ipoib_dbg(priv, "Reap connection for gid %p6\n", + ipoib_dbg(priv, "Reap connection for gid %pI6\n", tx->neigh->dgid.raw); tx->neigh = NULL; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index e7f4f94c3e92..b3a671895bdc 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -359,7 +359,7 @@ void ipoib_mark_paths_invalid(struct net_device *dev) spin_lock_irq(&priv->lock); list_for_each_entry_safe(path, tp, &priv->path_list, list) { - ipoib_dbg(priv, "mark path LID 0x%04x GID %p6 invalid\n", + ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 invalid\n", be16_to_cpu(path->pathrec.dlid), path->pathrec.dgid.raw); path->valid = 0; @@ -413,10 +413,10 @@ static void path_rec_completion(int status, unsigned long flags; if (!status) - ipoib_dbg(priv, "PathRec LID 0x%04x for GID %p6\n", + ipoib_dbg(priv, "PathRec LID 0x%04x for GID %pI6\n", be16_to_cpu(pathrec->dlid), pathrec->dgid.raw); else - ipoib_dbg(priv, "PathRec status %d for GID %p6\n", + ipoib_dbg(priv, "PathRec status %d for GID %pI6\n", status, path->pathrec.dgid.raw); skb_queue_head_init(&skqueue); @@ -527,7 +527,7 @@ static int path_rec_start(struct net_device *dev, { struct ipoib_dev_priv *priv = netdev_priv(dev); - ipoib_dbg(priv, "Start path record lookup for %p6\n", + ipoib_dbg(priv, "Start path record lookup for %pI6\n", path->pathrec.dgid.raw); init_completion(&path->done); @@ -764,7 +764,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) && (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) { - ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %p6\n", + ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n", skb->dst ? "neigh" : "dst", be16_to_cpup((__be16 *) skb->data), IPOIB_QPN(phdr->hwaddr), @@ -844,7 +844,7 @@ static void ipoib_neigh_cleanup(struct neighbour *n) else return; ipoib_dbg(priv, - "neigh_cleanup for %06x %p6\n", + "neigh_cleanup for %06x %pI6\n", IPOIB_QPN(n->ha), n->ha + 4); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 0de79cf4c07c..a2eb3b9789eb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -71,7 +71,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) struct ipoib_neigh *neigh, *tmp; int tx_dropped = 0; - ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %p6\n", + ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", mcast->mcmember.mgid.raw); spin_lock_irq(&priv->lock); @@ -204,7 +204,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { - ipoib_warn(priv, "multicast group %p6 already attached\n", + ipoib_warn(priv, "multicast group %pI6 already attached\n", mcast->mcmember.mgid.raw); return 0; @@ -213,7 +213,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), &mcast->mcmember.mgid, set_qkey); if (ret < 0) { - ipoib_warn(priv, "couldn't attach QP to multicast group %p6\n", + ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n", mcast->mcmember.mgid.raw); clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); @@ -245,7 +245,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, mcast->ah = ah; spin_unlock_irq(&priv->lock); - ipoib_dbg_mcast(priv, "MGID %p6 AV %p, LID 0x%04x, SL %d\n", + ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n", mcast->mcmember.mgid.raw, mcast->ah->ah, be16_to_cpu(mcast->mcmember.mlid), @@ -291,7 +291,7 @@ ipoib_mcast_sendonly_join_complete(int status, if (status) { if (mcast->logcount++ < 20) - ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %p6, status %d\n", + ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n", mcast->mcmember.mgid.raw, status); /* Flush out any queued packets */ @@ -351,7 +351,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n", ret); } else { - ipoib_dbg_mcast(priv, "no multicast record for %p6, starting join\n", + ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n", mcast->mcmember.mgid.raw); } @@ -380,7 +380,7 @@ static int ipoib_mcast_join_complete(int status, struct net_device *dev = mcast->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); - ipoib_dbg_mcast(priv, "join completion for %p6 (status %d)\n", + ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", mcast->mcmember.mgid.raw, status); /* We trap for port events ourselves. */ @@ -410,10 +410,10 @@ static int ipoib_mcast_join_complete(int status, if (mcast->logcount++ < 20) { if (status == -ETIMEDOUT) { - ipoib_dbg_mcast(priv, "multicast join failed for %p6, status %d\n", + ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n", mcast->mcmember.mgid.raw, status); } else { - ipoib_warn(priv, "multicast join failed for %p6, status %d\n", + ipoib_warn(priv, "multicast join failed for %pI6, status %d\n", mcast->mcmember.mgid.raw, status); } } @@ -446,7 +446,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, ib_sa_comp_mask comp_mask; int ret = 0; - ipoib_dbg_mcast(priv, "joining MGID %p6\n", mcast->mcmember.mgid.raw); + ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw); rec.mgid = mcast->mcmember.mgid; rec.port_gid = priv->local_gid; @@ -631,7 +631,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) ib_sa_free_multicast(mcast->mc); if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { - ipoib_dbg_mcast(priv, "leaving MGID %p6\n", + ipoib_dbg_mcast(priv, "leaving MGID %pI6\n", mcast->mcmember.mgid.raw); /* Remove ourselves from the multicast group */ @@ -663,7 +663,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) mcast = __ipoib_mcast_find(dev, mgid); if (!mcast) { /* Let's create a new send only group now */ - ipoib_dbg_mcast(priv, "setting up send only multicast group for %p6\n", + ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n", mgid); mcast = ipoib_mcast_alloc(dev, 0); @@ -797,13 +797,13 @@ void ipoib_mcast_restart_task(struct work_struct *work) /* ignore group which is directly joined by userspace */ if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) && !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) { - ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %p6\n", + ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %pI6\n", mgid.raw); continue; } /* Not found or send-only group, let's add a new entry */ - ipoib_dbg_mcast(priv, "adding multicast entry for mgid %p6\n", + ipoib_dbg_mcast(priv, "adding multicast entry for mgid %pI6\n", mgid.raw); nmcast = ipoib_mcast_alloc(dev, 0); @@ -837,7 +837,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { - ipoib_dbg_mcast(priv, "deleting multicast group %p6\n", + ipoib_dbg_mcast(priv, "deleting multicast group %pI6\n", mcast->mcmember.mgid.raw); rb_erase(&mcast->rb_node, &priv->multicast_tree); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index bc825310c6db..7c13db885bf6 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1514,7 +1514,7 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, target->state == SRP_TARGET_REMOVED) return -ENODEV; - return sprintf(buf, "%p6\n", target->path.dgid.raw); + return sprintf(buf, "%pI6\n", target->path.dgid.raw); } static ssize_t show_orig_dgid(struct device *dev, @@ -1526,7 +1526,7 @@ static ssize_t show_orig_dgid(struct device *dev, target->state == SRP_TARGET_REMOVED) return -ENODEV; - return sprintf(buf, "%p6\n", target->orig_dgid); + return sprintf(buf, "%pI6\n", target->orig_dgid); } static ssize_t show_zero_req_lim(struct device *dev, @@ -1867,7 +1867,7 @@ static ssize_t srp_create_target(struct device *dev, shost_printk(KERN_DEBUG, target->scsi_host, PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " - "service_id %016llx dgid %p6\n", + "service_id %016llx dgid %pI6\n", (unsigned long long) be64_to_cpu(target->id_ext), (unsigned long long) be64_to_cpu(target->ioc_guid), be16_to_cpu(target->path.pkey), diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c index 6f79e84a5c9a..b1622062b12d 100644 --- a/drivers/net/mlx4/mcg.c +++ b/drivers/net/mlx4/mcg.c @@ -118,7 +118,7 @@ static int find_mgm(struct mlx4_dev *dev, return err; if (0) - mlx4_dbg(dev, "Hash for %p6 is %04x\n", gid, *hash); + mlx4_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash); *index = *hash; *prev = -1; @@ -267,7 +267,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) goto out; if (index == -1) { - mlx4_err(dev, "MGID %p6 not found\n", gid); + mlx4_err(dev, "MGID %pI6 not found\n", gid); err = -EINVAL; goto out; } diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index ef929aef7c12..24d09028a27f 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -1608,7 +1608,7 @@ static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock, case AF_INET6: sin6 = (struct sockaddr_in6 *)addr; spin_lock_bh(&conn->session->lock); - sprintf(buf, "%p6", &sin6->sin6_addr); + sprintf(buf, "%pI6", &sin6->sin6_addr); *port = be16_to_cpu(sin6->sin6_port); spin_unlock_bh(&conn->session->lock); break; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 344e6b475e05..c8ab7d70390d 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -122,7 +122,7 @@ static void nlm_display_address(const struct sockaddr *sap, snprintf(buf, len, NIPQUAD_FMT, NIPQUAD(sin6->sin6_addr.s6_addr32[3])); else - snprintf(buf, len, "%p6", &sin6->sin6_addr); + snprintf(buf, len, "%pI6", &sin6->sin6_addr); break; default: snprintf(buf, len, "unsupported address family"); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5fe77219df78..eb391d8d70ba 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -468,7 +468,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, } case AF_INET6: { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; - seq_printf(m, ",mountaddr=%p6", &sin6->sin6_addr); + seq_printf(m, ",mountaddr=%pI6", &sin6->sin6_addr); break; } default: diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 42e01c93c7ea..51cb75ea42d5 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -145,7 +145,7 @@ static inline char *__svc_print_addr(struct sockaddr *addr, break; case AF_INET6: - snprintf(buf, len, "%p6, port=%u", + snprintf(buf, len, "%pI6, port=%u", &((struct sockaddr_in6 *)addr)->sin6_addr, ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); break; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 6a6692067092..af48cada561e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -87,7 +87,7 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, int len; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - len = snprintf(&buf[*idx], buf_len - *idx, "[%p6]", + len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6]", &addr->in6) + 1; else #endif diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 303efaf68d08..42f1fc96f3ec 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -122,7 +122,7 @@ static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t) static inline void nf_ct_dump_tuple_ipv6(const struct nf_conntrack_tuple *t) { #ifdef DEBUG - printk("tuple %p: %u %p6 %hu -> %p6 %hu\n", + printk("tuple %p: %u %pI6 %hu -> %pI6 %hu\n", t, t->dst.protonum, t->src.u3.all, ntohs(t->src.u.all), t->dst.u3.all, ntohs(t->dst.u.all)); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index a84c3976e1b0..e71b0f7ce88e 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -285,7 +285,7 @@ extern int sctp_debug_flag; if (sctp_debug_flag) { \ if (saddr->sa.sa_family == AF_INET6) { \ printk(KERN_DEBUG \ - lead "%p6" trail, \ + lead "%pI6" trail, \ leadparm, \ &saddr->v6.sin6_addr, \ otherparms); \ diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 3654f3ebdb8f..5e7cff3542f2 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -133,7 +133,7 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, printk(" INCOMPLETE IPv6 header"); goto out; } - printk(" IPv6 SRC=%p6 IPv6 DST=%p6, IPv6 priority=0x%01X, Next Header=%d", + printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d", &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr); nexthdr = ih->nexthdr; offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 191c06bb0f67..04909e4b3c4c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2346,7 +2346,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - printk(KERN_DEBUG "Undo %s %p6/%u c%u l%u ss%u/%u p%u\n", + printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, &np->daddr, ntohs(inet->dport), tp->snd_cwnd, tcp_left_out(tp), diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4e6ee5205237..979c9d604eb0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -306,7 +306,7 @@ static void tcp_retransmit_timer(struct sock *sk) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %p6:%u/%u shrinks window %u:%u. Repaired.\n", + LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %pI6:%u/%u shrinks window %u:%u. Repaired.\n", &np->daddr, ntohs(inet->dport), inet->num, tp->snd_una, tp->snd_nxt); } diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index d28036659d27..6ff73c4c126a 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -186,7 +186,7 @@ u32 ipv6_addr_label(struct net *net, label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; rcu_read_unlock(); - ADDRLABEL(KERN_DEBUG "%s(addr=%p6, type=%d, ifindex=%d) => %08x\n", + ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", __func__, addr, type, ifindex, label); return label; @@ -201,7 +201,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, struct ip6addrlbl_entry *newp; int addrtype; - ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u)\n", + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", __func__, prefix, prefixlen, ifindex, (unsigned int)label); addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); @@ -289,7 +289,7 @@ static int ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", __func__, prefix, prefixlen, ifindex, (unsigned int)label, replace); @@ -313,7 +313,7 @@ static int __ip6addrlbl_del(struct net *net, struct hlist_node *pos, *n; int ret = -ESRCH; - ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n", + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { @@ -337,7 +337,7 @@ static int ip6addrlbl_del(struct net *net, struct in6_addr prefix_buf; int ret; - ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n", + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 9bc43f2527ce..7a8a01369e5c 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -419,7 +419,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%p6\n", + NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", ntohl(ah->spi), &iph->daddr); xfrm_state_put(x); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index ec4be188c341..c02a6308defe 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -367,7 +367,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%p6\n", + printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", ntohl(esph->spi), &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a89051468359..1c7f400a3cfe 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -219,7 +219,7 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff) if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { LIMIT_NETDEBUG( - KERN_DEBUG "hao is not an unicast addr: %p6\n", &hao->addr); + KERN_DEBUG "hao is not an unicast addr: %pI6\n", &hao->addr); goto discard; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b3fa38e40dc4..3c2821f9b529 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -681,7 +681,7 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%p6 > %p6]\n", + LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", saddr, daddr); goto discard_it; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 798e6a29dd83..c491fb98a5e3 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -297,7 +297,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc6_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, "%p6 %p6 %-3d %8ld %8ld %8ld", + seq_printf(seq, "%pI6 %pI6 %-3d %8ld %8ld %8ld", &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, mfc->mf6c_parent, mfc->mfc_un.res.pkt, diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 9566d8f73140..d4576a9c154f 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -67,7 +67,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%p6\n", + printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI6\n", spi, &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 191bb0722a7c..2a6752dae09d 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -647,7 +647,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if ((probes -= neigh->parms->ucast_probes) < 0) { if (!(neigh->nud_state & NUD_VALID)) { - ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %p6\n", + ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } ndisc_send_ns(dev, neigh, target, target, saddr); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index a61ce3010007..02885e8bb69b 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -61,7 +61,7 @@ static void dump_packet(const struct nf_loginfo *info, } /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ - printk("SRC=%p6 DST=%p6 ", &ih->saddr, &ih->daddr); + printk("SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index b165a273c6c0..727b9530448a 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -56,7 +56,7 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, static int ipv6_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "src=%p6 dst=%p6 ", + return seq_printf(s, "src=%pI6 dst=%pI6 ", tuple->src.u3.ip6, tuple->dst.u3.ip6); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 483550cfdf33..984276463a8d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -872,7 +872,7 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash %s for (%p6, %u)->(%p6, %u)\n", + printk(KERN_INFO "MD5 Hash %s for (%pI6, %u)->(%pI6, %u)\n", genhash ? "failed" : "mismatch", &ip6h->saddr, ntohs(th->source), &ip6h->daddr, ntohs(th->dest)); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 89bf65be6f2c..60aba45023ff 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -820,7 +820,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) - seq_printf(seq, "%-3s %p6 %04X %p6 %04X %p6 %04X %-11s %7lu\n", + seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %7lu\n", ip_vs_proto_name(cp->protocol), &cp->caddr.in6, ntohs(cp->cport), &cp->vaddr.in6, ntohs(cp->vport), @@ -881,7 +881,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) - seq_printf(seq, "%-3s %p6 %04X %p6 %04X %p6 %04X %-11s %-6s %7lu\n", + seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %-6s %7lu\n", ip_vs_proto_name(cp->protocol), &cp->caddr.in6, ntohs(cp->cport), &cp->vaddr.in6, ntohs(cp->vport), diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 9400587a01e7..c3c68443b5b1 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -805,7 +805,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) if (ic == NULL) return NF_DROP; - IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %p6->%p6\n", + IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %pI6->%pI6\n", ic->icmp6_type, ntohs(icmpv6_id(ic)), &iph->saddr, &iph->daddr); @@ -1175,7 +1175,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) if (ic == NULL) return NF_DROP; - IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %p6->%p6\n", + IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %pI6->%pI6\n", ic->icmp6_type, ntohs(icmpv6_id(ic)), &iph->saddr, &iph->daddr); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 28c47c0d5142..76db27ec9633 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1867,7 +1867,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - seq_printf(seq, "%s [%p6]:%04X %s ", + seq_printf(seq, "%s [%pI6]:%04X %s ", ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), @@ -1895,7 +1895,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) #ifdef CONFIG_IP_VS_IPV6 if (dest->af == AF_INET6) seq_printf(seq, - " -> [%p6]:%04X" + " -> [%pI6]:%04X" " %-7s %-6d %-10d %-10d\n", &dest->addr.in6, ntohs(dest->port), diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index d7ce4f1839c9..54cd67fbfe74 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -203,7 +203,7 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, if (ih == NULL) sprintf(buf, "%s TRUNCATED", pp->name); else if (ih->nexthdr == IPPROTO_FRAGMENT) - sprintf(buf, "%s %p6->%p6 frag", + sprintf(buf, "%s %pI6->%pI6 frag", pp->name, &ih->saddr, &ih->daddr); else { __be16 _ports[2], *pptr; @@ -211,10 +211,10 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), sizeof(_ports), _ports); if (pptr == NULL) - sprintf(buf, "%s TRUNCATED %p6->%p6", + sprintf(buf, "%s TRUNCATED %pI6->%pI6", pp->name, &ih->saddr, &ih->daddr); else - sprintf(buf, "%s %p6:%u->%p6:%u", + sprintf(buf, "%s %pI6:%u->%pI6:%u", pp->name, &ih->saddr, ntohs(pptr[0]), &ih->daddr, ntohs(pptr[1])); diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 59f2d11b683e..6ede88812044 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -154,7 +154,7 @@ ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb, if (ih == NULL) sprintf(buf, "%s TRUNCATED", pp->name); else - sprintf(buf, "%s %p6->%p6", + sprintf(buf, "%s %pI6->%pI6", pp->name, &ih->saddr, &ih->daddr); printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index be34c335cabe..fc342dda950a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -141,12 +141,12 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) NULL, &fl); if (!rt) { spin_unlock(&dest->dst_lock); - IP_VS_DBG_RL("ip6_route_output error, dest: %p6\n", + IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", &dest->addr.in6); return NULL; } __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst)); - IP_VS_DBG(10, "new dst %p6, refcnt=%d\n", + IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", &dest->addr.in6, atomic_read(&rt->u.dst.__refcnt)); } @@ -166,7 +166,7 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); if (!rt) { - IP_VS_DBG_RL("ip6_route_output error, dest: %p6\n", + IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", &cp->daddr.in6); return NULL; } @@ -300,7 +300,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); if (!rt) { - IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, dest: %p6\n", + IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, dest: %pI6\n", &iph->daddr); goto tx_error_icmp; } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 05bf82d345ce..8cab6d595909 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -467,7 +467,7 @@ static int help(struct sk_buff *skb, NIPQUAD(cmd.u3.ip), NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip)); } else { - pr_debug("conntrack_ftp: NOT RECORDING: %p6 != %p6\n", + pr_debug("conntrack_ftp: NOT RECORDING: %pI6 != %pI6\n", cmd.u3.ip6, ct->tuplehash[dir].tuple.src.u3.ip6); } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 29e49b1c80b3..99bc803d1dd1 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -850,7 +850,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, get_h225_addr(ct, *data, &setup->destCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) { - pr_debug("nf_ct_q931: set destCallSignalAddress %p6:%hu->%p6:%hu\n", + pr_debug("nf_ct_q931: set destCallSignalAddress %pI6:%hu->%pI6:%hu\n", &addr, ntohs(port), &ct->tuplehash[!dir].tuple.src.u3, ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); ret = set_h225_addr(skb, data, dataoff, @@ -866,7 +866,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, get_h225_addr(ct, *data, &setup->sourceCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) { - pr_debug("nf_ct_q931: set sourceCallSignalAddress %p6:%hu->%p6:%hu\n", + pr_debug("nf_ct_q931: set sourceCallSignalAddress %pI6:%hu->%pI6:%hu\n", &addr, ntohs(port), &ct->tuplehash[!dir].tuple.dst.u3, ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); ret = set_h225_addr(skb, data, dataoff, diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index f04c6ed43674..6379717f9044 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -904,7 +904,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, ent->rateinfo.cost); #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) case NFPROTO_IPV6: - return seq_printf(s, "%ld %p6:%u->%p6:%u %u %u %u\n", + return seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", (long)(ent->expires - jiffies)/HZ, &ent->dst.ip6.src, ntohs(ent->dst.src_port), diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index a377ea333e16..b785727a5bf7 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -426,7 +426,7 @@ static int recent_seq_show(struct seq_file *seq, void *v) "oldest_pkt: %u", NIPQUAD(e->addr.ip), e->ttl, e->stamps[i], e->index); else - seq_printf(seq, "src=%p6 ttl: %u last_seen: %lu oldest_pkt: %u", + seq_printf(seq, "src=%pI6 ttl: %u last_seen: %lu oldest_pkt: %u", &e->addr.in6, e->ttl, e->stamps[i], e->index); for (i = 0; i < e->nstamps; i++) seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]); diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index 614c95ec39df..8b1c58b0820c 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -370,7 +370,7 @@ void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, if (dev != NULL) audit_log_format(audit_buf, " netif=%s", dev); - audit_log_format(audit_buf, " %s=%p6", dir, addr); + audit_log_format(audit_buf, " %s=%pI6", dir, addr); if (ntohl(mask->s6_addr32[3]) != 0xffffffff) { u32 mask_len = 0; u32 mask_val; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e82668bd2b50..ceaa4aa066ea 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -223,7 +223,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) ipv6_addr_copy(&fl.fl6_dst, rt0->addr); } - SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%p6 dst:%p6\n", + SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, skb->len, &fl.fl6_src, &fl.fl6_dst); @@ -251,18 +251,18 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc, fl.oif = daddr->v6.sin6_scope_id; - SCTP_DEBUG_PRINTK("%s: DST=%p6 ", __func__, &fl.fl6_dst); + SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl.fl6_dst); if (saddr) { ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr); - SCTP_DEBUG_PRINTK("SRC=%p6 - ", &fl.fl6_src); + SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl.fl6_src); } dst = ip6_route_output(&init_net, NULL, &fl); if (!dst->error) { struct rt6_info *rt; rt = (struct rt6_info *)dst; - SCTP_DEBUG_PRINTK("rt6_dst:%p6 rt6_src:%p6\n", + SCTP_DEBUG_PRINTK("rt6_dst:%pI6 rt6_src:%pI6\n", &rt->rt6i_dst.addr, &rt->rt6i_src.addr); return dst; } @@ -309,7 +309,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, __u8 matchlen = 0; __u8 bmatchlen; - SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p daddr:%p6 ", + SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p daddr:%pI6 ", __func__, asoc, dst, &daddr->v6.sin6_addr); if (!asoc) { @@ -318,7 +318,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, &daddr->v6.sin6_addr, inet6_sk(&sk->inet.sk)->srcprefs, &saddr->v6.sin6_addr); - SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: %p6\n", + SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: %pI6\n", &saddr->v6.sin6_addr); return; } @@ -347,10 +347,10 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, if (baddr) { memcpy(saddr, baddr, sizeof(union sctp_addr)); - SCTP_DEBUG_PRINTK("saddr: %p6\n", &saddr->v6.sin6_addr); + SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr); } else { printk(KERN_ERR "%s: asoc:%p Could not find a valid source " - "address for the dest:%p6\n", + "address for the dest:%pI6\n", __func__, asoc, &daddr->v6.sin6_addr); } @@ -720,7 +720,7 @@ static int sctp_v6_is_ce(const struct sk_buff *skb) /* Dump the v6 addr to the seq file. */ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) { - seq_printf(seq, "%p6 ", &addr->v6.sin6_addr); + seq_printf(seq, "%pI6 ", &addr->v6.sin6_addr); } static void sctp_v6_ecn_capable(struct sock *sk) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 9f370964e733..d07b484b873a 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1123,7 +1123,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, if (from_addr.sa.sa_family == AF_INET6) { if (net_ratelimit()) printk(KERN_WARNING - "%s association %p could not find address %p6\n", + "%s association %p could not find address %pI6\n", __func__, asoc, &from_addr.v6.sin6_addr); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 26f61fd11eab..8f067497c212 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -278,7 +278,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) case AF_INET6: { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)args->address; - snprintf(servername, sizeof(servername), "%p6", + snprintf(servername, sizeof(servername), "%pI6", &sin->sin6_addr); break; } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 968ec1f66bc3..4c8adadc214d 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -305,7 +305,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, snprintf(buf, sizeof(buf), "::.%u.%u", port >> 8, port & 0xff); else - snprintf(buf, sizeof(buf), "%p6.%u.%u", + snprintf(buf, sizeof(buf), "%pI6.%u.%u", &address_to_register->sin6_addr, port >> 8, port & 0xff); map->r_addr = buf; diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index eb640c1a1bc9..16f714a247bc 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -168,7 +168,7 @@ static void ip_map_request(struct cache_detail *cd, ntohl(im->m_addr.s6_addr32[3]) >> 8 & 0xff, ntohl(im->m_addr.s6_addr32[3]) >> 0 & 0xff); } else { - snprintf(text_addr, 40, "%p6", &im->m_addr); + snprintf(text_addr, 40, "%pI6", &im->m_addr); } qword_add(bpp, blen, im->m_class); qword_add(bpp, blen, text_addr); @@ -286,7 +286,7 @@ static int ip_map_show(struct seq_file *m, ntohl(addr.s6_addr32[3]) >> 0 & 0xff, dom); } else { - seq_printf(m, "%s %p6 %s\n", im->m_class, &addr, dom); + seq_printf(m, "%s %pI6 %s\n", im->m_class, &addr, dom); } return 0; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3c9aff584579..f9ce3c9949d5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -341,7 +341,7 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, buf = kzalloc(40, GFP_KERNEL); if (buf) { - snprintf(buf, 40, "%p6",&addr->sin6_addr); + snprintf(buf, 40, "%pI6",&addr->sin6_addr); } xprt->address_strings[RPC_DISPLAY_ADDR] = buf; @@ -356,7 +356,7 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, buf = kzalloc(64, GFP_KERNEL); if (buf) { - snprintf(buf, 64, "addr=%p6 port=%u proto=%s", + snprintf(buf, 64, "addr=%pI6 port=%u proto=%s", &addr->sin6_addr, ntohs(addr->sin6_port), protocol); @@ -378,7 +378,7 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, buf = kzalloc(50, GFP_KERNEL); if (buf) { - snprintf(buf, 50, "%p6.%u.%u", + snprintf(buf, 50, "%pI6.%u.%u", &addr->sin6_addr, ntohs(addr->sin6_port) >> 8, ntohs(addr->sin6_port) & 0xff); @@ -1407,7 +1407,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) if (port > last) nloop++; } while (err == -EADDRINUSE && nloop != 2); - dprintk("RPC: xs_bind6 %p6:%u: %s (%d)\n", + dprintk("RPC: xs_bind6 %pI6:%u: %s (%d)\n", &myaddr.sin6_addr, port, err ? "failed" : "ok", err); return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f052b069f983..80b13eea30e7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2467,11 +2467,11 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, sel->prefixlen_d); break; case AF_INET6: - audit_log_format(audit_buf, " src=%p6", sel->saddr.a6); + audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6); if (sel->prefixlen_s != 128) audit_log_format(audit_buf, " src_prefixlen=%d", sel->prefixlen_s); - audit_log_format(audit_buf, " dst=%p6", sel->daddr.a6); + audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6); if (sel->prefixlen_d != 128) audit_log_format(audit_buf, " dst_prefixlen=%d", sel->prefixlen_d); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7944861fb9bf..304eca4ac970 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2115,7 +2115,7 @@ static void xfrm_audit_helper_sainfo(struct xfrm_state *x, NIPQUAD(x->id.daddr.a4)); break; case AF_INET6: - audit_log_format(audit_buf, " src=%p6 dst=%p6", + audit_log_format(audit_buf, " src=%pI6 dst=%pI6", x->props.saddr.a6, x->id.daddr.a6); break; } @@ -2140,7 +2140,7 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, case AF_INET6: iph6 = ipv6_hdr(skb); audit_log_format(audit_buf, - " src=%p6 dst=%p6 flowlbl=0x%x%02x%02x", + " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x", &iph6->saddr,&iph6->daddr, iph6->flow_lbl[0] & 0x0f, iph6->flow_lbl[1], diff --git a/security/selinux/avc.c b/security/selinux/avc.c index c91008f438a2..ed6af12cdf43 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -495,7 +495,7 @@ static inline void avc_print_ipv6_addr(struct audit_buffer *ab, char *name1, char *name2) { if (!ipv6_addr_any(addr)) - audit_log_format(ab, " %s=%p6", name1, addr); + audit_log_format(ab, " %s=%pI6", name1, addr); if (port) audit_log_format(ab, " %s=%d", name2, ntohs(port)); } -- cgit v1.2.3 From c8db3fec5b02f4cefe441903fe1c142ff14e1771 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Oct 2008 14:00:53 -0700 Subject: udp: Should use spin_lock_bh()/spin_unlock_bh() in udp_lib_unhash() Spotted by Alexander Beregalov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c3ecec8a9e1c..f760b86e7530 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -970,12 +970,12 @@ void udp_lib_unhash(struct sock *sk) unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); struct udp_hslot *hslot = &udptable->hash[hash]; - spin_lock(&hslot->lock); + spin_lock_bh(&hslot->lock); if (sk_del_node_init_rcu(sk)) { inet_sk(sk)->num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } - spin_unlock(&hslot->lock); + spin_unlock_bh(&hslot->lock); } EXPORT_SYMBOL(udp_lib_unhash); -- cgit v1.2.3 From cffee385d7f367e80b288abf4261256477f7760e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 31 Oct 2008 00:53:08 -0700 Subject: net: replace NIPQUAD() in net/ipv4/netfilter/ Using NIPQUAD() with NIPQUAD_FMT, %d.%d.%d.%d or %u.%u.%u.%u can be replaced with %pI4 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 16 +++---- net/ipv4/netfilter/ip_tables.c | 12 ++---- net/ipv4/netfilter/ipt_CLUSTERIP.c | 9 ++-- net/ipv4/netfilter/ipt_LOG.c | 7 ++-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 15 ++++--- net/ipv4/netfilter/nf_nat_h323.c | 58 ++++++++++++-------------- net/ipv4/netfilter/nf_nat_irc.c | 4 +- net/ipv4/netfilter/nf_nat_rule.c | 6 +-- net/ipv4/netfilter/nf_nat_sip.c | 18 ++++---- net/ipv4/netfilter/nf_nat_snmp_basic.c | 9 ++-- 10 files changed, 69 insertions(+), 85 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 8d70d29f1ccf..7ea88b61cb0d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -142,15 +142,15 @@ static inline int arp_packet_match(const struct arphdr *arphdr, ARPT_INV_TGTIP)) { dprintf("Source or target IP address mismatch.\n"); - dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n", - NIPQUAD(src_ipaddr), - NIPQUAD(arpinfo->smsk.s_addr), - NIPQUAD(arpinfo->src.s_addr), + dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", + &src_ipaddr, + &arpinfo->smsk.s_addr, + &arpinfo->src.s_addr, arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : ""); - dprintf("TGT: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n", - NIPQUAD(tgt_ipaddr), - NIPQUAD(arpinfo->tmsk.s_addr), - NIPQUAD(arpinfo->tgt.s_addr), + dprintf("TGT: %pI4 Mask: %pI4 Target: %pI4.%s\n", + &tgt_ipaddr, + &arpinfo->tmsk.s_addr, + &arpinfo->tgt.s_addr, arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : ""); return 0; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 213fb27debc1..ef8b6ca068b2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -94,15 +94,11 @@ ip_packet_match(const struct iphdr *ip, IPT_INV_DSTIP)) { dprintf("Source or dest mismatch.\n"); - dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n", - NIPQUAD(ip->saddr), - NIPQUAD(ipinfo->smsk.s_addr), - NIPQUAD(ipinfo->src.s_addr), + dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", + &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr, ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : ""); - dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n", - NIPQUAD(ip->daddr), - NIPQUAD(ipinfo->dmsk.s_addr), - NIPQUAD(ipinfo->dst.s_addr), + dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n", + &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr, ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); return false; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 7ac1677419a9..2e4f98b85524 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -168,7 +168,7 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, char buffer[16]; /* create proc dir entry */ - sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); + sprintf(buffer, "%pI4", &ip); c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, clusterip_procdir, &clusterip_proc_fops, c); @@ -373,7 +373,7 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par) config = clusterip_config_find_get(e->ip.dst.s_addr, 1); if (!config) { if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { - printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); + printk(KERN_WARNING "CLUSTERIP: no config found for %pI4, need 'new'\n", &e->ip.dst.s_addr); return false; } else { struct net_device *dev; @@ -478,9 +478,8 @@ static void arp_print(struct arp_payload *payload) } hbuffer[--k]='\0'; - printk("src %u.%u.%u.%u@%s, dst %u.%u.%u.%u\n", - NIPQUAD(payload->src_ip), hbuffer, - NIPQUAD(payload->dst_ip)); + printk("src %pI4@%s, dst %pI4\n", + &payload->src_ip, hbuffer, &payload->dst_ip); } #endif diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index fc6ce04a3e35..4614a696f1b0 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -54,8 +54,8 @@ static void dump_packet(const struct nf_loginfo *info, /* Important fields: * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ - printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ", - NIPQUAD(ih->saddr), NIPQUAD(ih->daddr)); + printk("SRC=%pI4 DST=%pI4 ", + &ih->saddr, &ih->daddr); /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", @@ -262,8 +262,7 @@ static void dump_packet(const struct nf_loginfo *info, break; case ICMP_REDIRECT: /* Max length: 24 "GATEWAY=255.255.255.255 " */ - printk("GATEWAY=%u.%u.%u.%u ", - NIPQUAD(ich->un.gateway)); + printk("GATEWAY=%pI4 ", &ich->un.gateway); /* Fall through */ case ICMP_DEST_UNREACH: case ICMP_SOURCE_QUENCH: diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 4a7c35275396..bddadead6195 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -60,9 +60,8 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, static int ipv4_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", - NIPQUAD(tuple->src.u3.ip), - NIPQUAD(tuple->dst.u3.ip)); + return seq_printf(s, "src=%pI4 dst=%pI4 ", + &tuple->src.u3.ip, &tuple->dst.u3.ip); } static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, @@ -284,17 +283,17 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) .tuple.dst.u3.ip; memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); - pr_debug("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", - NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); + pr_debug("SO_ORIGINAL_DST: %pI4 %u\n", + &sin.sin_addr.s_addr, ntohs(sin.sin_port)); nf_ct_put(ct); if (copy_to_user(user, &sin, sizeof(sin)) != 0) return -EFAULT; else return 0; } - pr_debug("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", - NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port), - NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port)); + pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n", + &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port), + &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port)); return -ENOENT; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index ee47bf28c825..7e8e6fc75413 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -119,10 +119,9 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, (ntohl(addr.ip) & 0xff000000) == 0x7f000000) i = 0; - pr_debug("nf_nat_ras: set signal address " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(addr.ip), port, - NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), + pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n", + &addr.ip, port, + &ct->tuplehash[!dir].tuple.dst.u3.ip, info->sig_port[!dir]); return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir]. @@ -131,10 +130,9 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && port == info->sig_port[dir]) { /* GK->GW */ - pr_debug("nf_nat_ras: set signal address " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(addr.ip), port, - NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip), + pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n", + &addr.ip, port, + &ct->tuplehash[!dir].tuple.src.u3.ip, info->sig_port[!dir]); return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir]. @@ -162,10 +160,9 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) && addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && port == ct->tuplehash[dir].tuple.src.u.udp.port) { - pr_debug("nf_nat_ras: set rasAddress " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(addr.ip), ntohs(port), - NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), + pr_debug("nf_nat_ras: set rasAddress %pI4:%hu->%pI4:%hu\n", + &addr.ip, ntohs(port), + &ct->tuplehash[!dir].tuple.dst.u3.ip, ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir].tuple.dst.u3, @@ -257,15 +254,15 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, } /* Success */ - pr_debug("nf_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(rtp_exp->tuple.src.u3.ip), + pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n", + &rtp_exp->tuple.src.u3.ip, ntohs(rtp_exp->tuple.src.u.udp.port), - NIPQUAD(rtp_exp->tuple.dst.u3.ip), + &rtp_exp->tuple.dst.u3.ip, ntohs(rtp_exp->tuple.dst.u.udp.port)); - pr_debug("nf_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(rtcp_exp->tuple.src.u3.ip), + pr_debug("nf_nat_h323: expect RTCP %pI4:%hu->%pI4:%hu\n", + &rtcp_exp->tuple.src.u3.ip, ntohs(rtcp_exp->tuple.src.u.udp.port), - NIPQUAD(rtcp_exp->tuple.dst.u3.ip), + &rtcp_exp->tuple.dst.u3.ip, ntohs(rtcp_exp->tuple.dst.u.udp.port)); return 0; @@ -307,10 +304,10 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, return -1; } - pr_debug("nf_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.u3.ip), + pr_debug("nf_nat_h323: expect T.120 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.u3.ip), + &exp->tuple.dst.u3.ip, ntohs(exp->tuple.dst.u.tcp.port)); return 0; @@ -361,10 +358,10 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, return -1; } - pr_debug("nf_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.u3.ip), + pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.u3.ip), + &exp->tuple.dst.u3.ip, ntohs(exp->tuple.dst.u.tcp.port)); return 0; @@ -455,10 +452,10 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, } /* Success */ - pr_debug("nf_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.u3.ip), + pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.u3.ip), + &exp->tuple.dst.u3.ip, ntohs(exp->tuple.dst.u.tcp.port)); return 0; @@ -524,11 +521,10 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, } /* Success */ - pr_debug("nf_nat_q931: expect Call Forwarding " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.u3.ip), + pr_debug("nf_nat_q931: expect Call Forwarding %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.u3.ip), + &exp->tuple.dst.u3.ip, ntohs(exp->tuple.dst.u.tcp.port)); return 0; diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index fe6f9cef6c85..ea83a886b03e 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -55,8 +55,8 @@ static unsigned int help(struct sk_buff *skb, ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip); sprintf(buffer, "%u %u", ip, port); - pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n", - buffer, NIPQUAD(ip), port); + pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n", + buffer, &ip, port); ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, matchoff, matchlen, buffer, diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index bea54a685109..a4f1c3479e23 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -98,8 +98,7 @@ static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip) if (rt->rt_src != srcip && !warned) { printk("NAT: no longer support implicit source local NAT\n"); - printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n", - NIPQUAD(srcip), NIPQUAD(dstip)); + printk("NAT: packet src %pI4 -> dst %pI4\n", &srcip, &dstip); warned = 1; } ip_rt_put(rt); @@ -166,8 +165,7 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) struct nf_nat_range range = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } }; - pr_debug("Allocating NULL binding for %p (%u.%u.%u.%u)\n", - ct, NIPQUAD(ip)); + pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, &ip); return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); } diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 14544320c545..07d61a57613c 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -74,8 +74,7 @@ static int map_addr(struct sk_buff *skb, if (newaddr == addr->ip && newport == port) return 1; - buflen = sprintf(buffer, "%u.%u.%u.%u:%u", - NIPQUAD(newaddr), ntohs(newport)); + buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); return mangle_packet(skb, dptr, datalen, matchoff, matchlen, buffer, buflen); @@ -152,8 +151,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, &addr) > 0 && addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { - __be32 ip = ct->tuplehash[!dir].tuple.dst.u3.ip; - buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); + buflen = sprintf(buffer, "%pI4", + &ct->tuplehash[!dir].tuple.dst.u3.ip); if (!mangle_packet(skb, dptr, datalen, poff, plen, buffer, buflen)) return NF_DROP; @@ -166,8 +165,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, &addr) > 0 && addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - __be32 ip = ct->tuplehash[!dir].tuple.src.u3.ip; - buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); + buflen = sprintf(buffer, "%pI4", + &ct->tuplehash[!dir].tuple.src.u3.ip); if (!mangle_packet(skb, dptr, datalen, poff, plen, buffer, buflen)) return NF_DROP; @@ -279,8 +278,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, if (exp->tuple.dst.u3.ip != exp->saved_ip || exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { - buflen = sprintf(buffer, "%u.%u.%u.%u:%u", - NIPQUAD(newip), port); + buflen = sprintf(buffer, "%pI4:%u", &newip, port); if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, buffer, buflen)) goto err; @@ -345,7 +343,7 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, char buffer[sizeof("nnn.nnn.nnn.nnn")]; unsigned int buflen; - buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip)); + buflen = sprintf(buffer, "%pI4", &addr->ip); if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, buffer, buflen)) return 0; @@ -380,7 +378,7 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, unsigned int buflen; /* Mangle session description owner and contact addresses */ - buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip)); + buflen = sprintf(buffer, "%pI4", &addr->ip); if (mangle_sdp_packet(skb, dptr, dataoff, datalen, SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, buffer, buflen)) diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 8303e4b406c0..182f845de92f 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -930,8 +930,8 @@ static inline void mangle_address(unsigned char *begin, } if (debug) - printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to " - "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr)); + printk(KERN_DEBUG "bsalg: mapped %pI4 to %pI4\n", + &old, addr); } } @@ -1267,9 +1267,8 @@ static int help(struct sk_buff *skb, unsigned int protoff, */ if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { if (net_ratelimit()) - printk(KERN_WARNING "SNMP: dropping malformed packet " - "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n", - NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); + printk(KERN_WARNING "SNMP: dropping malformed packet src=%pI4 dst=%pI4\n", + &iph->saddr, &iph->daddr); return NF_DROP; } -- cgit v1.2.3 From 673d57e72398edfedc93fb50ff58048077c9d587 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 31 Oct 2008 00:53:57 -0700 Subject: net: replace NIPQUAD() in net/ipv4/ net/ipv6/ Using NIPQUAD() with NIPQUAD_FMT, %d.%d.%d.%d or %u.%u.%u.%u can be replaced with %pI4 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 7 ++----- net/ipv4/arp.c | 4 ++-- net/ipv4/fib_trie.c | 6 +++--- net/ipv4/icmp.c | 24 ++++++++++-------------- net/ipv4/ip_fragment.c | 5 ++--- net/ipv4/ip_input.c | 6 ++---- net/ipv4/ipcomp.c | 4 ++-- net/ipv4/ipconfig.c | 40 ++++++++++++++++++---------------------- net/ipv4/route.c | 40 +++++++++++++++++----------------------- net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_ipv4.c | 13 +++++-------- net/ipv4/tcp_probe.c | 7 +++---- net/ipv4/tcp_timer.c | 4 ++-- net/ipv4/udp.c | 12 ++++++------ net/ipv6/netfilter/ip6t_LOG.c | 5 ++--- 15 files changed, 78 insertions(+), 103 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1fbff5fa4241..e3286814c8d9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1070,11 +1070,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) return 0; if (sysctl_ip_dynaddr > 1) { - printk(KERN_INFO "%s(): shifting inet->" - "saddr from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n", - __func__, - NIPQUAD(old_saddr), - NIPQUAD(new_saddr)); + printk(KERN_INFO "%s(): shifting inet->saddr from %pI4 to %pI4\n", + __func__, &old_saddr, &new_saddr); } inet->saddr = inet->rcv_saddr = new_saddr; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 1a9dd66511fc..051a62fc8189 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1308,7 +1308,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) } #endif - sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->primary_key)); + sprintf(tbuf, "%pI4", n->primary_key); seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); read_unlock(&n->lock); @@ -1321,7 +1321,7 @@ static void arp_format_pneigh_entry(struct seq_file *seq, int hatype = dev ? dev->type : 0; char tbuf[16]; - sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->key)); + sprintf(tbuf, "%pI4", n->key); seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00", dev ? dev->name : "*"); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5cb72786a8af..ec0ae490f0b6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2399,8 +2399,8 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); seq_indent(seq, iter->depth-1); - seq_printf(seq, " +-- " NIPQUAD_FMT "/%d %d %d %d\n", - NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, + seq_printf(seq, " +-- %pI4/%d %d %d %d\n", + &prf, tn->pos, tn->bits, tn->full_children, tn->empty_children); } else { @@ -2410,7 +2410,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) __be32 val = htonl(l->key); seq_indent(seq, iter->depth); - seq_printf(seq, " |-- " NIPQUAD_FMT "\n", NIPQUAD(val)); + seq_printf(seq, " |-- %pI4\n", &val); hlist_for_each_entry_rcu(li, node, &l->list, hlist) { struct fib_alias *fa; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e9d6ea0b49ca..21e497efbd7f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -683,10 +683,8 @@ static void icmp_unreach(struct sk_buff *skb) break; case ICMP_FRAG_NEEDED: if (ipv4_config.no_pmtu_disc) { - LIMIT_NETDEBUG(KERN_INFO "ICMP: " NIPQUAD_FMT ": " - "fragmentation needed " - "and DF set.\n", - NIPQUAD(iph->daddr)); + LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: fragmentation needed and DF set.\n", + &iph->daddr); } else { info = ip_rt_frag_needed(net, iph, ntohs(icmph->un.frag.mtu), @@ -696,9 +694,8 @@ static void icmp_unreach(struct sk_buff *skb) } break; case ICMP_SR_FAILED: - LIMIT_NETDEBUG(KERN_INFO "ICMP: " NIPQUAD_FMT ": Source " - "Route Failed.\n", - NIPQUAD(iph->daddr)); + LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: Source Route Failed.\n", + &iph->daddr); break; default: break; @@ -729,12 +726,12 @@ static void icmp_unreach(struct sk_buff *skb) if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { if (net_ratelimit()) - printk(KERN_WARNING NIPQUAD_FMT " sent an invalid ICMP " + printk(KERN_WARNING "%pI4 sent an invalid ICMP " "type %u, code %u " - "error to a broadcast: " NIPQUAD_FMT " on %s\n", - NIPQUAD(ip_hdr(skb)->saddr), + "error to a broadcast: %pI4 on %s\n", + &ip_hdr(skb)->saddr, icmph->type, icmph->code, - NIPQUAD(iph->daddr), + &iph->daddr, skb->dev->name); goto out; } @@ -952,9 +949,8 @@ static void icmp_address_reply(struct sk_buff *skb) break; } if (!ifa && net_ratelimit()) { - printk(KERN_INFO "Wrong address mask " NIPQUAD_FMT " from " - "%s/" NIPQUAD_FMT "\n", - NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src)); + printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n", + mp, dev->name, &rt->rt_src); } } rcu_read_unlock(); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index e4f81f54befe..8c495e34d563 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -559,9 +559,8 @@ out_nomem: goto out_fail; out_oversize: if (net_ratelimit()) - printk(KERN_INFO - "Oversized IP packet from " NIPQUAD_FMT ".\n", - NIPQUAD(qp->saddr)); + printk(KERN_INFO "Oversized IP packet from %pI4.\n", + &qp->saddr); out_fail: IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS); return err; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 861978a4f1a8..1b6475c05eb4 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -294,10 +294,8 @@ static inline int ip_rcv_options(struct sk_buff *skb) if (!IN_DEV_SOURCE_ROUTE(in_dev)) { if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) - printk(KERN_INFO "source route option " - NIPQUAD_FMT " -> " NIPQUAD_FMT "\n", - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); + printk(KERN_INFO "source route option %pI4 -> %pI4\n", + &iph->saddr, &iph->daddr); in_dev_put(in_dev); goto drop; } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 38ccb6dfb02e..ec8264ae45c2 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -39,8 +39,8 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) spi, IPPROTO_COMP, AF_INET); if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIPQUAD_FMT "\n", - spi, NIPQUAD(iph->daddr)); + NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n", + spi, &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 42065fff46c4..42a0f3dd3fd6 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -374,7 +374,7 @@ static int __init ic_defaults(void) */ if (!ic_host_name_set) - sprintf(init_utsname()->nodename, NIPQUAD_FMT, NIPQUAD(ic_myaddr)); + sprintf(init_utsname()->nodename, "%pI4", &ic_myaddr); if (root_server_addr == NONE) root_server_addr = ic_servaddr; @@ -387,11 +387,11 @@ static int __init ic_defaults(void) else if (IN_CLASSC(ntohl(ic_myaddr))) ic_netmask = htonl(IN_CLASSC_NET); else { - printk(KERN_ERR "IP-Config: Unable to guess netmask for address " NIPQUAD_FMT "\n", - NIPQUAD(ic_myaddr)); + printk(KERN_ERR "IP-Config: Unable to guess netmask for address %pI4\n", + &ic_myaddr); return -1; } - printk("IP-Config: Guessing netmask " NIPQUAD_FMT "\n", NIPQUAD(ic_netmask)); + printk("IP-Config: Guessing netmask %pI4\n", &ic_netmask); } return 0; @@ -979,10 +979,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str ic_myaddr = b->your_ip; ic_servaddr = server_id; #ifdef IPCONFIG_DEBUG - printk("DHCP: Offered address " NIPQUAD_FMT, - NIPQUAD(ic_myaddr)); - printk(" by server " NIPQUAD_FMT "\n", - NIPQUAD(ic_servaddr)); + printk("DHCP: Offered address %pI4 by server %pI4\n", + &ic_myaddr, &ic_servaddr); #endif /* The DHCP indicated server address takes * precedence over the bootp header one if @@ -1177,11 +1175,11 @@ static int __init ic_dynamic(void) return -1; } - printk("IP-Config: Got %s answer from " NIPQUAD_FMT ", ", + printk("IP-Config: Got %s answer from %pI4, ", ((ic_got_reply & IC_RARP) ? "RARP" : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), - NIPQUAD(ic_servaddr)); - printk("my address is " NIPQUAD_FMT "\n", NIPQUAD(ic_myaddr)); + &ic_servaddr); + printk("my address is %pI4\n", &ic_myaddr); return 0; } @@ -1206,14 +1204,12 @@ static int pnp_seq_show(struct seq_file *seq, void *v) "domain %s\n", ic_domain); for (i = 0; i < CONF_NAMESERVERS_MAX; i++) { if (ic_nameservers[i] != NONE) - seq_printf(seq, - "nameserver " NIPQUAD_FMT "\n", - NIPQUAD(ic_nameservers[i])); + seq_printf(seq, "nameserver %pI4\n", + &ic_nameservers[i]); } if (ic_servaddr != NONE) - seq_printf(seq, - "bootserver " NIPQUAD_FMT "\n", - NIPQUAD(ic_servaddr)); + seq_printf(seq, "bootserver %pI4\n", + &ic_servaddr); return 0; } @@ -1387,13 +1383,13 @@ static int __init ip_auto_config(void) */ printk("IP-Config: Complete:"); printk("\n device=%s", ic_dev->name); - printk(", addr=" NIPQUAD_FMT, NIPQUAD(ic_myaddr)); - printk(", mask=" NIPQUAD_FMT, NIPQUAD(ic_netmask)); - printk(", gw=" NIPQUAD_FMT, NIPQUAD(ic_gateway)); + printk(", addr=%pI4", &ic_myaddr); + printk(", mask=%pI4", &ic_netmask); + printk(", gw=%pI4", &ic_gateway); printk(",\n host=%s, domain=%s, nis-domain=%s", utsname()->nodename, ic_domain, utsname()->domainname); - printk(",\n bootserver=" NIPQUAD_FMT, NIPQUAD(ic_servaddr)); - printk(", rootserver=" NIPQUAD_FMT, NIPQUAD(root_server_addr)); + printk(",\n bootserver=%pI4", &ic_servaddr); + printk(", rootserver=%pI4", &root_server_addr); printk(", rootpath=%s", root_server_path); printk("\n"); #endif /* !SILENT */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e59b4dcf6778..f47b9db0db7f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1214,10 +1214,9 @@ restart: #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { struct rtable *trt; - printk(KERN_DEBUG "rt_cache @%02x: " NIPQUAD_FMT, hash, - NIPQUAD(rt->rt_dst)); + printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) - printk(" . " NIPQUAD_FMT, NIPQUAD(trt->rt_dst)); + printk(" . %pI4", &trt->rt_dst); printk("\n"); } #endif @@ -1444,11 +1443,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, reject_redirect: #ifdef CONFIG_IP_ROUTE_VERBOSE if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) - printk(KERN_INFO "Redirect from " NIPQUAD_FMT " on %s about " - NIPQUAD_FMT " ignored.\n" - " Advised path = " NIPQUAD_FMT " -> " NIPQUAD_FMT "\n", - NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), - NIPQUAD(saddr), NIPQUAD(daddr)); + printk(KERN_INFO "Redirect from %pI4 on %s about %pI4 ignored.\n" + " Advised path = %pI4 -> %pI4\n", + &old_gw, dev->name, &new_gw, + &saddr, &daddr); #endif in_dev_put(in_dev); } @@ -1468,9 +1466,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) rt->fl.oif, rt_genid(dev_net(dst->dev))); #if RT_CACHE_DEBUG >= 1 - printk(KERN_DEBUG "ipv4_negative_advice: redirect to " - NIPQUAD_FMT "/%02x dropped\n", - NIPQUAD(rt->rt_dst), rt->fl.fl4_tos); + printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", + &rt->rt_dst, rt->fl.fl4_tos); #endif rt_del(hash, rt); ret = NULL; @@ -1534,10 +1531,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) if (IN_DEV_LOG_MARTIANS(in_dev) && rt->u.dst.rate_tokens == ip_rt_redirect_number && net_ratelimit()) - printk(KERN_WARNING "host " NIPQUAD_FMT "/if%d ignores " - "redirects for " NIPQUAD_FMT " to " NIPQUAD_FMT ".\n", - NIPQUAD(rt->rt_src), rt->rt_iif, - NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway)); + printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", + &rt->rt_src, rt->rt_iif, + &rt->rt_dst, &rt->rt_gateway); #endif } out: @@ -1730,8 +1726,8 @@ static void ipv4_link_failure(struct sk_buff *skb) static int ip_rt_bug(struct sk_buff *skb) { - printk(KERN_DEBUG "ip_rt_bug: " NIPQUAD_FMT " -> " NIPQUAD_FMT ", %s\n", - NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr), + printk(KERN_DEBUG "ip_rt_bug: %pI4 -> %pI4, %s\n", + &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, skb->dev ? skb->dev->name : "?"); kfree_skb(skb); return 0; @@ -1908,9 +1904,8 @@ static void ip_handle_martian_source(struct net_device *dev, * RFC1812 recommendation, if source is martian, * the only hint is MAC header. */ - printk(KERN_WARNING "martian source " NIPQUAD_FMT " from " - NIPQUAD_FMT", on dev %s\n", - NIPQUAD(daddr), NIPQUAD(saddr), dev->name); + printk(KERN_WARNING "martian source %pI4 from %pI4, on dev %s\n", + &daddr, &saddr, dev->name); if (dev->hard_header_len && skb_mac_header_was_set(skb)) { int i; const unsigned char *p = skb_mac_header(skb); @@ -2219,9 +2214,8 @@ martian_destination: RT_CACHE_STAT_INC(in_martian_dst); #ifdef CONFIG_IP_ROUTE_VERBOSE if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) - printk(KERN_WARNING "martian destination " NIPQUAD_FMT " from " - NIPQUAD_FMT ", dev %s\n", - NIPQUAD(daddr), NIPQUAD(saddr), dev->name); + printk(KERN_WARNING "martian destination %pI4 from %pI4, dev %s\n", + &daddr, &saddr, dev->name); #endif e_hostunreach: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 04909e4b3c4c..097294b7da3e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2336,9 +2336,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) struct inet_sock *inet = inet_sk(sk); if (sk->sk_family == AF_INET) { - printk(KERN_DEBUG "Undo %s " NIPQUAD_FMT "/%u c%u l%u ss%u/%u p%u\n", + printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", msg, - NIPQUAD(inet->daddr), ntohs(inet->dport), + &inet->daddr, ntohs(inet->dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5c8fa7f1e327..ef33246e6a6f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1139,10 +1139,9 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash failed for " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest), + printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", + &iph->saddr, ntohs(th->source), + &iph->daddr, ntohs(th->dest), genhash ? " tcp_v4_calc_md5_hash failed" : ""); } return 1; @@ -1297,10 +1296,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * to destinations, already remembered * to the moment of synflood. */ - LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " - "request from " NIPQUAD_FMT "/%u\n", - NIPQUAD(saddr), - ntohs(tcp_hdr(skb)->source)); + LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n", + &saddr, ntohs(tcp_hdr(skb)->source)); goto drop_and_release; } diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 7ddc30f0744f..25524d4e372a 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -153,12 +153,11 @@ static int tcpprobe_sprint(char *tbuf, int n) = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); return snprintf(tbuf, n, - "%lu.%09lu " NIPQUAD_FMT ":%u " NIPQUAD_FMT ":%u" - " %d %#x %#x %u %u %u %u\n", + "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n", (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec, - NIPQUAD(p->saddr), ntohs(p->sport), - NIPQUAD(p->daddr), ntohs(p->dport), + &p->saddr, ntohs(p->sport), + &p->daddr, ntohs(p->dport), p->length, p->snd_nxt, p->snd_una, p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 979c9d604eb0..9843f6cb40a5 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -299,8 +299,8 @@ static void tcp_retransmit_timer(struct sock *sk) #ifdef TCP_DEBUG struct inet_sock *inet = inet_sk(sk); if (sk->sk_family == AF_INET) { - LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIPQUAD_FMT ":%u/%u shrinks window %u:%u. Repaired.\n", - NIPQUAD(inet->daddr), ntohs(inet->dport), + LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %pI4:%u/%u shrinks window %u:%u. Repaired.\n", + &inet->daddr, ntohs(inet->dport), inet->num, tp->snd_una, tp->snd_nxt); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f760b86e7530..bcb5def2b09e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1263,13 +1263,13 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return 0; short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From " NIPQUAD_FMT ":%u %d/%d to " NIPQUAD_FMT ":%u\n", + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", proto == IPPROTO_UDPLITE ? "-Lite" : "", - NIPQUAD(saddr), + &saddr, ntohs(uh->source), ulen, skb->len, - NIPQUAD(daddr), + &daddr, ntohs(uh->dest)); goto drop; @@ -1278,11 +1278,11 @@ csum_error: * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ - LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From " NIPQUAD_FMT ":%u to " NIPQUAD_FMT ":%u ulen %d\n", + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", proto == IPPROTO_UDPLITE ? "-Lite" : "", - NIPQUAD(saddr), + &saddr, ntohs(uh->source), - NIPQUAD(daddr), + &daddr, ntohs(uh->dest), ulen); drop: diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 02885e8bb69b..7c668c63f708 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -424,9 +424,8 @@ ip6t_log_packet(u_int8_t pf, if (skb->dev->type == ARPHRD_SIT) { const struct iphdr *iph = (struct iphdr *)skb_mac_header(skb); - printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ", - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); + printk("TUNNEL=%pI4->%pI4 ", + &iph->saddr, &iph->daddr); } } else printk(" "); -- cgit v1.2.3 From c37ccc0d4e2a4ee52f1a40cff1be0049f2104bba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 1 Nov 2008 21:19:18 -0700 Subject: udp: add a missing smp_wmb() in udp_lib_get_port() Corey Minyard spotted a missing memory barrier in udp_lib_get_port() We need to make sure a reader cannot read the new 'sk->sk_next' value and previous value of 'sk->sk_hash'. Or else, an item could be deleted from a chain, and inserted into another chain. If new chain was empty before the move, 'next' pointer is NULL, and lockless reader can not detect it missed following items in original chain. This patch is temporary, since we expect an upcoming patch to introduce another way of handling the problem. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bcb5def2b09e..7e4d9c871153 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -189,6 +189,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { + /* + * We need that previous write to sk->sk_hash committed + * before write to sk->next done in following add_node() variant + */ + smp_wmb(); sk_add_node_rcu(sk, &hslot->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } -- cgit v1.2.3 From ae27e98a51526595837ab7498b23d6478a198960 Mon Sep 17 00:00:00 2001 From: Sangtae Ha Date: Wed, 29 Oct 2008 00:07:18 -0400 Subject: [TCP] CUBIC v2.3 Signed-off-by: Sangtae Ha Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 109 insertions(+), 11 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 4a1221e5e8ee..ee467ec40c4f 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -1,13 +1,23 @@ /* - * TCP CUBIC: Binary Increase Congestion control for TCP v2.2 + * TCP CUBIC: Binary Increase Congestion control for TCP v2.3 * Home page: * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC * This is from the implementation of CUBIC TCP in - * Injong Rhee, Lisong Xu. - * "CUBIC: A New TCP-Friendly High-Speed TCP Variant - * in PFLDnet 2005 + * Sangtae Ha, Injong Rhee and Lisong Xu, + * "CUBIC: A New TCP-Friendly High-Speed TCP Variant" + * in ACM SIGOPS Operating System Review, July 2008. * Available from: - * http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf + * http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf + * + * CUBIC integrates a new slow start algorithm, called HyStart. + * The details of HyStart are presented in + * Sangtae Ha and Injong Rhee, + * "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008. + * Available from: + * http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf + * + * All testing results are available from: + * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing * * Unless CUBIC is enabled and congestion window is large * this behaves the same as the original Reno. @@ -23,12 +33,26 @@ */ #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ +/* Two methods of hybrid slow start */ +#define HYSTART_ACK_TRAIN 0x1 +#define HYSTART_DELAY 0x2 + +/* Number of delay samples for detecting the increase of delay */ +#define HYSTART_MIN_SAMPLES 8 +#define HYSTART_DELAY_MIN (2U<<3) +#define HYSTART_DELAY_MAX (16U<<3) +#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) + static int fast_convergence __read_mostly = 1; static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ static int initial_ssthresh __read_mostly; static int bic_scale __read_mostly = 41; static int tcp_friendliness __read_mostly = 1; +static int hystart __read_mostly = 1; +static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; +static int hystart_low_window __read_mostly = 16; + static u32 cube_rtt_scale __read_mostly; static u32 beta_scale __read_mostly; static u64 cube_factor __read_mostly; @@ -44,6 +68,13 @@ module_param(bic_scale, int, 0444); MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); module_param(tcp_friendliness, int, 0644); MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); +module_param(hystart, int, 0644); +MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm"); +module_param(hystart_detect, int, 0644); +MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms" + " 1: packet-train 2: delay 3: both packet-train and delay"); +module_param(hystart_low_window, int, 0644); +MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); /* BIC TCP Parameters */ struct bictcp { @@ -59,7 +90,13 @@ struct bictcp { u32 ack_cnt; /* number of acks */ u32 tcp_cwnd; /* estimated tcp cwnd */ #define ACK_RATIO_SHIFT 4 - u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ + u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ + u8 sample_cnt; /* number of samples to decide curr_rtt */ + u8 found; /* the exit point is found? */ + u32 round_start; /* beginning of each round */ + u32 end_seq; /* end_seq of the round */ + u32 last_jiffies; /* last time when the ACK spacing is close */ + u32 curr_rtt; /* the minimum rtt of current round */ }; static inline void bictcp_reset(struct bictcp *ca) @@ -76,12 +113,28 @@ static inline void bictcp_reset(struct bictcp *ca) ca->delayed_ack = 2 << ACK_RATIO_SHIFT; ca->ack_cnt = 0; ca->tcp_cwnd = 0; + ca->found = 0; +} + +static inline void bictcp_hystart_reset(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + + ca->round_start = ca->last_jiffies = jiffies; + ca->end_seq = tp->snd_nxt; + ca->curr_rtt = 0; + ca->sample_cnt = 0; } static void bictcp_init(struct sock *sk) { bictcp_reset(inet_csk_ca(sk)); - if (initial_ssthresh) + + if (hystart) + bictcp_hystart_reset(sk); + + if (!hystart && initial_ssthresh) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } @@ -235,9 +288,11 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) if (!tcp_is_cwnd_limited(sk, in_flight)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tp->snd_cwnd <= tp->snd_ssthresh) { + if (hystart && after(ack, ca->end_seq)) + bictcp_hystart_reset(sk); tcp_slow_start(tp); - else { + } else { bictcp_update(ca, tp->snd_cwnd); /* In dangerous area, increase slowly. @@ -281,8 +336,45 @@ static u32 bictcp_undo_cwnd(struct sock *sk) static void bictcp_state(struct sock *sk, u8 new_state) { - if (new_state == TCP_CA_Loss) + if (new_state == TCP_CA_Loss) { bictcp_reset(inet_csk_ca(sk)); + bictcp_hystart_reset(sk); + } +} + +static void hystart_update(struct sock *sk, u32 delay) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + + if (!(ca->found & hystart_detect)) { + u32 curr_jiffies = jiffies; + + /* first detection parameter - ack-train detection */ + if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { + ca->last_jiffies = curr_jiffies; + if (curr_jiffies - ca->round_start >= ca->delay_min>>4) + ca->found |= HYSTART_ACK_TRAIN; + } + + /* obtain the minimum delay of more than sampling packets */ + if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { + if (ca->curr_rtt == 0 || ca->curr_rtt > delay) + ca->curr_rtt = delay; + + ca->sample_cnt++; + } else { + if (ca->curr_rtt > ca->delay_min + + HYSTART_DELAY_THRESH(ca->delay_min>>4)) + ca->found |= HYSTART_DELAY; + } + /* + * Either one of two conditions are met, + * we exit from slow start immediately. + */ + if (ca->found & hystart_detect) + tp->snd_ssthresh = tp->snd_cwnd; + } } /* Track delayed acknowledgment ratio using sliding window @@ -291,6 +383,7 @@ static void bictcp_state(struct sock *sk, u8 new_state) static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) { const struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); u32 delay; @@ -314,6 +407,11 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) /* first time call or link delay decreases */ if (ca->delay_min == 0 || ca->delay_min > delay) ca->delay_min = delay; + + /* hystart triggers when cwnd is larger than some threshold */ + if (hystart && tp->snd_cwnd <= tp->snd_ssthresh && + tp->snd_cwnd >= hystart_low_window) + hystart_update(sk, delay); } static struct tcp_congestion_ops cubictcp = { @@ -372,4 +470,4 @@ module_exit(cubictcp_unregister); MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CUBIC TCP"); -MODULE_VERSION("2.2"); +MODULE_VERSION("2.3"); -- cgit v1.2.3 From d9319100c1ad7d0ed4045ded767684ad25670436 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 3 Nov 2008 00:23:42 -0800 Subject: net: clean up net/ipv4/ah4.c esp4.c fib_semantics.c inet_connection_sock.c inetpeer.c ip_output.c Signed-off-by: Jianjun Kong Signed-off-by: David S. Miller --- net/ipv4/ah4.c | 4 ++-- net/ipv4/esp4.c | 4 ++-- net/ipv4/fib_semantics.c | 8 ++++---- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/inetpeer.c | 2 +- net/ipv4/ip_output.c | 6 +++--- 6 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 8219b7e0968d..3f205181712d 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -201,8 +201,8 @@ out: static void ah4_err(struct sk_buff *skb, u32 info) { - struct iphdr *iph = (struct iphdr*)skb->data; - struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2)); + struct iphdr *iph = (struct iphdr *)skb->data; + struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 21515d4c49eb..95a9c65003f8 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -413,8 +413,8 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) static void esp4_err(struct sk_buff *skb, u32 info) { - struct iphdr *iph = (struct iphdr*)skb->data; - struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2)); + struct iphdr *iph = (struct iphdr *)skb->data; + struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ded2ae34eab1..4817dea3bc73 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -63,16 +63,16 @@ static DEFINE_SPINLOCK(fib_multipath_lock); for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ -for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) +for (nhsel=0, nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) #else /* CONFIG_IP_ROUTE_MULTIPATH */ /* Hope, that gcc will optimize it to get rid of dummy loop */ -#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \ +#define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ for (nhsel=0; nhsel < 1; nhsel++) -#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \ +#define change_nexthops(fi) { int nhsel = 0; struct fib_nh * nh = (struct fib_nh *)((fi)->fib_nh); \ for (nhsel=0; nhsel < 1; nhsel++) #endif /* CONFIG_IP_ROUTE_MULTIPATH */ @@ -358,7 +358,7 @@ int fib_detect_death(struct fib_info *fi, int order, state = n->nud_state; neigh_release(n); } - if (state==NUD_REACHABLE) + if (state == NUD_REACHABLE) return 0; if ((state&NUD_VALID) && order != dflt) return 0; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index bd1278a2d828..36f4cbc7da3a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -323,7 +323,7 @@ void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); -struct dst_entry* inet_csk_route_req(struct sock *sk, +struct dst_entry *inet_csk_route_req(struct sock *sk, const struct request_sock *req) { struct rtable *rt; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index a456ceeac3f2..b1fbe18feb5a 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -144,7 +144,7 @@ static void unlink_from_unused(struct inet_peer *p) * _stack is known to be NULL or not at compile time, * so compiler will optimize the if (_stack) tests. */ -#define lookup(_daddr,_stack) \ +#define lookup(_daddr, _stack) \ ({ \ struct inet_peer *u, **v; \ if (_stack != NULL) { \ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d2a8f8bb78a6..46d7be233eac 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -430,7 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) * single device frame, and queue such a frame for sending. */ -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) +int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct iphdr *iph; int raw = 0; @@ -720,7 +720,7 @@ static inline int ip_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int mtu,unsigned int flags) + int transhdrlen, int mtu, unsigned int flags) { struct sk_buff *skb; int err; @@ -741,7 +741,7 @@ static inline int ip_ufo_append_data(struct sock *sk, skb_reserve(skb, hh_len); /* create space for UDP/IP header */ - skb_put(skb,fragheaderlen + transhdrlen); + skb_put(skb, fragheaderlen + transhdrlen); /* initialize network header pointer */ skb_reset_network_header(skb); -- cgit v1.2.3 From 5a5f3a8db9d70c90e9d55b46e02b2d8deb1c2c2e Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 3 Nov 2008 00:24:34 -0800 Subject: net: clean up net/ipv4/ipip.c raw.c tcp.c tcp_minisocks.c tcp_yeah.c xfrm4_policy.c Signed-off-by: Jianjun Kong Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 2 +- net/ipv4/raw.c | 8 ++++---- net/ipv4/tcp.c | 6 +++--- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_yeah.c | 4 ++-- net/ipv4/xfrm4_policy.c | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 29609d29df76..b3c3d7b0d116 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -281,7 +281,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ - struct iphdr *iph = (struct iphdr*)skb->data; + struct iphdr *iph = (struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index cd975743bcd2..998fcffc9e15 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -247,7 +247,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) } if (inet->recverr) { - struct iphdr *iph = (struct iphdr*)skb->data; + struct iphdr *iph = (struct iphdr *)skb->data; u8 *payload = skb->data + (iph->ihl << 2); if (inet->hdrincl) @@ -465,7 +465,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, */ if (msg->msg_namelen) { - struct sockaddr_in *usin = (struct sockaddr_in*)msg->msg_name; + struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; err = -EINVAL; if (msg->msg_namelen < sizeof(*usin)) goto out; @@ -851,7 +851,7 @@ struct proto raw_prot = { static struct sock *raw_get_first(struct seq_file *seq) { struct sock *sk; - struct raw_iter_state* state = raw_seq_private(seq); + struct raw_iter_state *state = raw_seq_private(seq); for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; ++state->bucket) { @@ -868,7 +868,7 @@ found: static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) { - struct raw_iter_state* state = raw_seq_private(seq); + struct raw_iter_state *state = raw_seq_private(seq); do { sk = sk_next(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eccb7165a80c..60c28add96b8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1681,7 +1681,7 @@ void tcp_set_state(struct sock *sk, int state) inet_put_port(sk); /* fall through */ default: - if (oldstate==TCP_ESTABLISHED) + if (oldstate == TCP_ESTABLISHED) TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); } @@ -1691,7 +1691,7 @@ void tcp_set_state(struct sock *sk, int state) sk->sk_state = state; #ifdef STATE_TRACE - SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]); + SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); #endif } EXPORT_SYMBOL_GPL(tcp_set_state); @@ -2651,7 +2651,7 @@ EXPORT_SYMBOL(tcp_md5_hash_key); void tcp_done(struct sock *sk) { - if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) + if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); tcp_set_state(sk, TCP_CLOSE); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 779f2e9d0689..f67effbb102b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -491,7 +491,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, * as a request_sock. */ -struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, +struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct request_sock **prev) { diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index e03b10183a8b..9ec843a9bbb2 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -83,7 +83,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) else if (!yeah->doing_reno_now) { /* Scalable */ - tp->snd_cwnd_cnt+=yeah->pkts_acked; + tp->snd_cwnd_cnt += yeah->pkts_acked; if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){ if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; @@ -224,7 +224,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); } else - reduction = max(tp->snd_cwnd>>1,2U); + reduction = max(tp->snd_cwnd>>1, 2U); yeah->fast_count = 0; yeah->reno_count = max(yeah->reno_count>>1, 2U); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c63de0a72aba..f9a775b7e796 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -65,7 +65,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) read_lock_bh(&policy->lock); for (dst = policy->bundles; dst; dst = dst->next) { - struct xfrm_dst *xdst = (struct xfrm_dst*)dst; + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/ xdst->u.rt.fl.fl4_dst == fl->fl4_dst && xdst->u.rt.fl.fl4_src == fl->fl4_src && -- cgit v1.2.3 From 6ed2533e55889943c478d11b1f63aaed2fd767cc Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 3 Nov 2008 00:25:16 -0800 Subject: net: clean up net/ipv4/fib_frontend.c fib_hash.c ip_gre.c Signed-off-by: Jianjun Kong Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 10 +++++----- net/ipv4/fib_hash.c | 12 ++++++------ net/ipv4/ip_gre.c | 14 +++++++------- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 65c1503f8cc8..741e4fa3e474 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -578,7 +578,7 @@ errout: return err; } -static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct fib_config cfg; @@ -600,7 +600,7 @@ errout: return err; } -static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct fib_config cfg; @@ -903,7 +903,7 @@ static void fib_disable_ip(struct net_device *dev, int force) static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct net_device *dev = ifa->ifa_dev->dev; switch (event) { @@ -964,11 +964,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo } static struct notifier_block fib_inetaddr_notifier = { - .notifier_call =fib_inetaddr_event, + .notifier_call = fib_inetaddr_event, }; static struct notifier_block fib_netdev_notifier = { - .notifier_call =fib_netdev_event, + .notifier_call = fib_netdev_event, }; static int __net_init ip_fib_net_init(struct net *net) diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index c8cac6c7f881..ded8c44fb848 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -247,7 +247,7 @@ fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result { int err; struct fn_zone *fz; - struct fn_hash *t = (struct fn_hash*)tb->tb_data; + struct fn_hash *t = (struct fn_hash *)tb->tb_data; read_lock(&fib_hash_lock); for (fz = t->fn_zone_list; fz; fz = fz->fz_next) { @@ -283,7 +283,7 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib struct fib_node *f; struct fib_info *fi = NULL; struct fib_info *last_resort; - struct fn_hash *t = (struct fn_hash*)tb->tb_data; + struct fn_hash *t = (struct fn_hash *)tb->tb_data; struct fn_zone *fz = t->fn_zones[0]; if (fz == NULL) @@ -548,7 +548,7 @@ out: static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) { - struct fn_hash *table = (struct fn_hash*)tb->tb_data; + struct fn_hash *table = (struct fn_hash *)tb->tb_data; struct fib_node *f; struct fib_alias *fa, *fa_to_delete; struct fn_zone *fz; @@ -748,7 +748,7 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin { int m, s_m; struct fn_zone *fz; - struct fn_hash *table = (struct fn_hash*)tb->tb_data; + struct fn_hash *table = (struct fn_hash *)tb->tb_data; s_m = cb->args[2]; read_lock(&fib_hash_lock); @@ -845,10 +845,10 @@ static struct fib_alias *fib_get_first(struct seq_file *seq) struct hlist_node *node; struct fib_node *fn; - hlist_for_each_entry(fn,node,iter->hash_head,fn_hash) { + hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { struct fib_alias *fa; - list_for_each_entry(fa,&fn->fn_alias,fa_list) { + list_for_each_entry(fa, &fn->fn_alias, fa_list) { iter->fn = fn; iter->fa = fa; goto out; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 85c487b8572b..191ef7588134 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -371,7 +371,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) by themself??? */ - struct iphdr *iph = (struct iphdr*)skb->data; + struct iphdr *iph = (struct iphdr *)skb->data; __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); int grehlen = (iph->ihl<<2) + 4; const int type = icmp_hdr(skb)->type; @@ -632,7 +632,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (dev->header_ops && dev->type == ARPHRD_IPGRE) { gre_hlen = 0; - tiph = (struct iphdr*)skb->data; + tiph = (struct iphdr *)skb->data; } else { gre_hlen = tunnel->hlen; tiph = &tunnel->parms.iph; @@ -660,7 +660,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (neigh == NULL) goto tx_error; - addr6 = (struct in6_addr*)&neigh->primary_key; + addr6 = (struct in6_addr *)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if (addr_type == IPV6_ADDR_ANY) { @@ -726,7 +726,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } #ifdef CONFIG_IPV6 else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info*)skb->dst; + struct rt6_info *rt6 = (struct rt6_info *)skb->dst; if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { if ((tunnel->parms.iph.daddr && @@ -800,7 +800,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) iph->ttl = old_iph->ttl; #ifdef CONFIG_IPV6 else if (skb->protocol == htons(ETH_P_IPV6)) - iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit; + iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; #endif else iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); @@ -962,7 +962,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) break; } } else { - unsigned nflags=0; + unsigned nflags = 0; t = netdev_priv(dev); @@ -1104,7 +1104,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) { - struct iphdr *iph = (struct iphdr*) skb_mac_header(skb); + struct iphdr *iph = (struct iphdr *) skb_mac_header(skb); memcpy(haddr, &iph->saddr, 4); return 4; } -- cgit v1.2.3 From a7e9ff735bd5e3437a8e5ecbbc2db3865974a523 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 3 Nov 2008 00:26:09 -0800 Subject: net: clean up net/ipv4/igmp.c Signed-off-by: Jianjun Kong Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a0d86455c53e..f92733e15c9f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -167,7 +167,7 @@ static __inline__ void igmp_stop_timer(struct ip_mc_list *im) spin_lock_bh(&im->lock); if (del_timer(&im->timer)) atomic_dec(&im->refcnt); - im->tm_running=0; + im->tm_running = 0; im->reporter = 0; im->unsolicit_count = 0; spin_unlock_bh(&im->lock); @@ -176,9 +176,9 @@ static __inline__ void igmp_stop_timer(struct ip_mc_list *im) /* It must be called with locked im->lock */ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) { - int tv=net_random() % max_delay; + int tv = net_random() % max_delay; - im->tm_running=1; + im->tm_running = 1; if (!mod_timer(&im->timer, jiffies+tv+2)) atomic_inc(&im->refcnt); } @@ -207,7 +207,7 @@ static void igmp_mod_timer(struct ip_mc_list *im, int max_delay) if (del_timer(&im->timer)) { if ((long)(im->timer.expires-jiffies) < max_delay) { add_timer(&im->timer); - im->tm_running=1; + im->tm_running = 1; spin_unlock_bh(&im->lock); return; } @@ -358,7 +358,7 @@ static int igmpv3_sendpack(struct sk_buff *skb) static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) { - return sizeof(struct igmpv3_grec) + 4*igmp_scount(pmc,type,gdel,sdel); + return sizeof(struct igmpv3_grec) + 4*igmp_scount(pmc, type, gdel, sdel); } static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, @@ -653,7 +653,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, return -1; } - skb=alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); + skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) { ip_rt_put(rt); return -1; @@ -682,11 +682,11 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, ((u8*)&iph[1])[3] = 0; ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); - ih->type=type; - ih->code=0; - ih->csum=0; - ih->group=group; - ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr)); + ih->type = type; + ih->code = 0; + ih->csum = 0; + ih->group = group; + ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr)); return ip_local_out(skb); } @@ -728,7 +728,7 @@ static void igmp_timer_expire(unsigned long data) struct in_device *in_dev = im->interface; spin_lock(&im->lock); - im->tm_running=0; + im->tm_running = 0; if (im->unsolicit_count) { im->unsolicit_count--; @@ -997,7 +997,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr) --ANK */ if (arp_mc_map(addr, buf, dev, 0) == 0) - dev_mc_add(dev,buf,dev->addr_len,0); + dev_mc_add(dev, buf, dev->addr_len, 0); } /* @@ -1010,7 +1010,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr) struct net_device *dev = in_dev->dev; if (arp_mc_map(addr, buf, dev, 0) == 0) - dev_mc_delete(dev,buf,dev->addr_len,0); + dev_mc_delete(dev, buf, dev->addr_len, 0); } #ifdef CONFIG_IP_MULTICAST @@ -1210,10 +1210,10 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) if (!im) goto out; - im->users=1; - im->interface=in_dev; + im->users = 1; + im->interface = in_dev; in_dev_hold(in_dev); - im->multiaddr=addr; + im->multiaddr = addr; /* initial mode is (EX, empty) */ im->sfmode = MCAST_EXCLUDE; im->sfcount[MCAST_INCLUDE] = 0; @@ -1224,7 +1224,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) atomic_set(&im->refcnt, 1); spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST - im->tm_running=0; + im->tm_running = 0; setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); im->unsolicit_count = IGMP_Unsolicited_Report_Count; im->reporter = 0; @@ -1232,8 +1232,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) #endif im->loaded = 0; write_lock_bh(&in_dev->mc_list_lock); - im->next=in_dev->mc_list; - in_dev->mc_list=im; + im->next = in_dev->mc_list; + in_dev->mc_list = im; in_dev->mc_count++; write_unlock_bh(&in_dev->mc_list_lock); #ifdef CONFIG_IP_MULTICAST @@ -1279,7 +1279,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { - if (i->multiaddr==addr) { + if (i->multiaddr == addr) { if (--i->users == 0) { write_lock_bh(&in_dev->mc_list_lock); *ip = i->next; @@ -1738,7 +1738,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) { int err; __be32 addr = imr->imr_multiaddr.s_addr; - struct ip_mc_socklist *iml=NULL, *i; + struct ip_mc_socklist *iml = NULL, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); @@ -1769,7 +1769,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) err = -ENOBUFS; if (count >= sysctl_igmp_max_memberships) goto done; - iml = sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL); + iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); if (iml == NULL) goto done; -- cgit v1.2.3 From 09cb105ea78d5644570d52085e2149f784575872 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 3 Nov 2008 00:27:11 -0800 Subject: net: clean up net/ipv4/ip_sockglue.c tcp_output.c Signed-off-by: Jianjun Kong Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 32 ++++++++++++++++---------------- net/ipv4/tcp_output.c | 8 ++++---- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 465abf0a9869..e976efeb1456 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -94,7 +94,7 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) { unsigned char optbuf[sizeof(struct ip_options) + 40]; - struct ip_options * opt = (struct ip_options*)optbuf; + struct ip_options * opt = (struct ip_options *)optbuf; if (IPCB(skb)->opt.optlen == 0) return; @@ -411,7 +411,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { struct inet_sock *inet = inet_sk(sk); - int val=0,err; + int val = 0, err; if (((1< 255) goto e_inval; @@ -573,12 +573,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EFAULT; if (optlen >= sizeof(struct ip_mreqn)) { - if (copy_from_user(&mreq,optval,sizeof(mreq))) + if (copy_from_user(&mreq, optval, sizeof(mreq))) break; } else { memset(&mreq, 0, sizeof(mreq)); if (optlen >= sizeof(struct in_addr) && - copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr))) + copy_from_user(&mreq.imr_address, optval, sizeof(struct in_addr))) break; } @@ -626,11 +626,11 @@ static int do_ip_setsockopt(struct sock *sk, int level, goto e_inval; err = -EFAULT; if (optlen >= sizeof(struct ip_mreqn)) { - if (copy_from_user(&mreq,optval,sizeof(mreq))) + if (copy_from_user(&mreq, optval, sizeof(mreq))) break; } else { memset(&mreq, 0, sizeof(mreq)); - if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq))) + if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq))) break; } @@ -808,7 +808,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -ENOBUFS; break; } - gsf = kmalloc(optlen,GFP_KERNEL); + gsf = kmalloc(optlen, GFP_KERNEL); if (!gsf) { err = -ENOBUFS; break; @@ -828,7 +828,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, goto mc_msf_out; } msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); - msf = kmalloc(msize,GFP_KERNEL); + msf = kmalloc(msize, GFP_KERNEL); if (!msf) { err = -ENOBUFS; goto mc_msf_out; @@ -971,9 +971,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; if (ip_mroute_opt(optname)) - return ip_mroute_getsockopt(sk,optname,optval,optlen); + return ip_mroute_getsockopt(sk, optname, optval, optlen); - if (get_user(len,optlen)) + if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; @@ -984,7 +984,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_OPTIONS: { unsigned char optbuf[sizeof(struct ip_options)+40]; - struct ip_options * opt = (struct ip_options*)optbuf; + struct ip_options * opt = (struct ip_options *)optbuf; opt->optlen = 0; if (inet->opt) memcpy(optbuf, inet->opt, @@ -1154,13 +1154,13 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, len = 1; if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval,&ucval,1)) + if (copy_to_user(optval, &ucval, 1)) return -EFAULT; } else { len = min_t(unsigned int, sizeof(int), len); if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval,&val,len)) + if (copy_to_user(optval, &val, len)) return -EFAULT; } return 0; @@ -1178,7 +1178,7 @@ int ip_getsockopt(struct sock *sk, int level, !ip_mroute_opt(optname)) { int len; - if (get_user(len,optlen)) + if (get_user(len, optlen)) return -EFAULT; lock_sock(sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ba85d8831893..a524627923ae 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -42,7 +42,7 @@ /* People can turn this off for buggy TCP's found in printers etc. */ int sysctl_tcp_retrans_collapse __read_mostly = 1; -/* People can turn this on to work with those rare, broken TCPs that +/* People can turn this on to work with those rare, broken TCPs that * interpret the window field as a signed quantity. */ int sysctl_tcp_workaround_signed_windows __read_mostly = 0; @@ -484,7 +484,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, } if (likely(sysctl_tcp_window_scaling)) { opts->ws = tp->rx_opt.rcv_wscale; - if(likely(opts->ws)) + if (likely(opts->ws)) size += TCPOLEN_WSCALE_ALIGNED; } if (likely(sysctl_tcp_sack)) { @@ -526,7 +526,7 @@ static unsigned tcp_synack_options(struct sock *sk, if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; - if(likely(opts->ws)) + if (likely(opts->ws)) size += TCPOLEN_WSCALE_ALIGNED; } if (likely(doing_ts)) { @@ -1172,7 +1172,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, static inline int tcp_minshall_check(const struct tcp_sock *tp) { - return after(tp->snd_sml,tp->snd_una) && + return after(tp->snd_sml, tp->snd_una) && !after(tp->snd_sml, tp->snd_nxt); } -- cgit v1.2.3 From c354e1246348e25c714e6b2973f3257183d06e2c Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 3 Nov 2008 00:28:02 -0800 Subject: net: clean up net/ipv4/ipmr.c Signed-off-by: Jianjun Kong Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 130 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 65 insertions(+), 65 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b42e082cc170..05ed336f798a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -331,7 +331,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c) atomic_dec(&cache_resolve_queue_len); - while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) { + while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); nlh->nlmsg_type = NLMSG_ERROR; @@ -477,13 +477,13 @@ static int vif_add(struct vifctl *vifc, int mrtsock) /* * Fill in the VIF structures */ - v->rate_limit=vifc->vifc_rate_limit; - v->local=vifc->vifc_lcl_addr.s_addr; - v->remote=vifc->vifc_rmt_addr.s_addr; - v->flags=vifc->vifc_flags; + v->rate_limit = vifc->vifc_rate_limit; + v->local = vifc->vifc_lcl_addr.s_addr; + v->remote = vifc->vifc_rmt_addr.s_addr; + v->flags = vifc->vifc_flags; if (!mrtsock) v->flags |= VIFF_STATIC; - v->threshold=vifc->vifc_threshold; + v->threshold = vifc->vifc_threshold; v->bytes_in = 0; v->bytes_out = 0; v->pkt_in = 0; @@ -494,7 +494,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) /* And finish update writing critical data */ write_lock_bh(&mrt_lock); - v->dev=dev; + v->dev = dev; #ifdef CONFIG_IP_PIMSM if (v->flags&VIFF_REGISTER) reg_vif_num = vifi; @@ -507,7 +507,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) { - int line=MFC_HASH(mcastgrp,origin); + int line = MFC_HASH(mcastgrp, origin); struct mfc_cache *c; for (c=mfc_cache_array[line]; c; c = c->next) { @@ -522,8 +522,8 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) */ static struct mfc_cache *ipmr_cache_alloc(void) { - struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); - if (c==NULL) + struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); + if (c == NULL) return NULL; c->mfc_un.res.minvif = MAXVIFS; return c; @@ -531,8 +531,8 @@ static struct mfc_cache *ipmr_cache_alloc(void) static struct mfc_cache *ipmr_cache_alloc_unres(void) { - struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); - if (c==NULL) + struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); + if (c == NULL) return NULL; skb_queue_head_init(&c->mfc_un.unres.unresolved); c->mfc_un.unres.expires = jiffies + 10*HZ; @@ -552,7 +552,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) * Play the pending entries through our router */ - while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { + while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); @@ -637,7 +637,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) * Add our header */ - igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); + igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); igmp->type = msg->im_msgtype = assert; igmp->code = 0; @@ -653,7 +653,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) /* * Deliver to mrouted */ - if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) { + if ((ret = sock_queue_rcv_skb(mroute_socket, skb))<0) { if (net_ratelimit()) printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); kfree_skb(skb); @@ -685,7 +685,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) * Create a new entry if allowable */ - if (atomic_read(&cache_resolve_queue_len)>=10 || + if (atomic_read(&cache_resolve_queue_len) >= 10 || (c=ipmr_cache_alloc_unres())==NULL) { spin_unlock_bh(&mfc_unres_lock); @@ -728,7 +728,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) kfree_skb(skb); err = -ENOBUFS; } else { - skb_queue_tail(&c->mfc_un.unres.unresolved,skb); + skb_queue_tail(&c->mfc_un.unres.unresolved, skb); err = 0; } @@ -745,7 +745,7 @@ static int ipmr_mfc_delete(struct mfcctl *mfc) int line; struct mfc_cache *c, **cp; - line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); + line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && @@ -766,7 +766,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) int line; struct mfc_cache *uc, *c, **cp; - line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); + line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && @@ -787,13 +787,13 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) return -EINVAL; - c=ipmr_cache_alloc(); - if (c==NULL) + c = ipmr_cache_alloc(); + if (c == NULL) return -ENOMEM; - c->mfc_origin=mfc->mfcc_origin.s_addr; - c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; - c->mfc_parent=mfc->mfcc_parent; + c->mfc_origin = mfc->mfcc_origin.s_addr; + c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; + c->mfc_parent = mfc->mfcc_parent; ipmr_update_thresholds(c, mfc->mfcc_ttls); if (!mrtsock) c->mfc_flags |= MFC_STATIC; @@ -846,7 +846,7 @@ static void mroute_clean_tables(struct sock *sk) /* * Wipe the cache */ - for (i=0;isk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_IGMP) return -EOPNOTSUPP; - if (optlen!=sizeof(int)) + if (optlen != sizeof(int)) return -ENOPROTOOPT; rtnl_lock(); @@ -930,7 +930,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { write_lock_bh(&mrt_lock); - mroute_socket=sk; + mroute_socket = sk; write_unlock_bh(&mrt_lock); IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++; @@ -938,19 +938,19 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt rtnl_unlock(); return ret; case MRT_DONE: - if (sk!=mroute_socket) + if (sk != mroute_socket) return -EACCES; return ip_ra_control(sk, 0, NULL); case MRT_ADD_VIF: case MRT_DEL_VIF: - if (optlen!=sizeof(vif)) + if (optlen != sizeof(vif)) return -EINVAL; - if (copy_from_user(&vif,optval,sizeof(vif))) + if (copy_from_user(&vif, optval, sizeof(vif))) return -EFAULT; if (vif.vifc_vifi >= MAXVIFS) return -ENFILE; rtnl_lock(); - if (optname==MRT_ADD_VIF) { + if (optname == MRT_ADD_VIF) { ret = vif_add(&vif, sk==mroute_socket); } else { ret = vif_delete(vif.vifc_vifi, 0); @@ -964,12 +964,12 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt */ case MRT_ADD_MFC: case MRT_DEL_MFC: - if (optlen!=sizeof(mfc)) + if (optlen != sizeof(mfc)) return -EINVAL; - if (copy_from_user(&mfc,optval, sizeof(mfc))) + if (copy_from_user(&mfc, optval, sizeof(mfc))) return -EFAULT; rtnl_lock(); - if (optname==MRT_DEL_MFC) + if (optname == MRT_DEL_MFC) ret = ipmr_mfc_delete(&mfc); else ret = ipmr_mfc_add(&mfc, sk==mroute_socket); @@ -1028,12 +1028,12 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt * Getsock opt support for the multicast routing system. */ -int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen) +int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { int olr; int val; - if (optname!=MRT_VERSION && + if (optname != MRT_VERSION && #ifdef CONFIG_IP_PIMSM optname!=MRT_PIM && #endif @@ -1047,17 +1047,17 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u if (olr < 0) return -EINVAL; - if (put_user(olr,optlen)) + if (put_user(olr, optlen)) return -EFAULT; - if (optname==MRT_VERSION) - val=0x0305; + if (optname == MRT_VERSION) + val = 0x0305; #ifdef CONFIG_IP_PIMSM - else if (optname==MRT_PIM) - val=mroute_do_pim; + else if (optname == MRT_PIM) + val = mroute_do_pim; #endif else - val=mroute_do_assert; - if (copy_to_user(optval,&val,olr)) + val = mroute_do_assert; + if (copy_to_user(optval, &val, olr)) return -EFAULT; return 0; } @@ -1075,27 +1075,27 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) switch (cmd) { case SIOCGETVIFCNT: - if (copy_from_user(&vr,arg,sizeof(vr))) + if (copy_from_user(&vr, arg, sizeof(vr))) return -EFAULT; - if (vr.vifi>=maxvif) + if (vr.vifi >= maxvif) return -EINVAL; read_lock(&mrt_lock); vif=&vif_table[vr.vifi]; if (VIF_EXISTS(vr.vifi)) { - vr.icount=vif->pkt_in; - vr.ocount=vif->pkt_out; - vr.ibytes=vif->bytes_in; - vr.obytes=vif->bytes_out; + vr.icount = vif->pkt_in; + vr.ocount = vif->pkt_out; + vr.ibytes = vif->bytes_in; + vr.obytes = vif->bytes_out; read_unlock(&mrt_lock); - if (copy_to_user(arg,&vr,sizeof(vr))) + if (copy_to_user(arg, &vr, sizeof(vr))) return -EFAULT; return 0; } read_unlock(&mrt_lock); return -EADDRNOTAVAIL; case SIOCGETSGCNT: - if (copy_from_user(&sr,arg,sizeof(sr))) + if (copy_from_user(&sr, arg, sizeof(sr))) return -EFAULT; read_lock(&mrt_lock); @@ -1106,7 +1106,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) sr.wrong_if = c->mfc_un.res.wrong_if; read_unlock(&mrt_lock); - if (copy_to_user(arg,&sr,sizeof(sr))) + if (copy_to_user(arg, &sr, sizeof(sr))) return -EFAULT; return 0; } @@ -1130,15 +1130,15 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; v=&vif_table[0]; - for (ct=0;ctdev==dev) + for (ct=0; ctdev == dev) vif_delete(ct, 1); } return NOTIFY_DONE; } -static struct notifier_block ip_mr_notifier={ +static struct notifier_block ip_mr_notifier = { .notifier_call = ipmr_device_event, }; @@ -1204,7 +1204,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) #ifdef CONFIG_IP_PIMSM if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; - vif->bytes_out+=skb->len; + vif->bytes_out += skb->len; vif->dev->stats.tx_bytes += skb->len; vif->dev->stats.tx_packets++; ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); @@ -1254,7 +1254,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) } vif->pkt_out++; - vif->bytes_out+=skb->len; + vif->bytes_out += skb->len; dst_release(skb->dst); skb->dst = &rt->u.dst; @@ -1352,7 +1352,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local } vif_table[vif].pkt_in++; - vif_table[vif].bytes_in+=skb->len; + vif_table[vif].bytes_in += skb->len; /* * Forward the frame @@ -1364,7 +1364,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local if (skb2) ipmr_queue_xmit(skb2, cache, psend); } - psend=ct; + psend = ct; } } if (psend != -1) { @@ -1428,7 +1428,7 @@ int ip_mr_input(struct sk_buff *skb) /* * No usable cache entry */ - if (cache==NULL) { + if (cache == NULL) { int vif; if (local) { @@ -1602,13 +1602,13 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) if (dev) RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); - mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0)); + mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { if (c->mfc_un.res.ttls[ct] < 255) { if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) goto rtattr_failure; - nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); + nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; @@ -1634,7 +1634,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) read_lock(&mrt_lock); cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); - if (cache==NULL) { + if (cache == NULL) { struct sk_buff *skb2; struct iphdr *iph; struct net_device *dev; -- cgit v1.2.3 From fd3f8c4cb632c28ef915a535617a0fcddcfe3f80 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 3 Nov 2008 02:47:38 -0800 Subject: net: clean up net/ipv4/ip_fragment.c tcp_timer.c ip_input.c Signed-off-by: Jianjun Kong Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_input.c | 4 ++-- net/ipv4/tcp_timer.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8c495e34d563..1a3c37b5e936 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -56,7 +56,7 @@ struct ipfrag_skb_cb int offset; }; -#define FRAG_CB(skb) ((struct ipfrag_skb_cb*)((skb)->cb)) +#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 1b6475c05eb4..70bedab03b09 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -340,9 +340,9 @@ static int ip_rcv_finish(struct sk_buff *skb) struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); u32 idx = skb->dst->tclassid; st[idx&0xFF].o_packets++; - st[idx&0xFF].o_bytes+=skb->len; + st[idx&0xFF].o_bytes += skb->len; st[(idx>>16)&0xFF].i_packets++; - st[(idx>>16)&0xFF].i_bytes+=skb->len; + st[(idx>>16)&0xFF].i_bytes += skb->len; } #endif diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 9843f6cb40a5..3df339e3e363 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -171,7 +171,7 @@ static int tcp_write_timeout(struct sock *sk) static void tcp_delack_timer(unsigned long data) { - struct sock *sk = (struct sock*)data; + struct sock *sk = (struct sock *)data; struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -396,7 +396,7 @@ out:; static void tcp_write_timer(unsigned long data) { - struct sock *sk = (struct sock*)data; + struct sock *sk = (struct sock *)data; struct inet_connection_sock *icsk = inet_csk(sk); int event; -- cgit v1.2.3 From f4cca7ffb2700bff5752fbbc28f49d58ed2c5cb3 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 3 Nov 2008 02:48:14 -0800 Subject: net: clean up net/ipv4/pararp.c Signed-off-by: Jianjun Kong Signed-off-by: David S. Miller --- net/ipv4/arp.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 051a62fc8189..957c87dc8e16 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -506,7 +506,7 @@ int arp_bind_neighbour(struct dst_entry *dst) if (dev == NULL) return -EINVAL; if (n == NULL) { - __be32 nexthop = ((struct rtable*)dst)->rt_gateway; + __be32 nexthop = ((struct rtable *)dst)->rt_gateway; if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT)) nexthop = 0; n = __neigh_lookup_errno( @@ -640,14 +640,14 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, arp_ptr=(unsigned char *)(arp+1); memcpy(arp_ptr, src_hw, dev->addr_len); - arp_ptr+=dev->addr_len; - memcpy(arp_ptr, &src_ip,4); - arp_ptr+=4; + arp_ptr += dev->addr_len; + memcpy(arp_ptr, &src_ip, 4); + arp_ptr += 4; if (target_hw != NULL) memcpy(arp_ptr, target_hw, dev->addr_len); else memset(arp_ptr, 0, dev->addr_len); - arp_ptr+=dev->addr_len; + arp_ptr += dev->addr_len; memcpy(arp_ptr, &dest_ip, 4); return skb; @@ -823,9 +823,9 @@ static int arp_process(struct sk_buff *skb) int dont_send = 0; if (!dont_send) - dont_send |= arp_ignore(in_dev,sip,tip); + dont_send |= arp_ignore(in_dev, sip, tip); if (!dont_send && IN_DEV_ARPFILTER(in_dev)) - dont_send |= arp_filter(sip,tip,dev); + dont_send |= arp_filter(sip, tip, dev); if (!dont_send) arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); -- cgit v1.2.3 From 539afedfccb39577c9264b29f11ec9556fd45022 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 3 Nov 2008 02:48:48 -0800 Subject: net: clean up net/ipv4/devinet.c Signed-off-by: Jianjun Kong Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 0bff576d2918..309997edc8a5 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1101,7 +1101,7 @@ out: } static struct notifier_block ip_netdev_notifier = { - .notifier_call =inetdev_event, + .notifier_call = inetdev_event, }; static inline size_t inet_nlmsg_size(void) @@ -1188,7 +1188,7 @@ done: return skb->len; } -static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, +static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 pid) { struct sk_buff *skb; @@ -1255,7 +1255,7 @@ static void inet_forward_change(struct net *net) } static int devinet_conf_proc(ctl_table *ctl, int write, - struct file* filp, void __user *buffer, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); @@ -1327,7 +1327,7 @@ static int devinet_conf_sysctl(ctl_table *table, } static int devinet_sysctl_forward(ctl_table *ctl, int write, - struct file* filp, void __user *buffer, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; @@ -1356,7 +1356,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, } int ipv4_doint_and_flush(ctl_table *ctl, int write, - struct file* filp, void __user *buffer, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; -- cgit v1.2.3 From 5799de0b12c773874282444052da401989075df6 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 3 Nov 2008 02:49:10 -0800 Subject: net: clean up net/ipv4/tcp_ipv4.c Signed-off-by: Jianjun Kong Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ef33246e6a6f..d49233f409b5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1874,7 +1874,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct inet_connection_sock *icsk; struct hlist_node *node; struct sock *sk = cur; - struct tcp_iter_state* st = seq->private; + struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); if (!sk) { @@ -1960,7 +1960,7 @@ static inline int empty_bucket(struct tcp_iter_state *st) static void *established_get_first(struct seq_file *seq) { - struct tcp_iter_state* st = seq->private; + struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); void *rc = NULL; @@ -2005,7 +2005,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) struct sock *sk = cur; struct inet_timewait_sock *tw; struct hlist_node *node; - struct tcp_iter_state* st = seq->private; + struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); ++st->num; @@ -2064,7 +2064,7 @@ static void *established_get_idx(struct seq_file *seq, loff_t pos) static void *tcp_get_idx(struct seq_file *seq, loff_t pos) { void *rc; - struct tcp_iter_state* st = seq->private; + struct tcp_iter_state *st = seq->private; inet_listen_lock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_LISTENING; @@ -2081,7 +2081,7 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) { - struct tcp_iter_state* st = seq->private; + struct tcp_iter_state *st = seq->private; st->state = TCP_SEQ_STATE_LISTENING; st->num = 0; return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; @@ -2090,7 +2090,7 @@ static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { void *rc = NULL; - struct tcp_iter_state* st; + struct tcp_iter_state *st; if (v == SEQ_START_TOKEN) { rc = tcp_get_idx(seq, 0); @@ -2120,7 +2120,7 @@ out: static void tcp_seq_stop(struct seq_file *seq, void *v) { - struct tcp_iter_state* st = seq->private; + struct tcp_iter_state *st = seq->private; switch (st->state) { case TCP_SEQ_STATE_OPENREQ: @@ -2281,7 +2281,7 @@ static void get_timewait4_sock(struct inet_timewait_sock *tw, static int tcp4_seq_show(struct seq_file *seq, void *v) { - struct tcp_iter_state* st; + struct tcp_iter_state *st; int len; if (v == SEQ_START_TOKEN) { -- cgit v1.2.3 From 6d9f239a1edb31d6133230f478fd1dc2da338ec5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 3 Nov 2008 18:21:05 -0800 Subject: net: '&' redux I want to compile out proc_* and sysctl_* handlers totally and stub them to NULL depending on config options, however usage of & will prevent this, since taking adress of NULL pointer will break compilation. So, drop & in front of every ->proc_handler and every ->strategy handler, it was never needed in fact. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/802/tr.c | 2 +- net/appletalk/sysctl_net_atalk.c | 14 +- net/ax25/sysctl_net_ax25.c | 56 ++++---- net/bridge/br_netfilter.c | 10 +- net/core/neighbour.c | 48 +++---- net/core/sysctl_net_core.c | 34 ++--- net/decnet/sysctl_net_decnet.c | 44 +++--- net/ipv4/ip_fragment.c | 14 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 12 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 4 +- net/ipv4/route.c | 48 +++---- net/ipv4/sysctl_net_ipv4.c | 178 ++++++++++++------------- net/ipv6/addrconf.c | 62 ++++----- net/ipv6/icmp.c | 4 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +- net/ipv6/reassembly.c | 12 +- net/ipv6/route.c | 34 ++--- net/ipv6/sysctl_net_ipv6.c | 4 +- net/ipx/sysctl_net_ipx.c | 2 +- net/irda/irsysctl.c | 50 +++---- net/llc/sysctl_net_llc.c | 20 +-- net/netfilter/ipvs/ip_vs_ctl.c | 46 +++---- net/netfilter/ipvs/ip_vs_lblc.c | 2 +- net/netfilter/ipvs/ip_vs_lblcr.c | 2 +- net/netfilter/nf_conntrack_acct.c | 2 +- net/netfilter/nf_conntrack_proto_generic.c | 4 +- net/netfilter/nf_conntrack_proto_sctp.c | 28 ++-- net/netfilter/nf_conntrack_proto_tcp.c | 50 +++---- net/netfilter/nf_conntrack_proto_udp.c | 8 +- net/netfilter/nf_conntrack_proto_udplite.c | 4 +- net/netfilter/nf_conntrack_standalone.c | 16 +-- net/netrom/sysctl_net_netrom.c | 48 +++---- net/phonet/sysctl.c | 2 +- net/rose/sysctl_net_rose.c | 40 +++--- net/sctp/sysctl.c | 82 ++++++------ net/unix/sysctl_net_unix.c | 2 +- net/x25/sysctl_net_x25.c | 22 +-- 38 files changed, 509 insertions(+), 509 deletions(-) (limited to 'net/ipv4') diff --git a/net/802/tr.c b/net/802/tr.c index 38f1f290c635..158150fee462 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -635,7 +635,7 @@ static struct ctl_table tr_table[] = { .data = &sysctl_tr_rif_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { 0 }, }; diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index 621805dfa2f4..8d237b15183b 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -17,8 +17,8 @@ static struct ctl_table atalk_table[] = { .data = &sysctl_aarp_expiry_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_ATALK_AARP_TICK_TIME, @@ -26,8 +26,8 @@ static struct ctl_table atalk_table[] = { .data = &sysctl_aarp_tick_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_ATALK_AARP_RETRANSMIT_LIMIT, @@ -35,7 +35,7 @@ static struct ctl_table atalk_table[] = { .data = &sysctl_aarp_retransmit_limit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_ATALK_AARP_RESOLVE_TIME, @@ -43,8 +43,8 @@ static struct ctl_table atalk_table[] = { .data = &sysctl_aarp_resolve_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { 0 }, }; diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index f288fc4aef9b..c1d877bb5dff 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -43,8 +43,8 @@ static const ctl_table ax25_param_table[] = { .procname = "ip_default_mode", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ipdefmode, .extra2 = &max_ipdefmode }, @@ -53,8 +53,8 @@ static const ctl_table ax25_param_table[] = { .procname = "ax25_default_mode", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_axdefmode, .extra2 = &max_axdefmode }, @@ -63,8 +63,8 @@ static const ctl_table ax25_param_table[] = { .procname = "backoff_type", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_backoff, .extra2 = &max_backoff }, @@ -73,8 +73,8 @@ static const ctl_table ax25_param_table[] = { .procname = "connect_mode", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_conmode, .extra2 = &max_conmode }, @@ -83,8 +83,8 @@ static const ctl_table ax25_param_table[] = { .procname = "standard_window_size", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, @@ -93,8 +93,8 @@ static const ctl_table ax25_param_table[] = { .procname = "extended_window_size", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ewindow, .extra2 = &max_ewindow }, @@ -103,8 +103,8 @@ static const ctl_table ax25_param_table[] = { .procname = "t1_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t1, .extra2 = &max_t1 }, @@ -113,8 +113,8 @@ static const ctl_table ax25_param_table[] = { .procname = "t2_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t2, .extra2 = &max_t2 }, @@ -123,8 +123,8 @@ static const ctl_table ax25_param_table[] = { .procname = "t3_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t3, .extra2 = &max_t3 }, @@ -133,8 +133,8 @@ static const ctl_table ax25_param_table[] = { .procname = "idle_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, @@ -143,8 +143,8 @@ static const ctl_table ax25_param_table[] = { .procname = "maximum_retry_count", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_n2, .extra2 = &max_n2 }, @@ -153,8 +153,8 @@ static const ctl_table ax25_param_table[] = { .procname = "maximum_packet_length", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_paclen, .extra2 = &max_paclen }, @@ -163,8 +163,8 @@ static const ctl_table ax25_param_table[] = { .procname = "protocol", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_proto, .extra2 = &max_proto }, @@ -174,8 +174,8 @@ static const ctl_table ax25_param_table[] = { .procname = "dama_slave_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ds_timeout, .extra2 = &max_ds_timeout }, diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index fa5cda4e552a..db6176d96e71 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -938,35 +938,35 @@ static ctl_table brnf_table[] = { .data = &brnf_call_arptables, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-call-iptables", .data = &brnf_call_iptables, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-call-ip6tables", .data = &brnf_call_ip6tables, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-filter-vlan-tagged", .data = &brnf_filter_vlan_tagged, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-filter-pppoe-tagged", .data = &brnf_filter_pppoe_tagged, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .ctl_name = 0 } }; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b337a937ea52..d9bbe010e0ee 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2566,128 +2566,128 @@ static struct neigh_sysctl_table { .procname = "mcast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_UCAST_SOLICIT, .procname = "ucast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_APP_SOLICIT, .procname = "app_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "retrans_time", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = proc_dointvec_userhz_jiffies, }, { .ctl_name = NET_NEIGH_REACHABLE_TIME, .procname = "base_reachable_time", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_NEIGH_DELAY_PROBE_TIME, .procname = "delay_first_probe_time", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_NEIGH_GC_STALE_TIME, .procname = "gc_stale_time", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_NEIGH_UNRES_QLEN, .procname = "unres_qlen", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_PROXY_QLEN, .procname = "proxy_qlen", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "anycast_delay", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = proc_dointvec_userhz_jiffies, }, { .procname = "proxy_delay", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = proc_dointvec_userhz_jiffies, }, { .procname = "locktime", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = proc_dointvec_userhz_jiffies, }, { .ctl_name = NET_NEIGH_RETRANS_TIME_MS, .procname = "retrans_time_ms", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies, }, { .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, .procname = "base_reachable_time_ms", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies, }, { .ctl_name = NET_NEIGH_GC_INTERVAL, .procname = "gc_interval", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_NEIGH_GC_THRESH1, .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_GC_THRESH2, .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_GC_THRESH3, .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, {}, }, diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f686467ff12b..2bc0384b0448 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -22,7 +22,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_wmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_RMEM_MAX, @@ -30,7 +30,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_rmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_WMEM_DEFAULT, @@ -38,7 +38,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_wmem_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_RMEM_DEFAULT, @@ -46,7 +46,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_rmem_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_DEV_WEIGHT, @@ -54,7 +54,7 @@ static struct ctl_table net_core_table[] = { .data = &weight_p, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_MAX_BACKLOG, @@ -62,7 +62,7 @@ static struct ctl_table net_core_table[] = { .data = &netdev_max_backlog, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_MSG_COST, @@ -70,8 +70,8 @@ static struct ctl_table net_core_table[] = { .data = &net_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_CORE_MSG_BURST, @@ -79,7 +79,7 @@ static struct ctl_table net_core_table[] = { .data = &net_ratelimit_state.burst, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_CORE_OPTMEM_MAX, @@ -87,7 +87,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_optmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #ifdef CONFIG_XFRM { @@ -96,7 +96,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_xfrm_aevent_etime, .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_AEVENT_RSEQTH, @@ -104,7 +104,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_xfrm_aevent_rseqth, .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = CTL_UNNUMBERED, @@ -112,7 +112,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_xfrm_larval_drop, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = CTL_UNNUMBERED, @@ -120,7 +120,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_xfrm_acq_expires, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif /* CONFIG_XFRM */ #endif /* CONFIG_NET */ @@ -130,7 +130,7 @@ static struct ctl_table net_core_table[] = { .data = &netdev_budget, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_WARNINGS, @@ -138,7 +138,7 @@ static struct ctl_table net_core_table[] = { .data = &net_msg_warn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; @@ -150,7 +150,7 @@ static struct ctl_table netns_core_table[] = { .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 36400b266896..2f360a1e5e49 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -354,8 +354,8 @@ static ctl_table dn_table[] = { .data = node_name, .maxlen = 7, .mode = 0644, - .proc_handler = &proc_dostring, - .strategy = &sysctl_string, + .proc_handler = proc_dostring, + .strategy = sysctl_string, }, { .ctl_name = NET_DECNET_DEFAULT_DEVICE, @@ -371,8 +371,8 @@ static ctl_table dn_table[] = { .data = &decnet_time_wait, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_decnet_time_wait, .extra2 = &max_decnet_time_wait }, @@ -382,8 +382,8 @@ static ctl_table dn_table[] = { .data = &decnet_dn_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, @@ -393,8 +393,8 @@ static ctl_table dn_table[] = { .data = &decnet_di_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, @@ -404,8 +404,8 @@ static ctl_table dn_table[] = { .data = &decnet_dr_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, @@ -415,8 +415,8 @@ static ctl_table dn_table[] = { .data = &decnet_dst_gc_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_decnet_dst_gc_interval, .extra2 = &max_decnet_dst_gc_interval }, @@ -426,8 +426,8 @@ static ctl_table dn_table[] = { .data = &decnet_no_fc_max_cwnd, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_decnet_no_fc_max_cwnd, .extra2 = &max_decnet_no_fc_max_cwnd }, @@ -437,8 +437,8 @@ static ctl_table dn_table[] = { .data = &sysctl_decnet_mem, .maxlen = sizeof(sysctl_decnet_mem), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec, }, { .ctl_name = NET_DECNET_RMEM, @@ -446,8 +446,8 @@ static ctl_table dn_table[] = { .data = &sysctl_decnet_rmem, .maxlen = sizeof(sysctl_decnet_rmem), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec, }, { .ctl_name = NET_DECNET_WMEM, @@ -455,8 +455,8 @@ static ctl_table dn_table[] = { .data = &sysctl_decnet_wmem, .maxlen = sizeof(sysctl_decnet_wmem), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec, }, { .ctl_name = NET_DECNET_DEBUG_LEVEL, @@ -464,8 +464,8 @@ static ctl_table dn_table[] = { .data = &decnet_debug_level, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec, }, {0} }; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1a3c37b5e936..6659ac000eeb 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -607,7 +607,7 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .data = &init_net.ipv4.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, @@ -615,7 +615,7 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .data = &init_net.ipv4.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_IPFRAG_TIME, @@ -623,8 +623,8 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .data = &init_net.ipv4.frags.timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { } }; @@ -636,15 +636,15 @@ static struct ctl_table ip4_frags_ctl_table[] = { .data = &ip4_frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .procname = "ipfrag_max_dist", .data = &sysctl_ipfrag_max_dist, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = &zero }, { } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index bddadead6195..b2141e11575e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -197,7 +197,7 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, @@ -205,7 +205,7 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS, @@ -213,7 +213,7 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &nf_conntrack_htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, @@ -221,7 +221,7 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, @@ -229,8 +229,8 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 4e8879220222..1fd3ef7718b6 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -272,7 +272,7 @@ static struct ctl_table icmp_sysctl_table[] = { .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 @@ -285,7 +285,7 @@ static struct ctl_table icmp_compat_sysctl_table[] = { .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f47b9db0db7f..0dc0c3826763 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3118,7 +3118,7 @@ static ctl_table ipv4_route_table[] = { .data = &ipv4_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_MAX_SIZE, @@ -3126,7 +3126,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_max_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { /* Deprecated. Use gc_min_interval_ms */ @@ -3136,8 +3136,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, @@ -3145,8 +3145,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT, @@ -3154,8 +3154,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL, @@ -3163,8 +3163,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD, @@ -3172,7 +3172,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_redirect_load, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER, @@ -3180,7 +3180,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_redirect_number, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE, @@ -3188,7 +3188,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_redirect_silence, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_ERROR_COST, @@ -3196,7 +3196,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_error_cost, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_ERROR_BURST, @@ -3204,7 +3204,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_error_burst, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY, @@ -3212,7 +3212,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES, @@ -3220,8 +3220,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_MIN_PMTU, @@ -3229,7 +3229,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_min_pmtu, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS, @@ -3237,7 +3237,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL, @@ -3245,8 +3245,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &ipv4_sysctl_rt_secret_interval, - .strategy = &ipv4_sysctl_rt_secret_interval_strategy, + .proc_handler = ipv4_sysctl_rt_secret_interval, + .strategy = ipv4_sysctl_rt_secret_interval_strategy, }, { .ctl_name = 0 } }; @@ -3274,8 +3274,8 @@ static struct ctl_table ipv4_route_flush_table[] = { .procname = "flush", .maxlen = sizeof(int), .mode = 0200, - .proc_handler = &ipv4_sysctl_rtcache_flush, - .strategy = &ipv4_sysctl_rtcache_flush_strategy, + .proc_handler = ipv4_sysctl_rtcache_flush, + .strategy = ipv4_sysctl_rtcache_flush_strategy, }, { .ctl_name = 0 }, }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0cc8d31f9ac0..4710d219f06a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -195,7 +195,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_timestamps, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, @@ -203,7 +203,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_window_scaling, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_SACK, @@ -211,7 +211,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_sack, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_RETRANS_COLLAPSE, @@ -219,7 +219,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_retrans_collapse, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_DEFAULT_TTL, @@ -227,8 +227,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_ip_default_ttl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &ipv4_doint_and_flush, - .strategy = &ipv4_doint_and_flush_strategy, + .proc_handler = ipv4_doint_and_flush, + .strategy = ipv4_doint_and_flush_strategy, .extra2 = &init_net, }, { @@ -237,7 +237,7 @@ static struct ctl_table ipv4_table[] = { .data = &ipv4_config.no_pmtu_disc, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_NONLOCAL_BIND, @@ -245,7 +245,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_ip_nonlocal_bind, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_SYN_RETRIES, @@ -253,7 +253,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syn_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_SYNACK_RETRIES, @@ -261,7 +261,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_synack_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_MAX_ORPHANS, @@ -269,7 +269,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_max_orphans, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_MAX_TW_BUCKETS, @@ -277,7 +277,7 @@ static struct ctl_table ipv4_table[] = { .data = &tcp_death_row.sysctl_max_tw_buckets, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_DYNADDR, @@ -285,7 +285,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_ip_dynaddr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME, @@ -293,8 +293,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_keepalive_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_TCP_KEEPALIVE_PROBES, @@ -302,7 +302,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_keepalive_probes, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_KEEPALIVE_INTVL, @@ -310,8 +310,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_keepalive_intvl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_TCP_RETRIES1, @@ -319,8 +319,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_retries1, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra2 = &tcp_retr1_max }, { @@ -329,7 +329,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_retries2, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_FIN_TIMEOUT, @@ -337,8 +337,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_fin_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, #ifdef CONFIG_SYN_COOKIES { @@ -347,7 +347,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syncookies, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif { @@ -356,7 +356,7 @@ static struct ctl_table ipv4_table[] = { .data = &tcp_death_row.sysctl_tw_recycle, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_ABORT_ON_OVERFLOW, @@ -364,7 +364,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_abort_on_overflow, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_STDURG, @@ -372,7 +372,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_stdurg, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_RFC1337, @@ -380,7 +380,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_rfc1337, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_MAX_SYN_BACKLOG, @@ -388,7 +388,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_max_syn_backlog, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, @@ -396,8 +396,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_local_ports.range, .maxlen = sizeof(sysctl_local_ports.range), .mode = 0644, - .proc_handler = &ipv4_local_port_range, - .strategy = &ipv4_sysctl_local_port_range, + .proc_handler = ipv4_local_port_range, + .strategy = ipv4_sysctl_local_port_range, }, #ifdef CONFIG_IP_MULTICAST { @@ -406,7 +406,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_igmp_max_memberships, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif @@ -416,7 +416,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_igmp_max_msf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, @@ -424,7 +424,7 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_threshold, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_INET_PEER_MINTTL, @@ -432,8 +432,8 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_minttl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_INET_PEER_MAXTTL, @@ -441,8 +441,8 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_maxttl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, @@ -450,8 +450,8 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_gc_mintime, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, @@ -459,8 +459,8 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_gc_maxtime, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_TCP_ORPHAN_RETRIES, @@ -468,7 +468,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_orphan_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_FACK, @@ -476,7 +476,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_fack, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_REORDERING, @@ -484,7 +484,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_reordering, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_ECN, @@ -492,7 +492,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_ecn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_DSACK, @@ -500,7 +500,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_dsack, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_MEM, @@ -508,7 +508,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_mem, .maxlen = sizeof(sysctl_tcp_mem), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_WMEM, @@ -516,7 +516,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_wmem, .maxlen = sizeof(sysctl_tcp_wmem), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_RMEM, @@ -524,7 +524,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_rmem, .maxlen = sizeof(sysctl_tcp_rmem), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_APP_WIN, @@ -532,7 +532,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_app_win, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_ADV_WIN_SCALE, @@ -540,7 +540,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_adv_win_scale, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_TW_REUSE, @@ -548,7 +548,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_tw_reuse, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_FRTO, @@ -556,7 +556,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_frto, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_FRTO_RESPONSE, @@ -564,7 +564,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_frto_response, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_LOW_LATENCY, @@ -572,7 +572,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_low_latency, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_NO_METRICS_SAVE, @@ -580,7 +580,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_nometrics_save, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_MODERATE_RCVBUF, @@ -588,7 +588,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_moderate_rcvbuf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_TSO_WIN_DIVISOR, @@ -596,15 +596,15 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_tso_win_divisor, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_CONG_CONTROL, .procname = "tcp_congestion_control", .mode = 0644, .maxlen = TCP_CA_NAME_MAX, - .proc_handler = &proc_tcp_congestion_control, - .strategy = &sysctl_tcp_congestion_control, + .proc_handler = proc_tcp_congestion_control, + .strategy = sysctl_tcp_congestion_control, }, { .ctl_name = NET_TCP_ABC, @@ -612,7 +612,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_abc, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_MTU_PROBING, @@ -620,7 +620,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_mtu_probing, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_BASE_MSS, @@ -628,7 +628,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_base_mss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, @@ -636,7 +636,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_workaround_signed_windows, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #ifdef CONFIG_NET_DMA { @@ -645,7 +645,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_dma_copybreak, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif { @@ -654,7 +654,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_slow_start_after_idle, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #ifdef CONFIG_NETLABEL { @@ -663,7 +663,7 @@ static struct ctl_table ipv4_table[] = { .data = &cipso_v4_cache_enabled, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, @@ -671,7 +671,7 @@ static struct ctl_table ipv4_table[] = { .data = &cipso_v4_cache_bucketsize, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_CIPSOV4_RBM_OPTFMT, @@ -679,7 +679,7 @@ static struct ctl_table ipv4_table[] = { .data = &cipso_v4_rbm_optfmt, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, @@ -687,22 +687,22 @@ static struct ctl_table ipv4_table[] = { .data = &cipso_v4_rbm_strictvalid, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif /* CONFIG_NETLABEL */ { .procname = "tcp_available_congestion_control", .maxlen = TCP_CA_BUF_MAX, .mode = 0444, - .proc_handler = &proc_tcp_available_congestion_control, + .proc_handler = proc_tcp_available_congestion_control, }, { .ctl_name = NET_TCP_ALLOWED_CONG_CONTROL, .procname = "tcp_allowed_congestion_control", .maxlen = TCP_CA_BUF_MAX, .mode = 0644, - .proc_handler = &proc_allowed_congestion_control, - .strategy = &strategy_allowed_congestion_control, + .proc_handler = proc_allowed_congestion_control, + .strategy = strategy_allowed_congestion_control, }, { .ctl_name = NET_TCP_MAX_SSTHRESH, @@ -710,7 +710,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_max_ssthresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -718,8 +718,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &zero }, { @@ -728,8 +728,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_udp_rmem_min, .maxlen = sizeof(sysctl_udp_rmem_min), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &zero }, { @@ -738,8 +738,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_udp_wmem_min, .maxlen = sizeof(sysctl_udp_wmem_min), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &zero }, { .ctl_name = 0 } @@ -752,7 +752,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, @@ -760,7 +760,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, @@ -768,7 +768,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, @@ -776,7 +776,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_ICMP_RATELIMIT, @@ -784,8 +784,8 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_ratelimit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies }, { .ctl_name = NET_IPV4_ICMP_RATEMASK, @@ -793,7 +793,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_ratemask, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = CTL_UNNUMBERED, @@ -801,7 +801,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { } }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ff7ae05f72e1..07ee758de9e1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4030,8 +4030,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.forwarding, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &addrconf_sysctl_forward, - .strategy = &addrconf_sysctl_forward_strategy, + .proc_handler = addrconf_sysctl_forward, + .strategy = addrconf_sysctl_forward_strategy, }, { .ctl_name = NET_IPV6_HOP_LIMIT, @@ -4047,7 +4047,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.mtu6, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_RA, @@ -4055,7 +4055,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_REDIRECTS, @@ -4063,7 +4063,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_redirects, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_AUTOCONF, @@ -4071,7 +4071,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.autoconf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_DAD_TRANSMITS, @@ -4079,7 +4079,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.dad_transmits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_RTR_SOLICITS, @@ -4087,7 +4087,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_solicits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL, @@ -4095,8 +4095,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_solicit_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY, @@ -4104,8 +4104,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_solicit_delay, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_FORCE_MLD_VERSION, @@ -4113,7 +4113,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.force_mld_version, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_PRIVACY { @@ -4122,7 +4122,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.use_tempaddr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_TEMP_VALID_LFT, @@ -4130,7 +4130,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.temp_valid_lft, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_TEMP_PREFERED_LFT, @@ -4138,7 +4138,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.temp_prefered_lft, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_REGEN_MAX_RETRY, @@ -4146,7 +4146,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.regen_max_retry, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR, @@ -4154,7 +4154,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.max_desync_factor, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif { @@ -4163,7 +4163,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.max_addresses, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_RA_DEFRTR, @@ -4171,7 +4171,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_defrtr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_RA_PINFO, @@ -4179,7 +4179,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_pinfo, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_ROUTER_PREF { @@ -4188,7 +4188,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_rtr_pref, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_RTR_PROBE_INTERVAL, @@ -4196,8 +4196,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_probe_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, #ifdef CONFIG_IPV6_ROUTE_INFO { @@ -4206,7 +4206,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_rt_info_max_plen, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif #endif @@ -4216,7 +4216,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.proxy_ndp, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_SOURCE_ROUTE, @@ -4224,7 +4224,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_source_route, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_OPTIMISTIC_DAD { @@ -4233,7 +4233,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.optimistic_dad, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif @@ -4244,7 +4244,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.mc_forwarding, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif { @@ -4253,7 +4253,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.disable_ipv6, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -4261,7 +4261,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_dad, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0, /* sentinel */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 3c2821f9b529..be351009fd03 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -956,8 +956,8 @@ ctl_table ipv6_icmp_table_template[] = { .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies }, { .ctl_name = 0 }, }; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 05726177903f..bd52151d31e9 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -253,7 +253,7 @@ static struct ctl_table icmpv6_sysctl_table[] = { .data = &nf_ct_icmpv6_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 9967ac7a01a8..ed4d79a9e4a6 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -80,7 +80,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .data = &nf_init_frags.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, @@ -88,7 +88,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .data = &nf_init_frags.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, @@ -96,7 +96,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .data = &nf_init_frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 } }; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index af12de071f4c..3c575118fca5 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -642,7 +642,7 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .data = &init_net.ipv6.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, @@ -650,7 +650,7 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .data = &init_net.ipv6.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV6_IP6FRAG_TIME, @@ -658,8 +658,8 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .data = &init_net.ipv6.frags.timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { } }; @@ -671,8 +671,8 @@ static struct ctl_table ip6_frags_ctl_table[] = { .data = &ip6_frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { } }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d69fa462d3f0..4d40dc214b2d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2499,7 +2499,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.flush_delay, .maxlen = sizeof(int), .mode = 0200, - .proc_handler = &ipv6_sysctl_rtcache_flush + .proc_handler = ipv6_sysctl_rtcache_flush }, { .ctl_name = NET_IPV6_ROUTE_GC_THRESH, @@ -2507,7 +2507,7 @@ ctl_table ipv6_route_table_template[] = { .data = &ip6_dst_ops_template.gc_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, @@ -2515,7 +2515,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_max_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, @@ -2523,8 +2523,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, @@ -2532,8 +2532,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, @@ -2541,8 +2541,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, @@ -2550,8 +2550,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, @@ -2559,8 +2559,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, @@ -2568,8 +2568,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, @@ -2577,8 +2577,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies, }, { .ctl_name = 0 } }; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 587f8f60c489..9048fe7e7ea7 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -35,7 +35,7 @@ static ctl_table ipv6_table_template[] = { .data = &init_net.ipv6.sysctl.bindv6only, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; @@ -47,7 +47,7 @@ static ctl_table ipv6_table[] = { .data = &sysctl_mld_max_msf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c index 92fef864e852..633fcab35580 100644 --- a/net/ipx/sysctl_net_ipx.c +++ b/net/ipx/sysctl_net_ipx.c @@ -23,7 +23,7 @@ static struct ctl_table ipx_table[] = { .data = &sysctl_ipx_pprop_broadcasting, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { 0 }, }; diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 9ab3df15425d..57f8817c3979 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -118,8 +118,8 @@ static ctl_table irda_table[] = { .data = &sysctl_discovery, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &do_discovery, - .strategy = &sysctl_intvec + .proc_handler = do_discovery, + .strategy = sysctl_intvec }, { .ctl_name = NET_IRDA_DEVNAME, @@ -127,8 +127,8 @@ static ctl_table irda_table[] = { .data = sysctl_devname, .maxlen = 65, .mode = 0644, - .proc_handler = &do_devname, - .strategy = &sysctl_string + .proc_handler = do_devname, + .strategy = sysctl_string }, #ifdef CONFIG_IRDA_DEBUG { @@ -137,7 +137,7 @@ static ctl_table irda_table[] = { .data = &irda_debug, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif #ifdef CONFIG_IRDA_FAST_RR @@ -147,7 +147,7 @@ static ctl_table irda_table[] = { .data = &sysctl_fast_poll_increase, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif { @@ -156,8 +156,8 @@ static ctl_table irda_table[] = { .data = &sysctl_discovery_slots, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_discovery_slots, .extra2 = &max_discovery_slots }, @@ -167,7 +167,7 @@ static ctl_table irda_table[] = { .data = &sysctl_discovery_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IRDA_SLOT_TIMEOUT, @@ -175,8 +175,8 @@ static ctl_table irda_table[] = { .data = &sysctl_slot_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_slot_timeout, .extra2 = &max_slot_timeout }, @@ -186,8 +186,8 @@ static ctl_table irda_table[] = { .data = &sysctl_max_baud_rate, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_max_baud_rate, .extra2 = &max_max_baud_rate }, @@ -197,8 +197,8 @@ static ctl_table irda_table[] = { .data = &sysctl_min_tx_turn_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_min_tx_turn_time, .extra2 = &max_min_tx_turn_time }, @@ -208,8 +208,8 @@ static ctl_table irda_table[] = { .data = &sysctl_max_tx_data_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_max_tx_data_size, .extra2 = &max_max_tx_data_size }, @@ -219,8 +219,8 @@ static ctl_table irda_table[] = { .data = &sysctl_max_tx_window, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_max_tx_window, .extra2 = &max_max_tx_window }, @@ -230,8 +230,8 @@ static ctl_table irda_table[] = { .data = &sysctl_max_noreply_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_max_noreply_time, .extra2 = &max_max_noreply_time }, @@ -241,8 +241,8 @@ static ctl_table irda_table[] = { .data = &sysctl_warn_noreply_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_warn_noreply_time, .extra2 = &max_warn_noreply_time }, @@ -252,8 +252,8 @@ static ctl_table irda_table[] = { .data = &sysctl_lap_keepalive_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_lap_keepalive_time, .extra2 = &max_lap_keepalive_time }, diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index 5bef1dcf18e3..57b9304d444c 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -20,8 +20,8 @@ static struct ctl_table llc2_timeout_table[] = { .data = &sysctl_llc2_ack_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_LLC2_BUSY_TIMEOUT, @@ -29,8 +29,8 @@ static struct ctl_table llc2_timeout_table[] = { .data = &sysctl_llc2_busy_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_LLC2_P_TIMEOUT, @@ -38,8 +38,8 @@ static struct ctl_table llc2_timeout_table[] = { .data = &sysctl_llc2_p_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_LLC2_REJ_TIMEOUT, @@ -47,8 +47,8 @@ static struct ctl_table llc2_timeout_table[] = { .data = &sysctl_llc2_rej_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { 0 }, }; @@ -60,8 +60,8 @@ static struct ctl_table llc_station_table[] = { .data = &sysctl_llc_station_ack_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { 0 }, }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d0ccdaff4276..e01061f49cdc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1545,7 +1545,7 @@ static struct ctl_table vs_vars[] = { .data = &sysctl_ip_vs_amemthresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IP_VS_DEBUG { @@ -1553,7 +1553,7 @@ static struct ctl_table vs_vars[] = { .data = &sysctl_ip_vs_debug_level, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif { @@ -1561,28 +1561,28 @@ static struct ctl_table vs_vars[] = { .data = &sysctl_ip_vs_am_droprate, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "drop_entry", .data = &sysctl_ip_vs_drop_entry, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_do_defense_mode, + .proc_handler = proc_do_defense_mode, }, { .procname = "drop_packet", .data = &sysctl_ip_vs_drop_packet, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_do_defense_mode, + .proc_handler = proc_do_defense_mode, }, { .procname = "secure_tcp", .data = &sysctl_ip_vs_secure_tcp, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_do_defense_mode, + .proc_handler = proc_do_defense_mode, }, #if 0 { @@ -1590,84 +1590,84 @@ static struct ctl_table vs_vars[] = { .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_synsent", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_synrecv", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_finwait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_timewait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_close", .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_closewait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_lastack", .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_listen", .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_synack", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_udp", .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_icmp", .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, #endif { @@ -1675,35 +1675,35 @@ static struct ctl_table vs_vars[] = { .data = &sysctl_ip_vs_cache_bypass, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "expire_nodest_conn", .data = &sysctl_ip_vs_expire_nodest_conn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "expire_quiescent_template", .data = &sysctl_ip_vs_expire_quiescent_template, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "sync_threshold", .data = &sysctl_ip_vs_sync_threshold, .maxlen = sizeof(sysctl_ip_vs_sync_threshold), .mode = 0644, - .proc_handler = &proc_do_sync_threshold, + .proc_handler = proc_do_sync_threshold, }, { .procname = "nat_icmp_send", .data = &sysctl_ip_vs_nat_icmp_send, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 } }; diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 4256cfad8d31..9394f539966a 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -116,7 +116,7 @@ static ctl_table vs_vars_table[] = { .data = &sysctl_ip_vs_lblc_expiration, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 } }; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 321b49fa41d8..92dc76a6842c 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -295,7 +295,7 @@ static ctl_table vs_vars_table[] = { .data = &sysctl_ip_vs_lblcr_expiration, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 } }; diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index b92df5c1dfcf..9fe8982bd7c9 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -35,7 +35,7 @@ static struct ctl_table acct_sysctl_table[] = { .data = &init_net.ct.sysctl_acct, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, {} }; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index dbe680af85d2..4be80d7b8795 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -67,7 +67,7 @@ static struct ctl_table generic_sysctl_table[] = { .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 @@ -80,7 +80,7 @@ static struct ctl_table generic_compat_sysctl_table[] = { .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index ae8c2609e230..c2bd457bc2f2 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -548,49 +548,49 @@ static struct ctl_table sctp_sysctl_table[] = { .data = &sctp_timeouts[SCTP_CONNTRACK_CLOSED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_cookie_wait", .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_cookie_echoed", .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_established", .data = &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_shutdown_sent", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_shutdown_recd", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 @@ -604,49 +604,49 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .data = &sctp_timeouts[SCTP_CONNTRACK_CLOSED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_cookie_wait", .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_cookie_echoed", .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_established", .data = &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_shutdown_sent", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_shutdown_recd", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index f947ec41e391..a1edb9c1adee 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1192,70 +1192,70 @@ static struct ctl_table tcp_sysctl_table[] = { .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_syn_recv", .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_established", .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_fin_wait", .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_close_wait", .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_last_ack", .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_time_wait", .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_close", .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_max_retrans", .data = &nf_ct_tcp_timeout_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_unacknowledged", .data = &nf_ct_tcp_timeout_unacknowledged, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, @@ -1263,7 +1263,7 @@ static struct ctl_table tcp_sysctl_table[] = { .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL, @@ -1271,7 +1271,7 @@ static struct ctl_table tcp_sysctl_table[] = { .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS, @@ -1279,7 +1279,7 @@ static struct ctl_table tcp_sysctl_table[] = { .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 @@ -1293,63 +1293,63 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_syn_recv", .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_established", .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_fin_wait", .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_close_wait", .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_last_ack", .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_time_wait", .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_close", .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_max_retrans", .data = &nf_ct_tcp_timeout_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE, @@ -1357,7 +1357,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, @@ -1365,7 +1365,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, @@ -1373,7 +1373,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 7c2ca48698be..2b8b1f579f93 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -143,14 +143,14 @@ static struct ctl_table udp_sysctl_table[] = { .data = &nf_ct_udp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_udp_timeout_stream", .data = &nf_ct_udp_timeout_stream, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 @@ -163,14 +163,14 @@ static struct ctl_table udp_compat_sysctl_table[] = { .data = &nf_ct_udp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_udp_timeout_stream", .data = &nf_ct_udp_timeout_stream, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index d22d839e4f94..4579d8de13b1 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -151,7 +151,7 @@ static struct ctl_table udplite_sysctl_table[] = { .data = &nf_ct_udplite_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = CTL_UNNUMBERED, @@ -159,7 +159,7 @@ static struct ctl_table udplite_sysctl_table[] = { .data = &nf_ct_udplite_timeout_stream, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 98106d4e89f0..f37b9b74c6a8 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -336,7 +336,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_COUNT, @@ -344,7 +344,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_BUCKETS, @@ -352,7 +352,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &nf_conntrack_htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_CHECKSUM, @@ -360,7 +360,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, @@ -368,8 +368,8 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, @@ -379,7 +379,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &nf_ct_expect_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 } }; @@ -393,7 +393,7 @@ static ctl_table nf_ct_netfilter_table[] = { .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 } }; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index 34c96c9674df..7b49591fe87c 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -41,8 +41,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_default_path_quality, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_quality, .extra2 = &max_quality }, @@ -52,8 +52,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_obsolescence_count_initialiser, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_obs, .extra2 = &max_obs }, @@ -63,8 +63,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_network_ttl_initialiser, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ttl, .extra2 = &max_ttl }, @@ -74,8 +74,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t1, .extra2 = &max_t1 }, @@ -85,8 +85,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_maximum_tries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_n2, .extra2 = &max_n2 }, @@ -96,8 +96,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_acknowledge_delay, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t2, .extra2 = &max_t2 }, @@ -107,8 +107,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_busy_delay, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t4, .extra2 = &max_t4 }, @@ -118,8 +118,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_requested_window_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, @@ -129,8 +129,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_no_activity_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, @@ -140,8 +140,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_routing_control, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_route, .extra2 = &max_route }, @@ -151,8 +151,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_link_fails_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_fails, .extra2 = &max_fails }, @@ -162,8 +162,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_reset_circuit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_reset, .extra2 = &max_reset }, diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 600a4309b8c8..0e3347bbdb12 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -89,7 +89,7 @@ static struct ctl_table phonet_table[] = { .data = &local_port_range, .maxlen = sizeof(local_port_range), .mode = 0644, - .proc_handler = &proc_local_port_range, + .proc_handler = proc_local_port_range, .strategy = NULL, }, { .ctl_name = 0 } diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c index 20be3485a97f..3bfe504faf86 100644 --- a/net/rose/sysctl_net_rose.c +++ b/net/rose/sysctl_net_rose.c @@ -31,8 +31,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_restart_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -42,8 +42,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_call_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -53,8 +53,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_reset_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -64,8 +64,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_clear_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -75,8 +75,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_no_activity_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, @@ -86,8 +86,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_ack_hold_back_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -97,8 +97,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_routing_control, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_route, .extra2 = &max_route }, @@ -108,8 +108,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_link_fail_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ftimer, .extra2 = &max_ftimer }, @@ -119,8 +119,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_maximum_vcs, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_maxvcs, .extra2 = &max_maxvcs }, @@ -130,8 +130,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_window_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 52910697e104..f58e994e6852 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -63,8 +63,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_initial, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -74,8 +74,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -85,8 +85,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -96,8 +96,8 @@ static ctl_table sctp_table[] = { .data = &sctp_valid_cookie_life, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -107,8 +107,8 @@ static ctl_table sctp_table[] = { .data = &sctp_max_burst, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &zero, .extra2 = &int_max }, @@ -118,8 +118,8 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_association, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, @@ -129,8 +129,8 @@ static ctl_table sctp_table[] = { .data = &sctp_sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_RCVBUF_POLICY, @@ -138,8 +138,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_PATH_MAX_RETRANS, @@ -147,8 +147,8 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_path, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, @@ -158,8 +158,8 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_init, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, @@ -169,8 +169,8 @@ static ctl_table sctp_table[] = { .data = &sctp_hb_interval, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -180,8 +180,8 @@ static ctl_table sctp_table[] = { .data = &sctp_cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_RTO_ALPHA, @@ -189,8 +189,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_alpha, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_RTO_BETA, @@ -198,8 +198,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_beta, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_ADDIP_ENABLE, @@ -207,8 +207,8 @@ static ctl_table sctp_table[] = { .data = &sctp_addip_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_PRSCTP_ENABLE, @@ -216,8 +216,8 @@ static ctl_table sctp_table[] = { .data = &sctp_prsctp_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_SACK_TIMEOUT, @@ -225,8 +225,8 @@ static ctl_table sctp_table[] = { .data = &sctp_sack_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, @@ -236,7 +236,7 @@ static ctl_table sctp_table[] = { .data = &sysctl_sctp_mem, .maxlen = sizeof(sysctl_sctp_mem), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -244,7 +244,7 @@ static ctl_table sctp_table[] = { .data = &sysctl_sctp_rmem, .maxlen = sizeof(sysctl_sctp_rmem), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -252,7 +252,7 @@ static ctl_table sctp_table[] = { .data = &sysctl_sctp_wmem, .maxlen = sizeof(sysctl_sctp_wmem), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -260,8 +260,8 @@ static ctl_table sctp_table[] = { .data = &sctp_auth_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = CTL_UNNUMBERED, @@ -269,8 +269,8 @@ static ctl_table sctp_table[] = { .data = &sctp_addip_noauth, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = 0 } }; diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 1f4040cdadad..83c093077ebc 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -21,7 +21,7 @@ static ctl_table unix_table[] = { .data = &init_net.unx.sysctl_max_dgram_qlen, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index 6ebda25c24e9..a5d3416522de 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -24,8 +24,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_restart_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -35,8 +35,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_call_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -46,8 +46,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_reset_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -57,8 +57,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_clear_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -68,8 +68,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_ack_holdback_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -79,7 +79,7 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_forward, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { 0, }, }; -- cgit v1.2.3 From d4ec52bae739409b2372fea30dba0e7a8d6b9181 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:21:48 +0100 Subject: netfilter: netns-aware ipt_addrtype Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_addrtype.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 88762f02779d..3b216be3bc9f 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -23,24 +23,25 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_DESCRIPTION("Xtables: address type match for IPv4"); -static inline bool match_type(const struct net_device *dev, __be32 addr, - u_int16_t mask) +static inline bool match_type(struct net *net, const struct net_device *dev, + __be32 addr, u_int16_t mask) { - return !!(mask & (1 << inet_dev_addr_type(&init_net, dev, addr))); + return !!(mask & (1 << inet_dev_addr_type(net, dev, addr))); } static bool addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { + struct net *net = dev_net(par->in ? par->in : par->out); const struct ipt_addrtype_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool ret = true; if (info->source) - ret &= match_type(NULL, iph->saddr, info->source) ^ + ret &= match_type(net, NULL, iph->saddr, info->source) ^ info->invert_source; if (info->dest) - ret &= match_type(NULL, iph->daddr, info->dest) ^ + ret &= match_type(net, NULL, iph->daddr, info->dest) ^ info->invert_dest; return ret; @@ -49,6 +50,7 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) static bool addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) { + struct net *net = dev_net(par->in ? par->in : par->out); const struct ipt_addrtype_info_v1 *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); const struct net_device *dev = NULL; @@ -60,10 +62,10 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) dev = par->out; if (info->source) - ret &= match_type(dev, iph->saddr, info->source) ^ + ret &= match_type(net, dev, iph->saddr, info->source) ^ (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); if (ret && info->dest) - ret &= match_type(dev, iph->daddr, info->dest) ^ + ret &= match_type(net, dev, iph->daddr, info->dest) ^ !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); return ret; } -- cgit v1.2.3 From 19223f26d97077da8cf25251458afe00cae20cbb Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:22:13 +0100 Subject: netfilter: arptable_filter: merge forward hook It's identical to NF_ARP_IN hook. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arptable_filter.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index bee3d117661a..e091187e864f 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -75,16 +75,6 @@ static unsigned int arpt_out_hook(unsigned int hook, dev_net(out)->ipv4.arptable_filter); } -static unsigned int arpt_forward_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - return arpt_do_table(skb, hook, in, out, - dev_net(in)->ipv4.arptable_filter); -} - static struct nf_hook_ops arpt_ops[] __read_mostly = { { .hook = arpt_in_hook, @@ -101,7 +91,7 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = { .priority = NF_IP_PRI_FILTER, }, { - .hook = arpt_forward_hook, + .hook = arpt_in_hook, .owner = THIS_MODULE, .pf = NFPROTO_ARP, .hooknum = NF_ARP_FORWARD, -- cgit v1.2.3 From 6e3354c1e9946fa585de40e93ad917ec7abd006e Mon Sep 17 00:00:00 2001 From: Simon Arlott Date: Tue, 4 Nov 2008 14:35:39 +0100 Subject: netfilter: nf_nat: remove warn_if_extra_mangle In net/ipv4/netfilter/nf_nat_rule.c, the function warn_if_extra_mangle was added in commit 5b1158e909ecbe1a052203e0d8df15633f829930 (2006-12-02). I have a DNAT target in the OUTPUT chain than changes connections with dst 2.0.0.1 to another address which I'll substitute with 66.102.9.99 below. On every boot I get the following message: [ 146.252505] NAT: no longer support implicit source local NAT [ 146.252517] NAT: packet src 66.102.9.99 -> dst 2.0.0.1 As far as I can tell from reading the function doing this, it should warn if the source IP for the route to 66.102.9.99 is different from 2.0.0.1 but that is not the case. It doesn't make sense to check the DNAT target against the local route source. Either the function should be changed to correctly check the route, or it should be removed entirely as it's been nearly 2 years since it was added. Signed-off-by: Simon Arlott Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_nat_rule.c | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index a4f1c3479e23..cf95469ab9f1 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -86,24 +86,6 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par) return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); } -/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ -static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip) -{ - static int warned = 0; - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; - struct rtable *rt; - - if (ip_route_output_key(net, &rt, &fl) != 0) - return; - - if (rt->rt_src != srcip && !warned) { - printk("NAT: no longer support implicit source local NAT\n"); - printk("NAT: packet src %pI4 -> dst %pI4\n", &srcip, &dstip); - warned = 1; - } - ip_rt_put(rt); -} - static unsigned int ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) { @@ -119,11 +101,6 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - if (par->hooknum == NF_INET_LOCAL_OUT && - mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) - warn_if_extra_mangle(dev_net(par->out), ip_hdr(skb)->daddr, - mr->range[0].min_ip); - return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); } -- cgit v1.2.3 From 6bb3ce25d05f2990c8a19adaf427531430267c1f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 11 Nov 2008 17:25:22 -0800 Subject: net: remove struct dst_entry::entry_size Unused after kmem_cache_zalloc() conversion. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/dst.h | 1 - net/decnet/dn_route.c | 1 - net/ipv4/route.c | 2 -- net/ipv4/xfrm4_policy.c | 1 - net/ipv6/route.c | 2 -- net/ipv6/xfrm6_policy.c | 1 - 6 files changed, 8 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/dst.h b/include/net/dst.h index f96c4ba4dd32..65a60fab2f82 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -104,7 +104,6 @@ struct dst_ops void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, u32 mtu); int (*local_out)(struct sk_buff *skb); - int entry_size; atomic_t entries; struct kmem_cache *kmem_cachep; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 821bd1cdec04..768df000523b 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -131,7 +131,6 @@ static struct dst_ops dn_dst_ops = { .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, - .entry_size = sizeof(struct dn_route), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0dc0c3826763..4e6959c29819 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -160,7 +160,6 @@ static struct dst_ops ipv4_dst_ops = { .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, .local_out = __ip_local_out, - .entry_size = sizeof(struct rtable), .entries = ATOMIC_INIT(0), }; @@ -2701,7 +2700,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .destroy = ipv4_dst_destroy, .check = ipv4_dst_check, .update_pmtu = ipv4_rt_blackhole_update_pmtu, - .entry_size = sizeof(struct rtable), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index f9a775b7e796..84dbb5a03cc2 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -246,7 +246,6 @@ static struct dst_ops xfrm4_dst_ops = { .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, .gc_thresh = 1024, - .entry_size = sizeof(struct xfrm_dst), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4d40dc214b2d..9da1ece466a2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -108,7 +108,6 @@ static struct dst_ops ip6_dst_ops_template = { .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, .local_out = __ip6_local_out, - .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; @@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = { .destroy = ip6_dst_destroy, .check = ip6_dst_check, .update_pmtu = ip6_rt_blackhole_update_pmtu, - .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 604bc0a96c05..3b67ce7786ec 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -277,7 +277,6 @@ static struct dst_ops xfrm6_dst_ops = { .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, .gc_thresh = 1024, - .entry_size = sizeof(struct xfrm_dst), .entries = ATOMIC_INIT(0), }; -- cgit v1.2.3 From 7a9546ee354ec6f23af403992b8c07baa50a23d2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Nov 2008 00:54:20 -0800 Subject: net: ib_net pointer should depends on CONFIG_NET_NS We can shrink size of "struct inet_bind_bucket" by 50%, using read_pnet() and write_pnet() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 7 +++++++ net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/inet_hashtables.c | 6 +++--- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 5cc182f9ecae..cb31fbf8ae2a 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -77,13 +77,20 @@ struct inet_ehash_bucket { * ports are created in O(1) time? I thought so. ;-) -DaveM */ struct inet_bind_bucket { +#ifdef CONFIG_NET_NS struct net *ib_net; +#endif unsigned short port; signed short fastreuse; struct hlist_node node; struct hlist_head owners; }; +static inline struct net *ib_net(struct inet_bind_bucket *ib) +{ + return read_pnet(&ib->ib_net); +} + #define inet_bind_bucket_for_each(tb, node, head) \ hlist_for_each_entry(tb, node, head, node) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 36f4cbc7da3a..05af807ca9b9 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -109,7 +109,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->ib_net == net && tb->port == rover) + if (ib_net(tb) == net && tb->port == rover) goto next; break; next: @@ -137,7 +137,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->ib_net == net && tb->port == snum) + if (ib_net(tb) == net && tb->port == snum) goto tb_found; } tb = NULL; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 44981906fb91..be41ebbec4eb 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -35,7 +35,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { - tb->ib_net = hold_net(net); + write_pnet(&tb->ib_net, hold_net(net)); tb->port = snum; tb->fastreuse = 0; INIT_HLIST_HEAD(&tb->owners); @@ -51,7 +51,7 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); - release_net(tb->ib_net); + release_net(ib_net(tb)); kmem_cache_free(cachep, tb); } } @@ -449,7 +449,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, * unique enough. */ inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->ib_net == net && tb->port == port) { + if (ib_net(tb) == net && tb->port == port) { WARN_ON(hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; -- cgit v1.2.3 From d76b0d9b2d87cfc95686e148767cbf7d0e22bdc0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:25 +1100 Subject: CRED: Use creds in file structs Attach creds to file structs and discard f_uid/f_gid. file_operations::open() methods (such as hppfs_open()) should use file->f_cred rather than current_cred(). At the moment file->f_cred will be current_cred() at this point. Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: James Morris --- arch/mips/kernel/vpe.c | 4 ++-- drivers/isdn/hysdn/hysdn_procconf.c | 6 ++++-- fs/coda/file.c | 2 +- fs/file_table.c | 7 ++++--- fs/hppfs/hppfs.c | 4 ++-- include/linux/fs.h | 2 +- net/ipv4/netfilter/ipt_LOG.c | 4 ++-- net/ipv6/netfilter/ip6t_LOG.c | 4 ++-- net/netfilter/nfnetlink_log.c | 5 +++-- net/netfilter/xt_owner.c | 16 ++++++++-------- net/sched/cls_flow.c | 4 ++-- 11 files changed, 31 insertions(+), 27 deletions(-) (limited to 'net/ipv4') diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 972b2d2b8401..09786e496375 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -1085,8 +1085,8 @@ static int vpe_open(struct inode *inode, struct file *filp) v->load_addr = NULL; v->len = 0; - v->uid = filp->f_uid; - v->gid = filp->f_gid; + v->uid = filp->f_cred->fsuid; + v->gid = filp->f_cred->fsgid; #ifdef CONFIG_MIPS_APSP_KSPD /* get kspd to tell us when a syscall_exit happens */ diff --git a/drivers/isdn/hysdn/hysdn_procconf.c b/drivers/isdn/hysdn/hysdn_procconf.c index 484299b031f8..8f9f4912de32 100644 --- a/drivers/isdn/hysdn/hysdn_procconf.c +++ b/drivers/isdn/hysdn/hysdn_procconf.c @@ -246,7 +246,8 @@ hysdn_conf_open(struct inode *ino, struct file *filep) } if (card->debug_flags & (LOG_PROC_OPEN | LOG_PROC_ALL)) hysdn_addlog(card, "config open for uid=%d gid=%d mode=0x%x", - filep->f_uid, filep->f_gid, filep->f_mode); + filep->f_cred->fsuid, filep->f_cred->fsgid, + filep->f_mode); if ((filep->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_WRITE) { /* write only access -> write boot file or conf line */ @@ -331,7 +332,8 @@ hysdn_conf_close(struct inode *ino, struct file *filep) } if (card->debug_flags & (LOG_PROC_OPEN | LOG_PROC_ALL)) hysdn_addlog(card, "config close for uid=%d gid=%d mode=0x%x", - filep->f_uid, filep->f_gid, filep->f_mode); + filep->f_cred->fsuid, filep->f_cred->fsgid, + filep->f_mode); if ((filep->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_WRITE) { /* write only access -> write boot file or conf line */ diff --git a/fs/coda/file.c b/fs/coda/file.c index 29137ff3ca67..5a8769985494 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -174,7 +174,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file) BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode), - coda_flags, coda_file->f_uid); + coda_flags, coda_file->f_cred->fsuid); host_inode = cfi->cfi_container->f_path.dentry->d_inode; cii = ITOC(coda_inode); diff --git a/fs/file_table.c b/fs/file_table.c index bc4563fe791d..0fbcacc3ea75 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -36,7 +36,9 @@ static struct percpu_counter nr_files __cacheline_aligned_in_smp; static inline void file_free_rcu(struct rcu_head *head) { - struct file *f = container_of(head, struct file, f_u.fu_rcuhead); + struct file *f = container_of(head, struct file, f_u.fu_rcuhead); + + put_cred(f->f_cred); kmem_cache_free(filp_cachep, f); } @@ -121,8 +123,7 @@ struct file *get_empty_filp(void) INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); - f->f_uid = cred->fsuid; - f->f_gid = cred->fsgid; + f->f_cred = get_cred(cred); eventpoll_init_file(f); /* f->f_version: 0 */ return f; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 795e2c130ad7..b278f7f52024 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -426,7 +426,7 @@ static int file_mode(int fmode) static int hppfs_open(struct inode *inode, struct file *file) { - const struct cred *cred = current_cred(); + const struct cred *cred = file->f_cred; struct hppfs_private *data; struct vfsmount *proc_mnt; struct dentry *proc_dentry; @@ -490,7 +490,7 @@ static int hppfs_open(struct inode *inode, struct file *file) static int hppfs_dir_open(struct inode *inode, struct file *file) { - const struct cred *cred = current_cred(); + const struct cred *cred = file->f_cred; struct hppfs_private *data; struct vfsmount *proc_mnt; struct dentry *proc_dentry; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3bfec1327b8d..c0fb6d81d89b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -827,7 +827,7 @@ struct file { fmode_t f_mode; loff_t f_pos; struct fown_struct f_owner; - unsigned int f_uid, f_gid; + const struct cred *f_cred; struct file_ra_state f_ra; u64 f_version; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index fc6ce04a3e35..7b5dbe118c09 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -340,8 +340,8 @@ static void dump_packet(const struct nf_loginfo *info, read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) printk("UID=%u GID=%u ", - skb->sk->sk_socket->file->f_uid, - skb->sk->sk_socket->file->f_gid); + skb->sk->sk_socket->file->f_cred->fsuid, + skb->sk->sk_socket->file->f_cred->fsgid); read_unlock_bh(&skb->sk->sk_callback_lock); } diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index caa441d09567..871d157cec4e 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -364,8 +364,8 @@ static void dump_packet(const struct nf_loginfo *info, read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) printk("UID=%u GID=%u ", - skb->sk->sk_socket->file->f_uid, - skb->sk->sk_socket->file->f_gid); + skb->sk->sk_socket->file->f_cred->fsuid, + skb->sk->sk_socket->file->f_cred->fsgid); read_unlock_bh(&skb->sk->sk_callback_lock); } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 41e0105d3828..38f9efd90e8d 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -474,8 +474,9 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) { - __be32 uid = htonl(skb->sk->sk_socket->file->f_uid); - __be32 gid = htonl(skb->sk->sk_socket->file->f_gid); + struct file *file = skb->sk->sk_socket->file; + __be32 uid = htonl(file->f_cred->fsuid); + __be32 gid = htonl(file->f_cred->fsgid); /* need to unlock here since NLA_PUT may goto */ read_unlock_bh(&skb->sk->sk_callback_lock); NLA_PUT_BE32(inst->skb, NFULA_UID, uid); diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index f19ebd9b78f5..22b2a5e881ea 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -34,12 +34,12 @@ owner_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) return false; if (info->match & IPT_OWNER_UID) - if ((filp->f_uid != info->uid) ^ + if ((filp->f_cred->fsuid != info->uid) ^ !!(info->invert & IPT_OWNER_UID)) return false; if (info->match & IPT_OWNER_GID) - if ((filp->f_gid != info->gid) ^ + if ((filp->f_cred->fsgid != info->gid) ^ !!(info->invert & IPT_OWNER_GID)) return false; @@ -60,12 +60,12 @@ owner_mt6_v0(const struct sk_buff *skb, const struct xt_match_param *par) return false; if (info->match & IP6T_OWNER_UID) - if ((filp->f_uid != info->uid) ^ + if ((filp->f_cred->fsuid != info->uid) ^ !!(info->invert & IP6T_OWNER_UID)) return false; if (info->match & IP6T_OWNER_GID) - if ((filp->f_gid != info->gid) ^ + if ((filp->f_cred->fsgid != info->gid) ^ !!(info->invert & IP6T_OWNER_GID)) return false; @@ -93,14 +93,14 @@ owner_mt(const struct sk_buff *skb, const struct xt_match_param *par) (XT_OWNER_UID | XT_OWNER_GID)) == 0; if (info->match & XT_OWNER_UID) - if ((filp->f_uid >= info->uid_min && - filp->f_uid <= info->uid_max) ^ + if ((filp->f_cred->fsuid >= info->uid_min && + filp->f_cred->fsuid <= info->uid_max) ^ !(info->invert & XT_OWNER_UID)) return false; if (info->match & XT_OWNER_GID) - if ((filp->f_gid >= info->gid_min && - filp->f_gid <= info->gid_max) ^ + if ((filp->f_cred->fsgid >= info->gid_min && + filp->f_cred->fsgid <= info->gid_max) ^ !(info->invert & XT_OWNER_GID)) return false; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 0ebaff637e31..0ef4e3065bcd 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -260,14 +260,14 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb) static u32 flow_get_skuid(const struct sk_buff *skb) { if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) - return skb->sk->sk_socket->file->f_uid; + return skb->sk->sk_socket->file->f_cred->fsuid; return 0; } static u32 flow_get_skgid(const struct sk_buff *skb) { if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) - return skb->sk->sk_socket->file->f_gid; + return skb->sk->sk_socket->file->f_cred->fsgid; return 0; } -- cgit v1.2.3 From 8164f1b79731ad8ad9c713dc53d587a3b746f82f Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Sun, 16 Nov 2008 19:19:38 -0800 Subject: ipv4: Fix ARP behavior with many mac-vlans Ben Greear wrote: > I have 500 mac-vlans on a system talking to 500 other > mac-vlans. My problem is that the arp-table gets extremely > huge because every time an arp-request comes in on all mac-vlans, > a stale arp entry is added for each mac-vlan. I have filtering > turned on, but that doesn't help because the neigh_event_ns call > below will cause a stale neighbor entry to be created regardless > of whether a replay will be sent or not. > Maybe the neigh_event code should be below the checks for dont_send, > and only create check neigh_event_ns if we are !dont_send? The attached patch makes it work much better for me. The patch will cause the code to NOT create a stale neighbor entry if we are not going to respond to the ARP request. The old code *would* create a stale entry even if we are not going to respond. Signed-off-by: Ben Greear Signed-off-by: David S. Miller --- net/ipv4/arp.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 957c87dc8e16..29a74c01d8de 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -818,18 +818,18 @@ static int arp_process(struct sk_buff *skb) addr_type = rt->rt_type; if (addr_type == RTN_LOCAL) { - n = neigh_event_ns(&arp_tbl, sha, &sip, dev); - if (n) { - int dont_send = 0; - - if (!dont_send) - dont_send |= arp_ignore(in_dev, sip, tip); - if (!dont_send && IN_DEV_ARPFILTER(in_dev)) - dont_send |= arp_filter(sip, tip, dev); - if (!dont_send) - arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); + int dont_send = 0; - neigh_release(n); + if (!dont_send) + dont_send |= arp_ignore(in_dev,sip,tip); + if (!dont_send && IN_DEV_ARPFILTER(in_dev)) + dont_send |= arp_filter(sip,tip,dev); + if (!dont_send) { + n = neigh_event_ns(&arp_tbl, sha, &sip, dev); + if (n) { + arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); + neigh_release(n); + } } goto out; } else if (IN_DEV_FORWARD(in_dev)) { -- cgit v1.2.3 From e8b2dfe9b4501ed0047459b2756ba26e5a940a69 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Sun, 16 Nov 2008 19:32:39 -0800 Subject: TPROXY: implemented IP_RECVORIGDSTADDR socket option In case UDP traffic is redirected to a local UDP socket, the originally addressed destination address/port cannot be recovered with the in-kernel tproxy. This patch adds an IP_RECVORIGDSTADDR sockopt that enables a IP_ORIGDSTADDR ancillary message in recvmsg(). This ancillary message contains the original destination address/port of the packet being received. Signed-off-by: Balazs Scheidler Signed-off-by: David S. Miller --- include/linux/in.h | 4 ++++ net/ipv4/ip_sockglue.c | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/include/linux/in.h b/include/linux/in.h index db458beef19d..d60122a3a088 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -80,6 +80,10 @@ struct in_addr { /* BSD compatibility */ #define IP_RECVRETOPTS IP_RETOPTS +/* TProxy original addresses */ +#define IP_ORIGDSTADDR 20 +#define IP_RECVORIGDSTADDR IP_ORIGDSTADDR + /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index e976efeb1456..624b534201e2 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -48,6 +48,7 @@ #define IP_CMSG_RECVOPTS 8 #define IP_CMSG_RETOPTS 16 #define IP_CMSG_PASSSEC 32 +#define IP_CMSG_ORIGDSTADDR 64 /* * SOL_IP control messages. @@ -126,6 +127,27 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) security_release_secctx(secdata, seclen); } +void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) +{ + struct sockaddr_in sin; + struct iphdr *iph = ip_hdr(skb); + u16 *ports = (u16 *) skb_transport_header(skb); + + if (skb_transport_offset(skb) + 4 > skb->len) + return; + + /* All current transport protocols have the port numbers in the + * first four bytes of the transport header and this function is + * written with this assumption in mind. + */ + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = iph->daddr; + sin.sin_port = ports[1]; + memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); + + put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); +} void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { @@ -160,6 +182,12 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) if (flags & 1) ip_cmsg_recv_security(msg, skb); + + if ((flags>>=1) == 0) + return; + if (flags & 1) + ip_cmsg_recv_dstaddr(msg, skb); + } int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) @@ -421,7 +449,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; @@ -509,6 +538,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, else inet->cmsg_flags &= ~IP_CMSG_PASSSEC; break; + case IP_RECVORIGDSTADDR: + if (val) + inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR; + else + inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; + break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { val &= ~3; @@ -1022,6 +1057,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_PASSSEC: val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; break; + case IP_RECVORIGDSTADDR: + val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; + break; case IP_TOS: val = inet->tos; break; -- cgit v1.2.3 From 88ab1932eac721c6e7336708558fa5ed02c85c80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:39:21 -0800 Subject: udp: Use hlist_nulls in UDP RCU code This is a straightforward patch, using hlist_nulls infrastructure. RCUification already done on UDP two weeks ago. Using hlist_nulls permits us to avoid some memory barriers, both at lookup time and delete time. Patch is large because it adds new macros to include/net/sock.h. These macros will be used by TCP & DCCP in next patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rculist.h | 17 --------------- include/net/sock.h | 57 +++++++++++++++++++++++++++++++++++++++---------- include/net/udp.h | 2 +- net/ipv4/udp.c | 47 +++++++++++++++++++--------------------- net/ipv6/udp.c | 26 +++++++++++----------- 5 files changed, 83 insertions(+), 66 deletions(-) (limited to 'net/ipv4') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 3ba2998b22ba..e649bd3f2c97 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -383,22 +383,5 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference(pos->next)) -/** - * hlist_for_each_entry_rcu_safenext - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - * @next: the &struct hlist_node to use as a next cursor - * - * Special version of hlist_for_each_entry_rcu that make sure - * each next pointer is fetched before each iteration. - */ -#define hlist_for_each_entry_rcu_safenext(tpos, pos, head, member, next) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference(next)) - #endif /* __KERNEL__ */ #endif diff --git a/include/net/sock.h b/include/net/sock.h index 8b2b82131b67..0a638948868d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -52,6 +53,7 @@ #include #include +#include #include #include @@ -106,6 +108,7 @@ struct net; * @skc_reuse: %SO_REUSEADDR setting * @skc_bound_dev_if: bound device index if != 0 * @skc_node: main hash linkage for various protocol lookup tables + * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_refcnt: reference count * @skc_hash: hash value used with various protocol lookup tables @@ -120,7 +123,10 @@ struct sock_common { volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; - struct hlist_node skc_node; + union { + struct hlist_node skc_node; + struct hlist_nulls_node skc_nulls_node; + }; struct hlist_node skc_bind_node; atomic_t skc_refcnt; unsigned int skc_hash; @@ -206,6 +212,7 @@ struct sock { #define sk_reuse __sk_common.skc_reuse #define sk_bound_dev_if __sk_common.skc_bound_dev_if #define sk_node __sk_common.skc_node +#define sk_nulls_node __sk_common.skc_nulls_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt #define sk_hash __sk_common.skc_hash @@ -300,12 +307,30 @@ static inline struct sock *sk_head(const struct hlist_head *head) return hlist_empty(head) ? NULL : __sk_head(head); } +static inline struct sock *__sk_nulls_head(const struct hlist_nulls_head *head) +{ + return hlist_nulls_entry(head->first, struct sock, sk_nulls_node); +} + +static inline struct sock *sk_nulls_head(const struct hlist_nulls_head *head) +{ + return hlist_nulls_empty(head) ? NULL : __sk_nulls_head(head); +} + static inline struct sock *sk_next(const struct sock *sk) { return sk->sk_node.next ? hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; } +static inline struct sock *sk_nulls_next(const struct sock *sk) +{ + return (!is_a_nulls(sk->sk_nulls_node.next)) ? + hlist_nulls_entry(sk->sk_nulls_node.next, + struct sock, sk_nulls_node) : + NULL; +} + static inline int sk_unhashed(const struct sock *sk) { return hlist_unhashed(&sk->sk_node); @@ -321,6 +346,11 @@ static __inline__ void sk_node_init(struct hlist_node *node) node->pprev = NULL; } +static __inline__ void sk_nulls_node_init(struct hlist_nulls_node *node) +{ + node->pprev = NULL; +} + static __inline__ void __sk_del_node(struct sock *sk) { __hlist_del(&sk->sk_node); @@ -367,18 +397,18 @@ static __inline__ int sk_del_node_init(struct sock *sk) return rc; } -static __inline__ int __sk_del_node_init_rcu(struct sock *sk) +static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) { if (sk_hashed(sk)) { - hlist_del_init_rcu(&sk->sk_node); + hlist_nulls_del_init_rcu(&sk->sk_nulls_node); return 1; } return 0; } -static __inline__ int sk_del_node_init_rcu(struct sock *sk) +static __inline__ int sk_nulls_del_node_init_rcu(struct sock *sk) { - int rc = __sk_del_node_init_rcu(sk); + int rc = __sk_nulls_del_node_init_rcu(sk); if (rc) { /* paranoid for a while -acme */ @@ -399,15 +429,15 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) __sk_add_node(sk, list); } -static __inline__ void __sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { - hlist_add_head_rcu(&sk->sk_node, list); + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } -static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +static __inline__ void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { sock_hold(sk); - __sk_add_node_rcu(sk, list); + __sk_nulls_add_node_rcu(sk, list); } static __inline__ void __sk_del_bind_node(struct sock *sk) @@ -423,11 +453,16 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_for_each(__sk, node, list) \ hlist_for_each_entry(__sk, node, list, sk_node) -#define sk_for_each_rcu_safenext(__sk, node, list, next) \ - hlist_for_each_entry_rcu_safenext(__sk, node, list, sk_node, next) +#define sk_nulls_for_each(__sk, node, list) \ + hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) +#define sk_nulls_for_each_rcu(__sk, node, list) \ + hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node) #define sk_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_from(__sk, node, sk_node) +#define sk_nulls_for_each_from(__sk, node) \ + if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \ + hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node) #define sk_for_each_continue(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_continue(__sk, node, sk_node) diff --git a/include/net/udp.h b/include/net/udp.h index df2bfe545374..90e6ce56be65 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -51,7 +51,7 @@ struct udp_skb_cb { #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) struct udp_hslot { - struct hlist_head head; + struct hlist_nulls_head head; spinlock_t lock; } __attribute__((aligned(2 * sizeof(long)))); struct udp_table { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 54badc9a019d..fea2d873dd41 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -127,9 +127,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct sock *sk2)) { struct sock *sk2; - struct hlist_node *node; + struct hlist_nulls_node *node; - sk_for_each(sk2, node, &hslot->head) + sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && sk2 != sk && sk2->sk_hash == num && @@ -189,12 +189,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - /* - * We need that previous write to sk->sk_hash committed - * before write to sk->next done in following add_node() variant - */ - smp_wmb(); - sk_add_node_rcu(sk, &hslot->head); + sk_nulls_add_node_rcu(sk, &hslot->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } error = 0; @@ -261,7 +256,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, int dif, struct udp_table *udptable) { struct sock *sk, *result; - struct hlist_node *node, *next; + struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; @@ -271,13 +266,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, begin: result = NULL; badness = -1; - sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { - /* - * lockless reader, and SLAB_DESTROY_BY_RCU items: - * We must check this item was not moved to another chain - */ - if (udp_hashfn(net, sk->sk_hash) != hash) - goto begin; + sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { @@ -285,6 +274,14 @@ begin: badness = score; } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash) + goto begin; + if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; @@ -325,11 +322,11 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, __be16 rmt_port, __be32 rmt_addr, int dif) { - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *s = sk; unsigned short hnum = ntohs(loc_port); - sk_for_each_from(s, node) { + sk_nulls_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); if (!net_eq(sock_net(s), net) || @@ -977,7 +974,7 @@ void udp_lib_unhash(struct sock *sk) struct udp_hslot *hslot = &udptable->hash[hash]; spin_lock_bh(&hslot->lock); - if (sk_del_node_init_rcu(sk)) { + if (sk_nulls_del_node_init_rcu(sk)) { inet_sk(sk)->num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } @@ -1130,7 +1127,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, int dif; spin_lock(&hslot->lock); - sk = sk_head(&hslot->head); + sk = sk_nulls_head(&hslot->head); dif = skb->dev->ifindex; sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { @@ -1139,7 +1136,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, do { struct sk_buff *skb1 = skb; - sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest, + sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, uh->source, saddr, dif); if (sknext) @@ -1560,10 +1557,10 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) struct net *net = seq_file_net(seq); for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { - struct hlist_node *node; + struct hlist_nulls_node *node; struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; spin_lock_bh(&hslot->lock); - sk_for_each(sk, node, &hslot->head) { + sk_nulls_for_each(sk, node, &hslot->head) { if (!net_eq(sock_net(sk), net)) continue; if (sk->sk_family == state->family) @@ -1582,7 +1579,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) struct net *net = seq_file_net(seq); do { - sk = sk_next(sk); + sk = sk_nulls_next(sk); } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); if (!sk) { @@ -1753,7 +1750,7 @@ void __init udp_table_init(struct udp_table *table) int i; for (i = 0; i < UDP_HTABLE_SIZE; i++) { - INIT_HLIST_HEAD(&table->hash[i].head); + INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); spin_lock_init(&table->hash[i].lock); } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8dafa36b1ba5..fd2d9ad4a8a3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -98,7 +98,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, int dif, struct udp_table *udptable) { struct sock *sk, *result; - struct hlist_node *node, *next; + struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; @@ -108,19 +108,21 @@ static struct sock *__udp6_lib_lookup(struct net *net, begin: result = NULL; badness = -1; - sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { - /* - * lockless reader, and SLAB_DESTROY_BY_RCU items: - * We must check this item was not moved to another chain - */ - if (udp_hashfn(net, sk->sk_hash) != hash) - goto begin; + sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash) + goto begin; + if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; @@ -374,11 +376,11 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, __be16 rmt_port, struct in6_addr *rmt_addr, int dif) { - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *s = sk; unsigned short num = ntohs(loc_port); - sk_for_each_from(s, node) { + sk_nulls_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); if (!net_eq(sock_net(s), net)) @@ -423,7 +425,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, int dif; spin_lock(&hslot->lock); - sk = sk_head(&hslot->head); + sk = sk_nulls_head(&hslot->head); dif = inet6_iif(skb); sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { @@ -432,7 +434,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, } sk2 = sk; - while ((sk2 = udp_v6_mcast_next(net, sk_next(sk2), uh->dest, daddr, + while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr, uh->source, saddr, dif))) { struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); if (buff) { -- cgit v1.2.3 From 3ab5aee7fe840b5b1b35a8d1ac11c3de5281e611 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:40:17 -0800 Subject: net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls RCU was added to UDP lookups, using a fast infrastructure : - sockets kmem_cache use SLAB_DESTROY_BY_RCU and dont pay the price of call_rcu() at freeing time. - hlist_nulls permits to use few memory barriers. This patch uses same infrastructure for TCP/DCCP established and timewait sockets. Thanks to SLAB_DESTROY_BY_RCU, no slowdown for applications using short lived TCP connections. A followup patch, converting rwlocks to spinlocks will even speedup this case. __inet_lookup_established() is pretty fast now we dont have to dirty a contended cache line (read_lock/read_unlock) Only established and timewait hashtable are converted to RCU (bind table and listen table are still using traditional locking) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 4 +-- include/net/inet_timewait_sock.h | 10 +++--- net/core/sock.c | 4 ++- net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 1 + net/dccp/proto.c | 4 +-- net/ipv4/inet_diag.c | 7 ++-- net/ipv4/inet_hashtables.c | 78 ++++++++++++++++++++++++++++------------ net/ipv4/inet_timewait_sock.c | 26 ++++++++------ net/ipv4/tcp.c | 4 +-- net/ipv4/tcp_ipv4.c | 25 ++++++------- net/ipv6/inet6_hashtables.c | 70 ++++++++++++++++++++++++------------ net/ipv6/tcp_ipv6.c | 1 + 13 files changed, 151 insertions(+), 84 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index cb31fbf8ae2a..481896045111 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -41,8 +41,8 @@ * I'll experiment with dynamic table growth later. */ struct inet_ehash_bucket { - struct hlist_head chain; - struct hlist_head twchain; + struct hlist_nulls_head chain; + struct hlist_nulls_head twchain; }; /* There are a few simple rules, which allow for local port reuse by diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 80e4977631b8..4b8ece22b8e9 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -110,7 +110,7 @@ struct inet_timewait_sock { #define tw_state __tw_common.skc_state #define tw_reuse __tw_common.skc_reuse #define tw_bound_dev_if __tw_common.skc_bound_dev_if -#define tw_node __tw_common.skc_node +#define tw_node __tw_common.skc_nulls_node #define tw_bind_node __tw_common.skc_bind_node #define tw_refcnt __tw_common.skc_refcnt #define tw_hash __tw_common.skc_hash @@ -137,10 +137,10 @@ struct inet_timewait_sock { struct hlist_node tw_death_node; }; -static inline void inet_twsk_add_node(struct inet_timewait_sock *tw, - struct hlist_head *list) +static inline void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, + struct hlist_nulls_head *list) { - hlist_add_head(&tw->tw_node, list); + hlist_nulls_add_head_rcu(&tw->tw_node, list); } static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, @@ -175,7 +175,7 @@ static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) } #define inet_twsk_for_each(tw, node, head) \ - hlist_for_each_entry(tw, node, head, tw_node) + hlist_nulls_for_each_entry(tw, node, head, tw_node) #define inet_twsk_for_each_inmate(tw, node, jail) \ hlist_for_each_entry(tw, node, jail, tw_death_node) diff --git a/net/core/sock.c b/net/core/sock.c index ded1eb5d2fd4..38de9c3f563b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2082,7 +2082,9 @@ int proto_register(struct proto *prot, int alloc_slab) prot->twsk_prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name, prot->twsk_prot->twsk_obj_size, - 0, SLAB_HWCACHE_ALIGN, + 0, + SLAB_HWCACHE_ALIGN | + prot->slab_flags, NULL); if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 528baa2e5be4..d1dd95289b89 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -938,6 +938,7 @@ static struct proto dccp_v4_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .rsk_prot = &dccp_request_sock_ops, .twsk_prot = &dccp_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4aa1148cdb20..f033e845bb07 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1140,6 +1140,7 @@ static struct proto dccp_v6_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .rsk_prot = &dccp6_request_sock_ops, .twsk_prot = &dccp6_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 46cb3490d48e..1117d4d8c8f1 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1090,8 +1090,8 @@ static int __init dccp_init(void) } for (i = 0; i < dccp_hashinfo.ehash_size; i++) { - INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); - INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); + INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); + INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i); } if (inet_ehash_locks_alloc(&dccp_hashinfo)) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 564230dabcb8..41b36720e977 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -778,18 +778,19 @@ skip_listen_ht: struct inet_ehash_bucket *head = &hashinfo->ehash[i]; rwlock_t *lock = inet_ehash_lockp(hashinfo, i); struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; num = 0; - if (hlist_empty(&head->chain) && hlist_empty(&head->twchain)) + if (hlist_nulls_empty(&head->chain) && + hlist_nulls_empty(&head->twchain)) continue; if (i > s_i) s_num = 0; read_lock_bh(lock); - sk_for_each(sk, node, &head->chain) { + sk_nulls_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index be41ebbec4eb..fd269cfef0ec 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -223,35 +223,65 @@ struct sock * __inet_lookup_established(struct net *net, INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; - const struct hlist_node *node; + const struct hlist_nulls_node *node; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); + unsigned int slot = hash & (hashinfo->ehash_size - 1); + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - prefetch(head->chain.first); - read_lock(lock); - sk_for_each(sk, node, &head->chain) { + rcu_read_lock(); +begin: + sk_nulls_for_each_rcu(sk, node, &head->chain) { if (INET_MATCH(sk, net, hash, acookie, - saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ + saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + goto begintw; + if (unlikely(!INET_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif))) { + sock_put(sk); + goto begin; + } + goto out; + } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begin; +begintw: /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &head->twchain) { + sk_nulls_for_each_rcu(sk, node, &head->twchain) { if (INET_TW_MATCH(sk, net, hash, acookie, - saddr, daddr, ports, dif)) - goto hit; + saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + sk = NULL; + goto out; + } + if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif))) { + sock_put(sk); + goto begintw; + } + goto out; + } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begintw; sk = NULL; out: - read_unlock(lock); + rcu_read_unlock(); return sk; -hit: - sock_hold(sk); - goto out; } EXPORT_SYMBOL_GPL(__inet_lookup_established); @@ -272,14 +302,14 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; - const struct hlist_node *node; + const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; prefetch(head->chain.first); write_lock(lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &head->twchain) { + sk_nulls_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET_TW_MATCH(sk2, net, hash, acookie, @@ -293,7 +323,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, tw = NULL; /* And established part... */ - sk_for_each(sk2, node, &head->chain) { + sk_nulls_for_each(sk2, node, &head->chain) { if (INET_MATCH(sk2, net, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; @@ -306,7 +336,7 @@ unique: inet->sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); + __sk_nulls_add_node_rcu(sk, &head->chain); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); @@ -338,7 +368,7 @@ static inline u32 inet_sk_port_offset(const struct sock *sk) void __inet_hash_nolisten(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; + struct hlist_nulls_head *list; rwlock_t *lock; struct inet_ehash_bucket *head; @@ -350,7 +380,7 @@ void __inet_hash_nolisten(struct sock *sk) lock = inet_ehash_lockp(hashinfo, sk->sk_hash); write_lock(lock); - __sk_add_node(sk, list); + __sk_nulls_add_node_rcu(sk, list); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); } @@ -400,13 +430,15 @@ void inet_unhash(struct sock *sk) local_bh_disable(); inet_listen_wlock(hashinfo); lock = &hashinfo->lhash_lock; + if (__sk_del_node_init(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } else { lock = inet_ehash_lockp(hashinfo, sk->sk_hash); write_lock_bh(lock); + if (__sk_nulls_del_node_init_rcu(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } - if (__sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); write_unlock_bh(lock); out: if (sk->sk_state == TCP_LISTEN) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1c5fd38f8824..60689951ecdb 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -23,12 +23,12 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); write_lock(lock); - if (hlist_unhashed(&tw->tw_node)) { + if (hlist_nulls_unhashed(&tw->tw_node)) { write_unlock(lock); return; } - __hlist_del(&tw->tw_node); - sk_node_init(&tw->tw_node); + hlist_nulls_del_rcu(&tw->tw_node); + sk_nulls_node_init(&tw->tw_node); write_unlock(lock); /* Disassociate with bind bucket. */ @@ -92,13 +92,17 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, write_lock(lock); - /* Step 2: Remove SK from established hash. */ - if (__sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - - /* Step 3: Hash TW into TIMEWAIT chain. */ - inet_twsk_add_node(tw, &ehead->twchain); + /* + * Step 2: Hash TW into TIMEWAIT chain. + * Should be done before removing sk from established chain + * because readers are lockless and search established first. + */ atomic_inc(&tw->tw_refcnt); + inet_twsk_add_node_rcu(tw, &ehead->twchain); + + /* Step 3: Remove SK from established hash. */ + if (__sk_nulls_del_node_init_rcu(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); write_unlock(lock); } @@ -416,7 +420,7 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, { struct inet_timewait_sock *tw; struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; int h; local_bh_disable(); @@ -426,7 +430,7 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, rwlock_t *lock = inet_ehash_lockp(hashinfo, h); restart: write_lock(lock); - sk_for_each(sk, node, &head->twchain) { + sk_nulls_for_each(sk, node, &head->twchain) { tw = inet_twsk(sk); if (!net_eq(twsk_net(tw), net) || diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f60a5917e54d..044224a341eb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2707,8 +2707,8 @@ void __init tcp_init(void) thash_entries ? 0 : 512 * 1024); tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; for (i = 0; i < tcp_hashinfo.ehash_size; i++) { - INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); - INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); + INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); + INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); } if (inet_ehash_locks_alloc(&tcp_hashinfo)) panic("TCP: failed to alloc ehash_locks"); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d49233f409b5..b2e3ab2287ba 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1857,16 +1857,16 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ -static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) +static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) { - return hlist_empty(head) ? NULL : + return hlist_nulls_empty(head) ? NULL : list_entry(head->first, struct inet_timewait_sock, tw_node); } static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) { - return tw->tw_node.next ? - hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; + return !is_a_nulls(tw->tw_node.next) ? + hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; } static void *listening_get_next(struct seq_file *seq, void *cur) @@ -1954,8 +1954,8 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) static inline int empty_bucket(struct tcp_iter_state *st) { - return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) && - hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain); + return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && + hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); } static void *established_get_first(struct seq_file *seq) @@ -1966,7 +1966,7 @@ static void *established_get_first(struct seq_file *seq) for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; struct inet_timewait_sock *tw; rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); @@ -1975,7 +1975,7 @@ static void *established_get_first(struct seq_file *seq) continue; read_lock_bh(lock); - sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { + sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family || !net_eq(sock_net(sk), net)) { continue; @@ -2004,7 +2004,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) { struct sock *sk = cur; struct inet_timewait_sock *tw; - struct hlist_node *node; + struct hlist_nulls_node *node; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); @@ -2032,11 +2032,11 @@ get_tw: return NULL; read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); - sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); + sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); } else - sk = sk_next(sk); + sk = sk_nulls_next(sk); - sk_for_each_from(sk, node) { + sk_nulls_for_each_from(sk, node) { if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) goto found; } @@ -2375,6 +2375,7 @@ struct proto tcp_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, .h.hashinfo = &tcp_hashinfo, diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 1646a5658255..c1b4d401fd95 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -25,24 +25,28 @@ void __inet6_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; rwlock_t *lock; WARN_ON(!sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { + struct hlist_head *list; + list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; lock = &hashinfo->lhash_lock; inet_listen_wlock(hashinfo); + __sk_add_node(sk, list); } else { unsigned int hash; + struct hlist_nulls_head *list; + sk->sk_hash = hash = inet6_sk_ehashfn(sk); list = &inet_ehash_bucket(hashinfo, hash)->chain; lock = inet_ehash_lockp(hashinfo, hash); write_lock(lock); + __sk_nulls_add_node_rcu(sk, list); } - __sk_add_node(sk, list); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); } @@ -63,33 +67,53 @@ struct sock *__inet6_lookup_established(struct net *net, const int dif) { struct sock *sk; - const struct hlist_node *node; + const struct hlist_nulls_node *node; const __portpair ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); + unsigned int slot = hash & (hashinfo->ehash_size - 1); + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - prefetch(head->chain.first); - read_lock(lock); - sk_for_each(sk, node, &head->chain) { + + rcu_read_lock(); +begin: + sk_nulls_for_each_rcu(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ - if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ + if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + goto begintw; + if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + sock_put(sk); + goto begin; + } + goto out; + } } + if (get_nulls_value(node) != slot) + goto begin; + +begintw: /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &head->twchain) { - if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) - goto hit; + sk_nulls_for_each_rcu(sk, node, &head->twchain) { + if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + sk = NULL; + goto out; + } + if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + sock_put(sk); + goto begintw; + } + goto out; + } } - read_unlock(lock); - return NULL; - -hit: - sock_hold(sk); - read_unlock(lock); + if (get_nulls_value(node) != slot) + goto begintw; + sk = NULL; +out: + rcu_read_unlock(); return sk; } EXPORT_SYMBOL(__inet6_lookup_established); @@ -172,14 +196,14 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; - const struct hlist_node *node; + const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; prefetch(head->chain.first); write_lock(lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &head->twchain) { + sk_nulls_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) { @@ -192,7 +216,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, tw = NULL; /* And established part... */ - sk_for_each(sk2, node, &head->chain) { + sk_nulls_for_each(sk2, node, &head->chain) { if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) goto not_unique; } @@ -203,7 +227,7 @@ unique: inet->num = lport; inet->sport = htons(lport); WARN_ON(!sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); + __sk_nulls_add_node_rcu(sk, &head->chain); sk->sk_hash = hash; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 984276463a8d..b35787056313 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2043,6 +2043,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = &tcp_hashinfo, -- cgit v1.2.3 From a7a0d6a87b70f7b2bab5281fc0fd443772bd0795 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Nov 2008 15:43:27 -0800 Subject: net: inet_diag_handler structs can be const Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/diag.c | 2 +- net/ipv4/tcp_diag.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/dccp/diag.c b/net/dccp/diag.c index d8a3509b26f6..d1e100395efb 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -45,7 +45,7 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, dccp_get_info(sk, _info); } -static struct inet_diag_handler dccp_diag_handler = { +static const struct inet_diag_handler dccp_diag_handler = { .idiag_hashinfo = &dccp_hashinfo, .idiag_get_info = dccp_diag_get_info, .idiag_type = DCCPDIAG_GETSOCK, diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 838d491dfda7..fcbcd4ff6c5f 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -34,7 +34,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, tcp_get_info(sk, info); } -static struct inet_diag_handler tcp_diag_handler = { +static const struct inet_diag_handler tcp_diag_handler = { .idiag_hashinfo = &tcp_hashinfo, .idiag_get_info = tcp_diag_get_info, .idiag_type = TCPDIAG_GETSOCK, -- cgit v1.2.3 From 07f0757a6808f2f36a0e58c3a54867ccffdb8dc9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 19 Nov 2008 15:44:53 -0800 Subject: include/net net/ - csum_partial - remove unnecessary casts The first argument to csum_partial is const void * casts to char/u8 * are not necessary Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/checksum.h | 2 +- include/net/ip_vs.h | 6 +++--- net/core/netpoll.c | 2 +- net/ipv4/inet_lro.c | 4 ++-- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/icmp.c | 4 ++-- net/ipv6/mcast.c | 2 +- net/ipv6/ndisc.c | 4 ++-- net/ipv6/tcp_ipv6.c | 6 +++--- net/unix/af_unix.c | 4 ++-- 10 files changed, 19 insertions(+), 19 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/checksum.h b/include/net/checksum.h index 07602b7fa218..ba55d8b8c87c 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -98,7 +98,7 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __be32 diff[] = { ~from, to }; - *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum_unfold(*sum))); + *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); } static inline void csum_replace2(__sum16 *sum, __be16 from, __be16 to) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 8f6abf4883e3..ab9b003ab671 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -913,7 +913,7 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum) { __be32 diff[2] = { ~old, new }; - return csum_partial((char *) diff, sizeof(diff), oldsum); + return csum_partial(diff, sizeof(diff), oldsum); } #ifdef CONFIG_IP_VS_IPV6 @@ -923,7 +923,7 @@ static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new, __be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0], new[3], new[2], new[1], new[0] }; - return csum_partial((char *) diff, sizeof(diff), oldsum); + return csum_partial(diff, sizeof(diff), oldsum); } #endif @@ -931,7 +931,7 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) { __be16 diff[2] = { ~old, new }; - return csum_partial((char *) diff, sizeof(diff), oldsum); + return csum_partial(diff, sizeof(diff), oldsum); } #endif /* __KERNEL__ */ diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 34f5d072f168..fc4e28e23b89 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -343,7 +343,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->check = csum_tcpudp_magic(htonl(np->local_ip), htonl(np->remote_ip), udp_len, IPPROTO_UDP, - csum_partial((unsigned char *)udph, udp_len, 0)); + csum_partial(udph, udp_len, 0)); if (udph->check == 0) udph->check = CSUM_MANGLED_0; diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index cfd034a2b96e..6a667dae315e 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -120,7 +120,7 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); tcph->check = 0; - tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), 0); + tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, lro_desc->ip_tot_len - @@ -135,7 +135,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) __wsum tcp_ps_hdr_csum; tcp_csum = ~csum_unfold(tcph->check); - tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), tcp_csum); + tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum); tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, len + TCP_HDR_LEN(tcph), diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b2e3ab2287ba..5559fea61e87 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -492,7 +492,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = tcp_v4_check(len, inet->saddr, inet->daddr, - csum_partial((char *)th, + csum_partial(th, th->doff << 2, skb->csum)); } @@ -726,7 +726,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, th->check = tcp_v4_check(skb->len, ireq->loc_addr, ireq->rmt_addr, - csum_partial((char *)th, skb->len, + csum_partial(th, skb->len, skb->csum)); err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index be351009fd03..a77b8d103804 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -233,7 +233,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6h->icmp6_cksum = 0; if (skb_queue_len(&sk->sk_write_queue) == 1) { - skb->csum = csum_partial((char *)icmp6h, + skb->csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), skb->csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, @@ -246,7 +246,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct tmp_csum = csum_add(tmp_csum, skb->csum); } - tmp_csum = csum_partial((char *)icmp6h, + tmp_csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), tmp_csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a76199ecad23..870a1d64605a 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1817,7 +1817,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len, IPPROTO_ICMPV6, - csum_partial((__u8 *) hdr, len, 0)); + csum_partial(hdr, len, 0)); idev = in6_dev_get(skb->dev); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index fbf451c0d77a..af6705f03b5c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -491,7 +491,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev, hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len, IPPROTO_ICMPV6, - csum_partial((__u8 *) hdr, + csum_partial(hdr, len, 0)); return skb; @@ -1612,7 +1612,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr, len, IPPROTO_ICMPV6, - csum_partial((u8 *) icmph, len, 0)); + csum_partial(icmph, len, 0)); buff->dst = dst; idev = in6_dev_get(dst->dev); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b35787056313..a5d750acd793 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -501,7 +501,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) th->check = tcp_v6_check(th, skb->len, &treq->loc_addr, &treq->rmt_addr, - csum_partial((char *)th, skb->len, skb->csum)); + csum_partial(th, skb->len, skb->csum)); ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); err = ip6_xmit(sk, skb, &fl, opt, 0); @@ -915,7 +915,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, - csum_partial((char *)th, th->doff<<2, + csum_partial(th, th->doff<<2, skb->csum)); } } @@ -997,7 +997,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, } #endif - buff->csum = csum_partial((char *)t1, tot_len, 0); + buff->csum = csum_partial(t1, tot_len, 0); memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f2cf3f583f62..ebc4a9a4b1f7 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -216,7 +216,7 @@ static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp) return len; } - *hashp = unix_hash_fold(csum_partial((char *)sunaddr, len, 0)); + *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); return len; } @@ -686,7 +686,7 @@ static int unix_autobind(struct socket *sock) retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); - addr->hash = unix_hash_fold(csum_partial((void *)addr->name, addr->len, 0)); + addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; -- cgit v1.2.3 From 5bc3eb7e2f0026f246d939851109df99e8e9f64a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 21:52:05 -0800 Subject: ip: convert to net_device_ops for ioctl Convert to net_device_ops function table pointer for ioctl. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 25 +++++++++++++++++-------- net/ipv6/addrconf.c | 13 +++++++++---- 2 files changed, 26 insertions(+), 12 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 05ed336f798a..ed372f3983e5 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -124,8 +124,8 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) dev = __dev_get_by_name(&init_net, "tunl0"); if (dev) { + const struct net_device_ops *ops = dev->netdev_ops; struct ifreq ifr; - mm_segment_t oldfs; struct ip_tunnel_parm p; memset(&p, 0, sizeof(p)); @@ -137,9 +137,13 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) sprintf(p.name, "dvmrp%d", v->vifc_vifi); ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - oldfs = get_fs(); set_fs(KERNEL_DS); - dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL); - set_fs(oldfs); + if (ops->ndo_do_ioctl) { + mm_segment_t oldfs = get_fs(); + + set_fs(KERNEL_DS); + ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); + set_fs(oldfs); + } } } @@ -151,9 +155,9 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) dev = __dev_get_by_name(&init_net, "tunl0"); if (dev) { + const struct net_device_ops *ops = dev->netdev_ops; int err; struct ifreq ifr; - mm_segment_t oldfs; struct ip_tunnel_parm p; struct in_device *in_dev; @@ -166,9 +170,14 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) sprintf(p.name, "dvmrp%d", v->vifc_vifi); ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - oldfs = get_fs(); set_fs(KERNEL_DS); - err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); + if (ops->ndo_do_ioctl) { + mm_segment_t oldfs = get_fs(); + + set_fs(KERNEL_DS); + err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); + set_fs(oldfs); + } else + err = -EOPNOTSUPP; dev = NULL; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e41f1be6dc9..e92ad8455c63 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2031,8 +2031,8 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) if (dev->type == ARPHRD_SIT) { + const struct net_device_ops *ops = dev->netdev_ops; struct ifreq ifr; - mm_segment_t oldfs; struct ip_tunnel_parm p; err = -EADDRNOTAVAIL; @@ -2048,9 +2048,14 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) p.iph.ttl = 64; ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - oldfs = get_fs(); set_fs(KERNEL_DS); - err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); + if (ops->ndo_do_ioctl) { + mm_segment_t oldfs = get_fs(); + + set_fs(KERNEL_DS); + err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); + set_fs(oldfs); + } else + err = -EOPNOTSUPP; if (err == 0) { err = -ENOBUFS; -- cgit v1.2.3 From 5caea4ea7088e80ac5410d04660346094608b909 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Nov 2008 00:40:07 -0800 Subject: net: listening_hash get a spinlock per bucket This patch prepares RCU migration of listening_hash table for TCP/DCCP protocols. listening_hash table being small (32 slots per protocol), we add a spinlock for each slot, instead of a single rwlock for whole table. This should reduce hold time of readers, and writers concurrency. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 45 ++++++++-------------- net/dccp/proto.c | 8 +--- net/ipv4/inet_diag.c | 12 +++--- net/ipv4/inet_hashtables.c | 86 ++++++++++++++++--------------------------- net/ipv4/tcp_ipv4.c | 24 ++++++------ net/ipv6/inet6_hashtables.c | 23 ++++++------ 6 files changed, 79 insertions(+), 119 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 481896045111..62d2dd0d7860 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -99,6 +99,11 @@ struct inet_bind_hashbucket { struct hlist_head chain; }; +struct inet_listen_hashbucket { + spinlock_t lock; + struct hlist_head head; +}; + /* This is for listening sockets, thus all sockets which possess wildcards. */ #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ @@ -123,22 +128,21 @@ struct inet_hashinfo { unsigned int bhash_size; /* Note : 4 bytes padding on 64 bit arches */ - /* All sockets in TCP_LISTEN state will be in here. This is the only - * table where wildcard'd TCP sockets can exist. Hash function here - * is just local port number. - */ - struct hlist_head listening_hash[INET_LHTABLE_SIZE]; + struct kmem_cache *bind_bucket_cachep; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. * * Now align to a new cache line as all the following members - * are often dirty. + * might be often dirty. + */ + /* All sockets in TCP_LISTEN state will be in here. This is the only + * table where wildcard'd TCP sockets can exist. Hash function here + * is just local port number. */ - rwlock_t lhash_lock ____cacheline_aligned; - atomic_t lhash_users; - wait_queue_head_t lhash_wait; - struct kmem_cache *bind_bucket_cachep; + struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] + ____cacheline_aligned_in_smp; + }; static inline struct inet_ehash_bucket *inet_ehash_bucket( @@ -236,26 +240,7 @@ extern void __inet_inherit_port(struct sock *sk, struct sock *child); extern void inet_put_port(struct sock *sk); -extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); - -/* - * - We may sleep inside this lock. - * - If sleeping is not required (or called from BH), - * use plain read_(un)lock(&inet_hashinfo.lhash_lock). - */ -static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) -{ - /* read_lock synchronizes to candidates to writers */ - read_lock(&hashinfo->lhash_lock); - atomic_inc(&hashinfo->lhash_users); - read_unlock(&hashinfo->lhash_lock); -} - -static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) -{ - if (atomic_dec_and_test(&hashinfo->lhash_users)) - wake_up(&hashinfo->lhash_wait); -} +void inet_hashinfo_init(struct inet_hashinfo *h); extern void __inet_hash_nolisten(struct sock *sk); extern void inet_hash(struct sock *sk); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index bdf784c422b5..8b63394ec24c 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -44,12 +44,7 @@ atomic_t dccp_orphan_count = ATOMIC_INIT(0); EXPORT_SYMBOL_GPL(dccp_orphan_count); -struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { - .lhash_lock = RW_LOCK_UNLOCKED, - .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), -}; - +struct inet_hashinfo dccp_hashinfo; EXPORT_SYMBOL_GPL(dccp_hashinfo); /* the maximum queue length for tx in packets. 0 is no limit */ @@ -1030,6 +1025,7 @@ static int __init dccp_init(void) BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); + inet_hashinfo_init(&dccp_hashinfo); dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 41b36720e977..1cb154ed75ad 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -718,13 +718,15 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) goto skip_listen_ht; - inet_listen_lock(hashinfo); for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; struct hlist_node *node; + struct inet_listen_hashbucket *ilb; num = 0; - sk_for_each(sk, node, &hashinfo->listening_hash[i]) { + ilb = &hashinfo->listening_hash[i]; + spin_lock_bh(&ilb->lock); + sk_for_each(sk, node, &ilb->head) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) { @@ -742,7 +744,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto syn_recv; if (inet_csk_diag_dump(sk, skb, cb) < 0) { - inet_listen_unlock(hashinfo); + spin_unlock_bh(&ilb->lock); goto done; } @@ -751,7 +753,7 @@ syn_recv: goto next_listen; if (inet_diag_dump_reqs(skb, sk, cb) < 0) { - inet_listen_unlock(hashinfo); + spin_unlock_bh(&ilb->lock); goto done; } @@ -760,12 +762,12 @@ next_listen: cb->args[4] = 0; ++num; } + spin_unlock_bh(&ilb->lock); s_num = 0; cb->args[3] = 0; cb->args[4] = 0; } - inet_listen_unlock(hashinfo); skip_listen_ht: cb->args[0] = 1; s_i = num = s_num = 0; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fd269cfef0ec..377d004e5723 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -110,35 +110,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) EXPORT_SYMBOL_GPL(__inet_inherit_port); -/* - * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. - * Look, when several writers sleep and reader wakes them up, all but one - * immediately hit write lock and grab all the cpus. Exclusive sleep solves - * this, _but_ remember, it adds useless work on UP machines (wake up each - * exclusive lock release). It should be ifdefed really. - */ -void inet_listen_wlock(struct inet_hashinfo *hashinfo) - __acquires(hashinfo->lhash_lock) -{ - write_lock(&hashinfo->lhash_lock); - - if (atomic_read(&hashinfo->lhash_users)) { - DEFINE_WAIT(wait); - - for (;;) { - prepare_to_wait_exclusive(&hashinfo->lhash_wait, - &wait, TASK_UNINTERRUPTIBLE); - if (!atomic_read(&hashinfo->lhash_users)) - break; - write_unlock_bh(&hashinfo->lhash_lock); - schedule(); - write_lock_bh(&hashinfo->lhash_lock); - } - - finish_wait(&hashinfo->lhash_wait, &wait); - } -} - /* * Don't inline this cruft. Here are some nice properties to exploit here. The * BSD API does not allow a listening sock to specify the remote port nor the @@ -191,25 +162,25 @@ struct sock *__inet_lookup_listener(struct net *net, const int dif) { struct sock *sk = NULL; - const struct hlist_head *head; + struct inet_listen_hashbucket *ilb; - read_lock(&hashinfo->lhash_lock); - head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; - if (!hlist_empty(head)) { - const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); + ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; + spin_lock(&ilb->lock); + if (!hlist_empty(&ilb->head)) { + const struct inet_sock *inet = inet_sk((sk = __sk_head(&ilb->head))); if (inet->num == hnum && !sk->sk_node.next && (!inet->rcv_saddr || inet->rcv_saddr == daddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) goto sherry_cache; - sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); + sk = inet_lookup_listener_slow(net, &ilb->head, daddr, hnum, dif); } if (sk) { sherry_cache: sock_hold(sk); } - read_unlock(&hashinfo->lhash_lock); + spin_unlock(&ilb->lock); return sk; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); @@ -389,8 +360,7 @@ EXPORT_SYMBOL_GPL(__inet_hash_nolisten); static void __inet_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; - rwlock_t *lock; + struct inet_listen_hashbucket *ilb; if (sk->sk_state != TCP_LISTEN) { __inet_hash_nolisten(sk); @@ -398,14 +368,12 @@ static void __inet_hash(struct sock *sk) } WARN_ON(!sk_unhashed(sk)); - list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &hashinfo->lhash_lock; + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - inet_listen_wlock(hashinfo); - __sk_add_node(sk, list); + spin_lock(&ilb->lock); + __sk_add_node(sk, &ilb->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); - wake_up(&hashinfo->lhash_wait); + spin_unlock(&ilb->lock); } void inet_hash(struct sock *sk) @@ -420,29 +388,27 @@ EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { - rwlock_t *lock; struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; if (sk_unhashed(sk)) - goto out; + return; if (sk->sk_state == TCP_LISTEN) { - local_bh_disable(); - inet_listen_wlock(hashinfo); - lock = &hashinfo->lhash_lock; + struct inet_listen_hashbucket *ilb; + + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + spin_lock_bh(&ilb->lock); if (__sk_del_node_init(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + spin_unlock_bh(&ilb->lock); } else { - lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + write_lock_bh(lock); if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + write_unlock_bh(lock); } - - write_unlock_bh(lock); -out: - if (sk->sk_state == TCP_LISTEN) - wake_up(&hashinfo->lhash_wait); } EXPORT_SYMBOL_GPL(inet_unhash); @@ -556,3 +522,13 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, } EXPORT_SYMBOL_GPL(inet_hash_connect); + +void inet_hashinfo_init(struct inet_hashinfo *h) +{ + int i; + + for (i = 0; i < INET_LHTABLE_SIZE; i++) + spin_lock_init(&h->listening_hash[i].lock); +} + +EXPORT_SYMBOL_GPL(inet_hashinfo_init); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5559fea61e87..330b08a12274 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -97,11 +97,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) } #endif -struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { - .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), - .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), -}; +struct inet_hashinfo tcp_hashinfo; static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) { @@ -1874,15 +1870,18 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct inet_connection_sock *icsk; struct hlist_node *node; struct sock *sk = cur; + struct inet_listen_hashbucket *ilb; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); if (!sk) { st->bucket = 0; - sk = sk_head(&tcp_hashinfo.listening_hash[0]); + ilb = &tcp_hashinfo.listening_hash[0]; + spin_lock_bh(&ilb->lock); + sk = sk_head(&ilb->head); goto get_sk; } - + ilb = &tcp_hashinfo.listening_hash[st->bucket]; ++st->num; if (st->state == TCP_SEQ_STATE_OPENREQ) { @@ -1932,8 +1931,11 @@ start_req: } read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } + spin_unlock_bh(&ilb->lock); if (++st->bucket < INET_LHTABLE_SIZE) { - sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); + ilb = &tcp_hashinfo.listening_hash[st->bucket]; + spin_lock_bh(&ilb->lock); + sk = sk_head(&ilb->head); goto get_sk; } cur = NULL; @@ -2066,12 +2068,10 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) void *rc; struct tcp_iter_state *st = seq->private; - inet_listen_lock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_LISTENING; rc = listening_get_idx(seq, &pos); if (!rc) { - inet_listen_unlock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_idx(seq, pos); } @@ -2103,7 +2103,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) case TCP_SEQ_STATE_LISTENING: rc = listening_get_next(seq, v); if (!rc) { - inet_listen_unlock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_first(seq); } @@ -2130,7 +2129,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) - inet_listen_unlock(&tcp_hashinfo); + spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); break; case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: @@ -2405,6 +2404,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { void __init tcp_v4_init(void) { + inet_hashinfo_init(&tcp_hashinfo); if (register_pernet_device(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index c1b4d401fd95..21544b9be259 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -25,30 +25,30 @@ void __inet6_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - rwlock_t *lock; WARN_ON(!sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { - struct hlist_head *list; + struct inet_listen_hashbucket *ilb; - list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &hashinfo->lhash_lock; - inet_listen_wlock(hashinfo); - __sk_add_node(sk, list); + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + spin_lock(&ilb->lock); + __sk_add_node(sk, &ilb->head); + spin_unlock(&ilb->lock); } else { unsigned int hash; struct hlist_nulls_head *list; + rwlock_t *lock; sk->sk_hash = hash = inet6_sk_ehashfn(sk); list = &inet_ehash_bucket(hashinfo, hash)->chain; lock = inet_ehash_lockp(hashinfo, hash); write_lock(lock); __sk_nulls_add_node_rcu(sk, list); + write_unlock(lock); } sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); } EXPORT_SYMBOL(__inet6_hash); @@ -126,10 +126,11 @@ struct sock *inet6_lookup_listener(struct net *net, const struct hlist_node *node; struct sock *result = NULL; int score, hiscore = 0; + struct inet_listen_hashbucket *ilb; - read_lock(&hashinfo->lhash_lock); - sk_for_each(sk, node, - &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) { + ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; + spin_lock(&ilb->lock); + sk_for_each(sk, node, &ilb->head) { if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); @@ -157,7 +158,7 @@ struct sock *inet6_lookup_listener(struct net *net, } if (result) sock_hold(result); - read_unlock(&hashinfo->lhash_lock); + spin_unlock(&ilb->lock); return result; } -- cgit v1.2.3 From 21d1a161f6b1fff154e3614a6b5a2111fa7a4191 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 20 Nov 2008 01:54:27 -0800 Subject: net: ip_sockglue.c add static, annotate ports' endianness Fixes sparse warnings: net/ipv4/ip_sockglue.c:146:15: warning: incorrect type in assignment (different base types) net/ipv4/ip_sockglue.c:146:15: expected restricted __be16 [assigned] [usertype] sin_port net/ipv4/ip_sockglue.c:146:15: got unsigned short [unsigned] [short] [usertype] net/ipv4/ip_sockglue.c:130:6: warning: symbol 'ip_cmsg_recv_dstaddr' was not declared. Should it be static? Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 624b534201e2..43c05854d752 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -127,11 +127,11 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) security_release_secctx(secdata, seclen); } -void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) +static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; struct iphdr *iph = ip_hdr(skb); - u16 *ports = (u16 *) skb_transport_header(skb); + __be16 *ports = (__be16 *)skb_transport_header(skb); if (skb_transport_offset(skb) + 4 > skb->len) return; -- cgit v1.2.3 From 007c3838d9fdcc8fdaea87e4879ec3759f016ed5 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:28:35 -0800 Subject: ipmr: convert ipmr virtual interface to net_device_ops Convert to new network device ops interface. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 6 +++++- net/ipv6/ip6mr.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ee58bcbc99a5..77fc4d3fdf61 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -222,12 +222,16 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } +static const struct net_device_ops reg_vif_netdev_ops = { + .ndo_start_xmit = reg_vif_xmit, +}; + static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; dev->flags = IFF_NOARP; - dev->hard_start_xmit = reg_vif_xmit; + dev->netdev_ops = ®_vif_netdev_ops, dev->destructor = free_netdev; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 363ae258ee1e..dfba9fd0c248 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -416,12 +416,16 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } +static const struct net_device_ops reg_vif_netdev_ops = { + .ndo_start_xmit = reg_vif_xmit, +}; + static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; dev->flags = IFF_NOARP; - dev->hard_start_xmit = reg_vif_xmit; + dev->netdev_ops = ®_vif_netdev_ops; dev->destructor = free_netdev; } -- cgit v1.2.3 From 23a12b14715e2dcd34dc8002927263ad3437344c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:33:21 -0800 Subject: ipip: convert to net_device_ops Convert to network device ops. Needed to change to directly call the init routine since two sides share same ops. In the process found by inspection a device ref count leak if register_netdevice failed. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index b3c3d7b0d116..9eb437cb821a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -130,8 +130,8 @@ struct ipip_net { struct net_device *fb_tunnel_dev; }; -static int ipip_fb_tunnel_init(struct net_device *dev); -static int ipip_tunnel_init(struct net_device *dev); +static void ipip_fb_tunnel_init(struct net_device *dev); +static void ipip_tunnel_init(struct net_device *dev); static void ipip_tunnel_setup(struct net_device *dev); static DEFINE_RWLOCK(ipip_lock); @@ -245,9 +245,10 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net, } nt = netdev_priv(dev); - dev->init = ipip_tunnel_init; nt->parms = *parms; + ipip_tunnel_init(dev); + if (register_netdevice(dev) < 0) goto failed_free; @@ -691,12 +692,17 @@ static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) return 0; } +static const struct net_device_ops ipip_netdev_ops = { + .ndo_uninit = ipip_tunnel_uninit, + .ndo_start_xmit = ipip_tunnel_xmit, + .ndo_do_ioctl = ipip_tunnel_ioctl, + .ndo_change_mtu = ipip_tunnel_change_mtu, + +}; + static void ipip_tunnel_setup(struct net_device *dev) { - dev->uninit = ipip_tunnel_uninit; - dev->hard_start_xmit = ipip_tunnel_xmit; - dev->do_ioctl = ipip_tunnel_ioctl; - dev->change_mtu = ipip_tunnel_change_mtu; + dev->netdev_ops = &ipip_netdev_ops; dev->destructor = free_netdev; dev->type = ARPHRD_TUNNEL; @@ -708,11 +714,9 @@ static void ipip_tunnel_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; } -static int ipip_tunnel_init(struct net_device *dev) +static void ipip_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - - tunnel = netdev_priv(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; strcpy(tunnel->parms.name, dev->name); @@ -721,11 +725,9 @@ static int ipip_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip_tunnel_bind_dev(dev); - - return 0; } -static int ipip_fb_tunnel_init(struct net_device *dev) +static void ipip_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; @@ -740,7 +742,6 @@ static int ipip_fb_tunnel_init(struct net_device *dev) dev_hold(dev); ipn->tunnels_wc[0] = tunnel; - return 0; } static struct xfrm_tunnel ipip_handler = { @@ -793,7 +794,7 @@ static int ipip_init_net(struct net *net) goto err_alloc_dev; } - ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init; + ipip_fb_tunnel_init(ipn->fb_tunnel_dev); dev_net_set(ipn->fb_tunnel_dev, net); if ((err = register_netdev(ipn->fb_tunnel_dev))) -- cgit v1.2.3 From b8c26a33c8b6f0a150e9cb38ed80b890be55395c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:34:29 -0800 Subject: ipgre: convert to netdevice_ops Convert ipgre tunnel to netdevice ops. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 191ef7588134..bf5b338f1343 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -126,8 +126,6 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev); /* Fallback tunnel: no source, no destination, no key, no options */ -static int ipgre_fb_tunnel_init(struct net_device *dev); - #define HASH_SIZE 16 static int ipgre_net_id; @@ -1142,6 +1140,7 @@ static int ipgre_open(struct net_device *dev) static int ipgre_close(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); + if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { struct in_device *in_dev; in_dev = inetdev_by_index(dev_net(dev), t->mlink); @@ -1155,14 +1154,22 @@ static int ipgre_close(struct net_device *dev) #endif +static const struct net_device_ops ipgre_netdev_ops = { + .ndo_init = ipgre_tunnel_init, + .ndo_uninit = ipgre_tunnel_uninit, +#ifdef CONFIG_NET_IPGRE_BROADCAST + .ndo_open = ipgre_open, + .ndo_stop = ipgre_close, +#endif + .ndo_start_xmit = ipgre_tunnel_xmit, + .ndo_do_ioctl = ipgre_tunnel_ioctl, + .ndo_change_mtu = ipgre_tunnel_change_mtu, +}; + static void ipgre_tunnel_setup(struct net_device *dev) { - dev->init = ipgre_tunnel_init; - dev->uninit = ipgre_tunnel_uninit; + dev->netdev_ops = &ipgre_netdev_ops; dev->destructor = free_netdev; - dev->hard_start_xmit = ipgre_tunnel_xmit; - dev->do_ioctl = ipgre_tunnel_ioctl; - dev->change_mtu = ipgre_tunnel_change_mtu; dev->type = ARPHRD_IPGRE; dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; @@ -1194,8 +1201,6 @@ static int ipgre_tunnel_init(struct net_device *dev) return -EINVAL; dev->flags = IFF_BROADCAST; dev->header_ops = &ipgre_header_ops; - dev->open = ipgre_open; - dev->stop = ipgre_close; } #endif } else @@ -1204,7 +1209,7 @@ static int ipgre_tunnel_init(struct net_device *dev) return 0; } -static int ipgre_fb_tunnel_init(struct net_device *dev) +static void ipgre_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; @@ -1220,7 +1225,6 @@ static int ipgre_fb_tunnel_init(struct net_device *dev) dev_hold(dev); ign->tunnels_wc[0] = tunnel; - return 0; } @@ -1265,7 +1269,7 @@ static int ipgre_init_net(struct net *net) goto err_alloc_dev; } - ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init; + ipgre_fb_tunnel_init(ign->fb_tunnel_dev); dev_net_set(ign->fb_tunnel_dev, net); ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; @@ -1397,16 +1401,22 @@ static int ipgre_tap_init(struct net_device *dev) return 0; } +static const struct net_device_ops ipgre_tap_netdev_ops = { + .ndo_init = ipgre_tap_init, + .ndo_uninit = ipgre_tunnel_uninit, + .ndo_start_xmit = ipgre_tunnel_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ipgre_tunnel_change_mtu, +}; + static void ipgre_tap_setup(struct net_device *dev) { ether_setup(dev); - dev->init = ipgre_tap_init; - dev->uninit = ipgre_tunnel_uninit; + dev->netdev_ops = &ipgre_netdev_ops; dev->destructor = free_netdev; - dev->hard_start_xmit = ipgre_tunnel_xmit; - dev->change_mtu = ipgre_tunnel_change_mtu; dev->iflink = 0; dev->features |= NETIF_F_NETNS_LOCAL; -- cgit v1.2.3 From 9db66bdcc83749affe61c61eb8ff3cf08f42afec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Nov 2008 20:39:09 -0800 Subject: net: convert TCP/DCCP ehash rwlocks to spinlocks Now TCP & DCCP use RCU lookups, we can convert ehash rwlocks to spinlocks. /proc/net/tcp and other seq_file 'readers' can safely be converted to 'writers'. This should speedup writers, since spin_lock()/spin_unlock() only use one atomic operation instead of two for write_lock()/write_unlock() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 14 +++++++------- net/ipv4/inet_hashtables.c | 21 ++++++++++----------- net/ipv4/inet_timewait_sock.c | 22 +++++++++++----------- net/ipv4/tcp_ipv4.c | 12 ++++++------ net/ipv6/inet6_hashtables.c | 15 +++++++-------- 5 files changed, 41 insertions(+), 43 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 62d2dd0d7860..28b3ee3e8d6d 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -116,7 +116,7 @@ struct inet_hashinfo { * TIME_WAIT sockets use a separate chain (twchain). */ struct inet_ehash_bucket *ehash; - rwlock_t *ehash_locks; + spinlock_t *ehash_locks; unsigned int ehash_size; unsigned int ehash_locks_mask; @@ -152,7 +152,7 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; } -static inline rwlock_t *inet_ehash_lockp( +static inline spinlock_t *inet_ehash_lockp( struct inet_hashinfo *hashinfo, unsigned int hash) { @@ -177,16 +177,16 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) size = 4096; if (sizeof(rwlock_t) != 0) { #ifdef CONFIG_NUMA - if (size * sizeof(rwlock_t) > PAGE_SIZE) - hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); + if (size * sizeof(spinlock_t) > PAGE_SIZE) + hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t)); else #endif - hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), + hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t), GFP_KERNEL); if (!hashinfo->ehash_locks) return ENOMEM; for (i = 0; i < size; i++) - rwlock_init(&hashinfo->ehash_locks[i]); + spin_lock_init(&hashinfo->ehash_locks[i]); } hashinfo->ehash_locks_mask = size - 1; return 0; @@ -197,7 +197,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) if (hashinfo->ehash_locks) { #ifdef CONFIG_NUMA unsigned int size = (hashinfo->ehash_locks_mask + 1) * - sizeof(rwlock_t); + sizeof(spinlock_t); if (size > PAGE_SIZE) vfree(hashinfo->ehash_locks); else diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 377d004e5723..4c273a9981a6 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -271,13 +271,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct net *net = sock_net(sk); unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hinfo, hash); + spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - prefetch(head->chain.first); - write_lock(lock); + spin_lock(lock); /* Check TIME-WAIT sockets first. */ sk_nulls_for_each(sk2, node, &head->twchain) { @@ -308,8 +307,8 @@ unique: sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); if (twp) { *twp = tw; @@ -325,7 +324,7 @@ unique: return 0; not_unique: - write_unlock(lock); + spin_unlock(lock); return -EADDRNOTAVAIL; } @@ -340,7 +339,7 @@ void __inet_hash_nolisten(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; - rwlock_t *lock; + spinlock_t *lock; struct inet_ehash_bucket *head; WARN_ON(!sk_unhashed(sk)); @@ -350,10 +349,10 @@ void __inet_hash_nolisten(struct sock *sk) list = &head->chain; lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - write_lock(lock); + spin_lock(lock); __sk_nulls_add_node_rcu(sk, list); + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_hash_nolisten); @@ -402,12 +401,12 @@ void inet_unhash(struct sock *sk) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(&ilb->lock); } else { - rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - write_lock_bh(lock); + spin_lock_bh(lock); if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(lock); + spin_unlock_bh(lock); } } EXPORT_SYMBOL_GPL(inet_unhash); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 60689951ecdb..8554d0ea1719 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; /* Unlink from established hashes. */ - rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); + spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); - write_lock(lock); + spin_lock(lock); if (hlist_nulls_unhashed(&tw->tw_node)) { - write_unlock(lock); + spin_unlock(lock); return; } hlist_nulls_del_rcu(&tw->tw_node); sk_nulls_node_init(&tw->tw_node); - write_unlock(lock); + spin_unlock(lock); /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, @@ -76,7 +76,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in @@ -90,7 +90,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); - write_lock(lock); + spin_lock(lock); /* * Step 2: Hash TW into TIMEWAIT chain. @@ -104,7 +104,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock(lock); + spin_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); @@ -427,9 +427,9 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, for (h = 0; h < (hashinfo->ehash_size); h++) { struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, h); - rwlock_t *lock = inet_ehash_lockp(hashinfo, h); + spinlock_t *lock = inet_ehash_lockp(hashinfo, h); restart: - write_lock(lock); + spin_lock(lock); sk_nulls_for_each(sk, node, &head->twchain) { tw = inet_twsk(sk); @@ -438,13 +438,13 @@ restart: continue; atomic_inc(&tw->tw_refcnt); - write_unlock(lock); + spin_unlock(lock); inet_twsk_deschedule(tw, twdr); inet_twsk_put(tw); goto restart; } - write_unlock(lock); + spin_unlock(lock); } local_bh_enable(); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 330b08a12274..a81caa1be0cf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1970,13 +1970,13 @@ static void *established_get_first(struct seq_file *seq) struct sock *sk; struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); + spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); /* Lockless fast path for the common case of empty buckets */ if (empty_bucket(st)) continue; - read_lock_bh(lock); + spin_lock_bh(lock); sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family || !net_eq(sock_net(sk), net)) { @@ -1995,7 +1995,7 @@ static void *established_get_first(struct seq_file *seq) rc = tw; goto out; } - read_unlock_bh(lock); + spin_unlock_bh(lock); st->state = TCP_SEQ_STATE_ESTABLISHED; } out: @@ -2023,7 +2023,7 @@ get_tw: cur = tw; goto out; } - read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); st->state = TCP_SEQ_STATE_ESTABLISHED; /* Look for next non empty bucket */ @@ -2033,7 +2033,7 @@ get_tw: if (st->bucket >= tcp_hashinfo.ehash_size) return NULL; - read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); } else sk = sk_nulls_next(sk); @@ -2134,7 +2134,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: if (v) - read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); break; } } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 21544b9be259..e0fd68187f83 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -38,14 +38,14 @@ void __inet6_hash(struct sock *sk) } else { unsigned int hash; struct hlist_nulls_head *list; - rwlock_t *lock; + spinlock_t *lock; sk->sk_hash = hash = inet6_sk_ehashfn(sk); list = &inet_ehash_bucket(hashinfo, hash)->chain; lock = inet_ehash_lockp(hashinfo, hash); - write_lock(lock); + spin_lock(lock); __sk_nulls_add_node_rcu(sk, list); - write_unlock(lock); + spin_unlock(lock); } sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -195,13 +195,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hinfo, hash); + spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - prefetch(head->chain.first); - write_lock(lock); + spin_lock(lock); /* Check TIME-WAIT sockets first. */ sk_nulls_for_each(sk2, node, &head->twchain) { @@ -230,8 +229,8 @@ unique: WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); sk->sk_hash = hash; + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); if (twp != NULL) { *twp = tw; @@ -246,7 +245,7 @@ unique: return 0; not_unique: - write_unlock(lock); + spin_unlock(lock); return -EADDRNOTAVAIL; } -- cgit v1.2.3 From 7e3aab4a9cd7d37f80eee75bebb6a71347f82476 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 21 Nov 2008 16:39:19 -0800 Subject: inet_diag: Missed conversion after changing inet ehash lockl to spinlocks. They are no longer a rwlocks. Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 1cb154ed75ad..998a78f169ff 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -778,7 +778,7 @@ skip_listen_ht: for (i = s_i; i < hashinfo->ehash_size; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; - rwlock_t *lock = inet_ehash_lockp(hashinfo, i); + spinlock_t *lock = inet_ehash_lockp(hashinfo, i); struct sock *sk; struct hlist_nulls_node *node; @@ -791,7 +791,7 @@ skip_listen_ht: if (i > s_i) s_num = 0; - read_lock_bh(lock); + spin_lock_bh(lock); sk_nulls_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); @@ -806,7 +806,7 @@ skip_listen_ht: r->id.idiag_dport) goto next_normal; if (inet_csk_diag_dump(sk, skb, cb) < 0) { - read_unlock_bh(lock); + spin_unlock_bh(lock); goto done; } next_normal: @@ -828,14 +828,14 @@ next_normal: r->id.idiag_dport) goto next_dying; if (inet_twsk_diag_dump(tw, skb, cb) < 0) { - read_unlock_bh(lock); + spin_unlock_bh(lock); goto done; } next_dying: ++num; } } - read_unlock_bh(lock); + spin_unlock_bh(lock); } done: -- cgit v1.2.3 From 04f258ce7f085dd69422fa01d41c8f0194a0e270 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Nov 2008 15:42:23 -0800 Subject: net: some optimizations in af_inet 1) Use eq_net() in inet_netns_ok() to speedup socket creation if !CONFIG_NET_NS 2) Reorder the tests about inet_ehash_secret generation (once only) Use the unlikely() macro when testing if inet_ehash_secret already generated. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b1462e8c64cf..fe03048c130d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -245,7 +245,7 @@ static inline int inet_netns_ok(struct net *net, int protocol) int hash; struct net_protocol *ipprot; - if (net == &init_net) + if (net_eq(net, &init_net)) return 1; hash = protocol & (MAX_INET_PROTOS - 1); @@ -272,10 +272,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) int try_loading_module = 0; int err; - if (sock->type != SOCK_RAW && - sock->type != SOCK_DGRAM && - !inet_ehash_secret) - build_ehash_secret(); + if (unlikely(!inet_ehash_secret)) + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) + build_ehash_secret(); sock->state = SS_UNCONNECTED; -- cgit v1.2.3 From c25eb3bfb97294d0543a81230fbc237046b4b84c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Nov 2008 17:22:55 -0800 Subject: net: Convert TCP/DCCP listening hash tables to use RCU This is the last step to be able to perform full RCU lookups in __inet_lookup() : After established/timewait tables, we add RCU lookups to listening hash table. The only trick here is that a socket of a given type (TCP ipv4, TCP ipv6, ...) can now flight between two different tables (established and listening) during a RCU grace period, so we must use different 'nulls' end-of-chain values for two tables. We define a large value : #define LISTENING_NULLS_BASE (1U << 29) So that slots in listening table are guaranteed to have different end-of-chain values than slots in established table. A reader can still detect it finished its lookup in the right chain. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 9 ++- net/ipv4/inet_diag.c | 4 +- net/ipv4/inet_hashtables.c | 148 +++++++++++++++++++++--------------------- net/ipv4/tcp_ipv4.c | 8 +-- net/ipv6/inet6_hashtables.c | 94 +++++++++++++++++---------- 5 files changed, 147 insertions(+), 116 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ec7ee2e46d8c..f44bb5c77a70 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -99,9 +99,16 @@ struct inet_bind_hashbucket { struct hlist_head chain; }; +/* + * Sockets can be hashed in established or listening table + * We must use different 'nulls' end-of-chain value for listening + * hash table, or we might find a socket that was closed and + * reallocated/inserted into established hash table + */ +#define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; - struct hlist_head head; + struct hlist_nulls_head head; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 998a78f169ff..588a7796e3e3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -720,13 +720,13 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; struct inet_listen_hashbucket *ilb; num = 0; ilb = &hashinfo->listening_hash[i]; spin_lock_bh(&ilb->lock); - sk_for_each(sk, node, &ilb->head) { + sk_nulls_for_each(sk, node, &ilb->head) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) { diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 4c273a9981a6..11fcb87a1fdd 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -110,78 +110,79 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) EXPORT_SYMBOL_GPL(__inet_inherit_port); +static inline int compute_score(struct sock *sk, struct net *net, + const unsigned short hnum, const __be32 daddr, + const int dif) +{ + int score = -1; + struct inet_sock *inet = inet_sk(sk); + + if (net_eq(sock_net(sk), net) && inet->num == hnum && + !ipv6_only_sock(sk)) { + __be32 rcv_saddr = inet->rcv_saddr; + score = sk->sk_family == PF_INET ? 1 : 0; + if (rcv_saddr) { + if (rcv_saddr != daddr) + return -1; + score += 2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score += 2; + } + } + return score; +} + /* * Don't inline this cruft. Here are some nice properties to exploit here. The * BSD API does not allow a listening sock to specify the remote port nor the * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -static struct sock *inet_lookup_listener_slow(struct net *net, - const struct hlist_head *head, - const __be32 daddr, - const unsigned short hnum, - const int dif) -{ - struct sock *result = NULL, *sk; - const struct hlist_node *node; - int hiscore = -1; - - sk_for_each(sk, node, head) { - const struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && inet->num == hnum && - !ipv6_only_sock(sk)) { - const __be32 rcv_saddr = inet->rcv_saddr; - int score = sk->sk_family == PF_INET ? 1 : 0; - - if (rcv_saddr) { - if (rcv_saddr != daddr) - continue; - score += 2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score += 2; - } - if (score == 5) - return sk; - if (score > hiscore) { - hiscore = score; - result = sk; - } - } - } - return result; -} -/* Optimize the common listener case. */ + struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, const __be32 daddr, const unsigned short hnum, const int dif) { - struct sock *sk = NULL; - struct inet_listen_hashbucket *ilb; + struct sock *sk, *result; + struct hlist_nulls_node *node; + unsigned int hash = inet_lhashfn(net, hnum); + struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; + int score, hiscore; - ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; - spin_lock(&ilb->lock); - if (!hlist_empty(&ilb->head)) { - const struct inet_sock *inet = inet_sk((sk = __sk_head(&ilb->head))); - - if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && - (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) - goto sherry_cache; - sk = inet_lookup_listener_slow(net, &ilb->head, daddr, hnum, dif); + rcu_read_lock(); +begin: + result = NULL; + hiscore = -1; + sk_nulls_for_each_rcu(sk, node, &ilb->head) { + score = compute_score(sk, net, hnum, daddr, dif); + if (score > hiscore) { + result = sk; + hiscore = score; + } } - if (sk) { -sherry_cache: - sock_hold(sk); + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) + goto begin; + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, daddr, + dif) < hiscore)) { + sock_put(result); + goto begin; + } } - spin_unlock(&ilb->lock); - return sk; + rcu_read_unlock(); + return result; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); @@ -370,7 +371,7 @@ static void __inet_hash(struct sock *sk) ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; spin_lock(&ilb->lock); - __sk_add_node(sk, &ilb->head); + __sk_nulls_add_node_rcu(sk, &ilb->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); spin_unlock(&ilb->lock); } @@ -388,26 +389,22 @@ EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + spinlock_t *lock; + int done; if (sk_unhashed(sk)) return; - if (sk->sk_state == TCP_LISTEN) { - struct inet_listen_hashbucket *ilb; + if (sk->sk_state == TCP_LISTEN) + lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; + else + lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - spin_lock_bh(&ilb->lock); - if (__sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - spin_unlock_bh(&ilb->lock); - } else { - spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - - spin_lock_bh(lock); - if (__sk_nulls_del_node_init_rcu(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - spin_unlock_bh(lock); - } + spin_lock_bh(lock); + done =__sk_nulls_del_node_init_rcu(sk); + spin_unlock_bh(lock); + if (done) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } EXPORT_SYMBOL_GPL(inet_unhash); @@ -526,8 +523,11 @@ void inet_hashinfo_init(struct inet_hashinfo *h) { int i; - for (i = 0; i < INET_LHTABLE_SIZE; i++) + for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); + INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, + i + LISTENING_NULLS_BASE); + } } EXPORT_SYMBOL_GPL(inet_hashinfo_init); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a81caa1be0cf..cab2458f86fd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1868,7 +1868,7 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) static void *listening_get_next(struct seq_file *seq, void *cur) { struct inet_connection_sock *icsk; - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *sk = cur; struct inet_listen_hashbucket *ilb; struct tcp_iter_state *st = seq->private; @@ -1878,7 +1878,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) st->bucket = 0; ilb = &tcp_hashinfo.listening_hash[0]; spin_lock_bh(&ilb->lock); - sk = sk_head(&ilb->head); + sk = sk_nulls_head(&ilb->head); goto get_sk; } ilb = &tcp_hashinfo.listening_hash[st->bucket]; @@ -1914,7 +1914,7 @@ get_req: sk = sk_next(sk); } get_sk: - sk_for_each_from(sk, node) { + sk_nulls_for_each_from(sk, node) { if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { cur = sk; goto out; @@ -1935,7 +1935,7 @@ start_req: if (++st->bucket < INET_LHTABLE_SIZE) { ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock_bh(&ilb->lock); - sk = sk_head(&ilb->head); + sk = sk_nulls_head(&ilb->head); goto get_sk; } cur = NULL; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index e0fd68187f83..8fe267feb81e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -33,7 +33,7 @@ void __inet6_hash(struct sock *sk) ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; spin_lock(&ilb->lock); - __sk_add_node(sk, &ilb->head); + __sk_nulls_add_node_rcu(sk, &ilb->head); spin_unlock(&ilb->lock); } else { unsigned int hash; @@ -118,47 +118,71 @@ out: } EXPORT_SYMBOL(__inet6_lookup_established); +static int inline compute_score(struct sock *sk, struct net *net, + const unsigned short hnum, + const struct in6_addr *daddr, + const int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && + sk->sk_family == PF_INET6) { + const struct ipv6_pinfo *np = inet6_sk(sk); + + score = 1; + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + return -1; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + } + return score; +} + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, const unsigned short hnum, const int dif) { struct sock *sk; - const struct hlist_node *node; - struct sock *result = NULL; - int score, hiscore = 0; - struct inet_listen_hashbucket *ilb; - - ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; - spin_lock(&ilb->lock); - sk_for_each(sk, node, &ilb->head) { - if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && - sk->sk_family == PF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); - - score = 1; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) - continue; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score++; - } - if (score == 3) { - result = sk; - break; - } - if (score > hiscore) { - hiscore = score; - result = sk; - } + const struct hlist_nulls_node *node; + struct sock *result; + int score, hiscore; + unsigned int hash = inet_lhashfn(net, hnum); + struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; + + rcu_read_lock(); +begin: + result = NULL; + hiscore = -1; + sk_nulls_for_each(sk, node, &ilb->head) { + score = compute_score(sk, net, hnum, daddr, dif); + if (score > hiscore) { + hiscore = score; + result = sk; } } - if (result) - sock_hold(result); - spin_unlock(&ilb->lock); + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) + goto begin; + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, daddr, + dif) < hiscore)) { + sock_put(result); + goto begin; + } + } + rcu_read_unlock(); return result; } -- cgit v1.2.3 From be77e5930725c3e77bcc0fb1def28e016080d0a1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 23 Nov 2008 17:26:26 -0800 Subject: net: fix tunnels in netns after ndo_ changes dev_net_set() should be the very first thing after alloc_netdev(). "ndo_" changes turned simple assignment (which is OK to do before netns assignment) into quite non-trivial operation (which is not OK, init_net was used). This leads to incomplete initialisation of tunnel device in netns. BUG: unable to handle kernel NULL pointer dereference at 00000004 IP: [] ip6_tnl_exit_net+0x37/0x4f *pde = 00000000 Oops: 0000 [#1] PREEMPT DEBUG_PAGEALLOC last sysfs file: /sys/class/net/lo/operstate Pid: 10, comm: netns Not tainted (2.6.28-rc6 #1) EIP: 0060:[] EFLAGS: 00010246 CPU: 0 EIP is at ip6_tnl_exit_net+0x37/0x4f EAX: 00000000 EBX: 00000020 ECX: 00000000 EDX: 00000003 ESI: c5caef30 EDI: c782bbe8 EBP: c7909f50 ESP: c7909f48 DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 Process netns (pid: 10, ti=c7908000 task=c7905780 task.ti=c7908000) Stack: c03e75e0 c7390bc8 c7909f60 c0245448 c7390bd8 c7390bf0 c7909fa8 c012577a 00000000 00000002 00000000 c0125736 c782bbe8 c7909f90 c0308fe3 c782bc04 c7390bd4 c0245406 c084b718 c04f0770 c03ad785 c782bbe8 c782bc04 c782bc0c Call Trace: [] ? cleanup_net+0x42/0x82 [] ? run_workqueue+0xd6/0x1ae [] ? run_workqueue+0x92/0x1ae [] ? schedule+0x275/0x285 [] ? cleanup_net+0x0/0x82 [] ? worker_thread+0x81/0x8d [] ? autoremove_wake_function+0x0/0x33 [] ? worker_thread+0x0/0x8d [] ? kthread+0x39/0x5e [] ? kthread+0x0/0x5e [] ? kernel_thread_helper+0x7/0x10 Code: db e8 05 ff ff ff 89 c6 e8 dc 04 f6 ff eb 08 8b 40 04 e8 38 89 f5 ff 8b 44 9e 04 85 c0 75 f0 43 83 fb 20 75 f2 8b 86 84 00 00 00 <8b> 40 04 e8 1c 89 f5 ff e8 98 04 f6 ff 89 f0 e8 f8 63 e6 ff 5b EIP: [] ip6_tnl_exit_net+0x37/0x4f SS:ESP 0068:c7909f48 ---[ end trace 6c2f2328fccd3e0c ]--- Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/sit.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index bf5b338f1343..0101521f366b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1268,9 +1268,9 @@ static int ipgre_init_net(struct net *net) err = -ENOMEM; goto err_alloc_dev; } + dev_net_set(ign->fb_tunnel_dev, net); ipgre_fb_tunnel_init(ign->fb_tunnel_dev); - dev_net_set(ign->fb_tunnel_dev, net); ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; if ((err = register_netdev(ign->fb_tunnel_dev))) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 9eb437cb821a..5079dfbc6f38 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -793,9 +793,9 @@ static int ipip_init_net(struct net *net) err = -ENOMEM; goto err_alloc_dev; } + dev_net_set(ipn->fb_tunnel_dev, net); ipip_fb_tunnel_init(ipn->fb_tunnel_dev); - dev_net_set(ipn->fb_tunnel_dev, net); if ((err = register_netdev(ipn->fb_tunnel_dev))) goto err_reg_dev; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 358f001d848d..ef249ab5c93c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1428,9 +1428,9 @@ static int ip6_tnl_init_net(struct net *net) if (!ip6n->fb_tnl_dev) goto err_alloc_dev; + dev_net_set(ip6n->fb_tnl_dev, net); ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); - dev_net_set(ip6n->fb_tnl_dev, net); err = register_netdev(ip6n->fb_tnl_dev); if (err < 0) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1ebf99a6776e..d3467e563f02 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1025,9 +1025,9 @@ static int sit_init_net(struct net *net) err = -ENOMEM; goto err_alloc_dev; } + dev_net_set(sitn->fb_tunnel_dev, net); ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); - dev_net_set(sitn->fb_tunnel_dev, net); if ((err = register_netdev(sitn->fb_tunnel_dev))) goto err_reg_dev; -- cgit v1.2.3 From 920de804bca61f88643bc9171bcd06f1a56c6258 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Nov 2008 00:09:29 -0800 Subject: net: Make sure BHs are disabled in sock_prot_inuse_add() The rule of calling sock_prot_inuse_add() is that BHs must be disabled. Some new calls were added where this was not true and this tiggers warnings as reported by Ilpo. Fix this by adding explicit BH disabling around those call sites, or moving sock_prot_inuse_add() call inside an existing BH disabled section. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 2 +- net/packet/af_packet.c | 4 ++-- net/unix/af_unix.c | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 11fcb87a1fdd..6a1045da48d2 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -402,9 +402,9 @@ void inet_unhash(struct sock *sk) spin_lock_bh(lock); done =__sk_nulls_del_node_init_rcu(sk); - spin_unlock_bh(lock); if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + spin_unlock_bh(lock); } EXPORT_SYMBOL_GPL(inet_unhash); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b4870a34c435..5f94db2f3e9e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -872,6 +872,7 @@ static int packet_release(struct socket *sock) write_lock_bh(&net->packet.sklist_lock); sk_del_node_init(sk); + sock_prot_inuse_add(net, sk->sk_prot, -1); write_unlock_bh(&net->packet.sklist_lock); /* @@ -910,7 +911,6 @@ static int packet_release(struct socket *sock) skb_queue_purge(&sk->sk_receive_queue); sk_refcnt_debug_release(sk); - sock_prot_inuse_add(net, sk->sk_prot, -1); sock_put(sk); return 0; } @@ -1085,8 +1085,8 @@ static int packet_create(struct net *net, struct socket *sock, int protocol) write_lock_bh(&net->packet.sklist_lock); sk_add_node(sk, &net->packet.sklist); - write_unlock_bh(&net->packet.sklist_lock); sock_prot_inuse_add(net, &packet_proto, 1); + write_unlock_bh(&net->packet.sklist_lock); return(0); out: return err; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a45a9f7369ed..3a35a6e8bf91 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -615,9 +615,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) out: if (sk == NULL) atomic_dec(&unix_nr_socks); - else + else { + local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - + local_bh_enable(); + } return sk; } -- cgit v1.2.3 From 2e77d89b2fa8e3f8325b8ce7893ec3645f41aff5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Nov 2008 15:52:46 -0800 Subject: net: avoid a pair of dst_hold()/dst_release() in ip_append_data() We can reduce pressure on dst entry refcount that slowdown UDP transmit path on SMP machines. This pressure is visible on RTP servers when delivering content to mediagateways, especially big ones, handling thousand of streams. Several cpus send UDP frames to the same destination, hence use the same dst entry. This patch makes ip_append_data() eventually steal the refcount its callers had to take on the dst entry. This doesnt avoid all refcounting, but still gives speedups on SMP, on UDP/RAW transmit path Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/icmp.c | 8 ++++---- net/ipv4/ip_output.c | 11 ++++++++--- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- 5 files changed, 15 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/ip.h b/include/net/ip.h index bc026ecb513f..ddef10c22e3a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -110,7 +110,7 @@ extern int ip_append_data(struct sock *sk, int odd, struct sk_buff *skb), void *from, int len, int protolen, struct ipcm_cookie *ipc, - struct rtable *rt, + struct rtable **rt, unsigned int flags); extern int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb); extern ssize_t ip_append_page(struct sock *sk, struct page *page, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 21e497efbd7f..7b88be9803b1 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -321,12 +321,12 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, } static void icmp_push_reply(struct icmp_bxm *icmp_param, - struct ipcm_cookie *ipc, struct rtable *rt) + struct ipcm_cookie *ipc, struct rtable **rt) { struct sock *sk; struct sk_buff *skb; - sk = icmp_sk(dev_net(rt->u.dst.dev)); + sk = icmp_sk(dev_net((*rt)->u.dst.dev)); if (ip_append_data(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, @@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) } if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, icmp_param->data.icmph.code)) - icmp_push_reply(icmp_param, &ipc, rt); + icmp_push_reply(icmp_param, &ipc, &rt); ip_rt_put(rt); out_unlock: icmp_xmit_unlock(sk); @@ -635,7 +635,7 @@ route_done: icmp_param.data_len = room; icmp_param.head_len = sizeof(struct icmphdr); - icmp_push_reply(&icmp_param, &ipc, rt); + icmp_push_reply(&icmp_param, &ipc, &rt); ende: ip_rt_put(rt); out_unlock: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 46d7be233eac..5516825a0751 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -778,7 +778,7 @@ int ip_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - struct ipcm_cookie *ipc, struct rtable *rt, + struct ipcm_cookie *ipc, struct rtable **rtp, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); @@ -793,6 +793,7 @@ int ip_append_data(struct sock *sk, int offset = 0; unsigned int maxfraglen, fragheaderlen; int csummode = CHECKSUM_NONE; + struct rtable *rt; if (flags&MSG_PROBE) return 0; @@ -812,7 +813,11 @@ int ip_append_data(struct sock *sk, inet->cork.flags |= IPCORK_OPT; inet->cork.addr = ipc->addr; } - dst_hold(&rt->u.dst); + rt = *rtp; + /* + * We steal reference to this route, caller should not release it + */ + *rtp = NULL; inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); @@ -1391,7 +1396,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, - &ipc, rt, MSG_DONTWAIT); + &ipc, &rt, MSG_DONTWAIT); if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { if (arg->csumoffset >= 0) *((__sum16 *)skb_transport_header(skb) + diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 998fcffc9e15..dff8bc4e0fac 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -572,7 +572,7 @@ back_from_confirm: ipc.addr = rt->rt_dst; lock_sock(sk); err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, - &ipc, rt, msg->msg_flags); + &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index da869ce041d9..549114472db3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -719,7 +719,7 @@ do_append_data: up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), &ipc, rt, + sizeof(struct udphdr), &ipc, &rt, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_flush_pending_frames(sk); -- cgit v1.2.3 From a21bba945430f3f5e00c349665f88cdacdb32a8d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Nov 2008 16:07:50 -0800 Subject: net: avoid a pair of dst_hold()/dst_release() in ip_push_pending_frames() We can reduce pressure on dst entry refcount that slowdown UDP transmit path on SMP machines. This pressure is visible on RTP servers when delivering content to mediagateways, especially big ones, handling thousand of streams. Several cpus send UDP frames to the same destination, hence use the same dst entry. This patch makes ip_push_pending_frames() steal the refcount its callers had to take when filling inet->cork.dst. This doesnt avoid all refcounting, but still gives speedups on SMP, on UDP/RAW transmit path. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5516825a0751..8ebe86dd72af 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1284,7 +1284,12 @@ int ip_push_pending_frames(struct sock *sk) skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb->dst = dst_clone(&rt->u.dst); + /* + * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec + * on dst refcount + */ + inet->cork.dst = NULL; + skb->dst = &rt->u.dst; if (iph->protocol == IPPROTO_ICMP) icmp_out_count(net, ((struct icmphdr *) -- cgit v1.2.3 From 4a17fc3add594fcc1c778e93a95b6ecf47f630e5 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:03:43 -0800 Subject: tcp: collapse more than two on retransmission MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I always had thought that collapsing up to two at a time was intentional decision to avoid excessive processing if 1 byte sized skbs are to be combined for a full mtu, and consecutive retransmissions would make the size of the retransmittee double each round anyway, but some recent discussion made me to understand that was not the case. Thus make collapse work more and wait less. It would be possible to take advantage of the shifting machinery (added in the later patch) in the case of paged data but that can be implemented on top of this change. tcp_skb_is_last check is now provided by the loop. I tested a bit (ss-after-idle-off, fill 4096x4096B xfer, 10s sleep + 4096 x 1byte writes while dropping them for some a while with netem): . 16774097:16775545(1448) ack 1 win 46 . 16775545:16776993(1448) ack 1 win 46 . ack 16759617 win 2399 P 16776993:16777217(224) ack 1 win 46 . ack 16762513 win 2399 . ack 16765409 win 2399 . ack 16768305 win 2399 . ack 16771201 win 2399 . ack 16774097 win 2399 . ack 16776993 win 2399 . ack 16777217 win 2399 P 16777217:16777257(40) ack 1 win 46 . ack 16777257 win 2399 P 16777257:16778705(1448) ack 1 win 46 P 16778705:16780153(1448) ack 1 win 46 FP 16780153:16781313(1160) ack 1 win 46 . ack 16778705 win 2399 . ack 16780153 win 2399 F 1:1(0) ack 16781314 win 2399 While without drop-all period I get this: . 16773585:16775033(1448) ack 1 win 46 . ack 16764897 win 9367 . ack 16767793 win 9367 . ack 16770689 win 9367 . ack 16773585 win 9367 . 16775033:16776481(1448) ack 1 win 46 P 16776481:16777217(736) ack 1 win 46 . ack 16776481 win 9367 . ack 16777217 win 9367 P 16777217:16777218(1) ack 1 win 46 P 16777218:16777219(1) ack 1 win 46 P 16777219:16777220(1) ack 1 win 46 ... P 16777247:16777248(1) ack 1 win 46 . ack 16777218 win 9367 . ack 16777219 win 9367 ... . ack 16777233 win 9367 . ack 16777248 win 9367 P 16777248:16778696(1448) ack 1 win 46 P 16778696:16780144(1448) ack 1 win 46 FP 16780144:16781313(1169) ack 1 win 46 . ack 16780144 win 9367 F 1:1(0) ack 16781314 win 9367 The window seems to be 30-40 segments, which were successfully combined into: P 16777217:16777257(40) ack 1 win 46 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 96 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 37 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a524627923ae..86ef98975e94 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1766,46 +1766,22 @@ u32 __tcp_select_window(struct sock *sk) return window; } -/* Attempt to collapse two adjacent SKB's during retransmission. */ -static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, - int mss_now) +/* Collapses two adjacent SKB's during retransmission. */ +static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); int skb_size, next_skb_size; u16 flags; - /* The first test we must make is that neither of these two - * SKB's are still referenced by someone else. - */ - if (skb_cloned(skb) || skb_cloned(next_skb)) - return; - skb_size = skb->len; next_skb_size = next_skb->len; flags = TCP_SKB_CB(skb)->flags; - /* Also punt if next skb has been SACK'd. */ - if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) - return; - - /* Next skb is out of window. */ - if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp))) - return; - - /* Punt if not enough space exists in the first SKB for - * the data in the second, or the total combined payload - * would exceed the MSS. - */ - if ((next_skb_size > skb_tailroom(skb)) || - ((skb_size + next_skb_size) > mss_now)) - return; - BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); tcp_highest_sack_combine(sk, next_skb, skb); - /* Ok. We will be able to collapse the packet. */ tcp_unlink_write_queue(next_skb, sk); skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), @@ -1847,6 +1823,62 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, sk_wmem_free_skb(sk, next_skb); } +static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) +{ + if (tcp_skb_pcount(skb) > 1) + return 0; + /* TODO: SACK collapsing could be used to remove this condition */ + if (skb_shinfo(skb)->nr_frags != 0) + return 0; + if (skb_cloned(skb)) + return 0; + if (skb == tcp_send_head(sk)) + return 0; + /* Some heurestics for collapsing over SACK'd could be invented */ + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) + return 0; + + return 1; +} + +static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, + int space) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb = to, *tmp; + int first = 1; + + if (!sysctl_tcp_retrans_collapse) + return; + if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) + return; + + tcp_for_write_queue_from_safe(skb, tmp, sk) { + if (!tcp_can_collapse(sk, skb)) + break; + + space -= skb->len; + + if (first) { + first = 0; + continue; + } + + if (space < 0) + break; + /* Punt if not enough space exists in the first SKB for + * the data in the second + */ + if (skb->len > skb_tailroom(to)) + break; + + if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) + break; + + tcp_collapse_retrans(sk, to); + } +} + /* Do a simple retransmit without using the backoff mechanisms in * tcp_timer. This is used for path mtu discovery. * The socket is already locked here. @@ -1946,17 +1978,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return -ENOMEM; /* We'll try again later. */ } - /* Collapse two adjacent packets if worthwhile and we can. */ - if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && - (skb->len < (cur_mss >> 1)) && - (!tcp_skb_is_last(sk, skb)) && - (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && - (skb_shinfo(skb)->nr_frags == 0 && - skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && - (tcp_skb_pcount(skb) == 1 && - tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && - (sysctl_tcp_retrans_collapse != 0)) - tcp_retrans_try_collapse(sk, skb, cur_mss); + tcp_retrans_try_collapse(sk, skb, cur_mss); /* Some Solaris stacks overoptimize and ignore the FIN on a * retransmit when old data is attached. So strip it off -- cgit v1.2.3 From e1aa680fa40e7492260a09cb57d94002245cc8fe Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:11:55 -0800 Subject: tcp: move tcp_simple_retransmit to tcp_input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- net/ipv4/tcp_input.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/tcp_output.c | 50 ------------------------------------------------ 3 files changed, 52 insertions(+), 53 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8f26b28fb407..90b4c3b4c336 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -472,8 +472,6 @@ extern void tcp_send_delayed_ack(struct sock *sk); /* tcp_input.c */ extern void tcp_cwnd_application_limited(struct sock *sk); -extern void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, - struct sk_buff *skb); /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 097294b7da3e..8085704863fb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1002,7 +1002,8 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) } } -void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) +static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, + struct sk_buff *skb) { tcp_verify_retransmit_hint(tp, skb); @@ -2559,6 +2560,56 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } +/* Do a simple retransmit without using the backoff mechanisms in + * tcp_timer. This is used for path mtu discovery. + * The socket is already locked here. + */ +void tcp_simple_retransmit(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + unsigned int mss = tcp_current_mss(sk, 0); + u32 prior_lost = tp->lost_out; + + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; + if (skb->len > mss && + !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { + TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= tcp_skb_pcount(skb); + } + tcp_skb_mark_lost_uncond_verify(tp, skb); + } + } + + tcp_clear_retrans_hints_partial(tp); + + if (prior_lost == tp->lost_out) + return; + + if (tcp_is_reno(tp)) + tcp_limit_reno_sacked(tp); + + tcp_verify_left_out(tp); + + /* Don't muck with the congestion window here. + * Reason is that we do not increase amount of _data_ + * in network, but units changed and effective + * cwnd/ssthresh really reduced now. + */ + if (icsk->icsk_ca_state != TCP_CA_Loss) { + tp->high_seq = tp->snd_nxt; + tp->snd_ssthresh = tcp_current_ssthresh(sk); + tp->prior_ssthresh = 0; + tp->undo_marker = 0; + tcp_set_ca_state(sk, TCP_CA_Loss); + } + tcp_xmit_retransmit_queue(sk); +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 86ef98975e94..c069ecb81ea5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1879,56 +1879,6 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, } } -/* Do a simple retransmit without using the backoff mechanisms in - * tcp_timer. This is used for path mtu discovery. - * The socket is already locked here. - */ -void tcp_simple_retransmit(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - unsigned int mss = tcp_current_mss(sk, 0); - u32 prior_lost = tp->lost_out; - - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (skb->len > mss && - !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - } - tcp_skb_mark_lost_uncond_verify(tp, skb); - } - } - - tcp_clear_retrans_hints_partial(tp); - - if (prior_lost == tp->lost_out) - return; - - if (tcp_is_reno(tp)) - tcp_limit_reno_sacked(tp); - - tcp_verify_left_out(tp); - - /* Don't muck with the congestion window here. - * Reason is that we do not increase amount of _data_ - * in network, but units changed and effective - * cwnd/ssthresh really reduced now. - */ - if (icsk->icsk_ca_state != TCP_CA_Loss) { - tp->high_seq = tp->snd_nxt; - tp->snd_ssthresh = tcp_current_ssthresh(sk); - tp->prior_ssthresh = 0; - tp->undo_marker = 0; - tcp_set_ca_state(sk, TCP_CA_Loss); - } - tcp_xmit_retransmit_queue(sk); -} - /* This retransmits one SKB. Policy decisions and retransmit queue * state updates are done by the caller. Returns non-zero if an * error occurred which prevented the send. -- cgit v1.2.3 From e8bae275d9354104f7ae24a48a90d1a6286e7bd9 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:12:28 -0800 Subject: tcp: more aggressive skipping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I knew already when rewriting the sacktag that this condition was too conservative, change it now since it prevent lot of useless work (especially in the sack shifter decision code that is being added by a later patch). This shouldn't change anything really, just save some processing regardless of the shifter. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8085704863fb..3f26599ddc88 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1402,7 +1402,7 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, if (skb == tcp_send_head(sk)) break; - if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) + if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) break; *fack_count += tcp_skb_pcount(skb); -- cgit v1.2.3 From adb92db857ee2a0a2b925ccfbd560203c3f88aae Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:13:50 -0800 Subject: tcp: Make SACK code to split only at mss boundaries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sadly enough, this adds possible divide though we try to avoid it by checking one mss as common case. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3f26599ddc88..ca46eb9151f8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1248,20 +1248,39 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, { int in_sack, err; unsigned int pkt_len; + unsigned int mss; in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); if (tcp_skb_pcount(skb) > 1 && !in_sack && after(TCP_SKB_CB(skb)->end_seq, start_seq)) { - + mss = tcp_skb_mss(skb); in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); - if (!in_sack) + if (!in_sack) { pkt_len = start_seq - TCP_SKB_CB(skb)->seq; - else + if (pkt_len < mss) + pkt_len = mss; + } else { pkt_len = end_seq - TCP_SKB_CB(skb)->seq; - err = tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size); + if (pkt_len < mss) + return -EINVAL; + } + + /* Round if necessary so that SACKs cover only full MSSes + * and/or the remaining small portion (if present) + */ + if (pkt_len > mss) { + unsigned int new_len = (pkt_len / mss) * mss; + if (!in_sack && new_len < pkt_len) { + new_len += mss; + if (new_len > skb->len) + return 0; + } + pkt_len = new_len; + } + err = tcp_fragment(sk, skb, pkt_len, mss); if (err < 0) return err; } -- cgit v1.2.3 From f58b22fd3c16444edc393a217a74208f1894b601 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:14:43 -0800 Subject: tcp: make tcp_sacktag_one able to handle partial skb too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is preparatory work for SACK combiner patch which may have to count TCP state changes for only a part of the skb because it will intentionally avoids splitting skb to SACKed and not sacked parts. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ca46eb9151f8..3c8e297e2c39 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1289,7 +1289,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, } static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, - int *reord, int dup_sack, int fack_count) + int *reord, int dup_sack, int fack_count, + u8 *sackedto, int pcount) { struct tcp_sock *tp = tcp_sk(sk); u8 sacked = TCP_SKB_CB(skb)->sacked; @@ -1314,10 +1315,9 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, * that retransmission is still in flight. */ if (sacked & TCPCB_LOST) { - TCP_SKB_CB(skb)->sacked &= - ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); - tp->lost_out -= tcp_skb_pcount(skb); - tp->retrans_out -= tcp_skb_pcount(skb); + *sackedto &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); + tp->lost_out -= pcount; + tp->retrans_out -= pcount; } } else { if (!(sacked & TCPCB_RETRANS)) { @@ -1334,22 +1334,22 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, } if (sacked & TCPCB_LOST) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - tp->lost_out -= tcp_skb_pcount(skb); + *sackedto &= ~TCPCB_LOST; + tp->lost_out -= pcount; } } - TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; + *sackedto |= TCPCB_SACKED_ACKED; flag |= FLAG_DATA_SACKED; - tp->sacked_out += tcp_skb_pcount(skb); + tp->sacked_out += pcount; - fack_count += tcp_skb_pcount(skb); + fack_count += pcount; /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) - tp->lost_cnt_hint += tcp_skb_pcount(skb); + tp->lost_cnt_hint += pcount; if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; @@ -1362,9 +1362,9 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, * frames and clear it. undo_retrans is decreased above, L|R frames * are accounted above as well. */ - if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); + if (dup_sack && (*sackedto & TCPCB_SACKED_RETRANS)) { + *sackedto &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= pcount; } return flag; @@ -1404,7 +1404,9 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, if (in_sack) *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, - *fack_count); + *fack_count, + &(TCP_SKB_CB(skb)->sacked), + tcp_skb_pcount(skb)); *fack_count += tcp_skb_pcount(skb); } -- cgit v1.2.3 From 832d11c5cd076abc0aa1eaf7be96c81d1a59ce41 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:20:15 -0800 Subject: tcp: Try to restore large SKBs while SACK processing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During SACK processing, most of the benefits of TSO are eaten by the SACK blocks that one-by-one fragment SKBs to MSS sized chunks. Then we're in problems when cleanup work for them has to be done when a large cumulative ACK comes. Try to return back to pre-split state already while more and more SACK info gets discovered by combining newly discovered SACK areas with the previous skb if that's SACKed as well. This approach has a number of benefits: 1) The processing overhead is spread more equally over the RTT 2) Write queue has less skbs to process (affect everything which has to walk in the queue past the sacked areas) 3) Write queue is consistent whole the time, so no other parts of TCP has to be aware of this (this was not the case with some other approach that was, well, quite intrusive all around). 4) Clean_rtx_queue can release most of the pages using single put_page instead of previous PAGE_SIZE/mss+1 calls In case a hole is fully filled by the new SACK block, we attempt to combine the next skb too which allows construction of skbs that are even larger than what tso split them to and it handles hole per on every nth patterns that often occur during slow start overshoot pretty nicely. Though this to be really useful also a retransmission would have to get lost since cumulative ACKs advance one hole at a time in the most typical case. TODO: handle upwards only merging. That should be rather easy when segment is fully sacked but I'm leaving that as future work item (it won't make very large difference anyway since this current approach already covers quite a lot of normal cases). I was earlier thinking of some sophisticated way of tracking timestamps of the first and the last segment but later on realized that it won't be that necessary at all to store the timestamp of the last segment. The cases that can occur are basically either: 1) ambiguous => no sensible measurement can be taken anyway 2) non-ambiguous is due to reordering => having the timestamp of the last segment there is just skewing things more off than does some good since the ack got triggered by one of the holes (besides some substle issues that would make determining right hole/skb even harder problem). Anyway, it has nothing to do with this change then. I choose to route some abnormal looking cases with goto noop, some could be handled differently (eg., by stopping the walking at that skb but again). In general, they either shouldn't happen at all or are rare enough to make no difference in practice. In theory this change (as whole) could cause some macroscale regression (global) because of cache misses that are taken over the round-trip time but it gets very likely better because of much less (local) cache misses per other write queue walkers and the big recovery clearing cumulative ack. Worth to note that these benefits would be very easy to get also without TSO/GSO being on as long as the data is in pages so that we can merge them. Currently I won't let that happen because DSACK splitting at fragment that would mess up pcounts due to sk_can_gso in tcp_set_skb_tso_segs. Once DSACKs fragments gets avoided, we have some conditions that can be made less strict. TODO: I will probably have to convert the excessive pointer passing to struct sacktag_state... :-) My testing revealed that considerable amount of skbs couldn't be shifted because they were cloned (most likely still awaiting tx reclaim)... [The rest is considering future work instead since I got repeatably EFAULT to tcpdump's recvfrom when I added pskb_expand_head to deal with clones, so I separated that into another, later patch] ...To counter that, I gave up on the fifth advantage: 5) When growing previous SACK block, less allocs for new skbs are done, basically a new alloc is needed only when new hole is detected and when the previous skb runs out of frags space ...which now only happens of if reclaim is fast enough to dispose the clone before the SACK block comes in (the window is RTT long), otherwise we'll have to alloc some. With clones being handled I got these numbers (will be somewhat worse without that), taken with fine-grained mibs: TCPSackShifted 398 TCPSackMerged 877 TCPSackShiftFallback 320 TCPSACKCOLLAPSEFALLBACKGSO 0 TCPSACKCOLLAPSEFALLBACKSKBBITS 0 TCPSACKCOLLAPSEFALLBACKSKBDATA 0 TCPSACKCOLLAPSEFALLBACKBELOW 0 TCPSACKCOLLAPSEFALLBACKFIRST 1 TCPSACKCOLLAPSEFALLBACKPREVBITS 318 TCPSACKCOLLAPSEFALLBACKMSS 1 TCPSACKCOLLAPSEFALLBACKNOHEAD 0 TCPSACKCOLLAPSEFALLBACKSHIFT 0 TCPSACKCOLLAPSENOOPSEQ 0 TCPSACKCOLLAPSENOOPSMALLPCOUNT 0 TCPSACKCOLLAPSENOOPSMALLLEN 0 TCPSACKCOLLAPSEHOLE 12 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 33 +++++++ include/net/tcp.h | 5 + net/core/skbuff.c | 140 +++++++++++++++++++++++++++ net/ipv4/tcp_input.c | 256 +++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 427 insertions(+), 7 deletions(-) (limited to 'net/ipv4') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a01b6f84e3bc..acf17af45af9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -492,6 +492,19 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, return (skb->next == (struct sk_buff *) list); } +/** + * skb_queue_is_first - check if skb is the first entry in the queue + * @list: queue head + * @skb: buffer + * + * Returns true if @skb is the first buffer on the list. + */ +static inline bool skb_queue_is_first(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + return (skb->prev == (struct sk_buff *) list); +} + /** * skb_queue_next - return the next packet in the queue * @list: queue head @@ -510,6 +523,24 @@ static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list, return skb->next; } +/** + * skb_queue_prev - return the prev packet in the queue + * @list: queue head + * @skb: current buffer + * + * Return the prev packet in @list before @skb. It is only valid to + * call this if skb_queue_is_first() evaluates to false. + */ +static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + /* This BUG_ON may seem severe, but if we just return then we + * are going to dereference garbage. + */ + BUG_ON(skb_queue_is_first(list, skb)); + return skb->prev; +} + /** * skb_get - reference buffer * @skb: buffer to reference @@ -1652,6 +1683,8 @@ extern int skb_splice_bits(struct sk_buff *skb, extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); +extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, + int shiftlen); extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); diff --git a/include/net/tcp.h b/include/net/tcp.h index 90b4c3b4c336..265392470b26 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1192,6 +1192,11 @@ static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_bu return skb_queue_next(&sk->sk_write_queue, skb); } +static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_buff *skb) +{ + return skb_queue_prev(&sk->sk_write_queue, skb); +} + #define tcp_for_write_queue(skb, sk) \ skb_queue_walk(&(sk)->sk_write_queue, skb) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 267185a848f6..844b8abeb18c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2018,6 +2018,146 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) skb_split_no_header(skb, skb1, len, pos); } +/* Shifting from/to a cloned skb is a no-go. + * + * TODO: handle cloned skbs by using pskb_expand_head() + */ +static int skb_prepare_for_shift(struct sk_buff *skb) +{ + return skb_cloned(skb); +} + +/** + * skb_shift - Shifts paged data partially from skb to another + * @tgt: buffer into which tail data gets added + * @skb: buffer from which the paged data comes from + * @shiftlen: shift up to this many bytes + * + * Attempts to shift up to shiftlen worth of bytes, which may be less than + * the length of the skb, from tgt to skb. Returns number bytes shifted. + * It's up to caller to free skb if everything was shifted. + * + * If @tgt runs out of frags, the whole operation is aborted. + * + * Skb cannot include anything else but paged data while tgt is allowed + * to have non-paged data as well. + * + * TODO: full sized shift could be optimized but that would need + * specialized skb free'er to handle frags without up-to-date nr_frags. + */ +int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) +{ + int from, to, merge, todo; + struct skb_frag_struct *fragfrom, *fragto; + + BUG_ON(shiftlen > skb->len); + BUG_ON(skb_headlen(skb)); /* Would corrupt stream */ + + todo = shiftlen; + from = 0; + to = skb_shinfo(tgt)->nr_frags; + fragfrom = &skb_shinfo(skb)->frags[from]; + + /* Actual merge is delayed until the point when we know we can + * commit all, so that we don't have to undo partial changes + */ + if (!to || + !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) { + merge = -1; + } else { + merge = to - 1; + + todo -= fragfrom->size; + if (todo < 0) { + if (skb_prepare_for_shift(skb) || + skb_prepare_for_shift(tgt)) + return 0; + + fragto = &skb_shinfo(tgt)->frags[merge]; + + fragto->size += shiftlen; + fragfrom->size -= shiftlen; + fragfrom->page_offset += shiftlen; + + goto onlymerged; + } + + from++; + } + + /* Skip full, not-fitting skb to avoid expensive operations */ + if ((shiftlen == skb->len) && + (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to)) + return 0; + + if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) + return 0; + + while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { + if (to == MAX_SKB_FRAGS) + return 0; + + fragfrom = &skb_shinfo(skb)->frags[from]; + fragto = &skb_shinfo(tgt)->frags[to]; + + if (todo >= fragfrom->size) { + *fragto = *fragfrom; + todo -= fragfrom->size; + from++; + to++; + + } else { + get_page(fragfrom->page); + fragto->page = fragfrom->page; + fragto->page_offset = fragfrom->page_offset; + fragto->size = todo; + + fragfrom->page_offset += todo; + fragfrom->size -= todo; + todo = 0; + + to++; + break; + } + } + + /* Ready to "commit" this state change to tgt */ + skb_shinfo(tgt)->nr_frags = to; + + if (merge >= 0) { + fragfrom = &skb_shinfo(skb)->frags[0]; + fragto = &skb_shinfo(tgt)->frags[merge]; + + fragto->size += fragfrom->size; + put_page(fragfrom->page); + } + + /* Reposition in the original skb */ + to = 0; + while (from < skb_shinfo(skb)->nr_frags) + skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; + skb_shinfo(skb)->nr_frags = to; + + BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); + +onlymerged: + /* Most likely the tgt won't ever need its checksum anymore, skb on + * the other hand might need it if it needs to be resent + */ + tgt->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_PARTIAL; + + /* Yak, is it really working this way? Some helper please? */ + skb->len -= shiftlen; + skb->data_len -= shiftlen; + skb->truesize -= shiftlen; + tgt->len += shiftlen; + tgt->data_len += shiftlen; + tgt->truesize += shiftlen; + + return shiftlen; +} + /** * skb_prepare_seq_read - Prepare a sequential read of skb data * @skb: the buffer to read diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3c8e297e2c39..97d57676b8ee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1242,6 +1242,8 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, * aligned portion of it that matches. Therefore we might need to fragment * which may fail and creates some hassle (caller must handle error case * returns). + * + * FIXME: this could be merged to shift decision code */ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, u32 start_seq, u32 end_seq) @@ -1353,9 +1355,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; - - if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) - tcp_advance_highest_sack(sk, skb); } /* D-SACK. We can detect redundant retransmission in S|R and plain R @@ -1370,12 +1369,231 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, return flag; } +static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, + struct sk_buff *skb, unsigned int pcount, + int shifted, int fack_count, int *reord, + int *flag, int mss) +{ + struct tcp_sock *tp = tcp_sk(sk); + u8 dummy_sacked = TCP_SKB_CB(skb)->sacked; /* We discard results */ + + BUG_ON(!pcount); + + TCP_SKB_CB(prev)->end_seq += shifted; + TCP_SKB_CB(skb)->seq += shifted; + + skb_shinfo(prev)->gso_segs += pcount; + BUG_ON(skb_shinfo(skb)->gso_segs < pcount); + skb_shinfo(skb)->gso_segs -= pcount; + + /* When we're adding to gso_segs == 1, gso_size will be zero, + * in theory this shouldn't be necessary but as long as DSACK + * code can come after this skb later on it's better to keep + * setting gso_size to something. + */ + if (!skb_shinfo(prev)->gso_size) { + skb_shinfo(prev)->gso_size = mss; + skb_shinfo(prev)->gso_type = sk->sk_gso_type; + } + + /* CHECKME: To clear or not to clear? Mimics normal skb currently */ + if (skb_shinfo(skb)->gso_segs <= 1) { + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; + } + + *flag |= tcp_sacktag_one(skb, sk, reord, 0, fack_count, &dummy_sacked, + pcount); + + /* Difference in this won't matter, both ACKed by the same cumul. ACK */ + TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); + + tcp_clear_all_retrans_hints(tp); + + if (skb->len > 0) { + BUG_ON(!tcp_skb_pcount(skb)); + return 0; + } + + /* Whole SKB was eaten :-) */ + + TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; + if (skb == tcp_highest_sack(sk)) + tcp_advance_highest_sack(sk, skb); + + tcp_unlink_write_queue(skb, sk); + sk_wmem_free_skb(sk, skb); + + return 1; +} + +/* I wish gso_size would have a bit more sane initialization than + * something-or-zero which complicates things + */ +static int tcp_shift_mss(struct sk_buff *skb) +{ + int mss = tcp_skb_mss(skb); + + if (!mss) + mss = skb->len; + + return mss; +} + +/* Shifting pages past head area doesn't work */ +static int skb_can_shift(struct sk_buff *skb) +{ + return !skb_headlen(skb) && skb_is_nonlinear(skb); +} + +/* Try collapsing SACK blocks spanning across multiple skbs to a single + * skb. + */ +static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, + u32 start_seq, u32 end_seq, + int dup_sack, int *fack_count, + int *reord, int *flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *prev; + int mss; + int pcount = 0; + int len; + int in_sack; + + if (!sk_can_gso(sk)) + goto fallback; + + /* Normally R but no L won't result in plain S */ + if (!dup_sack && + (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) == TCPCB_SACKED_RETRANS) + goto fallback; + if (!skb_can_shift(skb)) + goto fallback; + /* This frame is about to be dropped (was ACKed). */ + if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) + goto fallback; + + /* Can only happen with delayed DSACK + discard craziness */ + if (unlikely(skb == tcp_write_queue_head(sk))) + goto fallback; + prev = tcp_write_queue_prev(sk, skb); + + if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) + goto fallback; + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && + !before(end_seq, TCP_SKB_CB(skb)->end_seq); + + if (in_sack) { + len = skb->len; + pcount = tcp_skb_pcount(skb); + mss = tcp_shift_mss(skb); + + /* TODO: Fix DSACKs to not fragment already SACKed and we can + * drop this restriction as unnecessary + */ + if (mss != tcp_shift_mss(prev)) + goto fallback; + } else { + if (!after(TCP_SKB_CB(skb)->end_seq, start_seq)) + goto noop; + /* CHECKME: This is non-MSS split case only?, this will + * cause skipped skbs due to advancing loop btw, original + * has that feature too + */ + if (tcp_skb_pcount(skb) <= 1) + goto noop; + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); + if (!in_sack) { + /* TODO: head merge to next could be attempted here + * if (!after(TCP_SKB_CB(skb)->end_seq, end_seq)), + * though it might not be worth of the additional hassle + * + * ...we can probably just fallback to what was done + * previously. We could try merging non-SACKed ones + * as well but it probably isn't going to buy off + * because later SACKs might again split them, and + * it would make skb timestamp tracking considerably + * harder problem. + */ + goto fallback; + } + + len = end_seq - TCP_SKB_CB(skb)->seq; + BUG_ON(len < 0); + BUG_ON(len > skb->len); + + /* MSS boundaries should be honoured or else pcount will + * severely break even though it makes things bit trickier. + * Optimize common case to avoid most of the divides + */ + mss = tcp_skb_mss(skb); + + /* TODO: Fix DSACKs to not fragment already SACKed and we can + * drop this restriction as unnecessary + */ + if (mss != tcp_shift_mss(prev)) + goto fallback; + + if (len == mss) { + pcount = 1; + } else if (len < mss) { + goto noop; + } else { + pcount = len / mss; + len = pcount * mss; + } + } + + if (!skb_shift(prev, skb, len)) + goto fallback; + if (!tcp_shifted_skb(sk, prev, skb, pcount, len, *fack_count, reord, + flag, mss)) + goto out; + + /* Hole filled allows collapsing with the next as well, this is very + * useful when hole on every nth skb pattern happens + */ + if (prev == tcp_write_queue_tail(sk)) + goto out; + skb = tcp_write_queue_next(sk, prev); + + if (!skb_can_shift(skb)) + goto out; + if (skb == tcp_send_head(sk)) + goto out; + if ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) + goto out; + + len = skb->len; + if (skb_shift(prev, skb, len)) { + pcount += tcp_skb_pcount(skb); + tcp_shifted_skb(sk, prev, skb, tcp_skb_pcount(skb), len, + *fack_count, reord, flag, mss); + } + +out: + *fack_count += pcount; + return prev; + +noop: + return skb; + +fallback: + return NULL; +} + static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, struct tcp_sack_block *next_dup, u32 start_seq, u32 end_seq, int dup_sack_in, int *fack_count, int *reord, int *flag) { + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *tmp; + tcp_for_write_queue_from(skb, sk) { int in_sack = 0; int dup_sack = dup_sack_in; @@ -1396,18 +1614,42 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, dup_sack = 1; } - if (in_sack <= 0) - in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, - end_seq); + /* skb reference here is a bit tricky to get right, since + * shifting can eat and free both this skb and the next, + * so not even _safe variant of the loop is enough. + */ + if (in_sack <= 0) { + tmp = tcp_shift_skb_data(sk, skb, start_seq, + end_seq, dup_sack, + fack_count, reord, flag); + if (tmp != NULL) { + if (tmp != skb) { + skb = tmp; + continue; + } + + in_sack = 0; + } else { + in_sack = tcp_match_skb_to_sack(sk, skb, + start_seq, + end_seq); + } + } + if (unlikely(in_sack < 0)) break; - if (in_sack) + if (in_sack) { *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, *fack_count, &(TCP_SKB_CB(skb)->sacked), tcp_skb_pcount(skb)); + if (!before(TCP_SKB_CB(skb)->seq, + tcp_highest_sack_seq(tp))) + tcp_advance_highest_sack(sk, skb); + } + *fack_count += tcp_skb_pcount(skb); } return skb; -- cgit v1.2.3 From 92ee76b6d99bfcdab6162816c9025541ef7248eb Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:26:56 -0800 Subject: tcp: Make shifting not clear the hints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The earlier version was just very basic one which is "playing safe" by always clearing the hints. However, clearing of a hint is extremely costly operation with large windows, so it must be avoided at all cost whenever possible, there is a way with shifting too achieve not-clearing. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 97d57676b8ee..e6291dde3348 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1379,6 +1379,11 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, BUG_ON(!pcount); + /* Tweak before seqno plays */ + if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint && + !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) + tp->lost_cnt_hint += pcount; + TCP_SKB_CB(prev)->end_seq += shifted; TCP_SKB_CB(skb)->seq += shifted; @@ -1408,8 +1413,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, /* Difference in this won't matter, both ACKed by the same cumul. ACK */ TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); - tcp_clear_all_retrans_hints(tp); - if (skb->len > 0) { BUG_ON(!tcp_skb_pcount(skb)); return 0; @@ -1417,6 +1420,15 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, /* Whole SKB was eaten :-) */ + if (skb == tp->retransmit_skb_hint) + tp->retransmit_skb_hint = prev; + if (skb == tp->scoreboard_skb_hint) + tp->scoreboard_skb_hint = prev; + if (skb == tp->lost_skb_hint) { + tp->lost_skb_hint = prev; + tp->lost_cnt_hint -= tcp_skb_pcount(prev); + } + TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; if (skb == tcp_highest_sack(sk)) tcp_advance_highest_sack(sk, skb); -- cgit v1.2.3 From 111cc8b913b42ef07793648b1699288332f273e1 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:27:22 -0800 Subject: tcp: add some mibs to track collapsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/snmp.h | 3 +++ net/ipv4/proc.c | 3 +++ net/ipv4/tcp_input.c | 4 ++++ 3 files changed, 10 insertions(+) (limited to 'net/ipv4') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 7a6e6bba4a71..aee3f1e1d1ce 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -216,6 +216,9 @@ enum LINUX_MIB_TCPSPURIOUSRTOS, /* TCPSpuriousRTOs */ LINUX_MIB_TCPMD5NOTFOUND, /* TCPMD5NotFound */ LINUX_MIB_TCPMD5UNEXPECTED, /* TCPMD5Unexpected */ + LINUX_MIB_SACKSHIFTED, + LINUX_MIB_SACKMERGED, + LINUX_MIB_SACKSHIFTFALLBACK, __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index a631a1f110ca..731789bb499f 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -234,6 +234,9 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), + SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), + SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), + SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e6291dde3348..9f8a80ba17bd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1415,6 +1415,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, if (skb->len > 0) { BUG_ON(!tcp_skb_pcount(skb)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED); return 0; } @@ -1436,6 +1437,8 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED); + return 1; } @@ -1594,6 +1597,7 @@ noop: return skb; fallback: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); return NULL; } -- cgit v1.2.3 From 6daad37230ab02bb593d179d704079d4b5912bd7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 01:05:09 -0800 Subject: ah4/ah6: remove useless NULL assignments struct will be kfreed in a moment, so... Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv4/ah4.c | 2 -- net/ipv6/ah6.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 3f205181712d..992ecd8662e2 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -293,9 +293,7 @@ static void ah_destroy(struct xfrm_state *x) return; kfree(ahp->work_icv); - ahp->work_icv = NULL; crypto_free_hash(ahp->tfm); - ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 7a8a01369e5c..13e330d89178 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -509,9 +509,7 @@ static void ah6_destroy(struct xfrm_state *x) return; kfree(ahp->work_icv); - ahp->work_icv = NULL; crypto_free_hash(ahp->tfm); - ahp->tfm = NULL; kfree(ahp); } -- cgit v1.2.3 From fb7e06748c29c08a9f5ca057a780b65acbb91c27 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 01:05:54 -0800 Subject: xfrm: remove useless forward declarations Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv4/xfrm4_state.c | 2 -- net/ipv6/xfrm6_state.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 55dc6beab9aa..1ef1366a0a03 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -13,8 +13,6 @@ #include #include -static struct xfrm_state_afinfo xfrm4_state_afinfo; - static int xfrm4_init_flags(struct xfrm_state *x) { if (ipv4_config.no_pmtu_disc) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 60c78cfc2737..0e685b05496e 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -19,8 +19,6 @@ #include #include -static struct xfrm_state_afinfo xfrm6_state_afinfo; - static void __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, struct xfrm_tmpl *tmpl, -- cgit v1.2.3 From 5f145e44ae09f629d25536b2947a91e9c01bddcb Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Tue, 25 Nov 2008 12:15:16 +0100 Subject: netfilter: nfmark routing in OUTPUT, mangle, NFQUEUE This patch let nfmark to be evaluated for routing decision for OUTPUT packet, in mangle table, when process paquet in NFQUEUE Until now, only change (in NFQUEUE process) on fields src_addr, dest_addr and tos could make netfilter to reevalute the routing. From: Laurent Licour Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy --- net/ipv4/netfilter.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6efdb70b3eb2..7c145d76384d 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -125,6 +125,7 @@ struct ip_rt_info { __be32 daddr; __be32 saddr; u_int8_t tos; + u_int32_t mark; }; static void nf_ip_saveroute(const struct sk_buff *skb, @@ -138,6 +139,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb, rt_info->tos = iph->tos; rt_info->daddr = iph->daddr; rt_info->saddr = iph->saddr; + rt_info->mark = skb->mark; } } @@ -150,6 +152,7 @@ static int nf_ip_reroute(struct sk_buff *skb, const struct iphdr *iph = ip_hdr(skb); if (!(iph->tos == rt_info->tos + && skb->mark == rt_info->mark && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) return ip_route_me_harder(skb, RTN_UNSPEC); -- cgit v1.2.3 From 8eecaba900e89643029fd2c253ad8ebb60761165 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 25 Nov 2008 13:45:29 -0800 Subject: tcp: tcp_limit_reno_sacked can become static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- net/ipv4/tcp_input.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/tcp.h b/include/net/tcp.h index 265392470b26..e8ae90a8c35e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -761,8 +761,6 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) return tp->packets_out - tcp_left_out(tp) + tp->retrans_out; } -extern int tcp_limit_reno_sacked(struct tcp_sock *tp); - /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9f8a80ba17bd..d67b6e9cc540 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1940,7 +1940,7 @@ out: /* Limits sacked_out so that sum with lost_out isn't ever larger than * packets_out. Returns zero if sacked_out adjustement wasn't necessary. */ -int tcp_limit_reno_sacked(struct tcp_sock *tp) +static int tcp_limit_reno_sacked(struct tcp_sock *tp) { u32 holes; -- cgit v1.2.3 From 723b46108f8ee75b61ce703d0c9225e4f537bc46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Nov 2008 13:55:15 -0800 Subject: net: udp_unhash() can test if sk is hashed Impact: Optimization Like done in inet_unhash(), we can avoid taking a chain lock if socket is not hashed in udp_unhash() Triggered by close(socket(AF_INET, SOCK_DGRAM, 0)); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 549114472db3..cf5ab0581eba 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -970,16 +970,18 @@ int udp_disconnect(struct sock *sk, int flags) void udp_lib_unhash(struct sock *sk) { - struct udp_table *udptable = sk->sk_prot->h.udp_table; - unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); - struct udp_hslot *hslot = &udptable->hash[hash]; + if (sk_hashed(sk)) { + struct udp_table *udptable = sk->sk_prot->h.udp_table; + unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); + struct udp_hslot *hslot = &udptable->hash[hash]; - spin_lock_bh(&hslot->lock); - if (sk_nulls_del_node_init_rcu(sk)) { - inet_sk(sk)->num = 0; - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + spin_lock_bh(&hslot->lock); + if (sk_nulls_del_node_init_rcu(sk)) { + inet_sk(sk)->num = 0; + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + } + spin_unlock_bh(&hslot->lock); } - spin_unlock_bh(&hslot->lock); } EXPORT_SYMBOL(udp_lib_unhash); -- cgit v1.2.3 From 673c09be457bb23aa0eaaa79804cbb342210d195 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:15:16 -0800 Subject: netns xfrm: add struct xfrm_state::xs_net To avoid unnecessary complications with passing netns around. * set once, very early after allocating * once set, never changes For a while create every xfrm_state in init_net. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 10 +++++++++- net/ipv4/ipcomp.c | 2 +- net/ipv6/ipcomp6.c | 2 +- net/key/af_key.c | 2 +- net/xfrm/xfrm_state.c | 9 +++++---- net/xfrm/xfrm_user.c | 4 ++-- 6 files changed, 19 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9107d6f5c297..9da89039832c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -130,6 +130,9 @@ struct xfrm_state_walk { /* Full description of state of transformer. */ struct xfrm_state { +#ifdef CONFIG_NET_NS + struct net *xs_net; +#endif union { struct hlist_node gclist; struct hlist_node bydst; @@ -223,6 +226,11 @@ struct xfrm_state void *data; }; +static inline struct net *xs_net(struct xfrm_state *x) +{ + return read_pnet(&x->xs_net); +} + /* xflags - make enum if more show up */ #define XFRM_TIME_DEFER 1 @@ -1296,7 +1304,7 @@ extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); extern int xfrm_state_walk(struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); extern void xfrm_state_walk_done(struct xfrm_state_walk *walk); -extern struct xfrm_state *xfrm_state_alloc(void); +extern struct xfrm_state *xfrm_state_alloc(struct net *net); extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index ec8264ae45c2..0a35f1b6f22c 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -49,7 +49,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t; - t = xfrm_state_alloc(); + t = xfrm_state_alloc(&init_net); if (t == NULL) goto out; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index d4576a9c154f..c369638e208a 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -76,7 +76,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t = NULL; - t = xfrm_state_alloc(); + t = xfrm_state_alloc(&init_net); if (!t) goto out; diff --git a/net/key/af_key.c b/net/key/af_key.c index 5b22e011653b..bde8aad4cc93 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1122,7 +1122,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t))) return ERR_PTR(-EINVAL); - x = xfrm_state_alloc(); + x = xfrm_state_alloc(&init_net); if (x == NULL) return ERR_PTR(-ENOBUFS); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 268fe3f9e498..81bde76d049c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -504,13 +504,14 @@ out: static void xfrm_replay_timer_handler(unsigned long data); -struct xfrm_state *xfrm_state_alloc(void) +struct xfrm_state *xfrm_state_alloc(struct net *net) { struct xfrm_state *x; x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC); if (x) { + write_pnet(&x->xs_net, net); atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); INIT_LIST_HEAD(&x->km.all); @@ -835,7 +836,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, error = -EEXIST; goto out; } - x = xfrm_state_alloc(); + x = xfrm_state_alloc(&init_net); if (x == NULL) { error = -ENOMEM; goto out; @@ -1017,7 +1018,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re if (!create) return NULL; - x = xfrm_state_alloc(); + x = xfrm_state_alloc(&init_net); if (likely(x)) { switch (family) { case AF_INET: @@ -1125,7 +1126,7 @@ EXPORT_SYMBOL(xfrm_state_add); static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) { int err = -ENOMEM; - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(&init_net); if (!x) goto error; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ee15d5dd6544..65cdaa5c2280 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -320,7 +320,7 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, struct nlattr **attrs, int *errp) { - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(&init_net); int err = -ENOMEM; if (!x) @@ -1663,7 +1663,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *rt = attrs[XFRMA_TMPL]; struct xfrm_user_acquire *ua = nlmsg_data(nlh); - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(&init_net); int err = -ENOMEM; if (!x) -- cgit v1.2.3 From 221df1ed33c9284fc7a6f6e47ca7f8d5f3665d43 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:30:50 -0800 Subject: netns xfrm: state lookup in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++-- net/ipv4/ah4.c | 2 +- net/ipv4/esp4.c | 2 +- net/ipv4/ipcomp.c | 4 ++-- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/ipv6/ipcomp6.c | 4 ++-- net/ipv6/xfrm6_input.c | 2 +- net/key/af_key.c | 2 +- net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_state.c | 34 +++++++++++++++++++--------------- net/xfrm/xfrm_user.c | 12 ++++++------ 12 files changed, 38 insertions(+), 34 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e4bb67225610..15136c5e2622 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1323,8 +1323,8 @@ extern int xfrm_state_check_expire(struct xfrm_state *x); extern void xfrm_state_insert(struct xfrm_state *x); extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); -extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); -extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); #ifdef CONFIG_XFRM_SUB_POLICY extern int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, unsigned short family); diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 992ecd8662e2..750426b0a276 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -209,7 +209,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 95a9c65003f8..35950128aa94 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -421,7 +421,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); if (!x) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 0a35f1b6f22c..3262ce06294c 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -35,7 +35,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) return; spi = htonl(ntohs(ipch->cpi)); - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET); if (!x) return; @@ -85,7 +85,7 @@ static int ipcomp_tunnel_attach(struct xfrm_state *x) int err = 0; struct xfrm_state *t; - t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr.a4, + t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr.a4, x->props.saddr.a4, IPPROTO_IPIP, AF_INET); if (!t) { t = ipcomp_tunnel_create(x); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 13e330d89178..6ae014b86b69 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -415,7 +415,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) return; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index c02a6308defe..68f2af8c15c0 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -364,7 +364,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index c369638e208a..3a0b3be7ece5 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -63,7 +63,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; spi = htonl(ntohs(ipcomph->cpi)); - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) return; @@ -114,7 +114,7 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x) spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr); if (spi) - t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr, + t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr, spi, IPPROTO_IPV6, AF_INET6); if (!t) { t = ipcomp6_tunnel_create(x); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a71c7ddcb41e..b69766a77743 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -100,7 +100,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, break; } - x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6); + x = xfrm_state_lookup_byaddr(&init_net, dst, src, proto, AF_INET6); if (!x) continue; diff --git a/net/key/af_key.c b/net/key/af_key.c index e5d595a60921..449a5d03e283 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -683,7 +683,7 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void ** if (!xaddr) return NULL; - return xfrm_state_lookup(xaddr, sa->sadb_sa_spi, proto, family); + return xfrm_state_lookup(&init_net, xaddr, sa->sadb_sa_spi, proto, family); } #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 75279402ccf4..c08a93e98a36 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -151,7 +151,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - x = xfrm_state_lookup(daddr, spi, nexthdr, family); + x = xfrm_state_lookup(&init_net, daddr, spi, nexthdr, family); if (x == NULL) { XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound(skb, family, spi, seq); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5f4c5340ba30..afde47498cdc 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -670,13 +670,13 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, return 0; } -static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { - unsigned int h = xfrm_spi_hash(&init_net, daddr, spi, proto, family); + unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; struct hlist_node *entry; - hlist_for_each_entry(x, entry, init_net.xfrm.state_byspi+h, byspi) { + hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto) @@ -702,13 +702,13 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, return NULL; } -static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { - unsigned int h = xfrm_src_hash(&init_net, daddr, saddr, family); + unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; struct hlist_node *entry; - hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto) continue; @@ -740,11 +740,13 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) { + struct net *net = xs_net(x); + if (use_spi) - return __xfrm_state_lookup(&x->id.daddr, x->id.spi, + return __xfrm_state_lookup(net, &x->id.daddr, x->id.spi, x->id.proto, family); else - return __xfrm_state_lookup_byaddr(&x->id.daddr, + return __xfrm_state_lookup_byaddr(net, &x->id.daddr, &x->props.saddr, x->id.proto, family); } @@ -818,7 +820,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi, + (x0 = __xfrm_state_lookup(&init_net, daddr, tmpl->id.spi, tmpl->id.proto, family)) != NULL) { to_put = x0; error = -EEXIST; @@ -1361,26 +1363,27 @@ int xfrm_state_check_expire(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_check_expire); struct xfrm_state * -xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, +xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup(daddr, spi, proto, family); + x = __xfrm_state_lookup(net, daddr, spi, proto, family); spin_unlock_bh(&xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup); struct xfrm_state * -xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, +xfrm_state_lookup_byaddr(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family); + x = __xfrm_state_lookup_byaddr(net, daddr, saddr, proto, family); spin_unlock_bh(&xfrm_state_lock); return x; } @@ -1486,6 +1489,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq); int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) { + struct net *net = xs_net(x); unsigned int h; struct xfrm_state *x0; int err = -ENOENT; @@ -1503,7 +1507,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) err = -ENOENT; if (minspi == maxspi) { - x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, &x->id.daddr, minspi, x->id.proto, x->props.family); if (x0) { xfrm_state_put(x0); goto unlock; @@ -1513,7 +1517,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) u32 spi = 0; for (h=0; hid.daddr, htonl(spi), x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { x->id.spi = htonl(spi); break; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 49a7e897ba96..e02ef3361190 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -440,7 +440,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { err = -ESRCH; - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + x = xfrm_state_lookup(&init_net, &p->daddr, p->spi, p->proto, p->family); } else { xfrm_address_t *saddr = NULL; @@ -451,8 +451,8 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, } err = -ESRCH; - x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto, - p->family); + x = xfrm_state_lookup_byaddr(&init_net, &p->daddr, saddr, + p->proto, p->family); } out: @@ -1468,7 +1468,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (r_skb == NULL) return -ENOMEM; - x = xfrm_state_lookup(&id->daddr, id->spi, id->proto, id->family); + x = xfrm_state_lookup(&init_net, &id->daddr, id->spi, id->proto, id->family); if (x == NULL) { kfree_skb(r_skb); return -ESRCH; @@ -1509,7 +1509,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (!(nlh->nlmsg_flags&NLM_F_REPLACE)) return err; - x = xfrm_state_lookup(&p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); + x = xfrm_state_lookup(&init_net, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); if (x == NULL) return -ESRCH; @@ -1628,7 +1628,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; - x = xfrm_state_lookup(&p->id.daddr, p->id.spi, p->id.proto, p->family); + x = xfrm_state_lookup(&init_net, &p->id.daddr, p->id.spi, p->id.proto, p->family); err = -ENOENT; if (x == NULL) -- cgit v1.2.3 From 52479b623d3d41df84c499325b6a8c7915413032 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:35:18 -0800 Subject: netns xfrm: lookup in netns Pass netns to xfrm_lookup()/__xfrm_lookup(). For that pass netns to flow_cache_lookup() and resolver callback. Take it from socket or netdevice. Stub DECnet to init_net. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/dst.h | 16 ++++++++-------- include/net/flow.h | 9 +++++---- net/core/flow.c | 4 ++-- net/dccp/ipv6.c | 10 +++++----- net/decnet/dn_route.c | 6 +++--- net/ipv4/icmp.c | 4 ++-- net/ipv4/netfilter.c | 4 ++-- net/ipv4/route.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/datagram.c | 3 ++- net/ipv6/icmp.c | 6 +++--- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_tunnel.c | 5 +++-- net/ipv6/mcast.c | 4 ++-- net/ipv6/ndisc.c | 4 ++-- net/ipv6/netfilter.c | 2 +- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/raw.c | 3 ++- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 11 ++++++----- net/ipv6/udp.c | 3 ++- net/xfrm/xfrm_policy.c | 38 ++++++++++++++++++++------------------ 22 files changed, 75 insertions(+), 67 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/dst.h b/include/net/dst.h index 6c778799bf10..6be3b082a070 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -291,21 +291,21 @@ enum { struct flowi; #ifndef CONFIG_XFRM -static inline int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) +static inline int xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags) { return 0; } -static inline int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) +static inline int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags) { return 0; } #else -extern int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags); -extern int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags); +extern int xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags); +extern int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags); #endif #endif diff --git a/include/net/flow.h b/include/net/flow.h index b45a5e4fcadd..809970b7dfee 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -84,12 +84,13 @@ struct flowi { #define FLOW_DIR_OUT 1 #define FLOW_DIR_FWD 2 +struct net; struct sock; -typedef int (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, - void **objp, atomic_t **obj_refp); +typedef int (*flow_resolve_t)(struct net *net, struct flowi *key, u16 family, + u8 dir, void **objp, atomic_t **obj_refp); -extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, - flow_resolve_t resolver); +extern void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, + u8 dir, flow_resolve_t resolver); extern void flow_cache_flush(void); extern atomic_t flow_cache_genid; diff --git a/net/core/flow.c b/net/core/flow.c index d323388dd1ba..96015871ecea 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -165,7 +165,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) return 0; } -void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, +void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver) { struct flow_cache_entry *fle, **head; @@ -225,7 +225,7 @@ nocache: void *obj; atomic_t *obj_ref; - err = resolver(key, family, dir, &obj, &obj_ref); + err = resolver(net, key, family, dir, &obj, &obj_ref); if (fle && !err) { fle->genid = atomic_read(&flow_cache_genid); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index f033e845bb07..b963f35c65f6 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -168,7 +168,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(net, &dst, &fl, sk, 0); if (err < 0) { sk->sk_err_soft = -err; goto out; @@ -279,7 +279,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0); if (err < 0) goto done; @@ -343,7 +343,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(ctl_sk, &skb->dst, &fl)) { - if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { + if (xfrm_lookup(net, &skb->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, skb, &fl, NULL, 0); DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); @@ -569,7 +569,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out; } @@ -1004,7 +1004,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT); + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 768df000523b..eeaa3d819f9c 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1184,7 +1184,7 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int f err = __dn_route_output_key(pprt, flp, flags); if (err == 0 && flp->proto) { - err = xfrm_lookup(pprt, flp, NULL, 0); + err = xfrm_lookup(&init_net, pprt, flp, NULL, 0); } return err; } @@ -1195,8 +1195,8 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); if (err == 0 && fl->proto) { - err = xfrm_lookup(pprt, fl, sk, (flags & MSG_DONTWAIT) ? - 0 : XFRM_LOOKUP_WAIT); + err = xfrm_lookup(&init_net, pprt, fl, sk, + (flags & MSG_DONTWAIT) ? 0 : XFRM_LOOKUP_WAIT); } return err; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 7b88be9803b1..705b33b184a3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -562,7 +562,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) /* No need to clone since we're just using its address. */ rt2 = rt; - err = xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); + err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); switch (err) { case 0: if (rt != rt2) @@ -601,7 +601,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (err) goto relookup_failed; - err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL, + err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, XFRM_LOOKUP_ICMP); switch (err) { case 0: diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6efdb70b3eb2..c99eecf89da5 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -66,7 +66,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, &fl, AF_INET) == 0) - if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) return -1; #endif @@ -97,7 +97,7 @@ int ip_xfrm_me_harder(struct sk_buff *skb) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0) + if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0) return -1; dst_release(skb->dst); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4e6959c29819..77bfba975959 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2761,7 +2761,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, flp->fl4_src = (*rp)->rt_src; if (!flp->fl4_dst) flp->fl4_dst = (*rp)->rt_dst; - err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, + err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, flags ? XFRM_LOOKUP_WAIT : 0); if (err == -EREMOTE) err = ipv4_dst_blackhole(net, rp, flp); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 01edac888510..437b750b98fd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -637,7 +637,7 @@ int inet6_sk_rebuild_header(struct sock *sk) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; return err; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e44deb8d4df2..e2bdc6d83a43 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -175,7 +175,8 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a77b8d103804..4f433847d95f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -427,7 +427,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, /* No need to clone since we're just using its address. */ dst2 = dst; - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(net, &dst, &fl, sk, 0); switch (err) { case 0: if (dst != dst2) @@ -446,7 +446,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (ip6_dst_lookup(sk, &dst2, &fl)) goto relookup_failed; - err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP); + err = xfrm_lookup(net, &dst2, &fl, sk, XFRM_LOOKUP_ICMP); switch (err) { case 0: dst_release(dst); @@ -552,7 +552,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto out; - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) goto out; if (ipv6_addr_is_multicast(&fl.fl6_dst)) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 16d43f20b32f..3c3732d50c1a 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -219,7 +219,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { sk->sk_route_caps = 0; kfree_skb(skb); return err; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ef249ab5c93c..58e2b0d93758 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -846,6 +846,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, int encap_limit, __u32 *pmtu) { + struct net *net = dev_net(dev); struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -861,9 +862,9 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { - dst = ip6_route_output(dev_net(dev), NULL, fl); + dst = ip6_route_output(net, NULL, fl); - if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0) + if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) goto tx_err_link_failure; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 870a1d64605a..0f3896032830 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1466,7 +1466,7 @@ static void mld_sendpack(struct sk_buff *skb) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); if (err) goto err_out; @@ -1831,7 +1831,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); if (err) goto err_out; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index af6705f03b5c..e4acc212345e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -524,7 +524,7 @@ void ndisc_send_skb(struct sk_buff *skb, return; } - err = xfrm_lookup(&dst, &fl, NULL, 0); + err = xfrm_lookup(net, &dst, &fl, NULL, 0); if (err < 0) { kfree_skb(skb); return; @@ -1524,7 +1524,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, if (dst == NULL) return; - err = xfrm_lookup(&dst, &fl, NULL, 0); + err = xfrm_lookup(net, &dst, &fl, NULL, 0); if (err) return; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index fd5b3a4e3329..627e21db65df 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -29,7 +29,7 @@ int ip6_route_me_harder(struct sk_buff *skb) #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, &fl, AF_INET6) == 0) - if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) return -1; #endif diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 0981b4ccb8b1..5a2d0a41694a 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -97,7 +97,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) dst = ip6_route_output(net, NULL, &fl); if (dst == NULL) return; - if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0)) + if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0)) return; hh_len = (dst->dev->hard_header_len + 15)&~15; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2ba04d41dc25..61f6827e5906 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -860,7 +860,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 676c80b5b14b..711175e0571f 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -259,7 +259,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out_free; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a5d750acd793..f259c9671f3e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -260,7 +260,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) @@ -390,7 +391,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; goto out; } @@ -492,7 +493,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) goto done; if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto done; skb = tcp_make_synack(sk, dst, req); @@ -1018,7 +1019,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, * namespace */ if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { - if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { + if (xfrm_lookup(net, &buff->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); if (rst) @@ -1316,7 +1317,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fd2d9ad4a8a3..38390dd19636 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -849,7 +849,8 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6165218fd7c2..7c88a25c7af5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -940,7 +940,8 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, return ret; } -static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, +static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, + struct flowi *fl, u16 family, u8 dir) { int err; @@ -956,7 +957,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, return NULL; read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(&init_net, daddr, saddr, family, dir); + chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); @@ -973,7 +974,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, break; } } - chain = &init_net.xfrm.policy_inexact[dir]; + chain = &net->xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { @@ -996,14 +997,14 @@ fail: return ret; } -static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, + u8 dir, void **objp, atomic_t **obj_refp) { struct xfrm_policy *pol; int err = 0; #ifdef CONFIG_XFRM_SUB_POLICY - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); if (IS_ERR(pol)) { err = PTR_ERR(pol); pol = NULL; @@ -1011,7 +1012,7 @@ static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, if (pol || err) goto end; #endif - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); if (IS_ERR(pol)) { err = PTR_ERR(pol); pol = NULL; @@ -1537,7 +1538,7 @@ static int stale_bundle(struct dst_entry *dst); * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ -int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *policy; @@ -1575,10 +1576,10 @@ restart: if (!policy) { /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || - !init_net.xfrm.policy_count[XFRM_POLICY_OUT]) + !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - policy = flow_cache_lookup(fl, dst_orig->ops->family, + policy = flow_cache_lookup(net, fl, dst_orig->ops->family, dir, xfrm_policy_lookup); err = PTR_ERR(policy); if (IS_ERR(policy)) { @@ -1635,7 +1636,8 @@ restart: #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(net, + XFRM_POLICY_TYPE_MAIN, fl, family, XFRM_POLICY_OUT); if (pols[1]) { @@ -1683,11 +1685,11 @@ restart: if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { DECLARE_WAITQUEUE(wait, current); - add_wait_queue(&init_net.xfrm.km_waitq, &wait); + add_wait_queue(&net->xfrm.km_waitq, &wait); set_current_state(TASK_INTERRUPTIBLE); schedule(); set_current_state(TASK_RUNNING); - remove_wait_queue(&init_net.xfrm.km_waitq, &wait); + remove_wait_queue(&net->xfrm.km_waitq, &wait); nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); @@ -1781,10 +1783,10 @@ nopol: } EXPORT_SYMBOL(__xfrm_lookup); -int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { - int err = __xfrm_lookup(dst_p, fl, sk, flags); + int err = __xfrm_lookup(net, dst_p, fl, sk, flags); if (err == -EREMOTE) { dst_release(*dst_p); @@ -1936,7 +1938,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } if (!pol) - pol = flow_cache_lookup(&fl, family, fl_dir, + pol = flow_cache_lookup(&init_net, &fl, family, fl_dir, xfrm_policy_lookup); if (IS_ERR(pol)) { @@ -1959,7 +1961,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, npols ++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(&init_net, XFRM_POLICY_TYPE_MAIN, &fl, family, XFRM_POLICY_IN); if (pols[1]) { @@ -2049,7 +2051,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) return 0; } - return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; + return xfrm_lookup(&init_net, &skb->dst, &fl, NULL, 0) == 0; } EXPORT_SYMBOL(__xfrm_route_forward); -- cgit v1.2.3 From ddcfd79680c1dc74eb5f24aa70785c11bf7eec8f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:37:23 -0800 Subject: netns xfrm: dst garbage-collecting in netns Pass netns pointer to struct xfrm_policy_afinfo::garbage_collect() [This needs more thoughts on what to do with dst_ops] [Currently stub to init_net] Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_policy.c | 2 +- net/xfrm/xfrm_policy.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 36c8cffdf4e2..bd2515005ae2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -265,7 +265,7 @@ struct xfrm_dst; struct xfrm_policy_afinfo { unsigned short family; struct dst_ops *dst_ops; - void (*garbage_collect)(void); + void (*garbage_collect)(struct net *net); struct dst_entry *(*dst_lookup)(int tos, xfrm_address_t *saddr, xfrm_address_t *daddr); int (*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 84dbb5a03cc2..a881ca38dddb 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -187,7 +187,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm4_garbage_collect(struct dst_ops *ops) { - xfrm4_policy_afinfo.garbage_collect(); + xfrm4_policy_afinfo.garbage_collect(&init_net); return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 3b67ce7786ec..2df8a78fc3df 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -220,7 +220,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm6_garbage_collect(struct dst_ops *ops) { - xfrm6_policy_afinfo.garbage_collect(); + xfrm6_policy_afinfo.garbage_collect(&init_net); return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 49e089826f45..7ebbdd63fca0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2173,9 +2173,9 @@ static int unused_bundle(struct dst_entry *dst) return !atomic_read(&dst->__refcnt); } -static void __xfrm_garbage_collect(void) +static void __xfrm_garbage_collect(struct net *net) { - xfrm_prune_bundles(&init_net, unused_bundle); + xfrm_prune_bundles(net, unused_bundle); } static int xfrm_flush_bundles(struct net *net) -- cgit v1.2.3 From c5b3cf46eabe6e7459125fc6e2033b4222665017 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:51:25 -0800 Subject: netns xfrm: ->dst_lookup in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- net/ipv4/xfrm4_policy.c | 7 ++++--- net/ipv6/xfrm6_policy.c | 7 ++++--- net/xfrm/xfrm_policy.c | 7 ++++--- 4 files changed, 14 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f3ea1607c595..b16d4c0b16e0 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -266,7 +266,8 @@ struct xfrm_policy_afinfo { unsigned short family; struct dst_ops *dst_ops; void (*garbage_collect)(struct net *net); - struct dst_entry *(*dst_lookup)(int tos, xfrm_address_t *saddr, + struct dst_entry *(*dst_lookup)(struct net *net, int tos, + xfrm_address_t *saddr, xfrm_address_t *daddr); int (*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr); struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index a881ca38dddb..e1cf9ed07d49 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -18,7 +18,8 @@ static struct dst_ops xfrm4_dst_ops; static struct xfrm_policy_afinfo xfrm4_policy_afinfo; -static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr, +static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct flowi fl = { @@ -36,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr, if (saddr) fl.fl4_src = saddr->a4; - err = __ip_route_output_key(&init_net, &rt, &fl); + err = __ip_route_output_key(net, &rt, &fl); dst = &rt->u.dst; if (err) dst = ERR_PTR(err); @@ -48,7 +49,7 @@ static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) struct dst_entry *dst; struct rtable *rt; - dst = xfrm4_dst_lookup(0, NULL, daddr); + dst = xfrm4_dst_lookup(&init_net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 2df8a78fc3df..d7a5b8bc3770 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -27,7 +27,8 @@ static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; -static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, +static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct flowi fl = {}; @@ -38,7 +39,7 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, if (saddr) memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl); err = dst->error; if (dst->error) { @@ -54,7 +55,7 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) struct dst_entry *dst; struct net_device *dev; - dst = xfrm6_dst_lookup(0, NULL, daddr); + dst = xfrm6_dst_lookup(&init_net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7ebbdd63fca0..2b0a80b6259b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -92,7 +92,7 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, return 0; } -static inline struct dst_entry *__xfrm_dst_lookup(int tos, +static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, xfrm_address_t *saddr, xfrm_address_t *daddr, int family) @@ -104,7 +104,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(int tos, if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); - dst = afinfo->dst_lookup(tos, saddr, daddr); + dst = afinfo->dst_lookup(net, tos, saddr, daddr); xfrm_policy_put_afinfo(afinfo); @@ -116,6 +116,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, xfrm_address_t *prev_daddr, int family) { + struct net *net = xs_net(x); xfrm_address_t *saddr = &x->props.saddr; xfrm_address_t *daddr = &x->id.daddr; struct dst_entry *dst; @@ -129,7 +130,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, daddr = x->coaddr; } - dst = __xfrm_dst_lookup(tos, saddr, daddr, family); + dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); if (!IS_ERR(dst)) { if (prev_saddr != saddr) -- cgit v1.2.3 From fbda33b2b85941c1ae3a0d89522dec5c1b1bd98c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:56:49 -0800 Subject: netns xfrm: ->get_saddr in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/ipv4/xfrm4_policy.c | 5 +++-- net/ipv6/xfrm6_policy.c | 5 +++-- net/xfrm/xfrm_policy.c | 7 ++++--- 4 files changed, 11 insertions(+), 8 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b16d4c0b16e0..d076f3d34278 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -269,7 +269,7 @@ struct xfrm_policy_afinfo { struct dst_entry *(*dst_lookup)(struct net *net, int tos, xfrm_address_t *saddr, xfrm_address_t *daddr); - int (*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr); + int (*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr); struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy); void (*decode_session)(struct sk_buff *skb, struct flowi *fl, diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index e1cf9ed07d49..2ad24ba31f9d 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -44,12 +44,13 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, return dst; } -static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +static int xfrm4_get_saddr(struct net *net, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct rtable *rt; - dst = xfrm4_dst_lookup(&init_net, 0, NULL, daddr); + dst = xfrm4_dst_lookup(net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index d7a5b8bc3770..97ab068e8ccc 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -50,12 +50,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, return dst; } -static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +static int xfrm6_get_saddr(struct net *net, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct net_device *dev; - dst = xfrm6_dst_lookup(&init_net, 0, NULL, daddr); + dst = xfrm6_dst_lookup(net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2b0a80b6259b..7c7bb54f2265 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1187,7 +1187,7 @@ int __xfrm_sk_clone_policy(struct sock *sk) } static int -xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, +xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, unsigned short family) { int err; @@ -1195,7 +1195,7 @@ xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(local, remote); + err = afinfo->get_saddr(net, local, remote); xfrm_policy_put_afinfo(afinfo); return err; } @@ -1207,6 +1207,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, struct xfrm_state **xfrm, unsigned short family) { + struct net *net = xp_net(policy); int nx; int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); @@ -1225,7 +1226,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, local = &tmpl->saddr; family = tmpl->encap_family; if (xfrm_addr_any(local, family)) { - error = xfrm_get_saddr(&tmp, remote, family); + error = xfrm_get_saddr(net, &tmp, remote, family); if (error) goto fail; local = &tmp; -- cgit v1.2.3 From 4fb236bac9fc7d51e2267866de6d4c30e549d2f8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:59:27 -0800 Subject: netns xfrm: AH/ESP in netns! Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv4/ah4.c | 4 +++- net/ipv4/esp4.c | 4 +++- net/ipv6/ah6.c | 3 ++- net/ipv6/esp6.c | 3 ++- 4 files changed, 10 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 750426b0a276..e878e494296e 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -201,6 +201,7 @@ out: static void ah4_err(struct sk_buff *skb, u32 info) { + struct net *net = dev_net(skb->dev); struct iphdr *iph = (struct iphdr *)skb->data; struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; @@ -209,7 +210,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", @@ -314,6 +315,7 @@ static struct net_protocol ah4_protocol = { .handler = xfrm4_rcv, .err_handler = ah4_err, .no_policy = 1, + .netns_ok = 1, }; static int __init ah4_init(void) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 35950128aa94..18bb383ea393 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -413,6 +413,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) static void esp4_err(struct sk_buff *skb, u32 info) { + struct net *net = dev_net(skb->dev); struct iphdr *iph = (struct iphdr *)skb->data; struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; @@ -421,7 +422,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); if (!x) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", @@ -618,6 +619,7 @@ static struct net_protocol esp4_protocol = { .handler = xfrm4_rcv, .err_handler = esp4_err, .no_policy = 1, + .netns_ok = 1, }; static int __init esp4_init(void) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 6ae014b86b69..52449f7a1b71 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -407,6 +407,7 @@ out: static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { + struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset); struct xfrm_state *x; @@ -415,7 +416,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) return; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 68f2af8c15c0..c2f250150db1 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -356,6 +356,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { + struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset); struct xfrm_state *x; @@ -364,7 +365,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", -- cgit v1.2.3 From 1748376b6626acf59c24e9592ac67b3fe2a0e026 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Nov 2008 21:16:35 -0800 Subject: net: Use a percpu_counter for sockets_allocated Instead of using one atomic_t per protocol, use a percpu_counter for "sockets_allocated", to reduce cache line contention on heavy duty network servers. Note : We revert commit (248969ae31e1b3276fc4399d67ce29a5d81e6fd9 net: af_unix can make unix_nr_socks visbile in /proc), since it is not anymore used after sock_prot_inuse_add() addition Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 1 + include/net/sock.h | 2 +- include/net/tcp.h | 2 +- net/core/sock.c | 10 +++++++--- net/ipv4/proc.c | 3 ++- net/ipv4/tcp.c | 8 ++++++-- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 2 +- net/sctp/protocol.c | 6 +++++- net/sctp/socket.c | 6 +++--- net/unix/af_unix.c | 1 - 11 files changed, 29 insertions(+), 16 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 23797506f593..bbb7742195b0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -138,6 +138,7 @@ void sctp_write_space(struct sock *sk); unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); +extern struct percpu_counter sctp_sockets_allocated; /* * sctp/primitive.c diff --git a/include/net/sock.h b/include/net/sock.h index 00cd486d362f..a2a3890959c4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -649,7 +649,7 @@ struct proto { /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); atomic_t *memory_allocated; /* Current allocated memory. */ - atomic_t *sockets_allocated; /* Current number of sockets. */ + struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. diff --git a/include/net/tcp.h b/include/net/tcp.h index e8ae90a8c35e..cbca3b8a133d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -238,7 +238,7 @@ extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; extern atomic_t tcp_memory_allocated; -extern atomic_t tcp_sockets_allocated; +extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; /* diff --git a/net/core/sock.c b/net/core/sock.c index a4e840e5a053..7a081b647bf9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1071,7 +1071,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) newsk->sk_sleep = NULL; if (newsk->sk_prot->sockets_allocated) - atomic_inc(newsk->sk_prot->sockets_allocated); + percpu_counter_inc(newsk->sk_prot->sockets_allocated); } out: return newsk; @@ -1463,8 +1463,12 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) } if (prot->memory_pressure) { - if (!*prot->memory_pressure || - prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) * + int alloc; + + if (!*prot->memory_pressure) + return 1; + alloc = percpu_counter_read_positive(prot->sockets_allocated); + if (prot->sysctl_mem[2] > alloc * sk_mem_pages(sk->sk_wmem_queued + atomic_read(&sk->sk_rmem_alloc) + sk->sk_forward_alloc)) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 731789bb499f..4944b47ad628 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -55,7 +55,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", sock_prot_inuse_get(net, &tcp_prot), atomic_read(&tcp_orphan_count), - tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), + tcp_death_row.tw_count, + (int)percpu_counter_sum_positive(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(net, &udp_prot), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 044224a341eb..e6fade9ebf62 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -290,9 +290,12 @@ EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); atomic_t tcp_memory_allocated; /* Current allocated memory. */ -atomic_t tcp_sockets_allocated; /* Current number of TCP sockets. */ - EXPORT_SYMBOL(tcp_memory_allocated); + +/* + * Current number of TCP sockets. + */ +struct percpu_counter tcp_sockets_allocated; EXPORT_SYMBOL(tcp_sockets_allocated); /* @@ -2685,6 +2688,7 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); + percpu_counter_init(&tcp_sockets_allocated, 0); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cab2458f86fd..26b9030747cc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1797,7 +1797,7 @@ static int tcp_v4_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - atomic_inc(&tcp_sockets_allocated); + percpu_counter_inc(&tcp_sockets_allocated); return 0; } @@ -1845,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk) sk->sk_sndmsg_page = NULL; } - atomic_dec(&tcp_sockets_allocated); + percpu_counter_dec(&tcp_sockets_allocated); } EXPORT_SYMBOL(tcp_v4_destroy_sock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f259c9671f3e..8702b06cb60a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1830,7 +1830,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - atomic_inc(&tcp_sockets_allocated); + percpu_counter_inc(&tcp_sockets_allocated); return 0; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a8ca743241ee..d5ea232c9126 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -102,6 +102,8 @@ struct sock *sctp_get_ctl_sock(void) /* Set up the proc fs entry for the SCTP protocol. */ static __init int sctp_proc_init(void) { + if (percpu_counter_init(&sctp_sockets_allocated, 0)) + goto out_nomem; #ifdef CONFIG_PROC_FS if (!proc_net_sctp) { struct proc_dir_entry *ent; @@ -110,7 +112,7 @@ static __init int sctp_proc_init(void) ent->owner = THIS_MODULE; proc_net_sctp = ent; } else - goto out_nomem; + goto out_free_percpu; } if (sctp_snmp_proc_init()) @@ -135,6 +137,8 @@ out_snmp_proc_init: proc_net_sctp = NULL; remove_proc_entry("sctp", init_net.proc_net); } +out_free_percpu: + percpu_counter_destroy(&sctp_sockets_allocated); out_nomem: return -ENOMEM; #else diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ba81fe3ccab8..a2de585888d0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -114,7 +114,7 @@ extern int sysctl_sctp_wmem[3]; static int sctp_memory_pressure; static atomic_t sctp_memory_allocated; -static atomic_t sctp_sockets_allocated; +struct percpu_counter sctp_sockets_allocated; static void sctp_enter_memory_pressure(struct sock *sk) { @@ -3613,7 +3613,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); - atomic_inc(&sctp_sockets_allocated); + percpu_counter_inc(&sctp_sockets_allocated); local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -3632,7 +3632,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) /* Release our hold on the endpoint. */ ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); - atomic_dec(&sctp_sockets_allocated); + percpu_counter_dec(&sctp_sockets_allocated); local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3a35a6e8bf91..5aaf23e43f1d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -571,7 +571,6 @@ static const struct proto_ops unix_seqpacket_ops = { static struct proto unix_proto = { .name = "UNIX", .owner = THIS_MODULE, - .sockets_allocated = &unix_nr_socks, .obj_size = sizeof(struct unix_sock), }; -- cgit v1.2.3 From dd24c00191d5e4a1ae896aafe33c6b8095ab4bd1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Nov 2008 21:17:14 -0800 Subject: net: Use a percpu_counter for orphan_count Instead of using one atomic_t per protocol, use a percpu_counter for "orphan_count", to reduce cache line contention on heavy duty network servers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- include/net/tcp.h | 2 +- net/dccp/dccp.h | 2 +- net/dccp/proto.c | 16 ++++++++++------ net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/proc.c | 2 +- net/ipv4/tcp.c | 12 +++++++----- net/ipv4/tcp_timer.c | 2 +- 8 files changed, 24 insertions(+), 18 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/sock.h b/include/net/sock.h index a2a3890959c4..5a3a151bd730 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -666,7 +666,7 @@ struct proto { unsigned int obj_size; int slab_flags; - atomic_t *orphan_count; + struct percpu_counter *orphan_count; struct request_sock_ops *rsk_prot; struct timewait_sock_ops *twsk_prot; diff --git a/include/net/tcp.h b/include/net/tcp.h index cbca3b8a133d..de1e91d959b8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -46,7 +46,7 @@ extern struct inet_hashinfo tcp_hashinfo; -extern atomic_t tcp_orphan_count; +extern struct percpu_counter tcp_orphan_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER (128 + MAX_HEADER) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 031ce350d3c1..33a1127270c1 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -49,7 +49,7 @@ extern int dccp_debug; extern struct inet_hashinfo dccp_hashinfo; -extern atomic_t dccp_orphan_count; +extern struct percpu_counter dccp_orphan_count; extern void dccp_time_wait(struct sock *sk, int state, int timeo); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ea85c423cdbd..db225f93cd5a 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -40,8 +40,7 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; EXPORT_SYMBOL_GPL(dccp_statistics); -atomic_t dccp_orphan_count = ATOMIC_INIT(0); - +struct percpu_counter dccp_orphan_count; EXPORT_SYMBOL_GPL(dccp_orphan_count); struct inet_hashinfo dccp_hashinfo; @@ -1000,7 +999,7 @@ adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); /* * It is the last release_sock in its life. It will remove backlog. @@ -1064,18 +1063,21 @@ static int __init dccp_init(void) { unsigned long goal; int ehash_order, bhash_order, i; - int rc = -ENOBUFS; + int rc; BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); - + rc = percpu_counter_init(&dccp_orphan_count, 0); + if (rc) + goto out; + rc = -ENOBUFS; inet_hashinfo_init(&dccp_hashinfo); dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL); if (!dccp_hashinfo.bind_bucket_cachep) - goto out; + goto out_free_percpu; /* * Size and allocate the main established and bind bucket @@ -1168,6 +1170,8 @@ out_free_dccp_ehash: out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_hashinfo.bind_bucket_cachep = NULL; +out_free_percpu: + percpu_counter_destroy(&dccp_orphan_count); goto out; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 05af807ca9b9..1ccdbba528be 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -561,7 +561,7 @@ void inet_csk_destroy_sock(struct sock *sk) sk_refcnt_debug_release(sk); - atomic_dec(sk->sk_prot->orphan_count); + percpu_counter_dec(sk->sk_prot->orphan_count); sock_put(sk); } @@ -641,7 +641,7 @@ void inet_csk_listen_stop(struct sock *sk) sock_orphan(child); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); inet_csk_destroy_sock(child); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4944b47ad628..614958b7c276 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -54,7 +54,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", sock_prot_inuse_get(net, &tcp_prot), - atomic_read(&tcp_orphan_count), + (int)percpu_counter_sum_positive(&tcp_orphan_count), tcp_death_row.tw_count, (int)percpu_counter_sum_positive(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e6fade9ebf62..019243408623 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -277,8 +277,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; -atomic_t tcp_orphan_count = ATOMIC_INIT(0); - +struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); int sysctl_tcp_mem[3] __read_mostly; @@ -1837,7 +1836,7 @@ adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); /* It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); @@ -1888,9 +1887,11 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { + int orphan_count = percpu_counter_read_positive( + sk->sk_prot->orphan_count); + sk_mem_reclaim(sk); - if (tcp_too_many_orphans(sk, - atomic_read(sk->sk_prot->orphan_count))) { + if (tcp_too_many_orphans(sk, orphan_count)) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); @@ -2689,6 +2690,7 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); percpu_counter_init(&tcp_sockets_allocated, 0); + percpu_counter_init(&tcp_orphan_count, 0); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3df339e3e363..cc4e6d27dedc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -65,7 +65,7 @@ static void tcp_write_err(struct sock *sk) static int tcp_out_of_resources(struct sock *sk, int do_reset) { struct tcp_sock *tp = tcp_sk(sk); - int orphans = atomic_read(&tcp_orphan_count); + int orphans = percpu_counter_read_positive(&tcp_orphan_count); /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ -- cgit v1.2.3 From 6976a1d6c222c50ac93d2273b9cf57e6fd047e59 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 1 Dec 2008 23:37:17 -0800 Subject: net: percpu_counter_inc() should not be called in BH-disabled section Based upon a lockdep report by Alexey Dobriyan. I checked all per_cpu_counter_xxx() usages in network tree, and I think all call sites are BH enabled except one in inet_csk_listen_stop(). commit dd24c00191d5e4a1ae896aafe33c6b8095ab4bd1 (net: Use a percpu_counter for orphan_count) replaced atomic_t orphan_count to a percpu_counter. atomic_inc()/atomic_dec() can be called from any context, while percpu_counter_xxx() should be called from a consistent state. For orphan_count, this context can be the BH-enabled one. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 1ccdbba528be..fe32255a9808 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -632,6 +632,8 @@ void inet_csk_listen_stop(struct sock *sk) acc_req = req->dl_next; + percpu_counter_inc(sk->sk_prot->orphan_count); + local_bh_disable(); bh_lock_sock(child); WARN_ON(sock_owned_by_user(child)); @@ -641,8 +643,6 @@ void inet_csk_listen_stop(struct sock *sk) sock_orphan(child); - percpu_counter_inc(sk->sk_prot->orphan_count); - inet_csk_destroy_sock(child); bh_unlock_sock(child); -- cgit v1.2.3 From 1ea472e2dedcf23d5f31c63fc790cccfab93c0de Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 3 Dec 2008 22:21:47 -0800 Subject: net: fix /proc/net/ip_mr_cache display - V2 /proc/net/ip_mr_cache and /proc/net/ip6_mr_cache displays garbage when showing unresolved mfc_cache entries. [root@qemu tests]# cat /proc/net/ip_mr_cache Group Origin Iif Pkts Bytes Wrong Oifs 014C00EF 010014AC 1 10 10050 0 2:1 3:1 024C00EF 010014AC 65535 514 2 -559067475 The first line is correct. It is a resolved cache entry, 10 packets used it... The second line represents an unresolved entry, and the columns Pkts(4th), Bytes(5th) and Wrong(6th) just show garbage. In struct mfc_cache, there's an union to store data for resolved and unresolved cases. And what ipmr_mfc_seq_show() is printing in these columns for the unresolved entries is some bytes from mfc_cache.mfc_un.res. Bad. (eg. In our case -559067475 is in fact 0xdead4ead which is the spinlock magic from mfc_cache.mfc_un.unres.unresolved.lock.magic). This patch replaces the garbage data written in these columns for the unresolved entries by '0' (zeros) which is more correct. This change doesn't break the ABI. Also, mfc->mfc_un.res.pkt, mfc->mfc_un.res.bytes, mfc->mfc_un.res.wrong_if are unsigned long. It applies on top of net-next-2.6. The patch for net-2.6 is slightly different because of the NIP6_FMT to %pI6 conversion that was made in the seq_printf. Changelog: ========== V2: * Instead of breaking the ABI by suppressing the columns that have no meaning for unresolved entries, fill them with 0 values. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 16 +++++++++++----- net/ipv6/ip6mr.c | 16 +++++++++++----- 2 files changed, 22 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 77fc4d3fdf61..cb3a57d46af6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1879,15 +1879,16 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld", + seq_printf(seq, "%08lX %08lX %-3d", (unsigned long) mfc->mfc_mcastgrp, (unsigned long) mfc->mfc_origin, - mfc->mfc_parent, - mfc->mfc_un.res.pkt, - mfc->mfc_un.res.bytes, - mfc->mfc_un.res.wrong_if); + mfc->mfc_parent); if (it->cache != &mfc_unres_queue) { + seq_printf(seq, " %8lu %8lu %8lu", + mfc->mfc_un.res.pkt, + mfc->mfc_un.res.bytes, + mfc->mfc_un.res.wrong_if); for (n = mfc->mfc_un.res.minvif; n < mfc->mfc_un.res.maxvif; n++ ) { if (VIF_EXISTS(n) @@ -1896,6 +1897,11 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]); } + } else { + /* unresolved mfc_caches don't contain + * pkt, bytes and wrong_if values + */ + seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); } seq_putc(seq, '\n'); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index dfba9fd0c248..2dc4b0190878 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -297,14 +297,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc6_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, "%pI6 %pI6 %-3d %8ld %8ld %8ld", + seq_printf(seq, "%pI6 %pI6 %-3d", &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, - mfc->mf6c_parent, - mfc->mfc_un.res.pkt, - mfc->mfc_un.res.bytes, - mfc->mfc_un.res.wrong_if); + mfc->mf6c_parent); if (it->cache != &mfc_unres_queue) { + seq_printf(seq, " %8lu %8lu %8lu", + mfc->mfc_un.res.pkt, + mfc->mfc_un.res.bytes, + mfc->mfc_un.res.wrong_if); for (n = mfc->mfc_un.res.minvif; n < mfc->mfc_un.res.maxvif; n++) { if (MIF_EXISTS(n) && @@ -313,6 +314,11 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]); } + } else { + /* unresolved mfc_caches don't contain + * pkt, bytes and wrong_if values + */ + seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); } seq_putc(seq, '\n'); } -- cgit v1.2.3 From 999890b21a8eff7559a140fcbd2cd4b34e685c76 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 3 Dec 2008 22:22:16 -0800 Subject: net: /proc/net/ip_mr_cache, display Iif as a signed short Today, iproute2 fails to show multicast forwarding unresolved cache entries while scanning /proc/net/ip_mr_cache. Indeed, it expects to see -1 in 'Iif' column to identify unresolved entries but the kernel outputs 65535. It's a signed/unsigned issue: 'Iif', the source interface, is retrieved from member mfc_parent in struct mfc_cache. mfc_parent is a vifi_t: unsigned short, but is displayed in ipmr_mfc_seq_show() as "%-3d", signed integer. In unresolevd entries, the 65535 value (0xFFFF) comes from this define: #define ALL_VIFS ((vifi_t)(-1)) That may explains why the guy who added support for this in iproute2 thought a -1 should be expected. I don't know if this must be fixed in kernel or in iproute2. Who is right? What is the correct API? How was it designed originally? I let you decide if it should goes in the kernel or be fixed in iproute2. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 2 +- net/ipv6/ip6mr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index cb3a57d46af6..244a624e0b18 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1879,7 +1879,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, "%08lX %08lX %-3d", + seq_printf(seq, "%08lX %08lX %-3hd", (unsigned long) mfc->mfc_mcastgrp, (unsigned long) mfc->mfc_origin, mfc->mfc_parent); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 2dc4b0190878..1446bec895a2 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -297,7 +297,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc6_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, "%pI6 %pI6 %-3d", + seq_printf(seq, "%pI6 %pI6 %-3hd", &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, mfc->mf6c_parent); -- cgit v1.2.3 From f0bc52f38b09308fca85f3aa9300a341364fe9c6 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 5 Dec 2008 22:40:47 -0800 Subject: tcp: force mss equality with the next skb too. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also make if-goto forest nicer looking. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d67b6e9cc540..63c3ef6d4a1c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1575,11 +1575,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, goto out; skb = tcp_write_queue_next(sk, prev); - if (!skb_can_shift(skb)) - goto out; - if (skb == tcp_send_head(sk)) - goto out; - if ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) + if (!skb_can_shift(skb) || + (skb == tcp_send_head(sk)) || + ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) || + (mss != tcp_shift_mss(skb))) goto out; len = skb->len; -- cgit v1.2.3 From 9969ca5f205988fb96461075cb4914c55cf166b5 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 5 Dec 2008 22:41:06 -0800 Subject: tcp: Fix thinko making the not-shiftable to cover S|R as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit S|R won't result in S if just SACK is received. DSACK is another story (but it is covered correctly already). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 63c3ef6d4a1c..33902f6799c3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1481,7 +1481,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, /* Normally R but no L won't result in plain S */ if (!dup_sack && - (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) == TCPCB_SACKED_RETRANS) + (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS) goto fallback; if (!skb_can_shift(skb)) goto fallback; -- cgit v1.2.3 From 775ffabf77a648d78fe1d20cb3a620e771abb921 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 5 Dec 2008 22:41:26 -0800 Subject: tcp: make mtu probe failure to not break gso'ed skbs unnecessarily MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I noticed that since skb->len has nothing to do with actual segment length with gso, we need to figure it out separately, reuse a function from the recent shifting stuff (generalize it). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 33902f6799c3..21c670190780 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1445,14 +1445,9 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, /* I wish gso_size would have a bit more sane initialization than * something-or-zero which complicates things */ -static int tcp_shift_mss(struct sk_buff *skb) +static int tcp_skb_seglen(struct sk_buff *skb) { - int mss = tcp_skb_mss(skb); - - if (!mss) - mss = skb->len; - - return mss; + return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb); } /* Shifting pages past head area doesn't work */ @@ -1503,12 +1498,12 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, if (in_sack) { len = skb->len; pcount = tcp_skb_pcount(skb); - mss = tcp_shift_mss(skb); + mss = tcp_skb_seglen(skb); /* TODO: Fix DSACKs to not fragment already SACKed and we can * drop this restriction as unnecessary */ - if (mss != tcp_shift_mss(prev)) + if (mss != tcp_skb_seglen(prev)) goto fallback; } else { if (!after(TCP_SKB_CB(skb)->end_seq, start_seq)) @@ -1549,7 +1544,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, /* TODO: Fix DSACKs to not fragment already SACKed and we can * drop this restriction as unnecessary */ - if (mss != tcp_shift_mss(prev)) + if (mss != tcp_skb_seglen(prev)) goto fallback; if (len == mss) { @@ -1578,7 +1573,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, if (!skb_can_shift(skb) || (skb == tcp_send_head(sk)) || ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) || - (mss != tcp_shift_mss(skb))) + (mss != tcp_skb_seglen(skb))) goto out; len = skb->len; @@ -2853,7 +2848,7 @@ void tcp_simple_retransmit(struct sock *sk) tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) break; - if (skb->len > mss && + if (tcp_skb_seglen(skb) > mss && !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; -- cgit v1.2.3 From a1197f5a6faa23e5d0c1f8ed97b011deb2a75457 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 5 Dec 2008 22:42:22 -0800 Subject: tcp: introduce struct tcp_sacktag_state to reduce arg pressure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are just too many args to some sacktag functions. This idea was first proposed by David S. Miller around a year ago, and the current situation is much worse that what it was back then. tcp_sacktag_one can be made a bit simpler by returning the new sacked (it can be achieved with a single variable though the previous code "caching" sacked into a local variable and therefore it is not exactly equal but the results will be the same). codiff on x86_64 tcp_sacktag_one | -15 tcp_shifted_skb | -50 tcp_match_skb_to_sack | -1 tcp_sacktag_walk | -64 tcp_sacktag_write_queue | -59 tcp_urg | +1 tcp_event_data_recv | -1 7 functions changed, 1 bytes added, 190 bytes removed, diff: -189 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 145 ++++++++++++++++++++++++++------------------------- 1 file changed, 74 insertions(+), 71 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 21c670190780..e25827719e70 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1237,6 +1237,12 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, return dup_sack; } +struct tcp_sacktag_state { + int reord; + int fack_count; + int flag; +}; + /* Check if skb is fully within the SACK block. In presence of GSO skbs, * the incoming SACK may not exactly match but we can find smaller MSS * aligned portion of it that matches. Therefore we might need to fragment @@ -1290,25 +1296,25 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, return in_sack; } -static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, - int *reord, int dup_sack, int fack_count, - u8 *sackedto, int pcount) +static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, + struct tcp_sacktag_state *state, + int dup_sack, int pcount) { struct tcp_sock *tp = tcp_sk(sk); u8 sacked = TCP_SKB_CB(skb)->sacked; - int flag = 0; + int fack_count = state->fack_count; /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) tp->undo_retrans--; if (sacked & TCPCB_SACKED_ACKED) - *reord = min(fack_count, *reord); + state->reord = min(fack_count, state->reord); } /* Nothing to do; acked frame is about to be dropped (was ACKed). */ if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) - return flag; + return sacked; if (!(sacked & TCPCB_SACKED_ACKED)) { if (sacked & TCPCB_SACKED_RETRANS) { @@ -1317,7 +1323,7 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, * that retransmission is still in flight. */ if (sacked & TCPCB_LOST) { - *sackedto &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); + sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); tp->lost_out -= pcount; tp->retrans_out -= pcount; } @@ -1328,21 +1334,22 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, */ if (before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) - *reord = min(fack_count, *reord); + state->reord = min(fack_count, + state->reord); /* SACK enhanced F-RTO (RFC4138; Appendix B) */ if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) - flag |= FLAG_ONLY_ORIG_SACKED; + state->flag |= FLAG_ONLY_ORIG_SACKED; } if (sacked & TCPCB_LOST) { - *sackedto &= ~TCPCB_LOST; + sacked &= ~TCPCB_LOST; tp->lost_out -= pcount; } } - *sackedto |= TCPCB_SACKED_ACKED; - flag |= FLAG_DATA_SACKED; + sacked |= TCPCB_SACKED_ACKED; + state->flag |= FLAG_DATA_SACKED; tp->sacked_out += pcount; fack_count += pcount; @@ -1361,21 +1368,20 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, * frames and clear it. undo_retrans is decreased above, L|R frames * are accounted above as well. */ - if (dup_sack && (*sackedto & TCPCB_SACKED_RETRANS)) { - *sackedto &= ~TCPCB_SACKED_RETRANS; + if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) { + sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= pcount; } - return flag; + return sacked; } static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, - struct sk_buff *skb, unsigned int pcount, - int shifted, int fack_count, int *reord, - int *flag, int mss) + struct sk_buff *skb, + struct tcp_sacktag_state *state, + unsigned int pcount, int shifted, int mss) { struct tcp_sock *tp = tcp_sk(sk); - u8 dummy_sacked = TCP_SKB_CB(skb)->sacked; /* We discard results */ BUG_ON(!pcount); @@ -1407,8 +1413,8 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, skb_shinfo(skb)->gso_type = 0; } - *flag |= tcp_sacktag_one(skb, sk, reord, 0, fack_count, &dummy_sacked, - pcount); + /* We discard results */ + tcp_sacktag_one(skb, sk, state, 0, pcount); /* Difference in this won't matter, both ACKed by the same cumul. ACK */ TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); @@ -1460,9 +1466,9 @@ static int skb_can_shift(struct sk_buff *skb) * skb. */ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, + struct tcp_sacktag_state *state, u32 start_seq, u32 end_seq, - int dup_sack, int *fack_count, - int *reord, int *flag) + int dup_sack) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *prev; @@ -1559,8 +1565,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, if (!skb_shift(prev, skb, len)) goto fallback; - if (!tcp_shifted_skb(sk, prev, skb, pcount, len, *fack_count, reord, - flag, mss)) + if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss)) goto out; /* Hole filled allows collapsing with the next as well, this is very @@ -1579,12 +1584,12 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, len = skb->len; if (skb_shift(prev, skb, len)) { pcount += tcp_skb_pcount(skb); - tcp_shifted_skb(sk, prev, skb, tcp_skb_pcount(skb), len, - *fack_count, reord, flag, mss); + tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb), len, + mss); } out: - *fack_count += pcount; + state->fack_count += pcount; return prev; noop: @@ -1597,9 +1602,9 @@ fallback: static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, struct tcp_sack_block *next_dup, + struct tcp_sacktag_state *state, u32 start_seq, u32 end_seq, - int dup_sack_in, int *fack_count, - int *reord, int *flag) + int dup_sack_in) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *tmp; @@ -1629,9 +1634,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, * so not even _safe variant of the loop is enough. */ if (in_sack <= 0) { - tmp = tcp_shift_skb_data(sk, skb, start_seq, - end_seq, dup_sack, - fack_count, reord, flag); + tmp = tcp_shift_skb_data(sk, skb, state, + start_seq, end_seq, dup_sack); if (tmp != NULL) { if (tmp != skb) { skb = tmp; @@ -1650,17 +1654,17 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, break; if (in_sack) { - *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, - *fack_count, - &(TCP_SKB_CB(skb)->sacked), - tcp_skb_pcount(skb)); + TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk, + state, + dup_sack, + tcp_skb_pcount(skb)); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) tcp_advance_highest_sack(sk, skb); } - *fack_count += tcp_skb_pcount(skb); + state->fack_count += tcp_skb_pcount(skb); } return skb; } @@ -1669,7 +1673,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, * a normal way */ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, - u32 skip_to_seq, int *fack_count) + struct tcp_sacktag_state *state, + u32 skip_to_seq) { tcp_for_write_queue_from(skb, sk) { if (skb == tcp_send_head(sk)) @@ -1678,7 +1683,7 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) break; - *fack_count += tcp_skb_pcount(skb); + state->fack_count += tcp_skb_pcount(skb); } return skb; } @@ -1686,18 +1691,17 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, struct sock *sk, struct tcp_sack_block *next_dup, - u32 skip_to_seq, - int *fack_count, int *reord, - int *flag) + struct tcp_sacktag_state *state, + u32 skip_to_seq) { if (next_dup == NULL) return skb; if (before(next_dup->start_seq, skip_to_seq)) { - skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); - skb = tcp_sacktag_walk(skb, sk, NULL, - next_dup->start_seq, next_dup->end_seq, - 1, fack_count, reord, flag); + skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq); + skb = tcp_sacktag_walk(skb, sk, NULL, state, + next_dup->start_seq, next_dup->end_seq, + 1); } return skb; @@ -1719,16 +1723,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); struct tcp_sack_block sp[TCP_NUM_SACKS]; struct tcp_sack_block *cache; + struct tcp_sacktag_state state; struct sk_buff *skb; int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); int used_sacks; - int reord = tp->packets_out; - int flag = 0; int found_dup_sack = 0; - int fack_count; int i, j; int first_sack_index; + state.flag = 0; + state.reord = tp->packets_out; + if (!tp->sacked_out) { if (WARN_ON(tp->fackets_out)) tp->fackets_out = 0; @@ -1738,7 +1743,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); if (found_dup_sack) - flag |= FLAG_DSACKING_ACK; + state.flag |= FLAG_DSACKING_ACK; /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can @@ -1807,7 +1812,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, } skb = tcp_write_queue_head(sk); - fack_count = 0; + state.fack_count = 0; i = 0; if (!tp->sacked_out) { @@ -1832,7 +1837,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, /* Event "B" in the comment above. */ if (after(end_seq, tp->high_seq)) - flag |= FLAG_DATA_LOST; + state.flag |= FLAG_DATA_LOST; /* Skip too early cached blocks */ while (tcp_sack_cache_ok(tp, cache) && @@ -1845,13 +1850,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, /* Head todo? */ if (before(start_seq, cache->start_seq)) { - skb = tcp_sacktag_skip(skb, sk, start_seq, - &fack_count); + skb = tcp_sacktag_skip(skb, sk, &state, + start_seq); skb = tcp_sacktag_walk(skb, sk, next_dup, + &state, start_seq, cache->start_seq, - dup_sack, &fack_count, - &reord, &flag); + dup_sack); } /* Rest of the block already fully processed? */ @@ -1859,9 +1864,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, goto advance_sp; skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, - cache->end_seq, - &fack_count, &reord, - &flag); + &state, + cache->end_seq); /* ...tail remains todo... */ if (tcp_highest_sack_seq(tp) == cache->end_seq) { @@ -1869,13 +1873,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (skb == NULL) break; - fack_count = tp->fackets_out; + state.fack_count = tp->fackets_out; cache++; goto walk; } - skb = tcp_sacktag_skip(skb, sk, cache->end_seq, - &fack_count); + skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq); /* Check overlap against next cached too (past this one already) */ cache++; continue; @@ -1885,20 +1888,20 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (skb == NULL) break; - fack_count = tp->fackets_out; + state.fack_count = tp->fackets_out; } - skb = tcp_sacktag_skip(skb, sk, start_seq, &fack_count); + skb = tcp_sacktag_skip(skb, sk, &state, start_seq); walk: - skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq, - dup_sack, &fack_count, &reord, &flag); + skb = tcp_sacktag_walk(skb, sk, next_dup, &state, + start_seq, end_seq, dup_sack); advance_sp: /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct * due to in-order walk */ if (after(end_seq, tp->frto_highmark)) - flag &= ~FLAG_ONLY_ORIG_SACKED; + state.flag &= ~FLAG_ONLY_ORIG_SACKED; i++; } @@ -1915,10 +1918,10 @@ advance_sp: tcp_verify_left_out(tp); - if ((reord < tp->fackets_out) && + if ((state.reord < tp->fackets_out) && ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) - tcp_update_reordering(sk, tp->fackets_out - reord, 0); + tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); out: @@ -1928,7 +1931,7 @@ out: WARN_ON((int)tp->retrans_out < 0); WARN_ON((int)tcp_packets_in_flight(tp) < 0); #endif - return flag; + return state.flag; } /* Limits sacked_out so that sum with lost_out isn't ever larger than -- cgit v1.2.3 From 50133161a83c9e5974d430cabd77d6430ca7d579 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 5 Dec 2008 22:42:41 -0800 Subject: tcp: no need to pass prev skb around, reduces arg pressure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e25827719e70..2d9151c94368 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1376,12 +1376,12 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, return sacked; } -static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, - struct sk_buff *skb, +static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, struct tcp_sacktag_state *state, unsigned int pcount, int shifted, int mss) { struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *prev = tcp_write_queue_prev(sk, skb); BUG_ON(!pcount); @@ -1565,7 +1565,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, if (!skb_shift(prev, skb, len)) goto fallback; - if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss)) + if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss)) goto out; /* Hole filled allows collapsing with the next as well, this is very @@ -1584,8 +1584,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, len = skb->len; if (skb_shift(prev, skb, len)) { pcount += tcp_skb_pcount(skb); - tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb), len, - mss); + tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss); } out: -- cgit v1.2.3 From ee6aac59505bcae5de1422c76956de62ac22170d Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 5 Dec 2008 22:43:08 -0800 Subject: tcp: drop tcp_bound_rto, merge content of it tcp_set_rto MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both are called by the same sites. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2d9151c94368..215e38c1a233 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -701,13 +701,10 @@ static inline void tcp_set_rto(struct sock *sk) * all the algo is pure shit and should be replaced * with correct one. It is exactly, which we pretend to do. */ -} -/* NOTE: clamping at TCP_RTO_MIN is not required, current algo - * guarantees that rto is higher. - */ -static inline void tcp_bound_rto(struct sock *sk) -{ + /* NOTE: clamping at TCP_RTO_MIN is not required, current algo + * guarantees that rto is higher. + */ if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) inet_csk(sk)->icsk_rto = TCP_RTO_MAX; } @@ -928,7 +925,6 @@ static void tcp_init_metrics(struct sock *sk) tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); } tcp_set_rto(sk); - tcp_bound_rto(sk); if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) goto reset; tp->snd_cwnd = tcp_init_cwnd(tp, dst); @@ -3081,7 +3077,6 @@ static void tcp_ack_saw_tstamp(struct sock *sk, int flag) tcp_rtt_estimator(sk, seq_rtt); tcp_set_rto(sk); inet_csk(sk)->icsk_backoff = 0; - tcp_bound_rto(sk); } static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) @@ -3101,7 +3096,6 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) tcp_rtt_estimator(sk, seq_rtt); tcp_set_rto(sk); inet_csk(sk)->icsk_backoff = 0; - tcp_bound_rto(sk); } static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, -- cgit v1.2.3 From 41834b7332a1ad3f7b6e8bbd83e6ce63586f0b07 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 5 Dec 2008 22:43:26 -0800 Subject: tcp: share code through function, not through copy-paste. :-) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 215e38c1a233..99b7ecbe8893 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3052,6 +3052,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tcp_xmit_retransmit_queue(sk); } +static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) +{ + tcp_rtt_estimator(sk, seq_rtt); + tcp_set_rto(sk); + inet_csk(sk)->icsk_backoff = 0; +} + /* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Supersedes RFC1323) */ @@ -3073,10 +3080,8 @@ static void tcp_ack_saw_tstamp(struct sock *sk, int flag) * in window is lost... Voila. --ANK (010210) */ struct tcp_sock *tp = tcp_sk(sk); - const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; - tcp_rtt_estimator(sk, seq_rtt); - tcp_set_rto(sk); - inet_csk(sk)->icsk_backoff = 0; + + tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr); } static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) @@ -3093,9 +3098,7 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) if (flag & FLAG_RETRANS_DATA_ACKED) return; - tcp_rtt_estimator(sk, seq_rtt); - tcp_set_rto(sk); - inet_csk(sk)->icsk_backoff = 0; + tcp_valid_rtt_meas(sk, seq_rtt); } static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, -- cgit v1.2.3 From 726e07a8a38168266ac95d87736f9501a2d9e7b2 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 5 Dec 2008 22:43:56 -0800 Subject: tcp: move some parts from tcp_write_xmit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 76f840917bcb..80147ba44141 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1530,13 +1530,6 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) int cwnd_quota; int result; - /* If we are closed, the bytes will have to remain here. - * In time closedown will finish, we empty the write queue and all - * will be happy. - */ - if (unlikely(sk->sk_state == TCP_CLOSE)) - return 0; - sent_pkts = 0; /* Do MTU probing. */ @@ -1608,10 +1601,18 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, { struct sk_buff *skb = tcp_send_head(sk); - if (skb) { - if (tcp_write_xmit(sk, cur_mss, nonagle)) - tcp_check_probe_timer(sk); - } + if (!skb) + return; + + /* If we are closed, the bytes will have to remain here. + * In time closedown will finish, we empty the write queue and + * all will be happy. + */ + if (unlikely(sk->sk_state == TCP_CLOSE)) + return; + + if (tcp_write_xmit(sk, cur_mss, nonagle)) + tcp_check_probe_timer(sk); } /* Send _single_ skb sitting at the send head. This function requires -- cgit v1.2.3 From d5dd9175bc12015ea4d2c1a9b6b15dfa645a3db9 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 5 Dec 2008 22:48:55 -0800 Subject: tcp: use tcp_write_xmit also in tcp_push_one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tcp_minshall_update is not significant difference since it only checks for not full-sized skb which is BUG'ed on the push_one path anyway. tcp_snd_test is tcp_nagle_test+tcp_cwnd_test+tcp_snd_wnd_test, just the order changed slightly. net/ipv4/tcp_output.c: tcp_snd_test | -89 tcp_mss_split_point | -91 tcp_may_send_now | +53 tcp_cwnd_validate | -98 tso_fragment | -239 __tcp_push_pending_frames | -1340 tcp_push_one | -146 7 functions changed, 53 bytes added, 2003 bytes removed, diff: -1950 net/ipv4/tcp_output.c: tcp_write_xmit | +1772 1 function changed, 1772 bytes added, diff: +1772 tcp_output.o.new: 8 functions changed, 1825 bytes added, 2003 bytes removed, diff: -178 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 54 ++++++++++++++++----------------------------------- 1 file changed, 17 insertions(+), 37 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 59505ce0c8fd..e65c114feea2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1519,7 +1519,8 @@ static int tcp_mtu_probe(struct sock *sk) * Returns 1, if no segments are in flight and we have queued segments, but * cannot send anything now because of SWS or another problem. */ -static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) +static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, + int push_one, gfp_t gfp) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -1529,11 +1530,14 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) sent_pkts = 0; - /* Do MTU probing. */ - if ((result = tcp_mtu_probe(sk)) == 0) { - return 0; - } else if (result > 0) { - sent_pkts = 1; + if (!push_one) { + /* Do MTU probing. */ + result = tcp_mtu_probe(sk); + if (!result) { + return 0; + } else if (result > 0) { + sent_pkts = 1; + } } while ((skb = tcp_send_head(sk))) { @@ -1555,7 +1559,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) nonagle : TCP_NAGLE_PUSH)))) break; } else { - if (tcp_tso_should_defer(sk, skb)) + if (!push_one && tcp_tso_should_defer(sk, skb)) break; } @@ -1570,7 +1574,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) TCP_SKB_CB(skb)->when = tcp_time_stamp; - if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC))) + if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; /* Advance the send_head. This one is sent out. @@ -1580,6 +1584,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) tcp_minshall_update(tp, mss_now, skb); sent_pkts++; + + if (push_one) + break; } if (likely(sent_pkts)) { @@ -1608,7 +1615,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, if (unlikely(sk->sk_state == TCP_CLOSE)) return; - if (tcp_write_xmit(sk, cur_mss, nonagle)) + if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC)) tcp_check_probe_timer(sk); } @@ -1617,38 +1624,11 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, */ void tcp_push_one(struct sock *sk, unsigned int mss_now) { - struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); - unsigned int tso_segs, cwnd_quota; BUG_ON(!skb || skb->len < mss_now); - tso_segs = tcp_init_tso_segs(sk, skb, mss_now); - cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); - - if (likely(cwnd_quota)) { - unsigned int limit; - - BUG_ON(!tso_segs); - - limit = mss_now; - if (tso_segs > 1 && !tcp_urg_mode(tp)) - limit = tcp_mss_split_point(sk, skb, mss_now, - cwnd_quota); - - if (skb->len > limit && - unlikely(tso_fragment(sk, skb, limit, mss_now))) - return; - - /* Send it out now. */ - TCP_SKB_CB(skb)->when = tcp_time_stamp; - - if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { - tcp_event_new_data_sent(sk, skb); - tcp_cwnd_validate(sk); - return; - } - } + tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation); } /* This function returns the amount that we can raise the -- cgit v1.2.3 From a2acde07711f7d8b19928245c555bce60f91482a Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 5 Dec 2008 22:49:37 -0800 Subject: tcp: fix tso_should_defer in 64bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since jiffies is unsigned long, the types get expanded into that and after long enough time the difference will therefore always be > 1 (and that probably happens near boot as well as iirc the first jiffies wrap is scheduler close after boot to find out problems related to that early). This was originally noted by Bill Fink in Dec'07 but nobody never ended fixing it. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e65c114feea2..dda42f0bd7a3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1334,7 +1334,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) /* Defer for less than two clock ticks. */ if (tp->tso_deferred && - ((jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) + (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) goto send_now; in_flight = tcp_packets_in_flight(tp); -- cgit v1.2.3 From 857a6e0a4d8db0bbee685ccc97c6bd7987e7aede Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sun, 14 Dec 2008 23:13:08 -0800 Subject: icsk: join error paths using goto MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index fe32255a9808..c7cda1ca8e65 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -344,16 +344,17 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct net *net = sock_net(sk); security_req_classify_flow(req, &fl); - if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); - return NULL; - } - if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { - ip_rt_put(rt); - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); - return NULL; - } + if (ip_route_output_flow(net, &rt, &fl, sk, 0)) + goto no_route; + if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) + goto route_err; return &rt->u.dst; + +route_err: + ip_rt_put(rt); +no_route: + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + return NULL; } EXPORT_SYMBOL_GPL(inet_csk_route_req); -- cgit v1.2.3 From 73cc19f1556b95976934de236fd9043f7208844f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:41:09 -0800 Subject: ipv4: Add GRO infrastructure This patch adds GRO support for IPv4. The criteria for merging is more stringent than LRO, in particular, we require all fields in the IP header to be identical except for the length, ID and checksum. In addition, the ID must form an arithmetic sequence with a difference of one. The ID requirement might seem overly strict, however, most hardware TSO solutions already obey this rule. Linux itself also obeys this whether GSO is in use or not. In future we could relax this rule by storing the IDs (or rather making sure that we don't drop them when pulling the aggregate skb's tail). Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/protocol.h | 3 ++ net/ipv4/af_inet.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) (limited to 'net/ipv4') diff --git a/include/net/protocol.h b/include/net/protocol.h index 8d024d7cb741..cb2965aa1b62 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -39,6 +39,9 @@ struct net_protocol { int (*gso_send_check)(struct sk_buff *skb); struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); + struct sk_buff **(*gro_receive)(struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sk_buff *skb); unsigned int no_policy:1, netns_ok:1; }; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fe03048c130d..a85595307fa7 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -94,6 +94,7 @@ #include #include #include +#include #include #include #include @@ -1241,6 +1242,100 @@ out: return segs; } +static struct sk_buff **inet_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct net_protocol *ops; + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct iphdr *iph; + int flush = 1; + int proto; + int id; + + if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) + goto out; + + iph = ip_hdr(skb); + proto = iph->protocol & (MAX_INET_PROTOS - 1); + + rcu_read_lock(); + ops = rcu_dereference(inet_protos[proto]); + if (!ops || !ops->gro_receive) + goto out_unlock; + + if (iph->version != 4 || iph->ihl != 5) + goto out_unlock; + + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + goto out_unlock; + + flush = ntohs(iph->tot_len) != skb->len || + iph->frag_off != htons(IP_DF); + id = ntohs(iph->id); + + for (p = *head; p; p = p->next) { + struct iphdr *iph2; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + iph2 = ip_hdr(p); + + if (iph->protocol != iph2->protocol || + iph->tos != iph2->tos || + memcmp(&iph->saddr, &iph2->saddr, 8)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + /* All fields must match except length and checksum. */ + NAPI_GRO_CB(p)->flush |= + memcmp(&iph->frag_off, &iph2->frag_off, 4) || + (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id; + + NAPI_GRO_CB(p)->flush |= flush; + } + + NAPI_GRO_CB(skb)->flush |= flush; + __skb_pull(skb, sizeof(*iph)); + skb_reset_transport_header(skb); + + pp = ops->gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int inet_gro_complete(struct sk_buff *skb) +{ + struct net_protocol *ops; + struct iphdr *iph = ip_hdr(skb); + int proto = iph->protocol & (MAX_INET_PROTOS - 1); + int err = -ENOSYS; + __be16 newlen = htons(skb->len - skb_network_offset(skb)); + + csum_replace2(&iph->check, iph->tot_len, newlen); + iph->tot_len = newlen; + + rcu_read_lock(); + ops = rcu_dereference(inet_protos[proto]); + if (WARN_ON(!ops || !ops->gro_complete)) + goto out_unlock; + + err = ops->gro_complete(skb); + +out_unlock: + rcu_read_unlock(); + + return err; +} + int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net) @@ -1407,6 +1502,8 @@ static struct packet_type ip_packet_type = { .func = ip_rcv, .gso_send_check = inet_gso_send_check, .gso_segment = inet_gso_segment, + .gro_receive = inet_gro_receive, + .gro_complete = inet_gro_complete, }; static int __init inet_init(void) -- cgit v1.2.3 From bf296b125b21b8d558ceb6ec30bb4eba2730cd6b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:43:36 -0800 Subject: tcp: Add GRO support This patch adds the TCP-specific portion of GRO. The criterion for merging is extremely strict (the TCP header must match exactly apart from the checksum) so as to allow refragmentation. Otherwise this is pretty much identical to LRO, except that we support the merging of ECN packets. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ++++ net/ipv4/af_inet.c | 2 ++ net/ipv4/tcp.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 35 ++++++++++++++++++ 4 files changed, 143 insertions(+) (limited to 'net/ipv4') diff --git a/include/net/tcp.h b/include/net/tcp.h index de1e91d959b8..218235de8963 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1358,6 +1358,12 @@ extern void tcp_v4_destroy_sock(struct sock *sk); extern int tcp_v4_gso_send_check(struct sk_buff *skb); extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features); +extern struct sk_buff **tcp_gro_receive(struct sk_buff **head, + struct sk_buff *skb); +extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head, + struct sk_buff *skb); +extern int tcp_gro_complete(struct sk_buff *skb); +extern int tcp4_gro_complete(struct sk_buff *skb); #ifdef CONFIG_PROC_FS extern int tcp4_proc_init(void); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a85595307fa7..664ff0ee1c83 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1410,6 +1410,8 @@ static struct net_protocol tcp_protocol = { .err_handler = tcp_v4_err, .gso_send_check = tcp_v4_gso_send_check, .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, .no_policy = 1, .netns_ok = 1, }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 019243408623..1f3d52946b3b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2465,6 +2465,106 @@ out: } EXPORT_SYMBOL(tcp_tso_segment); +struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct tcphdr *th; + struct tcphdr *th2; + unsigned int thlen; + unsigned int flags; + unsigned int total; + unsigned int mss = 1; + int flush = 1; + + if (!pskb_may_pull(skb, sizeof(*th))) + goto out; + + th = tcp_hdr(skb); + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + if (!pskb_may_pull(skb, thlen)) + goto out; + + th = tcp_hdr(skb); + __skb_pull(skb, thlen); + + flags = tcp_flag_word(th); + + for (; (p = *head); head = &p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + th2 = tcp_hdr(p); + + if (th->source != th2->source || th->dest != th2->dest) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + goto found; + } + + goto out_check_final; + +found: + flush = NAPI_GRO_CB(p)->flush; + flush |= flags & TCP_FLAG_CWR; + flush |= (flags ^ tcp_flag_word(th2)) & + ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); + flush |= th->ack_seq != th2->ack_seq || th->window != th2->window; + flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th)); + + total = p->len; + mss = total; + if (skb_shinfo(p)->frag_list) + mss = skb_shinfo(p)->frag_list->len; + + flush |= skb->len > mss || skb->len <= 0; + flush |= ntohl(th2->seq) + total != ntohl(th->seq); + + if (flush || skb_gro_receive(head, skb)) { + mss = 1; + goto out_check_final; + } + + p = *head; + th2 = tcp_hdr(p); + tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); + +out_check_final: + flush = skb->len < mss; + flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | + TCP_FLAG_SYN | TCP_FLAG_FIN); + + if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) + pp = head; + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +int tcp_gro_complete(struct sk_buff *skb) +{ + struct tcphdr *th = tcp_hdr(skb); + + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + + skb_shinfo(skb)->gso_size = skb_shinfo(skb)->frag_list->len; + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + if (th->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + + return 0; +} + #ifdef CONFIG_TCP_MD5SIG static unsigned long tcp_md5sig_users; static struct tcp_md5sig_pool **tcp_md5sig_pool; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 26b9030747cc..10172487921b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2346,6 +2346,41 @@ void tcp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr, + skb->csum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + + /* fall through */ + case CHECKSUM_NONE: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + return tcp_gro_receive(head, skb); +} +EXPORT_SYMBOL(tcp4_gro_receive); + +int tcp4_gro_complete(struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), + iph->saddr, iph->daddr, 0); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + return tcp_gro_complete(skb); +} +EXPORT_SYMBOL(tcp4_gro_complete); + struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, -- cgit v1.2.3 From b1879204dda06a92a4ff55b681c99ee185180003 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 16 Dec 2008 01:15:11 -0800 Subject: ipmr: merge common code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also removes redundant skb->len < x check which can't be true once pskb_may_pull(skb, x) succeeded. $ diff-funcs pim_rcv ipmr.c ipmr.c pim_rcv_v1 --- ipmr.c:pim_rcv() +++ ipmr.c:pim_rcv_v1() @@ -1,22 +1,27 @@ -static int pim_rcv(struct sk_buff * skb) +int pim_rcv_v1(struct sk_buff * skb) { - struct pimreghdr *pim; + struct igmphdr *pim; struct iphdr *encap; struct net_device *reg_dev = NULL; if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) goto drop; - pim = (struct pimreghdr *)skb_transport_header(skb); - if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || - (pim->flags&PIM_NULL_REGISTER) || - (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && - csum_fold(skb_checksum(skb, 0, skb->len, 0)))) + pim = igmp_hdr(skb); + + if (!mroute_do_pim || + skb->len < sizeof(*pim) + sizeof(*encap) || + pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) goto drop; - /* check if the inner packet is destined to mcast group */ encap = (struct iphdr *)(skb_transport_header(skb) + - sizeof(struct pimreghdr)); + sizeof(struct igmphdr)); + /* + Check that: + a. packet is really destinted to a multicast group + b. packet is not a NULL-REGISTER + c. packet is not truncated + */ if (!ipv4_is_multicast(encap->daddr) || encap->tot_len == 0 || ntohs(encap->tot_len) + sizeof(*pim) > skb->len) @@ -40,9 +45,9 @@ skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); + skb->dst = NULL; reg_dev->stats.rx_bytes += skb->len; reg_dev->stats.rx_packets++; - skb->dst = NULL; nf_reset(skb); netif_rx(skb); dev_put(reg_dev); $ codiff net/ipv4/ipmr.o.old net/ipv4/ipmr.o.new net/ipv4/ipmr.c: pim_rcv_v1 | -283 pim_rcv | -284 2 functions changed, 567 bytes removed net/ipv4/ipmr.c: __pim_rcv | +307 1 function changed, 307 bytes added net/ipv4/ipmr.o.new: 3 functions changed, 307 bytes added, 567 bytes removed, diff: -260 (Tested on x86_64). It seems that pimlen arg could be left out as well and eq-sizedness of structs trapped with BUILD_BUG_ON but I don't think that's more than a cosmetic flaw since there aren't that many args anyway. Compile tested. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 103 +++++++++++++++++++++----------------------------------- 1 file changed, 39 insertions(+), 64 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 244a624e0b18..14666449dc1c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1482,29 +1482,13 @@ dont_forward: return 0; } -#ifdef CONFIG_IP_PIMSM_V1 -/* - * Handle IGMP messages of PIMv1 - */ - -int pim_rcv_v1(struct sk_buff * skb) +#ifdef CONFIG_IP_PIMSM +static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) { - struct igmphdr *pim; - struct iphdr *encap; - struct net_device *reg_dev = NULL; - - if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) - goto drop; + struct net_device *reg_dev = NULL; + struct iphdr *encap; - pim = igmp_hdr(skb); - - if (!mroute_do_pim || - skb->len < sizeof(*pim) + sizeof(*encap) || - pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) - goto drop; - - encap = (struct iphdr *)(skb_transport_header(skb) + - sizeof(struct igmphdr)); + encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); /* Check that: a. packet is really destinted to a multicast group @@ -1513,8 +1497,8 @@ int pim_rcv_v1(struct sk_buff * skb) */ if (!ipv4_is_multicast(encap->daddr) || encap->tot_len == 0 || - ntohs(encap->tot_len) + sizeof(*pim) > skb->len) - goto drop; + ntohs(encap->tot_len) + pimlen > skb->len) + return 1; read_lock(&mrt_lock); if (reg_vif_num >= 0) @@ -1524,7 +1508,7 @@ int pim_rcv_v1(struct sk_buff * skb) read_unlock(&mrt_lock); if (reg_dev == NULL) - goto drop; + return 1; skb->mac_header = skb->network_header; skb_pull(skb, (u8*)encap - skb->data); @@ -1540,9 +1524,33 @@ int pim_rcv_v1(struct sk_buff * skb) nf_reset(skb); netif_rx(skb); dev_put(reg_dev); + return 0; - drop: - kfree_skb(skb); +} +#endif + +#ifdef CONFIG_IP_PIMSM_V1 +/* + * Handle IGMP messages of PIMv1 + */ + +int pim_rcv_v1(struct sk_buff * skb) +{ + struct igmphdr *pim; + + if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) + goto drop; + + pim = igmp_hdr(skb); + + if (!mroute_do_pim || + pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) + goto drop; + + if (__pim_rcv(skb, sizeof(*pim))) { +drop: + kfree_skb(skb); + } return 0; } #endif @@ -1551,10 +1559,8 @@ int pim_rcv_v1(struct sk_buff * skb) static int pim_rcv(struct sk_buff * skb) { struct pimreghdr *pim; - struct iphdr *encap; - struct net_device *reg_dev = NULL; - if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) + if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) goto drop; pim = (struct pimreghdr *)skb_transport_header(skb); @@ -1564,41 +1570,10 @@ static int pim_rcv(struct sk_buff * skb) csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; - /* check if the inner packet is destined to mcast group */ - encap = (struct iphdr *)(skb_transport_header(skb) + - sizeof(struct pimreghdr)); - if (!ipv4_is_multicast(encap->daddr) || - encap->tot_len == 0 || - ntohs(encap->tot_len) + sizeof(*pim) > skb->len) - goto drop; - - read_lock(&mrt_lock); - if (reg_vif_num >= 0) - reg_dev = vif_table[reg_vif_num].dev; - if (reg_dev) - dev_hold(reg_dev); - read_unlock(&mrt_lock); - - if (reg_dev == NULL) - goto drop; - - skb->mac_header = skb->network_header; - skb_pull(skb, (u8*)encap - skb->data); - skb_reset_network_header(skb); - skb->dev = reg_dev; - skb->protocol = htons(ETH_P_IP); - skb->ip_summed = 0; - skb->pkt_type = PACKET_HOST; - dst_release(skb->dst); - reg_dev->stats.rx_bytes += skb->len; - reg_dev->stats.rx_packets++; - skb->dst = NULL; - nf_reset(skb); - netif_rx(skb); - dev_put(reg_dev); - return 0; - drop: - kfree_skb(skb); + if (__pim_rcv(skb, sizeof(*pim))) { +drop: + kfree_skb(skb); + } return 0; } #endif -- cgit v1.2.3 From 6086ebca13ddc9cfaaa25248ba8ebef35103fb74 Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Thu, 18 Dec 2008 19:54:22 -0800 Subject: tcp: Stop scaring users with "treason uncloaked!" The original message was unhelpful and extremely alarming to our poor users, despite its charm. Make it less frightening. Signed-off-by: Matt Mackall Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index cc4e6d27dedc..0170e914f1b0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -299,14 +299,14 @@ static void tcp_retransmit_timer(struct sock *sk) #ifdef TCP_DEBUG struct inet_sock *inet = inet_sk(sk); if (sk->sk_family == AF_INET) { - LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %pI4:%u/%u shrinks window %u:%u. Repaired.\n", + LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", &inet->daddr, ntohs(inet->dport), inet->num, tp->snd_una, tp->snd_nxt); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %pI6:%u/%u shrinks window %u:%u. Repaired.\n", + LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", &np->daddr, ntohs(inet->dport), inet->num, tp->snd_una, tp->snd_nxt); } -- cgit v1.2.3 From b4ee07df3d8121060200dbe1c6686a4e0682bee2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 25 Dec 2008 16:42:23 -0800 Subject: netns: igmp: allow IPPROTO_IGMP sockets in netns Looks like everything is already ready. Required for ebtables(8) for one thing. Also, required for ipmr per-netns (coming soon). (Benjamin) Signed-off-by: Alexey Dobriyan Acked-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv4') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 664ff0ee1c83..743f5542d65a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1402,6 +1402,7 @@ EXPORT_SYMBOL_GPL(snmp_mib_free); #ifdef CONFIG_IP_MULTICAST static struct net_protocol igmp_protocol = { .handler = igmp_rcv, + .netns_ok = 1, }; #endif -- cgit v1.2.3 From 7091e728c570157496e52296d2dbe816a6916836 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 25 Dec 2008 16:42:51 -0800 Subject: netns: igmp: make /proc/net/{igmp,mcfilter} per netns This patch makes the followinf proc entries per-netns: /proc/net/igmp /proc/net/mcfilter Signed-off-by: Alexey Dobriyan Acked-by: Daniel Lezcano Acked-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 49 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 9 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f92733e15c9f..9eb6219af615 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2275,6 +2275,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p #if defined(CONFIG_PROC_FS) struct igmp_mc_iter_state { + struct seq_net_private p; struct net_device *dev; struct in_device *in_dev; }; @@ -2283,11 +2284,12 @@ struct igmp_mc_iter_state { static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ip_mc_list *im = NULL; struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); state->in_dev = NULL; - for_each_netdev(&init_net, state->dev) { + for_each_netdev(net, state->dev) { struct in_device *in_dev; in_dev = in_dev_get(state->dev); if (!in_dev) @@ -2408,7 +2410,7 @@ static const struct seq_operations igmp_mc_seq_ops = { static int igmp_mc_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &igmp_mc_seq_ops, + return seq_open_net(inode, file, &igmp_mc_seq_ops, sizeof(struct igmp_mc_iter_state)); } @@ -2417,10 +2419,11 @@ static const struct file_operations igmp_mc_seq_fops = { .open = igmp_mc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; struct igmp_mcf_iter_state { + struct seq_net_private p; struct net_device *dev; struct in_device *idev; struct ip_mc_list *im; @@ -2430,13 +2433,14 @@ struct igmp_mcf_iter_state { static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ip_sf_list *psf = NULL; struct ip_mc_list *im = NULL; struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); state->idev = NULL; state->im = NULL; - for_each_netdev(&init_net, state->dev) { + for_each_netdev(net, state->dev) { struct in_device *idev; idev = in_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2567,7 +2571,7 @@ static const struct seq_operations igmp_mcf_seq_ops = { static int igmp_mcf_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &igmp_mcf_seq_ops, + return seq_open_net(inode, file, &igmp_mcf_seq_ops, sizeof(struct igmp_mcf_iter_state)); } @@ -2576,14 +2580,41 @@ static const struct file_operations igmp_mcf_seq_fops = { .open = igmp_mcf_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; -int __init igmp_mc_proc_init(void) +static int igmp_net_init(struct net *net) { - proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops); - proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); + struct proc_dir_entry *pde; + + pde = proc_net_fops_create(net, "igmp", S_IRUGO, &igmp_mc_seq_fops); + if (!pde) + goto out_igmp; + pde = proc_net_fops_create(net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); + if (!pde) + goto out_mcfilter; return 0; + +out_mcfilter: + proc_net_remove(net, "igmp"); +out_igmp: + return -ENOMEM; +} + +static void igmp_net_exit(struct net *net) +{ + proc_net_remove(net, "mcfilter"); + proc_net_remove(net, "igmp"); +} + +static struct pernet_operations igmp_net_ops = { + .init = igmp_net_init, + .exit = igmp_net_exit, +}; + +int __init igmp_mc_proc_init(void) +{ + return register_pernet_subsys(&igmp_net_ops); } #endif -- cgit v1.2.3 From 64ff3b938ec6782e6585a83d5459b98b0c3f6eb8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 25 Dec 2008 17:12:58 -0800 Subject: tcp: Always set urgent pointer if it's beyond snd_nxt Our TCP stack does not set the urgent flag if the urgent pointer does not fit in 16 bits, i.e., if it is more than 64K from the sequence number of a packet. This behaviour is different from the BSDs, and clearly contradicts the purpose of urgent mode, which is to send the notification (though not necessarily the associated data) as soon as possible. Our current behaviour may in fact delay the urgent notification indefinitely if the receiver window does not open up. Simply matching BSD however may break legacy applications which incorrectly rely on the out-of-band delivery of urgent data, and conversely the in-band delivery of non-urgent data. Alexey Kuznetsov suggested a safe solution of following BSD only if the urgent pointer itself has not yet been transmitted. This way we guarantee that when the remote end sees the packet with non-urgent data marked as urgent due to wrap-around we would have advanced the urgent pointer beyond, either to the actual urgent data or to an as-yet untransmitted packet. The only potential downside is that applications on the remote end may see multiple SIGURG notifications. However, this would occur anyway with other TCP stacks. More importantly, the outcome of such a duplicate notification is likely to be harmless since the signal itself does not carry any information other than the fact that we're in urgent mode. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dda42f0bd7a3..557fe16cbfb0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -663,10 +663,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->urg_ptr = 0; /* The urg_mode check is necessary during a below snd_una win probe */ - if (unlikely(tcp_urg_mode(tp) && - between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { - th->urg_ptr = htons(tp->snd_up - tcb->seq); - th->urg = 1; + if (unlikely(tcp_urg_mode(tp))) { + if (between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF)) { + th->urg_ptr = htons(tp->snd_up - tcb->seq); + th->urg = 1; + } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { + th->urg_ptr = 0xFFFF; + th->urg = 1; + } } tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); -- cgit v1.2.3 From f2712fd0b4097e8385bdb75c0ebd285a057fe299 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 26 Dec 2008 01:31:18 -0800 Subject: ipsec: Remove useless ret variable This patch removes a useless ret variable from the IPv4 ESP/UDP decapsulation code. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/xfrm4_input.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 390dcb1354a5..4ec2162a437e 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -78,7 +78,6 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct udphdr *uh; struct iphdr *iph; int iphlen, len; - int ret; __u8 *udpdata; __be32 *udpdata32; @@ -152,8 +151,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) skb_reset_transport_header(skb); /* process ESP */ - ret = xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); - return ret; + return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); drop: kfree_skb(skb); -- cgit v1.2.3 From 0f23174aa8c1aa7a2a6050a72a60d290ef9ee578 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 29 Dec 2008 12:23:42 +0000 Subject: cpumask: prepare for iterators to only go to nr_cpu_ids/nr_cpumask_bits: net In future all cpumask ops will only be valid (in general) for bit numbers < nr_cpu_ids. So use that instead of NR_CPUS in iterators and other comparisons. This is always safe: no cpu number can be >= nr_cpu_ids, and nr_cpu_ids is initialized to NR_CPUS at boot. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Ingo Molnar Signed-off-by: David S. Miller --- net/core/neighbour.c | 4 ++-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 ++-- net/ipv4/route.c | 4 ++-- net/netfilter/nf_conntrack_standalone.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net/ipv4') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9c3717a23cf7..f66c58df8953 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2414,7 +2414,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; @@ -2429,7 +2429,7 @@ static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct neigh_table *tbl = pde->data; int cpu; - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 313ebf00ee36..6ba5c557690c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -291,7 +291,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; @@ -306,7 +306,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct net *net = seq_file_net(seq); int cpu; - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 77bfba975959..97f71153584f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -429,7 +429,7 @@ static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; @@ -442,7 +442,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) { int cpu; - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index f37b9b74c6a8..4da54b0b9233 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -200,7 +200,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu + 1; @@ -215,7 +215,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct net *net = seq_file_net(seq); int cpu; - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu + 1; -- cgit v1.2.3 From eb4dea5853046727bfbb579f0c9a8cae7369f7c6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Dec 2008 23:04:08 -0800 Subject: net: Fix percpu counters deadlock When we converted the protocol atomic counters such as the orphan count and the total socket count deadlocks were introduced due to the mismatch in BH status of the spots that used the percpu counter operations. Based on the diagnosis and patch by Peter Zijlstra, this patch fixes these issues by disabling BH where we may be in process context. Reported-by: Jeff Kirsher Tested-by: Ingo Molnar Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/dccp/proto.c | 3 ++- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/proc.c | 13 +++++++++---- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_ipv4.c | 3 +++ net/ipv6/tcp_ipv6.c | 3 +++ 6 files changed, 21 insertions(+), 8 deletions(-) (limited to 'net/ipv4') diff --git a/net/dccp/proto.c b/net/dccp/proto.c index d5c2bacb713c..1747ccae8e8d 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -964,7 +964,6 @@ adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); - percpu_counter_inc(sk->sk_prot->orphan_count); /* * It is the last release_sock in its life. It will remove backlog. @@ -978,6 +977,8 @@ adjudge_to_death: bh_lock_sock(sk); WARN_ON(sock_owned_by_user(sk)); + percpu_counter_inc(sk->sk_prot->orphan_count); + /* Have we already been destroyed by a softirq or backlog? */ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) goto out; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c7cda1ca8e65..f26ab38680de 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -633,8 +633,6 @@ void inet_csk_listen_stop(struct sock *sk) acc_req = req->dl_next; - percpu_counter_inc(sk->sk_prot->orphan_count); - local_bh_disable(); bh_lock_sock(child); WARN_ON(sock_owned_by_user(child)); @@ -644,6 +642,8 @@ void inet_csk_listen_stop(struct sock *sk) sock_orphan(child); + percpu_counter_inc(sk->sk_prot->orphan_count); + inet_csk_destroy_sock(child); bh_unlock_sock(child); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 614958b7c276..eb62e58bff79 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -50,13 +51,17 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; + int orphans, sockets; + + local_bh_disable(); + orphans = percpu_counter_sum_positive(&tcp_orphan_count), + sockets = percpu_counter_sum_positive(&tcp_sockets_allocated), + local_bh_enable(); socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", - sock_prot_inuse_get(net, &tcp_prot), - (int)percpu_counter_sum_positive(&tcp_orphan_count), - tcp_death_row.tw_count, - (int)percpu_counter_sum_positive(&tcp_sockets_allocated), + sock_prot_inuse_get(net, &tcp_prot), orphans, + tcp_death_row.tw_count, sockets, atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(net, &udp_prot), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1f3d52946b3b..f28acf11fc67 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1836,7 +1836,6 @@ adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); - percpu_counter_inc(sk->sk_prot->orphan_count); /* It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); @@ -1849,6 +1848,8 @@ adjudge_to_death: bh_lock_sock(sk); WARN_ON(sock_owned_by_user(sk)); + percpu_counter_inc(sk->sk_prot->orphan_count); + /* Have we already been destroyed by a softirq or backlog? */ if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) goto out; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 10172487921b..9d839fa9331e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -51,6 +51,7 @@ */ +#include #include #include #include @@ -1797,7 +1798,9 @@ static int tcp_v4_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; + local_bh_disable(); percpu_counter_inc(&tcp_sockets_allocated); + local_bh_enable(); return 0; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8702b06cb60a..e8b8337a8310 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -23,6 +23,7 @@ * 2 of the License, or (at your option) any later version. */ +#include #include #include #include @@ -1830,7 +1831,9 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; + local_bh_disable(); percpu_counter_inc(&tcp_sockets_allocated); + local_bh_enable(); return 0; } -- cgit v1.2.3