diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-03 07:53:45 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-03 07:53:45 +0400 |
commit | cd6362befe4cc7bf589a5236d2a780af2d47bcc9 (patch) | |
tree | 3bd4e13ec3f92a00dc4f6c3d65e820b54dbfe46e /net/xfrm | |
parent | 0f1b1e6d73cb989ce2c071edc57deade3b084dfe (diff) | |
parent | b1586f099ba897542ece36e8a23c1a62907261ef (diff) | |
download | linux-cd6362befe4cc7bf589a5236d2a780af2d47bcc9.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Here is my initial pull request for the networking subsystem during
this merge window:
1) Support for ESN in AH (RFC 4302) from Fan Du.
2) Add full kernel doc for ethtool command structures, from Ben
Hutchings.
3) Add BCM7xxx PHY driver, from Florian Fainelli.
4) Export computed TCP rate information in netlink socket dumps, from
Eric Dumazet.
5) Allow IPSEC SA to be dumped partially using a filter, from Nicolas
Dichtel.
6) Convert many drivers to pci_enable_msix_range(), from Alexander
Gordeev.
7) Record SKB timestamps more efficiently, from Eric Dumazet.
8) Switch to microsecond resolution for TCP round trip times, also
from Eric Dumazet.
9) Clean up and fix 6lowpan fragmentation handling by making use of
the existing inet_frag api for it's implementation.
10) Add TX grant mapping to xen-netback driver, from Zoltan Kiss.
11) Auto size SKB lengths when composing netlink messages based upon
past message sizes used, from Eric Dumazet.
12) qdisc dumps can take a long time, add a cond_resched(), From Eric
Dumazet.
13) Sanitize netpoll core and drivers wrt. SKB handling semantics.
Get rid of never-used-in-tree netpoll RX handling. From Eric W
Biederman.
14) Support inter-address-family and namespace changing in VTI tunnel
driver(s). From Steffen Klassert.
15) Add Altera TSE driver, from Vince Bridgers.
16) Optimizing csum_replace2() so that it doesn't adjust the checksum
by checksumming the entire header, from Eric Dumazet.
17) Expand BPF internal implementation for faster interpreting, more
direct translations into JIT'd code, and much cleaner uses of BPF
filtering in non-socket ocntexts. From Daniel Borkmann and Alexei
Starovoitov"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1976 commits)
netpoll: Use skb_irq_freeable to make zap_completion_queue safe.
net: Add a test to see if a skb is freeable in irq context
qlcnic: Fix build failure due to undefined reference to `vxlan_get_rx_port'
net: ptp: move PTP classifier in its own file
net: sxgbe: make "core_ops" static
net: sxgbe: fix logical vs bitwise operation
net: sxgbe: sxgbe_mdio_register() frees the bus
Call efx_set_channels() before efx->type->dimension_resources()
xen-netback: disable rogue vif in kthread context
net/mlx4: Set proper build dependancy with vxlan
be2net: fix build dependency on VxLAN
mac802154: make csma/cca parameters per-wpan
mac802154: allow only one WPAN to be up at any given time
net: filter: minor: fix kdoc in __sk_run_filter
netlink: don't compare the nul-termination in nla_strcmp
can: c_can: Avoid led toggling for every packet.
can: c_can: Simplify TX interrupt cleanup
can: c_can: Store dlc private
can: c_can: Reduce register access
can: c_can: Make the code readable
...
Diffstat (limited to 'net/xfrm')
-rw-r--r-- | net/xfrm/xfrm_input.c | 97 | ||||
-rw-r--r-- | net/xfrm/xfrm_policy.c | 40 | ||||
-rw-r--r-- | net/xfrm/xfrm_state.c | 72 | ||||
-rw-r--r-- | net/xfrm/xfrm_user.c | 37 |
4 files changed, 195 insertions, 51 deletions
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 6c7ac016ce3a..85d1d4764612 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -16,6 +16,81 @@ static struct kmem_cache *secpath_cachep __read_mostly; +static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); +static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO]; + +int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) +{ + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + spin_lock_bh(&xfrm_input_afinfo_lock); + if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) + err = -ENOBUFS; + else + rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo); + spin_unlock_bh(&xfrm_input_afinfo_lock); + return err; +} +EXPORT_SYMBOL(xfrm_input_register_afinfo); + +int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo) +{ + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + spin_lock_bh(&xfrm_input_afinfo_lock); + if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { + if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) + err = -EINVAL; + else + RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL); + } + spin_unlock_bh(&xfrm_input_afinfo_lock); + synchronize_rcu(); + return err; +} +EXPORT_SYMBOL(xfrm_input_unregister_afinfo); + +static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) +{ + struct xfrm_input_afinfo *afinfo; + + if (unlikely(family >= NPROTO)) + return NULL; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_input_afinfo[family]); + if (unlikely(!afinfo)) + rcu_read_unlock(); + return afinfo; +} + +static void xfrm_input_put_afinfo(struct xfrm_input_afinfo *afinfo) +{ + rcu_read_unlock(); +} + +static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, + int err) +{ + int ret; + struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); + + if (!afinfo) + return -EAFNOSUPPORT; + + ret = afinfo->callback(skb, protocol, err); + xfrm_input_put_afinfo(afinfo); + + return ret; +} + void __secpath_destroy(struct sec_path *sp) { int i; @@ -108,7 +183,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) int err; __be32 seq; __be32 seq_hi; - struct xfrm_state *x; + struct xfrm_state *x = NULL; xfrm_address_t *daddr; struct xfrm_mode *inner_mode; unsigned int family; @@ -120,9 +195,14 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) async = 1; x = xfrm_input_state(skb); seq = XFRM_SKB_CB(skb)->seq.input.low; + family = x->outer_mode->afinfo->family; goto resume; } + daddr = (xfrm_address_t *)(skb_network_header(skb) + + XFRM_SPI_SKB_CB(skb)->daddroff); + family = XFRM_SPI_SKB_CB(skb)->family; + /* Allocate new secpath or COW existing one. */ if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { struct sec_path *sp; @@ -137,10 +217,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp = sp; } - daddr = (xfrm_address_t *)(skb_network_header(skb) + - XFRM_SPI_SKB_CB(skb)->daddroff); - family = XFRM_SPI_SKB_CB(skb)->family; - seq = 0; if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); @@ -162,6 +238,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; + if (xfrm_tunnel_check(skb, x, family)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); + goto drop; + } + spin_lock(&x->lock); if (unlikely(x->km.state == XFRM_STATE_ACQ)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); @@ -201,7 +282,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (nexthdr == -EINPROGRESS) return 0; - resume: spin_lock(&x->lock); if (nexthdr <= 0) { @@ -263,6 +343,10 @@ resume: } } while (!err); + err = xfrm_rcv_cb(skb, family, x->type->proto, 0); + if (err) + goto drop; + nf_reset(skb); if (decaps) { @@ -276,6 +360,7 @@ resume: drop_unlock: spin_unlock(&x->lock); drop: + xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1); kfree_skb(skb); return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1d5c7bf29938..f02f511b7107 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -39,8 +39,6 @@ #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) #define XFRM_MAX_QUEUE_LEN 100 -static struct dst_entry *xfrm_policy_sk_bundles; - static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] __read_mostly; @@ -661,7 +659,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); net->xfrm.policy_count[dir]++; - atomic_inc(&flow_cache_genid); + atomic_inc(&net->xfrm.flow_cache_genid); /* After previous checking, family can either be AF_INET or AF_INET6 */ if (policy->family == AF_INET) @@ -2109,13 +2107,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, goto no_transform; } - dst_hold(&xdst->u.dst); - - spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); - xdst->u.dst.next = xfrm_policy_sk_bundles; - xfrm_policy_sk_bundles = &xdst->u.dst; - spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); - route = xdst->route; } } @@ -2549,33 +2540,15 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } -static void __xfrm_garbage_collect(struct net *net) -{ - struct dst_entry *head, *next; - - spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); - head = xfrm_policy_sk_bundles; - xfrm_policy_sk_bundles = NULL; - spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); - - while (head) { - next = head->next; - dst_free(head); - head = next; - } -} - void xfrm_garbage_collect(struct net *net) { - flow_cache_flush(); - __xfrm_garbage_collect(net); + flow_cache_flush(net); } EXPORT_SYMBOL(xfrm_garbage_collect); static void xfrm_garbage_collect_deferred(struct net *net) { - flow_cache_flush_deferred(); - __xfrm_garbage_collect(net); + flow_cache_flush_deferred(net); } static void xfrm_init_pmtu(struct dst_entry *dst) @@ -2940,15 +2913,19 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; + rv = flow_cache_init(net); + if (rv < 0) + goto out; /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); rwlock_init(&net->xfrm.xfrm_policy_lock); - spin_lock_init(&net->xfrm.xfrm_policy_sk_bundle_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); return 0; +out: + xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: @@ -2961,6 +2938,7 @@ out_statistics: static void __net_exit xfrm_net_exit(struct net *net) { + flow_cache_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 40f1b3e92e78..8e9c781a6bba 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -161,6 +161,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); +bool km_is_alive(const struct km_event *c); void km_state_expired(struct xfrm_state *x, int hard, u32 portid); static DEFINE_SPINLOCK(xfrm_type_lock); @@ -788,6 +789,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, struct xfrm_state *best = NULL; u32 mark = pol->mark.v & pol->mark.m; unsigned short encap_family = tmpl->encap_family; + struct km_event c; to_put = NULL; @@ -832,6 +834,17 @@ found: error = -EEXIST; goto out; } + + c.net = net; + /* If the KMs have no listeners (yet...), avoid allocating an SA + * for each and every packet - garbage collection might not + * handle the flood. + */ + if (!km_is_alive(&c)) { + error = -ESRCH; + goto out; + } + x = xfrm_state_alloc(net); if (x == NULL) { error = -ENOMEM; @@ -1135,10 +1148,9 @@ out: EXPORT_SYMBOL(xfrm_state_add); #ifdef CONFIG_XFRM_MIGRATE -static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) +static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig) { struct net *net = xs_net(orig); - int err = -ENOMEM; struct xfrm_state *x = xfrm_state_alloc(net); if (!x) goto out; @@ -1192,15 +1204,13 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) } if (orig->replay_esn) { - err = xfrm_replay_clone(x, orig); - if (err) + if (xfrm_replay_clone(x, orig)) goto error; } memcpy(&x->mark, &orig->mark, sizeof(x->mark)); - err = xfrm_init_state(x); - if (err) + if (xfrm_init_state(x) < 0) goto error; x->props.flags = orig->props.flags; @@ -1218,8 +1228,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) error: xfrm_state_put(x); out: - if (errp) - *errp = err; return NULL; } @@ -1274,9 +1282,8 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m) { struct xfrm_state *xc; - int err; - xc = xfrm_state_clone(x, &err); + xc = xfrm_state_clone(x); if (!xc) return NULL; @@ -1289,7 +1296,7 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, state is to be updated as it is a part of triplet */ xfrm_state_insert(xc); } else { - if ((err = xfrm_state_add(xc)) < 0) + if (xfrm_state_add(xc) < 0) goto error; } @@ -1601,6 +1608,23 @@ unlock: } EXPORT_SYMBOL(xfrm_alloc_spi); +static bool __xfrm_state_filter_match(struct xfrm_state *x, + struct xfrm_address_filter *filter) +{ + if (filter) { + if ((filter->family == AF_INET || + filter->family == AF_INET6) && + x->props.family != filter->family) + return false; + + return addr_match(&x->props.saddr, &filter->saddr, + filter->splen) && + addr_match(&x->id.daddr, &filter->daddr, + filter->dplen); + } + return true; +} + int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *data) @@ -1623,6 +1647,8 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, state = container_of(x, struct xfrm_state, km); if (!xfrm_id_proto_match(state->id.proto, walk->proto)) continue; + if (!__xfrm_state_filter_match(state, walk->filter)) + continue; err = func(state, walk->seq, data); if (err) { list_move_tail(&walk->all, &x->all); @@ -1641,17 +1667,21 @@ out: } EXPORT_SYMBOL(xfrm_state_walk); -void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) +void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto, + struct xfrm_address_filter *filter) { INIT_LIST_HEAD(&walk->all); walk->proto = proto; walk->state = XFRM_STATE_DEAD; walk->seq = 0; + walk->filter = filter; } EXPORT_SYMBOL(xfrm_state_walk_init); void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net) { + kfree(walk->filter); + if (list_empty(&walk->all)) return; @@ -1804,6 +1834,24 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address } EXPORT_SYMBOL(km_report); +bool km_is_alive(const struct km_event *c) +{ + struct xfrm_mgr *km; + bool is_alive = false; + + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { + if (km->is_alive && km->is_alive(c)) { + is_alive = true; + break; + } + } + rcu_read_unlock(); + + return is_alive; +} +EXPORT_SYMBOL(km_is_alive); + int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { int err; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2f7ddc3a59b4..8f131c10a6f3 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -137,7 +137,8 @@ static inline int verify_replay(struct xfrm_usersa_info *p, if (!rt) return 0; - if (p->id.proto != IPPROTO_ESP) + /* As only ESP and AH support ESN feature. */ + if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH)) return -EINVAL; if (p->replay_window != 0) @@ -881,6 +882,7 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb) return 0; } +static const struct nla_policy xfrma_policy[XFRMA_MAX+1]; static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); @@ -896,8 +898,31 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) info.nlmsg_flags = NLM_F_MULTI; if (!cb->args[0]) { + struct nlattr *attrs[XFRMA_MAX+1]; + struct xfrm_address_filter *filter = NULL; + u8 proto = 0; + int err; + cb->args[0] = 1; - xfrm_state_walk_init(walk, 0); + + err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, + xfrma_policy); + if (err < 0) + return err; + + if (attrs[XFRMA_ADDRESS_FILTER]) { + filter = kmalloc(sizeof(*filter), GFP_KERNEL); + if (filter == NULL) + return -ENOMEM; + + memcpy(filter, nla_data(attrs[XFRMA_ADDRESS_FILTER]), + sizeof(*filter)); + } + + if (attrs[XFRMA_PROTO]) + proto = nla_get_u8(attrs[XFRMA_PROTO]); + + xfrm_state_walk_init(walk, proto, filter); } (void) xfrm_state_walk(net, walk, dump_one_state, &info); @@ -2303,6 +2328,8 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_TFCPAD] = { .type = NLA_U32 }, [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, + [XFRMA_PROTO] = { .type = NLA_U8 }, + [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, }; static const struct xfrm_link { @@ -2976,6 +3003,11 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC); } +static bool xfrm_is_alive(const struct km_event *c) +{ + return (bool)xfrm_acquire_is_on(c->net); +} + static struct xfrm_mgr netlink_mgr = { .id = "netlink", .notify = xfrm_send_state_notify, @@ -2985,6 +3017,7 @@ static struct xfrm_mgr netlink_mgr = { .report = xfrm_send_report, .migrate = xfrm_send_migrate, .new_mapping = xfrm_send_mapping, + .is_alive = xfrm_is_alive, }; static int __net_init xfrm_user_net_init(struct net *net) |