diff options
Diffstat (limited to 'net')
87 files changed, 1407 insertions, 687 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 3d59c9bf8feb..3bccdd12a264 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -510,7 +510,8 @@ static int vlan_dev_open(struct net_device *dev) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); - netif_carrier_on(dev); + if (netif_carrier_ok(real_dev)) + netif_carrier_on(dev); return 0; clear_allmulti: diff --git a/net/Kconfig b/net/Kconfig index e24fa0873f32..e926884c1675 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -213,10 +213,11 @@ source "net/phonet/Kconfig" source "net/ieee802154/Kconfig" source "net/sched/Kconfig" source "net/dcb/Kconfig" +source "net/dns_resolver/Kconfig" config RPS boolean - depends on SMP && SYSFS + depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS default y menu "Network testing" diff --git a/net/Makefile b/net/Makefile index 41d420070a38..ea60fbce9b1b 100644 --- a/net/Makefile +++ b/net/Makefile @@ -67,3 +67,4 @@ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o endif obj-$(CONFIG_WIMAX) += wimax/ +obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 2ce79df00680..c7d81436213d 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -112,8 +112,8 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) if (sk) { sock_hold(sk); ax25_destroy_socket(ax25); - sock_put(sk); bh_unlock_sock(sk); + sock_put(sk); } else ax25_destroy_socket(ax25); return; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 2c911c0759c2..137f23259a93 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -162,8 +162,8 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) if (tmp) { memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info)); atomic_set(&tmp->use, 1); - nf_bridge_put(nf_bridge); } + nf_bridge_put(nf_bridge); nf_bridge = tmp; } return nf_bridge; @@ -761,9 +761,11 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) { if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && - !skb_is_gso(skb)) + !skb_is_gso(skb)) { + /* BUG: Should really parse the IP options here. */ + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); return ip_fragment(skb, br_dev_queue_push_xmit); - else + } else return br_dev_queue_push_xmit(skb); } #else diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 01f238ff2346..c49a6695793a 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -9,7 +9,7 @@ #include <linux/hardirq.h> #include <net/caif/cfpkt.h> -#define PKT_PREFIX 16 +#define PKT_PREFIX 48 #define PKT_POSTFIX 2 #define PKT_LEN_WHEN_EXTENDING 128 #define PKT_ERROR(pkt, errmsg) do { \ diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index eb1602022ac0..9a699242d104 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -7,7 +7,7 @@ #include <linux/stddef.h> #include <linux/spinlock.h> #include <linux/slab.h> -#include <linux/unaligned/le_byteshift.h> +#include <asm/unaligned.h> #include <net/caif/caif_layer.h> #include <net/caif/cfsrvl.h> #include <net/caif/cfpkt.h> diff --git a/net/can/bcm.c b/net/can/bcm.c index 9c65e9deb9c3..08ffe9e4be20 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -60,6 +60,13 @@ #include <net/sock.h> #include <net/net_namespace.h> +/* + * To send multiple CAN frame content within TX_SETUP or to filter + * CAN messages with multiplex index within RX_SETUP, the number of + * different filters is limited to 256 due to the one byte index value. + */ +#define MAX_NFRAMES 256 + /* use of last_frames[index].can_dlc */ #define RX_RECV 0x40 /* received data for this element */ #define RX_THR 0x80 /* element not been sent due to throttle feature */ @@ -89,16 +96,16 @@ struct bcm_op { struct list_head list; int ifindex; canid_t can_id; - int flags; + u32 flags; unsigned long frames_abs, frames_filtered; struct timeval ival1, ival2; struct hrtimer timer, thrtimer; struct tasklet_struct tsklet, thrtsklet; ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg; int rx_ifindex; - int count; - int nframes; - int currframe; + u32 count; + u32 nframes; + u32 currframe; struct can_frame *frames; struct can_frame *last_frames; struct can_frame sframe; @@ -175,7 +182,7 @@ static int bcm_proc_show(struct seq_file *m, void *v) seq_printf(m, "rx_op: %03X %-5s ", op->can_id, bcm_proc_getifname(ifname, op->ifindex)); - seq_printf(m, "[%d]%c ", op->nframes, + seq_printf(m, "[%u]%c ", op->nframes, (op->flags & RX_CHECK_DLC)?'d':' '); if (op->kt_ival1.tv64) seq_printf(m, "timeo=%lld ", @@ -198,7 +205,7 @@ static int bcm_proc_show(struct seq_file *m, void *v) list_for_each_entry(op, &bo->tx_ops, list) { - seq_printf(m, "tx_op: %03X %s [%d] ", + seq_printf(m, "tx_op: %03X %s [%u] ", op->can_id, bcm_proc_getifname(ifname, op->ifindex), op->nframes); @@ -283,7 +290,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, struct can_frame *firstframe; struct sockaddr_can *addr; struct sock *sk = op->sk; - int datalen = head->nframes * CFSIZ; + unsigned int datalen = head->nframes * CFSIZ; int err; skb = alloc_skb(sizeof(*head) + datalen, gfp_any()); @@ -468,7 +475,7 @@ rx_changed_settime: * bcm_rx_cmp_to_index - (bit)compares the currently received data to formerly * received data stored in op->last_frames[] */ -static void bcm_rx_cmp_to_index(struct bcm_op *op, int index, +static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index, const struct can_frame *rxdata) { /* @@ -554,7 +561,8 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) /* * bcm_rx_do_flush - helper for bcm_rx_thr_flush */ -static inline int bcm_rx_do_flush(struct bcm_op *op, int update, int index) +static inline int bcm_rx_do_flush(struct bcm_op *op, int update, + unsigned int index) { if ((op->last_frames) && (op->last_frames[index].can_dlc & RX_THR)) { if (update) @@ -575,7 +583,7 @@ static int bcm_rx_thr_flush(struct bcm_op *op, int update) int updated = 0; if (op->nframes > 1) { - int i; + unsigned int i; /* for MUX filter we start at index 1 */ for (i = 1; i < op->nframes; i++) @@ -624,7 +632,7 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data) { struct bcm_op *op = (struct bcm_op *)data; const struct can_frame *rxframe = (struct can_frame *)skb->data; - int i; + unsigned int i; /* disable timeout */ hrtimer_cancel(&op->timer); @@ -822,14 +830,15 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, { struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; - int i, err; + unsigned int i; + int err; /* we need a real device to send frames */ if (!ifindex) return -ENODEV; - /* we need at least one can_frame */ - if (msg_head->nframes < 1) + /* check nframes boundaries - we need at least one can_frame */ + if (msg_head->nframes < 1 || msg_head->nframes > MAX_NFRAMES) return -EINVAL; /* check the given can_id */ @@ -993,6 +1002,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, msg_head->nframes = 0; } + /* the first element contains the mux-mask => MAX_NFRAMES + 1 */ + if (msg_head->nframes > MAX_NFRAMES + 1) + return -EINVAL; + if ((msg_head->flags & RX_RTR_FRAME) && ((msg_head->nframes != 1) || (!(msg_head->can_id & CAN_RTR_FLAG)))) diff --git a/net/core/dev.c b/net/core/dev.c index 1ae654391442..660dd41aaaa6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2058,16 +2058,16 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { int queue_index; - struct sock *sk = skb->sk; + const struct net_device_ops *ops = dev->netdev_ops; - queue_index = sk_tx_queue_get(sk); - if (queue_index < 0) { - const struct net_device_ops *ops = dev->netdev_ops; + if (ops->ndo_select_queue) { + queue_index = ops->ndo_select_queue(dev, skb); + queue_index = dev_cap_txqueue(dev, queue_index); + } else { + struct sock *sk = skb->sk; + queue_index = sk_tx_queue_get(sk); + if (queue_index < 0) { - if (ops->ndo_select_queue) { - queue_index = ops->ndo_select_queue(dev, skb); - queue_index = dev_cap_txqueue(dev, queue_index); - } else { queue_index = 0; if (dev->real_num_tx_queues > 1) queue_index = skb_tx_hash(dev, skb); @@ -3143,7 +3143,7 @@ pull: put_page(skb_shinfo(skb)->frags[0].page); memmove(skb_shinfo(skb)->frags, skb_shinfo(skb)->frags + 1, - --skb_shinfo(skb)->nr_frags); + --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); } } @@ -4845,7 +4845,7 @@ static void rollback_registered_many(struct list_head *head) dev = list_first_entry(head, struct net_device, unreg_list); call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); - synchronize_net(); + rcu_barrier(); list_for_each_entry(dev, head, unreg_list) dev_put(dev); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9fbe7f7429b0..6743146e4d6b 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -232,7 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, est->last_packets = bstats->packets; est->avpps = rate_est->pps<<10; - spin_lock(&est_tree_lock); + spin_lock_bh(&est_tree_lock); if (!elist[idx].timer.function) { INIT_LIST_HEAD(&elist[idx].list); setup_timer(&elist[idx].timer, est_timer, idx); @@ -243,7 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, list_add_rcu(&est->list, &elist[idx].list); gen_add_node(est); - spin_unlock(&est_tree_lock); + spin_unlock_bh(&est_tree_lock); return 0; } @@ -270,7 +270,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, { struct gen_estimator *e; - spin_lock(&est_tree_lock); + spin_lock_bh(&est_tree_lock); while ((e = gen_find_node(bstats, rate_est))) { rb_erase(&e->node, &est_root); @@ -281,7 +281,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, list_del_rcu(&e->list); call_rcu(&e->e_rcu, __gen_kill_estimator); } - spin_unlock(&est_tree_lock); + spin_unlock_bh(&est_tree_lock); } EXPORT_SYMBOL(gen_kill_estimator); @@ -320,9 +320,9 @@ bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, ASSERT_RTNL(); - spin_lock(&est_tree_lock); + spin_lock_bh(&est_tree_lock); res = gen_find_node(bstats, rate_est) != NULL; - spin_unlock(&est_tree_lock); + spin_unlock_bh(&est_tree_lock); return res; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3a2513f0d0c3..c83b421341c0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2573,6 +2573,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) __copy_skb_header(nskb, skb); nskb->mac_len = skb->mac_len; + /* nskb and skb might have different headroom */ + if (nskb->ip_summed == CHECKSUM_PARTIAL) + nskb->csum_start += skb_headroom(nskb) - headroom; + skb_reset_mac_header(nskb); skb_set_network_header(nskb, skb->mac_len); nskb->transport_header = (nskb->network_header + @@ -2703,7 +2707,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) return -E2BIG; headroom = skb_headroom(p); - nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p)); + nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC); if (unlikely(!nskb)) return -ENOMEM; diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig new file mode 100644 index 000000000000..50d49f7e0472 --- /dev/null +++ b/net/dns_resolver/Kconfig @@ -0,0 +1,27 @@ +# +# Configuration for DNS Resolver +# +config DNS_RESOLVER + tristate "DNS Resolver support" + depends on NET && KEYS + help + Saying Y here will include support for the DNS Resolver key type + which can be used to make upcalls to perform DNS lookups in + userspace. + + DNS Resolver is used to query DNS server for information. Examples + being resolving a UNC hostname element to an IP address for CIFS or + performing a DNS query for AFSDB records so that AFS can locate a + cell's volume location database servers. + + DNS Resolver is used by the CIFS and AFS modules, and would support + SMB2 later. DNS Resolver is supported by the userspace upcall + helper "/sbin/dns.resolver" via /etc/request-key.conf. + + See <file:Documentation/networking/dns_resolver.txt> for further + information. + + To compile this as a module, choose M here: the module will be called + dnsresolver. + + If unsure, say N. diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile new file mode 100644 index 000000000000..c0ef4e71dc49 --- /dev/null +++ b/net/dns_resolver/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Linux DNS Resolver. +# + +obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o + +dns_resolver-objs := dns_key.o dns_query.o diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c new file mode 100644 index 000000000000..739435a6af39 --- /dev/null +++ b/net/dns_resolver/dns_key.c @@ -0,0 +1,293 @@ +/* Key type used to cache DNS lookups made by the kernel + * + * See Documentation/networking/dns_resolver.txt + * + * Copyright (c) 2007 Igor Mammedov + * Author(s): Igor Mammedov (niallain@gmail.com) + * Steve French (sfrench@us.ibm.com) + * Wang Lei (wang840925@gmail.com) + * David Howells (dhowells@redhat.com) + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/keyctl.h> +#include <linux/err.h> +#include <linux/seq_file.h> +#include <keys/dns_resolver-type.h> +#include <keys/user-type.h> +#include "internal.h" + +MODULE_DESCRIPTION("DNS Resolver"); +MODULE_AUTHOR("Wang Lei"); +MODULE_LICENSE("GPL"); + +unsigned dns_resolver_debug; +module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(debug, "DNS Resolver debugging mask"); + +const struct cred *dns_resolver_cache; + +#define DNS_ERRORNO_OPTION "dnserror" + +/* + * Instantiate a user defined key for dns_resolver. + * + * The data must be a NUL-terminated string, with the NUL char accounted in + * datalen. + * + * If the data contains a '#' characters, then we take the clause after each + * one to be an option of the form 'key=value'. The actual data of interest is + * the string leading up to the first '#'. For instance: + * + * "ip1,ip2,...#foo=bar" + */ +static int +dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen) +{ + struct user_key_payload *upayload; + unsigned long derrno; + int ret; + size_t result_len = 0; + const char *data = _data, *end, *opt; + + kenter("%%%d,%s,'%s',%zu", + key->serial, key->description, data, datalen); + + if (datalen <= 1 || !data || data[datalen - 1] != '\0') + return -EINVAL; + datalen--; + + /* deal with any options embedded in the data */ + end = data + datalen; + opt = memchr(data, '#', datalen); + if (!opt) { + /* no options: the entire data is the result */ + kdebug("no options"); + result_len = datalen; + } else { + const char *next_opt; + + result_len = opt - data; + opt++; + kdebug("options: '%s'", opt); + do { + const char *eq; + int opt_len, opt_nlen, opt_vlen, tmp; + + next_opt = memchr(opt, '#', end - opt) ?: end; + opt_len = next_opt - opt; + if (!opt_len) { + printk(KERN_WARNING + "Empty option to dns_resolver key %d\n", + key->serial); + return -EINVAL; + } + + eq = memchr(opt, '=', opt_len) ?: end; + opt_nlen = eq - opt; + eq++; + opt_vlen = next_opt - eq; /* will be -1 if no value */ + + tmp = opt_vlen >= 0 ? opt_vlen : 0; + kdebug("option '%*.*s' val '%*.*s'", + opt_nlen, opt_nlen, opt, tmp, tmp, eq); + + /* see if it's an error number representing a DNS error + * that's to be recorded as the result in this key */ + if (opt_nlen == sizeof(DNS_ERRORNO_OPTION) - 1 && + memcmp(opt, DNS_ERRORNO_OPTION, opt_nlen) == 0) { + kdebug("dns error number option"); + if (opt_vlen <= 0) + goto bad_option_value; + + ret = strict_strtoul(eq, 10, &derrno); + if (ret < 0) + goto bad_option_value; + + if (derrno < 1 || derrno > 511) + goto bad_option_value; + + kdebug("dns error no. = %lu", derrno); + key->type_data.x[0] = -derrno; + continue; + } + + bad_option_value: + printk(KERN_WARNING + "Option '%*.*s' to dns_resolver key %d:" + " bad/missing value\n", + opt_nlen, opt_nlen, opt, key->serial); + return -EINVAL; + } while (opt = next_opt + 1, opt < end); + } + + /* don't cache the result if we're caching an error saying there's no + * result */ + if (key->type_data.x[0]) { + kleave(" = 0 [h_error %ld]", key->type_data.x[0]); + return 0; + } + + kdebug("store result"); + ret = key_payload_reserve(key, result_len); + if (ret < 0) + return -EINVAL; + + upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL); + if (!upayload) { + kleave(" = -ENOMEM"); + return -ENOMEM; + } + + upayload->datalen = result_len; + memcpy(upayload->data, data, result_len); + upayload->data[result_len] = '\0'; + rcu_assign_pointer(key->payload.data, upayload); + + kleave(" = 0"); + return 0; +} + +/* + * The description is of the form "[<type>:]<domain_name>" + * + * The domain name may be a simple name or an absolute domain name (which + * should end with a period). The domain name is case-independent. + */ +static int +dns_resolver_match(const struct key *key, const void *description) +{ + int slen, dlen, ret = 0; + const char *src = key->description, *dsp = description; + + kenter("%s,%s", src, dsp); + + if (!src || !dsp) + goto no_match; + + if (strcasecmp(src, dsp) == 0) + goto matched; + + slen = strlen(src); + dlen = strlen(dsp); + if (slen <= 0 || dlen <= 0) + goto no_match; + if (src[slen - 1] == '.') + slen--; + if (dsp[dlen - 1] == '.') + dlen--; + if (slen != dlen || strncasecmp(src, dsp, slen) != 0) + goto no_match; + +matched: + ret = 1; +no_match: + kleave(" = %d", ret); + return ret; +} + +/* + * Describe a DNS key + */ +static void dns_resolver_describe(const struct key *key, struct seq_file *m) +{ + int err = key->type_data.x[0]; + + seq_puts(m, key->description); + if (err) + seq_printf(m, ": %d", err); + else + seq_printf(m, ": %u", key->datalen); +} + +struct key_type key_type_dns_resolver = { + .name = "dns_resolver", + .instantiate = dns_resolver_instantiate, + .match = dns_resolver_match, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = dns_resolver_describe, + .read = user_read, +}; + +static int __init init_dns_resolver(void) +{ + struct cred *cred; + struct key *keyring; + int ret; + + printk(KERN_NOTICE "Registering the %s key type\n", + key_type_dns_resolver.name); + + /* create an override credential set with a special thread keyring in + * which DNS requests are cached + * + * this is used to prevent malicious redirections from being installed + * with add_key(). + */ + cred = prepare_kernel_cred(NULL); + if (!cred) + return -ENOMEM; + + keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA); + if (IS_ERR(keyring)) { + ret = PTR_ERR(keyring); + goto failed_put_cred; + } + + ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); + if (ret < 0) + goto failed_put_key; + + ret = register_key_type(&key_type_dns_resolver); + if (ret < 0) + goto failed_put_key; + + /* instruct request_key() to use this special keyring as a cache for + * the results it looks up */ + cred->thread_keyring = keyring; + cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + dns_resolver_cache = cred; + + kdebug("DNS resolver keyring: %d\n", key_serial(keyring)); + return 0; + +failed_put_key: + key_put(keyring); +failed_put_cred: + put_cred(cred); + return ret; +} + +static void __exit exit_dns_resolver(void) +{ + key_revoke(dns_resolver_cache->thread_keyring); + unregister_key_type(&key_type_dns_resolver); + put_cred(dns_resolver_cache); + printk(KERN_NOTICE "Unregistered %s key type\n", + key_type_dns_resolver.name); +} + +module_init(init_dns_resolver) +module_exit(exit_dns_resolver) +MODULE_LICENSE("GPL"); diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c new file mode 100644 index 000000000000..c32be292c7e3 --- /dev/null +++ b/net/dns_resolver/dns_query.c @@ -0,0 +1,165 @@ +/* Upcall routine, designed to work as a key type and working through + * /sbin/request-key to contact userspace when handling DNS queries. + * + * See Documentation/networking/dns_resolver.txt + * + * Copyright (c) 2007 Igor Mammedov + * Author(s): Igor Mammedov (niallain@gmail.com) + * Steve French (sfrench@us.ibm.com) + * Wang Lei (wang840925@gmail.com) + * David Howells (dhowells@redhat.com) + * + * The upcall wrapper used to make an arbitrary DNS query. + * + * This function requires the appropriate userspace tool dns.upcall to be + * installed and something like the following lines should be added to the + * /etc/request-key.conf file: + * + * create dns_resolver * * /sbin/dns.upcall %k + * + * For example to use this module to query AFSDB RR: + * + * create dns_resolver afsdb:* * /sbin/dns.afsdb %k + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/dns_resolver.h> +#include <linux/err.h> +#include <keys/dns_resolver-type.h> +#include <keys/user-type.h> + +#include "internal.h" + +/** + * dns_query - Query the DNS + * @type: Query type (or NULL for straight host->IP lookup) + * @name: Name to look up + * @namelen: Length of name + * @options: Request options (or NULL if no options) + * @_result: Where to place the returned data. + * @_expiry: Where to store the result expiry time (or NULL) + * + * The data will be returned in the pointer at *result, and the caller is + * responsible for freeing it. + * + * The description should be of the form "[<query_type>:]<domain_name>", and + * the options need to be appropriate for the query type requested. If no + * query_type is given, then the query is a straight hostname to IP address + * lookup. + * + * The DNS resolution lookup is performed by upcalling to userspace by way of + * requesting a key of type dns_resolver. + * + * Returns the size of the result on success, -ve error code otherwise. + */ +int dns_query(const char *type, const char *name, size_t namelen, + const char *options, char **_result, time_t *_expiry) +{ + struct key *rkey; + struct user_key_payload *upayload; + const struct cred *saved_cred; + size_t typelen, desclen; + char *desc, *cp; + int ret, len; + + kenter("%s,%*.*s,%zu,%s", + type, (int)namelen, (int)namelen, name, namelen, options); + + if (!name || namelen == 0 || !_result) + return -EINVAL; + + /* construct the query key description as "[<type>:]<name>" */ + typelen = 0; + desclen = 0; + if (type) { + typelen = strlen(type); + if (typelen < 1) + return -EINVAL; + desclen += typelen + 1; + } + + if (!namelen) + namelen = strlen(name); + if (namelen < 3) + return -EINVAL; + desclen += namelen + 1; + + desc = kmalloc(desclen, GFP_KERNEL); + if (!desc) + return -ENOMEM; + + cp = desc; + if (type) { + memcpy(cp, type, typelen); + cp += typelen; + *cp++ = ':'; + } + memcpy(cp, name, namelen); + cp += namelen; + *cp = '\0'; + + if (!options) + options = ""; + kdebug("call request_key(,%s,%s)", desc, options); + + /* make the upcall, using special credentials to prevent the use of + * add_key() to preinstall malicious redirections + */ + saved_cred = override_creds(dns_resolver_cache); + rkey = request_key(&key_type_dns_resolver, desc, options); + revert_creds(saved_cred); + kfree(desc); + if (IS_ERR(rkey)) { + ret = PTR_ERR(rkey); + goto out; + } + + down_read(&rkey->sem); + rkey->perm |= KEY_USR_VIEW; + + ret = key_validate(rkey); + if (ret < 0) + goto put; + + /* If the DNS server gave an error, return that to the caller */ + ret = rkey->type_data.x[0]; + if (ret) + goto put; + + upayload = rcu_dereference_protected(rkey->payload.data, + lockdep_is_held(&rkey->sem)); + len = upayload->datalen; + + ret = -ENOMEM; + *_result = kmalloc(len + 1, GFP_KERNEL); + if (!*_result) + goto put; + + memcpy(*_result, upayload->data, len + 1); + if (_expiry) + *_expiry = rkey->expiry; + + ret = len; +put: + up_read(&rkey->sem); + key_put(rkey); +out: + kleave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(dns_query); diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h new file mode 100644 index 000000000000..189ca9e9b785 --- /dev/null +++ b/net/dns_resolver/internal.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010 Wang Lei + * Author(s): Wang Lei (wang840925@gmail.com). All Rights Reserved. + * + * Internal DNS Rsolver stuff + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/sched.h> + +/* + * dns_key.c + */ +extern const struct cred *dns_resolver_cache; + +/* + * debug tracing + */ +extern unsigned dns_resolver_debug; + +#define kdebug(FMT, ...) \ +do { \ + if (unlikely(dns_resolver_debug)) \ + printk(KERN_DEBUG "[%-6.6s] "FMT"\n", \ + current->comm, ##__VA_ARGS__); \ +} while (0) + +#define kenter(FMT, ...) kdebug("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define kleave(FMT, ...) kdebug("<== %s()"FMT"", __func__, ##__VA_ARGS__) diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 11201784d29a..87bb5f4de0e8 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -1,7 +1,7 @@ menuconfig NET_DSA bool "Distributed Switch Architecture support" default n - depends on EXPERIMENTAL && NET_ETHERNET && !S390 + depends on EXPERIMENTAL && NETDEVICES && !S390 select PHYLIB ---help--- This allows you to use hardware switch chips that use diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 7c3a7d191249..571f8950ed06 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -46,7 +46,7 @@ config IP_ADVANCED_ROUTER rp_filter on use: echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter - and + or echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter Note that some distributions enable it in startup scripts. diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index f0550941df7b..721a8a37b45c 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -62,8 +62,11 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } if (!inet->inet_saddr) inet->inet_saddr = rt->rt_src; /* Update source address */ - if (!inet->inet_rcv_saddr) + if (!inet->inet_rcv_saddr) { inet->inet_rcv_saddr = rt->rt_src; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } inet->inet_daddr = rt->rt_dst; inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index a43968918350..7d02a9f999fa 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -246,6 +246,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, struct fib_result res; int no_addr, rpf, accept_local; + bool dev_match; int ret; struct net *net; @@ -273,12 +274,22 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, } *spec_dst = FIB_RES_PREFSRC(res); fib_combine_itag(itag, &res); + dev_match = false; + #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) + for (ret = 0; ret < res.fi->fib_nhs; ret++) { + struct fib_nh *nh = &res.fi->fib_nh[ret]; + + if (nh->nh_dev == dev) { + dev_match = true; + break; + } + } #else if (FIB_RES_DEV(res) == dev) + dev_match = true; #endif - { + if (dev_match) { ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; fib_res_put(&res); return ret; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 79d057a939ba..4a8e370862bc 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -186,7 +186,9 @@ static inline struct tnode *node_parent_rcu(struct node *node) { struct tnode *ret = node_parent(node); - return rcu_dereference(ret); + return rcu_dereference_check(ret, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); } /* Same as rcu_assign_pointer @@ -1753,7 +1755,9 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) static struct leaf *trie_firstleaf(struct trie *t) { - struct tnode *n = (struct tnode *) rcu_dereference(t->trie); + struct tnode *n = (struct tnode *) rcu_dereference_check(t->trie, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); if (!n) return NULL; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a1ad0e7180d2..1fdcacd36ce7 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -834,7 +834,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, int mark = 0; - if (len == 8) { + if (len == 8 || IGMP_V2_SEEN(in_dev)) { if (ih->code == 0) { /* Alas, old v1 router presents here. */ diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 945b20a5ad50..35c93e8b6a46 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -45,7 +45,7 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> @@ -699,7 +699,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if ((dst = rt->rt_gateway) == 0) goto tx_error_icmp; } -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { struct in6_addr *addr6; int addr_type; @@ -774,7 +774,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev goto tx_error; } } -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); @@ -850,7 +850,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if ((iph->ttl = tiph->ttl) == 0) { if (skb->protocol == htons(ETH_P_IP)) iph->ttl = old_iph->ttl; -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; #endif diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 6c40a8c46e79..64b70ad162e3 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1129,6 +1129,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_HDRINCL: val = inet->hdrincl; break; + case IP_NODEFRAG: + val = inet->nodefrag; + break; case IP_MTU_DISCOVER: val = inet->pmtudisc; break; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 6bccba31d132..e8f4f9a57f12 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -735,6 +735,7 @@ static void get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + local_bh_disable(); xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { ADD_COUNTER(counters[i], iter->counters.bcnt, @@ -742,6 +743,7 @@ static void get_counters(const struct xt_table_info *t, ++i; } xt_info_wrunlock(cpu); + local_bh_enable(); } put_cpu(); } @@ -1418,6 +1420,9 @@ static int translate_compat_table(const char *name, if (ret != 0) break; ++i; + if (strcmp(arpt_get_target(iter1)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; } if (ret) { /* diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c439721b165a..d163f2e3b2e9 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -909,6 +909,7 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + local_bh_disable(); xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { ADD_COUNTER(counters[i], iter->counters.bcnt, @@ -916,6 +917,7 @@ get_counters(const struct xt_table_info *t, ++i; /* macro does multi eval of i */ } xt_info_wrunlock(cpu); + local_bh_enable(); } put_cpu(); } @@ -1749,6 +1751,9 @@ translate_compat_table(struct net *net, if (ret != 0) break; ++i; + if (strcmp(ipt_get_target(iter1)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; } if (ret) { /* diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3f56b6e6c6aa..6298f75d5e93 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2738,6 +2738,11 @@ slow_output: } EXPORT_SYMBOL_GPL(__ip_route_output_key); +static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) +{ + return NULL; +} + static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) { } @@ -2746,7 +2751,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), .destroy = ipv4_dst_destroy, - .check = ipv4_dst_check, + .check = ipv4_blackhole_dst_check, .update_pmtu = ipv4_rt_blackhole_update_pmtu, .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 176e11aaea77..95d75d443927 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -386,8 +386,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) */ mask = 0; - if (sk->sk_err) - mask = POLLERR; /* * POLLHUP is certainly not done right. But poll() doesn't @@ -451,11 +449,17 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) mask |= POLLOUT | POLLWRNORM; } - } + } else + mask |= POLLOUT | POLLWRNORM; if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; } + /* This barrier is coupled with smp_wmb() in tcp_reset() */ + smp_rmb(); + if (sk->sk_err) + mask |= POLLERR; + return mask; } EXPORT_SYMBOL(tcp_poll); @@ -2011,11 +2015,8 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { - int orphan_count = percpu_counter_read_positive( - sk->sk_prot->orphan_count); - sk_mem_reclaim(sk); - if (tcp_too_many_orphans(sk, orphan_count)) { + if (tcp_too_many_orphans(sk, 0)) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); @@ -3212,7 +3213,7 @@ void __init tcp_init(void) { struct sk_buff *skb = NULL; unsigned long nr_pages, limit; - int order, i, max_share; + int i, max_share, cnt; unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -3261,22 +3262,12 @@ void __init tcp_init(void) INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); } - /* Try to be a bit smarter and adjust defaults depending - * on available memory. - */ - for (order = 0; ((1 << order) << PAGE_SHIFT) < - (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); - order++) - ; - if (order >= 4) { - tcp_death_row.sysctl_max_tw_buckets = 180000; - sysctl_tcp_max_orphans = 4096 << (order - 4); - sysctl_max_syn_backlog = 1024; - } else if (order < 3) { - tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); - sysctl_tcp_max_orphans >>= (3 - order); - sysctl_max_syn_backlog = 128; - } + + cnt = tcp_hashinfo.ehash_mask + 1; + + tcp_death_row.sysctl_max_tw_buckets = cnt / 2; + sysctl_tcp_max_orphans = cnt / 2; + sysctl_max_syn_backlog = max(128, cnt / 256); /* Set the pressure threshold to be a fraction of global memory that * is up to 1/2 at 256 MB, decreasing toward zero with the amount of diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 0ec9bd0ae94f..850c737e08e2 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -196,10 +196,10 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) int tcp_set_allowed_congestion_control(char *val) { struct tcp_congestion_ops *ca; - char *clone, *name; + char *saved_clone, *clone, *name; int ret = 0; - clone = kstrdup(val, GFP_USER); + saved_clone = clone = kstrdup(val, GFP_USER); if (!clone) return -ENOMEM; @@ -226,6 +226,7 @@ int tcp_set_allowed_congestion_control(char *val) } out: spin_unlock(&tcp_cong_list_lock); + kfree(saved_clone); return ret; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e663b78a2ef6..149e79ac2891 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4048,6 +4048,8 @@ static void tcp_reset(struct sock *sk) default: sk->sk_err = ECONNRESET; } + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 808bb920c9f5..c35b469e851c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -66,18 +66,18 @@ static void tcp_write_err(struct sock *sk) static int tcp_out_of_resources(struct sock *sk, int do_reset) { struct tcp_sock *tp = tcp_sk(sk); - int orphans = percpu_counter_read_positive(&tcp_orphan_count); + int shift = 0; /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) - orphans <<= 1; + shift++; /* If some dubious ICMP arrived, penalize even more. */ if (sk->sk_err_soft) - orphans <<= 1; + shift++; - if (tcp_too_many_orphans(sk, orphans)) { + if (tcp_too_many_orphans(sk, shift)) { if (net_ratelimit()) printk(KERN_INFO "Out of socket memory\n"); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32e0bef60d0a..fb23c2e63b52 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1260,6 +1260,49 @@ void udp_lib_unhash(struct sock *sk) } EXPORT_SYMBOL(udp_lib_unhash); +/* + * inet_rcv_saddr was changed, we must rehash secondary hash + */ +void udp_lib_rehash(struct sock *sk, u16 newhash) +{ + if (sk_hashed(sk)) { + struct udp_table *udptable = sk->sk_prot->h.udp_table; + struct udp_hslot *hslot, *hslot2, *nhslot2; + + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); + nhslot2 = udp_hashslot2(udptable, newhash); + udp_sk(sk)->udp_portaddr_hash = newhash; + if (hslot2 != nhslot2) { + hslot = udp_hashslot(udptable, sock_net(sk), + udp_sk(sk)->udp_port_hash); + /* we must lock primary chain too */ + spin_lock_bh(&hslot->lock); + + spin_lock(&hslot2->lock); + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hslot2->count--; + spin_unlock(&hslot2->lock); + + spin_lock(&nhslot2->lock); + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &nhslot2->head); + nhslot2->count++; + spin_unlock(&nhslot2->lock); + + spin_unlock_bh(&hslot->lock); + } + } +} +EXPORT_SYMBOL(udp_lib_rehash); + +static void udp_v4_rehash(struct sock *sk) +{ + u16 new_hash = udp4_portaddr_hash(sock_net(sk), + inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_num); + udp_lib_rehash(sk, new_hash); +} + static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; @@ -1843,6 +1886,7 @@ struct proto udp_prot = { .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 1ef1366a0a03..47947624eccc 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -21,21 +21,25 @@ static int xfrm4_init_flags(struct xfrm_state *x) } static void -__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) +{ + sel->daddr.a4 = fl->fl4_dst; + sel->saddr.a4 = fl->fl4_src; + sel->dport = xfrm_flowi_dport(fl); + sel->dport_mask = htons(0xffff); + sel->sport = xfrm_flowi_sport(fl); + sel->sport_mask = htons(0xffff); + sel->family = AF_INET; + sel->prefixlen_d = 32; + sel->prefixlen_s = 32; + sel->proto = fl->proto; + sel->ifindex = fl->oif; +} + +static void +xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr) { - x->sel.daddr.a4 = fl->fl4_dst; - x->sel.saddr.a4 = fl->fl4_src; - x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = htons(0xffff); - x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = htons(0xffff); - x->sel.family = AF_INET; - x->sel.prefixlen_d = 32; - x->sel.prefixlen_s = 32; - x->sel.proto = fl->proto; - x->sel.ifindex = fl->oif; x->id = tmpl->id; if (x->id.daddr.a4 == 0) x->id.daddr.a4 = daddr->a4; @@ -70,6 +74,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .owner = THIS_MODULE, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, + .init_temprop = xfrm4_init_temprop, .output = xfrm4_output, .extract_input = xfrm4_extract_input, .extract_output = xfrm4_extract_output, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 7d929a22cbc2..ef371aa01ac5 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -105,9 +105,12 @@ ipv4_connected: if (ipv6_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr)) + if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } goto out; } @@ -181,6 +184,8 @@ ipv4_connected: if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); inet->inet_rcv_saddr = LOOPBACK4_IPV6; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); } ip6_dst_store(sk, dst, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 5359ef4daac5..8e754be92c24 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -922,6 +922,7 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + local_bh_disable(); xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { ADD_COUNTER(counters[i], iter->counters.bcnt, @@ -929,6 +930,7 @@ get_counters(const struct xt_table_info *t, ++i; } xt_info_wrunlock(cpu); + local_bh_enable(); } put_cpu(); } @@ -1764,6 +1766,9 @@ translate_compat_table(struct net *net, if (ret != 0) break; ++i; + if (strcmp(ip6t_get_target(iter1)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; } if (ret) { /* diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 13ef5bc05cf5..578f3c1a16db 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -113,14 +113,6 @@ static void nf_skb_free(struct sk_buff *skb) kfree_skb(NFCT_FRAG6_CB(skb)->orig); } -/* Memory Tracking Functions. */ -static void frag_kfree_skb(struct sk_buff *skb) -{ - atomic_sub(skb->truesize, &nf_init_frags.mem); - nf_skb_free(skb); - kfree_skb(skb); -} - /* Destruction primitives. */ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) @@ -282,66 +274,22 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, } found: - /* We found where to put this one. Check for overlap with - * preceding fragment, and, if needed, align things so that - * any overlaps are eliminated. - */ - if (prev) { - int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset; - - if (i > 0) { - offset += i; - if (end <= offset) { - pr_debug("overlap\n"); - goto err; - } - if (!pskb_pull(skb, i)) { - pr_debug("Can't pull\n"); - goto err; - } - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } - } - - /* Look for overlap with succeeding segments. - * If we can merge fragments, do it. + /* RFC5722, Section 4: + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments, including those not yet received) MUST be silently + * discarded. */ - while (next && NFCT_FRAG6_CB(next)->offset < end) { - /* overlap is 'i' bytes */ - int i = end - NFCT_FRAG6_CB(next)->offset; - - if (i < next->len) { - /* Eat head of the next overlapped fragment - * and leave the loop. The next ones cannot overlap. - */ - pr_debug("Eat head of the overlapped parts.: %d", i); - if (!pskb_pull(next, i)) - goto err; - /* next fragment */ - NFCT_FRAG6_CB(next)->offset += i; - fq->q.meat -= i; - if (next->ip_summed != CHECKSUM_UNNECESSARY) - next->ip_summed = CHECKSUM_NONE; - break; - } else { - struct sk_buff *free_it = next; - - /* Old fragmnet is completely overridden with - * new one drop it. - */ - next = next->next; + /* Check for overlap with preceding fragment. */ + if (prev && + (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset > 0) + goto discard_fq; - if (prev) - prev->next = next; - else - fq->q.fragments = next; - - fq->q.meat -= free_it->len; - frag_kfree_skb(free_it); - } - } + /* Look for overlap with succeeding segment. */ + if (next && NFCT_FRAG6_CB(next)->offset < end) + goto discard_fq; NFCT_FRAG6_CB(skb)->offset = offset; @@ -371,6 +319,8 @@ found: write_unlock(&nf_frags.lock); return 0; +discard_fq: + fq_kill(fq); err: return -1; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 545c4141b755..64cfef1b0a4c 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -149,13 +149,6 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a) } EXPORT_SYMBOL(ip6_frag_match); -/* Memory Tracking Functions. */ -static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) -{ - atomic_sub(skb->truesize, &nf->mem); - kfree_skb(skb); -} - void ip6_frag_init(struct inet_frag_queue *q, void *a) { struct frag_queue *fq = container_of(q, struct frag_queue, q); @@ -346,58 +339,22 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, } found: - /* We found where to put this one. Check for overlap with - * preceding fragment, and, if needed, align things so that - * any overlaps are eliminated. + /* RFC5722, Section 4: + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments, including those not yet received) MUST be silently + * discarded. */ - if (prev) { - int i = (FRAG6_CB(prev)->offset + prev->len) - offset; - if (i > 0) { - offset += i; - if (end <= offset) - goto err; - if (!pskb_pull(skb, i)) - goto err; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } - } + /* Check for overlap with preceding fragment. */ + if (prev && + (FRAG6_CB(prev)->offset + prev->len) - offset > 0) + goto discard_fq; - /* Look for overlap with succeeding segments. - * If we can merge fragments, do it. - */ - while (next && FRAG6_CB(next)->offset < end) { - int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */ - - if (i < next->len) { - /* Eat head of the next overlapped fragment - * and leave the loop. The next ones cannot overlap. - */ - if (!pskb_pull(next, i)) - goto err; - FRAG6_CB(next)->offset += i; /* next fragment */ - fq->q.meat -= i; - if (next->ip_summed != CHECKSUM_UNNECESSARY) - next->ip_summed = CHECKSUM_NONE; - break; - } else { - struct sk_buff *free_it = next; - - /* Old fragment is completely overridden with - * new one drop it. - */ - next = next->next; - - if (prev) - prev->next = next; - else - fq->q.fragments = next; - - fq->q.meat -= free_it->len; - frag_kfree_skb(fq->q.net, free_it); - } - } + /* Look for overlap with succeeding segment. */ + if (next && FRAG6_CB(next)->offset < end) + goto discard_fq; FRAG6_CB(skb)->offset = offset; @@ -436,6 +393,8 @@ found: write_unlock(&ip6_frags.lock); return -1; +discard_fq: + fq_kill(fq); err: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8f2d0400cf8a..d126365ac046 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2580,7 +2580,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .proc_handler = proc_dointvec, }, { .procname = "mtu_expires", @@ -2594,7 +2594,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .proc_handler = proc_dointvec, }, { .procname = "gc_min_interval_ms", diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1dd1affdead2..5acb3560ff15 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -111,6 +111,15 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); } +static void udp_v6_rehash(struct sock *sk) +{ + u16 new_hash = udp6_portaddr_hash(sock_net(sk), + &inet6_sk(sk)->rcv_saddr, + inet_sk(sk)->inet_num); + + udp_lib_rehash(sk, new_hash); +} + static inline int compute_score(struct sock *sk, struct net *net, unsigned short hnum, struct in6_addr *saddr, __be16 sport, @@ -1447,6 +1456,7 @@ struct proto udpv6_prot = { .backlog_rcv = udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index f417b77fa0e1..a67575d472a3 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -20,23 +20,27 @@ #include <net/addrconf.h> static void -__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) { /* Initialize temporary selector matching only * to current session. */ - ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst); - ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src); - x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = htons(0xffff); - x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = htons(0xffff); - x->sel.family = AF_INET6; - x->sel.prefixlen_d = 128; - x->sel.prefixlen_s = 128; - x->sel.proto = fl->proto; - x->sel.ifindex = fl->oif; + ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst); + ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src); + sel->dport = xfrm_flowi_dport(fl); + sel->dport_mask = htons(0xffff); + sel->sport = xfrm_flowi_sport(fl); + sel->sport_mask = htons(0xffff); + sel->family = AF_INET6; + sel->prefixlen_d = 128; + sel->prefixlen_s = 128; + sel->proto = fl->proto; + sel->ifindex = fl->oif; +} + +static void +xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr) +{ x->id = tmpl->id; if (ipv6_addr_any((struct in6_addr*)&x->id.daddr)) memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr)); @@ -168,6 +172,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .eth_proto = htons(ETH_P_IPV6), .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, + .init_temprop = xfrm6_init_temprop, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, .output = xfrm6_output, diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 79986a674f6e..fd55b5135de5 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -824,8 +824,8 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name); if (err < 0) { - kfree(self->ias_obj->name); - kfree(self->ias_obj); + irias_delete_object(self->ias_obj); + self->ias_obj = NULL; goto out; } diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index a788f9e9427d..6130f9d9dbe1 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -1102,7 +1102,7 @@ int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len) memcpy(&val_len, buf+n, 2); /* To avoid alignment problems */ le16_to_cpus(&val_len); n+=2; - if (val_len > 1016) { + if (val_len >= 1016) { IRDA_DEBUG(2, "%s(), parameter length to long\n", __func__ ); return -RSP_INVALID_COMMAND_FORMAT; } diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 9616c32d1076..5bb8353105cc 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -169,6 +169,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, { struct irlan_cb *self = netdev_priv(dev); int ret; + unsigned int len; /* skb headroom large enough to contain all IrDA-headers? */ if ((skb_headroom(skb) < self->max_header_size) || (skb_shared(skb))) { @@ -188,6 +189,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, dev->trans_start = jiffies; + len = skb->len; /* Now queue the packet in the transport layer */ if (self->use_udata) ret = irttp_udata_request(self->tsap_data, skb); @@ -209,7 +211,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, self->stats.tx_dropped++; } else { self->stats.tx_packets++; - self->stats.tx_bytes += skb->len; + self->stats.tx_bytes += len; } return NETDEV_TX_OK; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 58c6c4cda73b..1ae697681bc7 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -132,7 +132,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, printk("\n"); } - if (data_len < ETH_HLEN) + if (!pskb_may_pull(skb, sizeof(ETH_HLEN))) goto error; secpath_reset(skb); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 023ba820236f..582612998211 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1024,7 +1024,8 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - int rc = -EINVAL, opt; + unsigned int opt; + int rc = -EINVAL; lock_sock(sk); if (unlikely(level != SOL_LLC || optlen != sizeof(int))) diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index e4dae0244d76..cf4aea3ba30f 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -689,7 +689,7 @@ static void llc_station_rcv(struct sk_buff *skb) int __init llc_station_init(void) { - u16 rc = -ENOBUFS; + int rc = -ENOBUFS; struct sk_buff *skb; struct llc_station_state_ev *ev; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 6d0bd198af19..be04d46110fe 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -103,6 +103,7 @@ ieee80211_rate_control_ops_get(const char *name) struct rate_control_ops *ops; const char *alg_name; + kparam_block_sysfs_write(ieee80211_default_rc_algo); if (!name) alg_name = ieee80211_default_rc_algo; else @@ -120,6 +121,7 @@ ieee80211_rate_control_ops_get(const char *name) /* try built-in one if specific alg requested but not found */ if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT)) ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT); + kparam_unblock_sysfs_write(ieee80211_default_rc_algo); return ops; } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f8ddba48011..4c2f89df5cce 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -924,6 +924,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); + ip_vs_update_conntrack(skb, cp, 0); ip_vs_conn_put(cp); skb->ipvs_property = 1; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index f228a17ec649..7e9af5b76d9e 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -45,6 +45,7 @@ #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> #include <linux/gfp.h> #include <net/protocol.h> @@ -359,7 +360,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, buf_len = strlen(buf); ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) { + if (ct && !nf_ct_is_untracked(ct) && nfct_nat(ct)) { /* If mangling fails this function will return 0 * which will cause the packet to be dropped. * Mangling can only fail under memory pressure, @@ -409,7 +410,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; - struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -496,11 +496,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, ip_vs_control_add(n_cp, cp); } - ct = (struct nf_conn *)skb->nfct; - if (ct && ct != &nf_conntrack_untracked) - ip_vs_expect_related(skb, ct, n_cp, - IPPROTO_TCP, &n_cp->dport, 1); - /* * Move tunnel to listen state */ diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 21e1a5e9b9d3..49df6bea6a2d 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -349,8 +349,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } #endif -static void -ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) +void +ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) { struct nf_conn *ct = (struct nf_conn *)skb->nfct; struct nf_conntrack_tuple new_tuple; @@ -365,11 +365,17 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) * real-server we will see RIP->DIP. */ new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - new_tuple.src.u3 = cp->daddr; + if (outin) + new_tuple.src.u3 = cp->daddr; + else + new_tuple.dst.u3 = cp->vaddr; /* * This will also take care of UDP and other protocols. */ - new_tuple.src.u.tcp.port = cp->dport; + if (outin) + new_tuple.src.u.tcp.port = cp->dport; + else + new_tuple.dst.u.tcp.port = cp->vport; nf_conntrack_alter_reply(ct, &new_tuple); } @@ -428,7 +434,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp); + ip_vs_update_conntrack(skb, cp, 1); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still @@ -506,7 +512,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp); + ip_vs_update_conntrack(skb, cp, 1); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2cbf380377d5..cd96ed3ccee4 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1406,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags&MSG_DONTWAIT; size_t copied; - struct sk_buff *skb; + struct sk_buff *skb, *data_skb; int err; if (flags&MSG_OOB) @@ -1418,59 +1418,35 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; + data_skb = skb; + #ifdef CONFIG_COMPAT_NETLINK_MESSAGES if (unlikely(skb_shinfo(skb)->frag_list)) { - bool need_compat = !!(flags & MSG_CMSG_COMPAT); - /* - * If this skb has a frag_list, then here that means that - * we will have to use the frag_list skb for compat tasks - * and the regular skb for non-compat tasks. + * If this skb has a frag_list, then here that means that we + * will have to use the frag_list skb's data for compat tasks + * and the regular skb's data for normal (non-compat) tasks. * - * The skb might (and likely will) be cloned, so we can't - * just reset frag_list and go on with things -- we need to - * keep that. For the compat case that's easy -- simply get - * a reference to the compat skb and free the regular one - * including the frag. For the non-compat case, we need to - * avoid sending the frag to the user -- so assign NULL but - * restore it below before freeing the skb. + * If we need to send the compat skb, assign it to the + * 'data_skb' variable so that it will be used below for data + * copying. We keep 'skb' for everything else, including + * freeing both later. */ - if (need_compat) { - struct sk_buff *compskb = skb_shinfo(skb)->frag_list; - skb_get(compskb); - kfree_skb(skb); - skb = compskb; - } else { - /* - * Before setting frag_list to NULL, we must get a - * private copy of skb if shared (because of MSG_PEEK) - */ - if (skb_shared(skb)) { - struct sk_buff *nskb; - - nskb = pskb_copy(skb, GFP_KERNEL); - kfree_skb(skb); - skb = nskb; - err = -ENOMEM; - if (!skb) - goto out; - } - kfree_skb(skb_shinfo(skb)->frag_list); - skb_shinfo(skb)->frag_list = NULL; - } + if (flags & MSG_CMSG_COMPAT) + data_skb = skb_shinfo(skb)->frag_list; } #endif msg->msg_namelen = 0; - copied = skb->len; + copied = data_skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } - skb_reset_transport_header(skb); - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + skb_reset_transport_header(data_skb); + err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied); if (msg->msg_name) { struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; @@ -1490,7 +1466,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } siocb->scm->creds = *NETLINK_CREDS(skb); if (flags & MSG_TRUNC) - copied = skb->len; + copied = data_skb->len; skb_free_datagram(sk, skb); @@ -2126,6 +2102,26 @@ static void __net_exit netlink_net_exit(struct net *net) #endif } +static void __init netlink_add_usersock_entry(void) +{ + unsigned long *listeners; + int groups = 32; + + listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head), + GFP_KERNEL); + if (!listeners) + panic("netlink_add_usersock_entry: Cannot allocate listneres\n"); + + netlink_table_grab(); + + nl_table[NETLINK_USERSOCK].groups = groups; + nl_table[NETLINK_USERSOCK].listeners = listeners; + nl_table[NETLINK_USERSOCK].module = THIS_MODULE; + nl_table[NETLINK_USERSOCK].registered = 1; + + netlink_table_ungrab(); +} + static struct pernet_operations __net_initdata netlink_net_ops = { .init = netlink_net_init, .exit = netlink_net_exit, @@ -2174,6 +2170,8 @@ static int __init netlink_proto_init(void) hash->rehash_time = jiffies; } + netlink_add_usersock_entry(); + sock_register(&netlink_family_ops); register_pernet_subsys(&netlink_net_ops); /* The netlink device handler may be needed early. */ diff --git a/net/rds/recv.c b/net/rds/recv.c index 795a00b7f2cb..c93588c2d553 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -297,7 +297,7 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr) { struct rds_notifier *notifier; - struct rds_rdma_notify cmsg; + struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */ unsigned int count = 0, max_messages = ~0U; unsigned long flags; LIST_HEAD(copy); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 8e45e76a95f5..d952e7eac188 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -679,7 +679,7 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1) return -EINVAL; - if (addr->srose_ndigis > ROSE_MAX_DIGIS) + if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) { @@ -739,7 +739,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1) return -EINVAL; - if (addr->srose_ndigis > ROSE_MAX_DIGIS) + if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; /* Source + Destination digis should not exceed ROSE_MAX_DIGIS */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7043b294bb67..8e22bd345e71 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -597,12 +597,6 @@ extern unsigned rxrpc_debug; #define dbgprintk(FMT,...) \ printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__) -/* make sure we maintain the format strings, even when debugging is disabled */ -static inline __attribute__((format(printf,1,2))) -void _dbprintk(const char *fmt, ...) -{ -} - #define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) #define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) #define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) @@ -655,11 +649,11 @@ do { \ } while (0) #else -#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) -#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) -#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__) -#define _proto(FMT,...) _dbprintk("### "FMT ,##__VA_ARGS__) -#define _net(FMT,...) _dbprintk("@@@ "FMT ,##__VA_ARGS__) +#define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__) +#define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) +#define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__) +#define _proto(FMT,...) no_printk("### "FMT ,##__VA_ARGS__) +#define _net(FMT,...) no_printk("@@@ "FMT ,##__VA_ARGS__) #endif /* diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 8406c6654990..c2ed90a4c0b4 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -152,21 +152,24 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); - struct tc_gact opt; struct tcf_gact *gact = a->priv; + struct tc_gact opt = { + .index = gact->tcf_index, + .refcnt = gact->tcf_refcnt - ref, + .bindcnt = gact->tcf_bindcnt - bind, + .action = gact->tcf_action, + }; struct tcf_t t; - opt.index = gact->tcf_index; - opt.refcnt = gact->tcf_refcnt - ref; - opt.bindcnt = gact->tcf_bindcnt - bind; - opt.action = gact->tcf_action; NLA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); #ifdef CONFIG_GACT_PROB if (gact->tcfg_ptype) { - struct tc_gact_p p_opt; - p_opt.paction = gact->tcfg_paction; - p_opt.pval = gact->tcfg_pval; - p_opt.ptype = gact->tcfg_ptype; + struct tc_gact_p p_opt = { + .paction = gact->tcfg_paction, + .pval = gact->tcfg_pval, + .ptype = gact->tcfg_ptype, + }; + NLA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); } #endif diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 11f195af2da0..0c311be92827 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -219,15 +219,16 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = a->priv; - struct tc_mirred opt; + struct tc_mirred opt = { + .index = m->tcf_index, + .action = m->tcf_action, + .refcnt = m->tcf_refcnt - ref, + .bindcnt = m->tcf_bindcnt - bind, + .eaction = m->tcfm_eaction, + .ifindex = m->tcfm_ifindex, + }; struct tcf_t t; - opt.index = m->tcf_index; - opt.action = m->tcf_action; - opt.refcnt = m->tcf_refcnt - ref; - opt.bindcnt = m->tcf_bindcnt - bind; - opt.eaction = m->tcfm_eaction; - opt.ifindex = m->tcfm_ifindex; NLA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 509a2d53a99d..186eb837e600 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -272,19 +272,19 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_nat *p = a->priv; - struct tc_nat opt; + struct tc_nat opt = { + .old_addr = p->old_addr, + .new_addr = p->new_addr, + .mask = p->mask, + .flags = p->flags, + + .index = p->tcf_index, + .action = p->tcf_action, + .refcnt = p->tcf_refcnt - ref, + .bindcnt = p->tcf_bindcnt - bind, + }; struct tcf_t t; - opt.old_addr = p->old_addr; - opt.new_addr = p->new_addr; - opt.mask = p->mask; - opt.flags = p->flags; - - opt.index = p->tcf_index; - opt.action = p->tcf_action; - opt.refcnt = p->tcf_refcnt - ref; - opt.bindcnt = p->tcf_bindcnt - bind; - NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt); t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 537a48732e9e..7ebf7439b478 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -350,22 +350,19 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_police *police = a->priv; - struct tc_police opt; - - opt.index = police->tcf_index; - opt.action = police->tcf_action; - opt.mtu = police->tcfp_mtu; - opt.burst = police->tcfp_burst; - opt.refcnt = police->tcf_refcnt - ref; - opt.bindcnt = police->tcf_bindcnt - bind; + struct tc_police opt = { + .index = police->tcf_index, + .action = police->tcf_action, + .mtu = police->tcfp_mtu, + .burst = police->tcfp_burst, + .refcnt = police->tcf_refcnt - ref, + .bindcnt = police->tcf_bindcnt - bind, + }; + if (police->tcfp_R_tab) opt.rate = police->tcfp_R_tab->rate; - else - memset(&opt.rate, 0, sizeof(opt.rate)); if (police->tcfp_P_tab) opt.peakrate = police->tcfp_P_tab->rate; - else - memset(&opt.peakrate, 0, sizeof(opt.peakrate)); NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); if (police->tcfp_result) NLA_PUT_U32(skb, TCA_POLICE_RESULT, police->tcfp_result); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 4a1d640b0cf1..97e84f3ee775 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -164,13 +164,14 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_defact *d = a->priv; - struct tc_defact opt; + struct tc_defact opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; struct tcf_t t; - opt.index = d->tcf_index; - opt.refcnt = d->tcf_refcnt - ref; - opt.bindcnt = d->tcf_bindcnt - bind; - opt.action = d->tcf_action; NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata); t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index e9607fe55b58..66cbf4eb8855 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -159,13 +159,14 @@ static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_skbedit *d = a->priv; - struct tc_skbedit opt; + struct tc_skbedit opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; struct tcf_t t; - opt.index = d->tcf_index; - opt.refcnt = d->tcf_refcnt - ref; - opt.bindcnt = d->tcf_bindcnt - bind; - opt.action = d->tcf_action; NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt); if (d->flags & SKBEDIT_F_PRIORITY) NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority), diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b9e8c3b7d406..408eea7086aa 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -150,22 +150,34 @@ int register_qdisc(struct Qdisc_ops *qops) if (qops->enqueue == NULL) qops->enqueue = noop_qdisc_ops.enqueue; if (qops->peek == NULL) { - if (qops->dequeue == NULL) { + if (qops->dequeue == NULL) qops->peek = noop_qdisc_ops.peek; - } else { - rc = -EINVAL; - goto out; - } + else + goto out_einval; } if (qops->dequeue == NULL) qops->dequeue = noop_qdisc_ops.dequeue; + if (qops->cl_ops) { + const struct Qdisc_class_ops *cops = qops->cl_ops; + + if (!(cops->get && cops->put && cops->walk && cops->leaf)) + goto out_einval; + + if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf)) + goto out_einval; + } + qops->next = NULL; *qp = qops; rc = 0; out: write_unlock(&qdisc_mod_lock); return rc; + +out_einval: + rc = -EINVAL; + goto out; } EXPORT_SYMBOL(register_qdisc); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index e114f23d5eae..6318e1136b83 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -255,10 +255,6 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, error = -EINVAL; goto err_out; } - if (!list_empty(&flow->list)) { - error = -EEXIST; - goto err_out; - } } else { int i; unsigned long cl; @@ -418,7 +414,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) } ret = qdisc_enqueue(skb, flow->q); - if (ret != 0) { + if (ret != NET_XMIT_SUCCESS) { drop: __maybe_unused if (net_xmit_drop_count(ret)) { sch->qstats.drops++; @@ -442,7 +438,7 @@ drop: __maybe_unused */ if (flow == &p->link) { sch->q.qlen++; - return 0; + return NET_XMIT_SUCCESS; } tasklet_schedule(&p->task); return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index abd904be4287..47496098d35c 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -761,8 +761,8 @@ init_vf(struct hfsc_class *cl, unsigned int len) if (f != cl->cl_f) { cl->cl_f = f; cftree_update(cl); - update_cfmin(cl->cl_parent); } + update_cfmin(cl->cl_parent); } } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 534f33231c17..201cbac2b32c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -334,7 +334,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (++sch->q.qlen <= q->limit) { sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; - return 0; + return NET_XMIT_SUCCESS; } sfq_drop(sch); @@ -508,6 +508,11 @@ nla_put_failure: return -1; } +static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg) +{ + return NULL; +} + static unsigned long sfq_get(struct Qdisc *sch, u32 classid) { return 0; @@ -519,6 +524,10 @@ static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, return 0; } +static void sfq_put(struct Qdisc *q, unsigned long cl) +{ +} + static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl) { struct sfq_sched_data *q = qdisc_priv(sch); @@ -571,9 +580,12 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) } static const struct Qdisc_class_ops sfq_class_ops = { + .leaf = sfq_leaf, .get = sfq_get, + .put = sfq_put, .tcf_chain = sfq_find_tcf, .bind_tcf = sfq_bind, + .unbind_tcf = sfq_put, .dump = sfq_dump_class, .dump_stats = sfq_dump_class_stats, .walk = sfq_walk, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 0991c640cd3e..641a30d64635 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -127,7 +127,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) return qdisc_reshape_fail(skb, sch); ret = qdisc_enqueue(skb, q->qdisc); - if (ret != 0) { + if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) sch->qstats.drops++; return ret; @@ -136,7 +136,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) sch->q.qlen++; sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; - return 0; + return NET_XMIT_SUCCESS; } static unsigned int tbf_drop(struct Qdisc* sch) diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 807643bdcbac..feaabc103ce6 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -85,7 +85,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) __skb_queue_tail(&q->q, skb); sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; - return 0; + return NET_XMIT_SUCCESS; } kfree_skb(skb); diff --git a/net/sctp/output.c b/net/sctp/output.c index a646681f5acd..bcc4590ccaf2 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -92,7 +92,6 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, SCTP_DEBUG_PRINTK("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); - sctp_packet_reset(packet); packet->vtag = vtag; if (ecn_capable && sctp_packet_empty(packet)) { diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 24b2cd555637..d344dc481ccc 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1232,6 +1232,18 @@ out: return 0; } +static bool list_has_sctp_addr(const struct list_head *list, + union sctp_addr *ipaddr) +{ + struct sctp_transport *addr; + + list_for_each_entry(addr, list, transports) { + if (sctp_cmp_addr_exact(ipaddr, &addr->ipaddr)) + return true; + } + + return false; +} /* A restart is occurring, check to make sure no new addresses * are being added as we may be under a takeover attack. */ @@ -1240,10 +1252,10 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, struct sctp_chunk *init, sctp_cmd_seq_t *commands) { - struct sctp_transport *new_addr, *addr; - int found; + struct sctp_transport *new_addr; + int ret = 1; - /* Implementor's Guide - Sectin 5.2.2 + /* Implementor's Guide - Section 5.2.2 * ... * Before responding the endpoint MUST check to see if the * unexpected INIT adds new addresses to the association. If new @@ -1254,31 +1266,19 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, /* Search through all current addresses and make sure * we aren't adding any new ones. */ - new_addr = NULL; - found = 0; - list_for_each_entry(new_addr, &new_asoc->peer.transport_addr_list, - transports) { - found = 0; - list_for_each_entry(addr, &asoc->peer.transport_addr_list, - transports) { - if (sctp_cmp_addr_exact(&new_addr->ipaddr, - &addr->ipaddr)) { - found = 1; - break; - } - } - if (!found) + transports) { + if (!list_has_sctp_addr(&asoc->peer.transport_addr_list, + &new_addr->ipaddr)) { + sctp_sf_send_restart_abort(&new_addr->ipaddr, init, + commands); + ret = 0; break; - } - - /* If a new address was added, ABORT the sender. */ - if (!found && new_addr) { - sctp_sf_send_restart_abort(&new_addr->ipaddr, init, commands); + } } /* Return success if all addresses were found. */ - return found; + return ret; } /* Populate the verification/tie tags based on overlapping INIT diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 443c161eb8bd..3376d7657185 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -18,10 +18,11 @@ config SUNRPC_XPRT_RDMA If unsure, say N. config RPCSEC_GSS_KRB5 - tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL + tristate + depends on SUNRPC && CRYPTO + prompt "Secure RPC: Kerberos V mechanism" if !(NFS_V4 || NFSD_V4) + default y select SUNRPC_GSS - select CRYPTO select CRYPTO_MD5 select CRYPTO_DES select CRYPTO_CBC @@ -34,7 +35,7 @@ config RPCSEC_GSS_KRB5 available from http://linux-nfs.org/. In addition, user-space Kerberos support should be installed. - If unsure, say N. + If unsure, say Y. config RPCSEC_GSS_SPKM3 tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)" diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 8dc47f1d0001..36cb66022a27 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -19,6 +19,15 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif +#define RPC_CREDCACHE_DEFAULT_HASHBITS (4) +struct rpc_cred_cache { + struct hlist_head *hashtable; + unsigned int hashbits; + spinlock_t lock; +}; + +static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS; + static DEFINE_SPINLOCK(rpc_authflavor_lock); static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { &authnull_ops, /* AUTH_NULL */ @@ -29,6 +38,47 @@ static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { static LIST_HEAD(cred_unused); static unsigned long number_cred_unused; +#define MAX_HASHTABLE_BITS (10) +static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp) +{ + unsigned long num; + unsigned int nbits; + int ret; + + if (!val) + goto out_inval; + ret = strict_strtoul(val, 0, &num); + if (ret == -EINVAL) + goto out_inval; + nbits = fls(num); + if (num > (1U << nbits)) + nbits++; + if (nbits > MAX_HASHTABLE_BITS || nbits < 2) + goto out_inval; + *(unsigned int *)kp->arg = nbits; + return 0; +out_inval: + return -EINVAL; +} + +static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp) +{ + unsigned int nbits; + + nbits = *(unsigned int *)kp->arg; + return sprintf(buffer, "%u", 1U << nbits); +} + +#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int); + +static struct kernel_param_ops param_ops_hashtbl_sz = { + .set = param_set_hashtbl_sz, + .get = param_get_hashtbl_sz, +}; + +module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644); +MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size"); + static u32 pseudoflavor_to_flavor(u32 flavor) { if (flavor >= RPC_AUTH_MAXFLAVOR) @@ -145,16 +195,23 @@ int rpcauth_init_credcache(struct rpc_auth *auth) { struct rpc_cred_cache *new; - int i; + unsigned int hashsize; new = kmalloc(sizeof(*new), GFP_KERNEL); if (!new) - return -ENOMEM; - for (i = 0; i < RPC_CREDCACHE_NR; i++) - INIT_HLIST_HEAD(&new->hashtable[i]); + goto out_nocache; + new->hashbits = auth_hashbits; + hashsize = 1U << new->hashbits; + new->hashtable = kcalloc(hashsize, sizeof(new->hashtable[0]), GFP_KERNEL); + if (!new->hashtable) + goto out_nohashtbl; spin_lock_init(&new->lock); auth->au_credcache = new; return 0; +out_nohashtbl: + kfree(new); +out_nocache: + return -ENOMEM; } EXPORT_SYMBOL_GPL(rpcauth_init_credcache); @@ -183,11 +240,12 @@ rpcauth_clear_credcache(struct rpc_cred_cache *cache) LIST_HEAD(free); struct hlist_head *head; struct rpc_cred *cred; + unsigned int hashsize = 1U << cache->hashbits; int i; spin_lock(&rpc_credcache_lock); spin_lock(&cache->lock); - for (i = 0; i < RPC_CREDCACHE_NR; i++) { + for (i = 0; i < hashsize; i++) { head = &cache->hashtable[i]; while (!hlist_empty(head)) { cred = hlist_entry(head->first, struct rpc_cred, cr_hash); @@ -216,6 +274,7 @@ rpcauth_destroy_credcache(struct rpc_auth *auth) if (cache) { auth->au_credcache = NULL; rpcauth_clear_credcache(cache); + kfree(cache->hashtable); kfree(cache); } } @@ -297,7 +356,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, *entry, *new; unsigned int nr; - nr = hash_long(acred->uid, RPC_CREDCACHE_HASHBITS); + nr = hash_long(acred->uid, cache->hashbits); rcu_read_lock(); hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { @@ -390,16 +449,16 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, } EXPORT_SYMBOL_GPL(rpcauth_init_cred); -void +struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) { - task->tk_msg.rpc_cred = get_rpccred(cred); dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); + return get_rpccred(cred); } EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); -static void +static struct rpc_cred * rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; @@ -407,45 +466,43 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) .uid = 0, .gid = 0, }; - struct rpc_cred *ret; dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); - ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); - if (!IS_ERR(ret)) - task->tk_msg.rpc_cred = ret; - else - task->tk_status = PTR_ERR(ret); + return auth->au_ops->lookup_cred(auth, &acred, lookupflags); } -static void +static struct rpc_cred * rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; - struct rpc_cred *ret; dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, auth->au_ops->au_name); - ret = rpcauth_lookupcred(auth, lookupflags); - if (!IS_ERR(ret)) - task->tk_msg.rpc_cred = ret; - else - task->tk_status = PTR_ERR(ret); + return rpcauth_lookupcred(auth, lookupflags); } -void +static int rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) { + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_cred *new; int lookupflags = 0; if (flags & RPC_TASK_ASYNC) lookupflags |= RPCAUTH_LOOKUP_NEW; if (cred != NULL) - cred->cr_ops->crbind(task, cred, lookupflags); + new = cred->cr_ops->crbind(task, cred, lookupflags); else if (flags & RPC_TASK_ROOTCREDS) - rpcauth_bind_root_cred(task, lookupflags); + new = rpcauth_bind_root_cred(task, lookupflags); else - rpcauth_bind_new_cred(task, lookupflags); + new = rpcauth_bind_new_cred(task, lookupflags); + if (IS_ERR(new)) + return PTR_ERR(new); + if (req->rq_cred != NULL) + put_rpccred(req->rq_cred); + req->rq_cred = new; + return 0; } void @@ -484,22 +541,10 @@ out_nodestroy: } EXPORT_SYMBOL_GPL(put_rpccred); -void -rpcauth_unbindcred(struct rpc_task *task) -{ - struct rpc_cred *cred = task->tk_msg.rpc_cred; - - dprintk("RPC: %5u releasing %s cred %p\n", - task->tk_pid, cred->cr_auth->au_ops->au_name, cred); - - put_rpccred(cred); - task->tk_msg.rpc_cred = NULL; -} - __be32 * rpcauth_marshcred(struct rpc_task *task, __be32 *p) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u marshaling %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); @@ -510,7 +555,7 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p) __be32 * rpcauth_checkverf(struct rpc_task *task, __be32 *p) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u validating %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); @@ -522,7 +567,7 @@ int rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *data, void *obj) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u using %s cred %p to wrap rpc data\n", task->tk_pid, cred->cr_ops->cr_name, cred); @@ -536,7 +581,7 @@ int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *data, void *obj) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n", task->tk_pid, cred->cr_ops->cr_name, cred); @@ -550,13 +595,21 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, int rpcauth_refreshcred(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; int err; + cred = task->tk_rqstp->rq_cred; + if (cred == NULL) { + err = rpcauth_bindcred(task, task->tk_msg.rpc_cred, task->tk_flags); + if (err < 0) + goto out; + cred = task->tk_rqstp->rq_cred; + }; dprintk("RPC: %5u refreshing %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); err = cred->cr_ops->crrefresh(task); +out: if (err < 0) task->tk_status = err; return err; @@ -565,7 +618,7 @@ rpcauth_refreshcred(struct rpc_task *task) void rpcauth_invalcred(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u invalidating %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); @@ -576,7 +629,7 @@ rpcauth_invalcred(struct rpc_task *task) int rpcauth_uptodatecred(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; return cred == NULL || test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; @@ -587,14 +640,27 @@ static struct shrinker rpc_cred_shrinker = { .seeks = DEFAULT_SEEKS, }; -void __init rpcauth_init_module(void) +int __init rpcauth_init_module(void) { - rpc_init_authunix(); - rpc_init_generic_auth(); + int err; + + err = rpc_init_authunix(); + if (err < 0) + goto out1; + err = rpc_init_generic_auth(); + if (err < 0) + goto out2; register_shrinker(&rpc_cred_shrinker); + return 0; +out2: + rpc_destroy_authunix(); +out1: + return err; } void __exit rpcauth_remove_module(void) { + rpc_destroy_authunix(); + rpc_destroy_generic_auth(); unregister_shrinker(&rpc_cred_shrinker); } diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 8f623b0f03dd..43162bb3b78f 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -27,7 +27,6 @@ struct generic_cred { }; static struct rpc_auth generic_auth; -static struct rpc_cred_cache generic_cred_cache; static const struct rpc_credops generic_credops; /* @@ -55,18 +54,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void) } EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); -static void -generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) +static struct rpc_cred *generic_bind_cred(struct rpc_task *task, + struct rpc_cred *cred, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; - struct rpc_cred *ret; - ret = auth->au_ops->lookup_cred(auth, acred, lookupflags); - if (!IS_ERR(ret)) - task->tk_msg.rpc_cred = ret; - else - task->tk_status = PTR_ERR(ret); + return auth->au_ops->lookup_cred(auth, acred, lookupflags); } /* @@ -159,20 +153,16 @@ out_nomatch: return 0; } -void __init rpc_init_generic_auth(void) +int __init rpc_init_generic_auth(void) { - spin_lock_init(&generic_cred_cache.lock); + return rpcauth_init_credcache(&generic_auth); } void __exit rpc_destroy_generic_auth(void) { - rpcauth_clear_credcache(&generic_cred_cache); + rpcauth_destroy_credcache(&generic_auth); } -static struct rpc_cred_cache generic_cred_cache = { - {{ NULL, },}, -}; - static const struct rpc_authops generic_auth_ops = { .owner = THIS_MODULE, .au_name = "Generic", @@ -183,7 +173,6 @@ static const struct rpc_authops generic_auth_ops = { static struct rpc_auth generic_auth = { .au_ops = &generic_auth_ops, .au_count = ATOMIC_INIT(0), - .au_credcache = &generic_cred_cache, }; static const struct rpc_credops generic_credops = { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 8da2a0e68574..dcfc66bab2bb 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -373,7 +373,7 @@ gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss static void gss_upcall_callback(struct rpc_task *task) { - struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred, + struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; struct inode *inode = &gss_msg->inode->vfs_inode; @@ -502,7 +502,7 @@ static void warn_gssd(void) static inline int gss_refresh_upcall(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cred *gss_cred = container_of(cred, @@ -928,6 +928,7 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx) { dprintk("RPC: gss_free_ctx\n"); + gss_delete_sec_context(&ctx->gc_gss_ctx); kfree(ctx->gc_wire_ctx.data); kfree(ctx); } @@ -942,13 +943,7 @@ gss_free_ctx_callback(struct rcu_head *head) static void gss_free_ctx(struct gss_cl_ctx *ctx) { - struct gss_ctx *gc_gss_ctx; - - gc_gss_ctx = rcu_dereference(ctx->gc_gss_ctx); - rcu_assign_pointer(ctx->gc_gss_ctx, NULL); call_rcu(&ctx->gc_rcu, gss_free_ctx_callback); - if (gc_gss_ctx) - gss_delete_sec_context(&gc_gss_ctx); } static void @@ -1064,12 +1059,12 @@ out: static __be32 * gss_marshal(struct rpc_task *task, __be32 *p) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_cred *cred = req->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); __be32 *cred_len; - struct rpc_rqst *req = task->tk_rqstp; u32 maj_stat = 0; struct xdr_netobj mic; struct kvec iov; @@ -1119,7 +1114,7 @@ out_put_ctx: static int gss_renew_cred(struct rpc_task *task) { - struct rpc_cred *oldcred = task->tk_msg.rpc_cred; + struct rpc_cred *oldcred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(oldcred, struct gss_cred, gc_base); @@ -1133,7 +1128,7 @@ static int gss_renew_cred(struct rpc_task *task) new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW); if (IS_ERR(new)) return PTR_ERR(new); - task->tk_msg.rpc_cred = new; + task->tk_rqstp->rq_cred = new; put_rpccred(oldcred); return 0; } @@ -1161,7 +1156,7 @@ static int gss_cred_is_negative_entry(struct rpc_cred *cred) static int gss_refresh(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; int ret = 0; if (gss_cred_is_negative_entry(cred)) @@ -1172,7 +1167,7 @@ gss_refresh(struct rpc_task *task) ret = gss_renew_cred(task); if (ret < 0) goto out; - cred = task->tk_msg.rpc_cred; + cred = task->tk_rqstp->rq_cred; } if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) @@ -1191,7 +1186,7 @@ gss_refresh_null(struct rpc_task *task) static __be32 * gss_validate(struct rpc_task *task, __be32 *p) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); __be32 seq; struct kvec iov; @@ -1400,7 +1395,7 @@ static int gss_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *p, void *obj) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); @@ -1503,7 +1498,7 @@ static int gss_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *p, void *obj) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 1db618f56ecb..a5c36c01707b 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -75,7 +75,7 @@ nul_marshal(struct rpc_task *task, __be32 *p) static int nul_refresh(struct rpc_task *task) { - set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags); return 0; } diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index aac2f8b4ee21..4cb70dc6e7ad 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -29,7 +29,6 @@ struct unx_cred { #endif static struct rpc_auth unix_auth; -static struct rpc_cred_cache unix_cred_cache; static const struct rpc_credops unix_credops; static struct rpc_auth * @@ -141,7 +140,7 @@ static __be32 * unx_marshal(struct rpc_task *task, __be32 *p) { struct rpc_clnt *clnt = task->tk_client; - struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base); + struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base); __be32 *base, *hold; int i; @@ -174,7 +173,7 @@ unx_marshal(struct rpc_task *task, __be32 *p) static int unx_refresh(struct rpc_task *task) { - set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags); return 0; } @@ -197,15 +196,20 @@ unx_validate(struct rpc_task *task, __be32 *p) printk("RPC: giant verf size: %u\n", size); return NULL; } - task->tk_msg.rpc_cred->cr_auth->au_rslack = (size >> 2) + 2; + task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2; p += (size >> 2); return p; } -void __init rpc_init_authunix(void) +int __init rpc_init_authunix(void) { - spin_lock_init(&unix_cred_cache.lock); + return rpcauth_init_credcache(&unix_auth); +} + +void rpc_destroy_authunix(void) +{ + rpcauth_destroy_credcache(&unix_auth); } const struct rpc_authops authunix_ops = { @@ -219,17 +223,12 @@ const struct rpc_authops authunix_ops = { }; static -struct rpc_cred_cache unix_cred_cache = { -}; - -static struct rpc_auth unix_auth = { .au_cslack = UNX_WRITESLACK, .au_rslack = 2, /* assume AUTH_NULL verf */ .au_ops = &authunix_ops, .au_flavor = RPC_AUTH_UNIX, .au_count = ATOMIC_INIT(0), - .au_credcache = &unix_cred_cache, }; static diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 58de76c8540c..2b06410e584e 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -34,7 +34,6 @@ #include <linux/sunrpc/cache.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/rpc_pipe_fs.h> -#include <linux/smp_lock.h> #define RPCDBG_FACILITY RPCDBG_CACHE @@ -320,7 +319,7 @@ static struct cache_detail *current_detail; static int current_index; static void do_cache_clean(struct work_struct *work); -static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); +static struct delayed_work cache_cleaner; static void sunrpc_init_cache_detail(struct cache_detail *cd) { @@ -1504,6 +1503,11 @@ static int create_cache_proc_entries(struct cache_detail *cd) } #endif +void __init cache_initialize(void) +{ + INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean); +} + int cache_register(struct cache_detail *cd) { int ret; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 756fc324db9e..2388d83b68ff 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -414,6 +414,35 @@ out_no_clnt: EXPORT_SYMBOL_GPL(rpc_clone_client); /* + * Kill all tasks for the given client. + * XXX: kill their descendants as well? + */ +void rpc_killall_tasks(struct rpc_clnt *clnt) +{ + struct rpc_task *rovr; + + + if (list_empty(&clnt->cl_tasks)) + return; + dprintk("RPC: killing all tasks for client %p\n", clnt); + /* + * Spin lock all_tasks to prevent changes... + */ + spin_lock(&clnt->cl_lock); + list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { + if (!RPC_IS_ACTIVATED(rovr)) + continue; + if (!(rovr->tk_flags & RPC_TASK_KILLED)) { + rovr->tk_flags |= RPC_TASK_KILLED; + rpc_exit(rovr, -EIO); + rpc_wake_up_queued_task(rovr->tk_waitqueue, rovr); + } + } + spin_unlock(&clnt->cl_lock); +} +EXPORT_SYMBOL_GPL(rpc_killall_tasks); + +/* * Properly shut down an RPC client, terminating all outstanding * requests. */ @@ -538,6 +567,49 @@ out: } EXPORT_SYMBOL_GPL(rpc_bind_new_program); +void rpc_task_release_client(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + + if (clnt != NULL) { + /* Remove from client task list */ + spin_lock(&clnt->cl_lock); + list_del(&task->tk_task); + spin_unlock(&clnt->cl_lock); + task->tk_client = NULL; + + rpc_release_client(clnt); + } +} + +static +void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) +{ + if (clnt != NULL) { + rpc_task_release_client(task); + task->tk_client = clnt; + kref_get(&clnt->cl_kref); + if (clnt->cl_softrtry) + task->tk_flags |= RPC_TASK_SOFT; + /* Add to the client's list of all tasks */ + spin_lock(&clnt->cl_lock); + list_add_tail(&task->tk_task, &clnt->cl_tasks); + spin_unlock(&clnt->cl_lock); + } +} + +static void +rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) +{ + if (msg != NULL) { + task->tk_msg.rpc_proc = msg->rpc_proc; + task->tk_msg.rpc_argp = msg->rpc_argp; + task->tk_msg.rpc_resp = msg->rpc_resp; + if (msg->rpc_cred != NULL) + task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred); + } +} + /* * Default callback for async RPC calls */ @@ -562,6 +634,18 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) if (IS_ERR(task)) goto out; + rpc_task_set_client(task, task_setup_data->rpc_client); + rpc_task_set_rpc_message(task, task_setup_data->rpc_message); + + if (task->tk_status != 0) { + int ret = task->tk_status; + rpc_put_task(task); + return ERR_PTR(ret); + } + + if (task->tk_action == NULL) + rpc_call_start(task); + atomic_inc(&task->tk_count); rpc_execute(task); out: @@ -756,12 +840,13 @@ EXPORT_SYMBOL_GPL(rpc_force_rebind); * Restart an (async) RPC call from the call_prepare state. * Usually called from within the exit handler. */ -void +int rpc_restart_call_prepare(struct rpc_task *task) { if (RPC_ASSASSINATED(task)) - return; + return 0; task->tk_action = rpc_prepare_task; + return 1; } EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); @@ -769,13 +854,13 @@ EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); * Restart an (async) RPC call. Usually called from within the * exit handler. */ -void +int rpc_restart_call(struct rpc_task *task) { if (RPC_ASSASSINATED(task)) - return; - + return 0; task->tk_action = call_start; + return 1; } EXPORT_SYMBOL_GPL(rpc_restart_call); @@ -824,11 +909,6 @@ call_reserve(struct rpc_task *task) { dprint_status(task); - if (!rpcauth_uptodatecred(task)) { - task->tk_action = call_refresh; - return; - } - task->tk_status = 0; task->tk_action = call_reserveresult; xprt_reserve(task); @@ -892,7 +972,7 @@ call_reserveresult(struct rpc_task *task) static void call_allocate(struct rpc_task *task) { - unsigned int slack = task->tk_msg.rpc_cred->cr_auth->au_cslack; + unsigned int slack = task->tk_client->cl_auth->au_cslack; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_procinfo *proc = task->tk_msg.rpc_proc; @@ -900,7 +980,7 @@ call_allocate(struct rpc_task *task) dprint_status(task); task->tk_status = 0; - task->tk_action = call_bind; + task->tk_action = call_refresh; if (req->rq_buffer) return; @@ -937,6 +1017,47 @@ call_allocate(struct rpc_task *task) rpc_exit(task, -ERESTARTSYS); } +/* + * 2a. Bind and/or refresh the credentials + */ +static void +call_refresh(struct rpc_task *task) +{ + dprint_status(task); + + task->tk_action = call_refreshresult; + task->tk_status = 0; + task->tk_client->cl_stats->rpcauthrefresh++; + rpcauth_refreshcred(task); +} + +/* + * 2b. Process the results of a credential refresh + */ +static void +call_refreshresult(struct rpc_task *task) +{ + int status = task->tk_status; + + dprint_status(task); + + task->tk_status = 0; + task->tk_action = call_bind; + if (status >= 0 && rpcauth_uptodatecred(task)) + return; + switch (status) { + case -EACCES: + rpc_exit(task, -EACCES); + return; + case -ENOMEM: + rpc_exit(task, -ENOMEM); + return; + case -ETIMEDOUT: + rpc_delay(task, 3*HZ); + } + task->tk_action = call_refresh; +} + static inline int rpc_task_need_encode(struct rpc_task *task) { @@ -1472,43 +1593,6 @@ out_retry: } } -/* - * 8. Refresh the credentials if rejected by the server - */ -static void -call_refresh(struct rpc_task *task) -{ - dprint_status(task); - - task->tk_action = call_refreshresult; - task->tk_status = 0; - task->tk_client->cl_stats->rpcauthrefresh++; - rpcauth_refreshcred(task); -} - -/* - * 8a. Process the results of a credential refresh - */ -static void -call_refreshresult(struct rpc_task *task) -{ - int status = task->tk_status; - - dprint_status(task); - - task->tk_status = 0; - task->tk_action = call_reserve; - if (status >= 0 && rpcauth_uptodatecred(task)) - return; - if (status == -EACCES) { - rpc_exit(task, -EACCES); - return; - } - task->tk_action = call_refresh; - if (status != -ETIMEDOUT) - rpc_delay(task, 3*HZ); -} - static __be32 * rpc_encode_header(struct rpc_task *task) { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 4a843b883b89..cace6049e4a5 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -246,17 +246,8 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task) static void rpc_set_active(struct rpc_task *task) { - struct rpc_clnt *clnt; - if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0) - return; rpc_task_set_debuginfo(task); - /* Add to global list of all tasks */ - clnt = task->tk_client; - if (clnt != NULL) { - spin_lock(&clnt->cl_lock); - list_add_tail(&task->tk_task, &clnt->cl_tasks); - spin_unlock(&clnt->cl_lock); - } + set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); } /* @@ -319,11 +310,6 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); - if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) { - printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n"); - return; - } - __rpc_add_wait_queue(q, task); BUG_ON(task->tk_callback != NULL); @@ -334,8 +320,8 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action) { - /* Mark the task as being activated if so needed */ - rpc_set_active(task); + /* We shouldn't ever put an inactive task to sleep */ + BUG_ON(!RPC_IS_ACTIVATED(task)); /* * Protect the queue operations. @@ -406,14 +392,6 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* - * Wake up the specified task - */ -static void rpc_wake_up_task(struct rpc_task *task) -{ - rpc_wake_up_queued_task(task->tk_waitqueue, task); -} - -/* * Wake up the next task on a priority queue. */ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue) @@ -600,7 +578,15 @@ void rpc_exit_task(struct rpc_task *task) } } } -EXPORT_SYMBOL_GPL(rpc_exit_task); + +void rpc_exit(struct rpc_task *task, int status) +{ + task->tk_status = status; + task->tk_action = rpc_exit_task; + if (RPC_IS_QUEUED(task)) + rpc_wake_up_queued_task(task->tk_waitqueue, task); +} +EXPORT_SYMBOL_GPL(rpc_exit); void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) { @@ -690,7 +676,6 @@ static void __rpc_execute(struct rpc_task *task) dprintk("RPC: %5u got signal\n", task->tk_pid); task->tk_flags |= RPC_TASK_KILLED; rpc_exit(task, -ERESTARTSYS); - rpc_wake_up_task(task); } rpc_set_running(task); dprintk("RPC: %5u sync task resuming\n", task->tk_pid); @@ -714,8 +699,9 @@ static void __rpc_execute(struct rpc_task *task) void rpc_execute(struct rpc_task *task) { rpc_set_active(task); - rpc_set_running(task); - __rpc_execute(task); + rpc_make_runnable(task); + if (!RPC_IS_ASYNC(task)) + __rpc_execute(task); } static void rpc_async_schedule(struct work_struct *work) @@ -808,26 +794,9 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta /* Initialize workqueue for async tasks */ task->tk_workqueue = task_setup_data->workqueue; - task->tk_client = task_setup_data->rpc_client; - if (task->tk_client != NULL) { - kref_get(&task->tk_client->cl_kref); - if (task->tk_client->cl_softrtry) - task->tk_flags |= RPC_TASK_SOFT; - } - if (task->tk_ops->rpc_call_prepare != NULL) task->tk_action = rpc_prepare_task; - if (task_setup_data->rpc_message != NULL) { - task->tk_msg.rpc_proc = task_setup_data->rpc_message->rpc_proc; - task->tk_msg.rpc_argp = task_setup_data->rpc_message->rpc_argp; - task->tk_msg.rpc_resp = task_setup_data->rpc_message->rpc_resp; - /* Bind the user cred */ - rpcauth_bindcred(task, task_setup_data->rpc_message->rpc_cred, task_setup_data->flags); - if (task->tk_action == NULL) - rpc_call_start(task); - } - /* starting timestamp */ task->tk_start = ktime_get(); @@ -896,11 +865,8 @@ void rpc_put_task(struct rpc_task *task) if (task->tk_rqstp) xprt_release(task); if (task->tk_msg.rpc_cred) - rpcauth_unbindcred(task); - if (task->tk_client) { - rpc_release_client(task->tk_client); - task->tk_client = NULL; - } + put_rpccred(task->tk_msg.rpc_cred); + rpc_task_release_client(task); if (task->tk_workqueue != NULL) { INIT_WORK(&task->u.tk_work, rpc_async_release); queue_work(task->tk_workqueue, &task->u.tk_work); @@ -913,13 +879,6 @@ static void rpc_release_task(struct rpc_task *task) { dprintk("RPC: %5u release task\n", task->tk_pid); - if (!list_empty(&task->tk_task)) { - struct rpc_clnt *clnt = task->tk_client; - /* Remove from client task list */ - spin_lock(&clnt->cl_lock); - list_del(&task->tk_task); - spin_unlock(&clnt->cl_lock); - } BUG_ON (RPC_IS_QUEUED(task)); /* Wake up anyone who is waiting for task completion */ @@ -928,35 +887,6 @@ static void rpc_release_task(struct rpc_task *task) rpc_put_task(task); } -/* - * Kill all tasks for the given client. - * XXX: kill their descendants as well? - */ -void rpc_killall_tasks(struct rpc_clnt *clnt) -{ - struct rpc_task *rovr; - - - if (list_empty(&clnt->cl_tasks)) - return; - dprintk("RPC: killing all tasks for client %p\n", clnt); - /* - * Spin lock all_tasks to prevent changes... - */ - spin_lock(&clnt->cl_lock); - list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { - if (! RPC_IS_ACTIVATED(rovr)) - continue; - if (!(rovr->tk_flags & RPC_TASK_KILLED)) { - rovr->tk_flags |= RPC_TASK_KILLED; - rpc_exit(rovr, -EIO); - rpc_wake_up_task(rovr); - } - } - spin_unlock(&clnt->cl_lock); -} -EXPORT_SYMBOL_GPL(rpc_killall_tasks); - int rpciod_up(void) { return try_module_get(THIS_MODULE) ? 0 : -EINVAL; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index f438347d817b..c0d085013a2b 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -33,21 +33,27 @@ init_sunrpc(void) if (err) goto out; err = rpc_init_mempool(); - if (err) { - unregister_rpc_pipefs(); - goto out; - } + if (err) + goto out2; + err = rpcauth_init_module(); + if (err) + goto out3; #ifdef RPC_DEBUG rpc_register_sysctl(); #endif #ifdef CONFIG_PROC_FS rpc_proc_init(); #endif + cache_initialize(); cache_register(&ip_map_cache); cache_register(&unix_gid_cache); svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ - rpcauth_init_module(); + return 0; +out3: + rpc_destroy_mempool(); +out2: + unregister_rpc_pipefs(); out: return err; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index dcd0132396ba..970fb00f388c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1032,6 +1032,8 @@ void xprt_release(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); if (req->rq_buffer) xprt->ops->buf_free(req->rq_buffer); + if (req->rq_cred != NULL) + put_rpccred(req->rq_cred); task->tk_rqstp = NULL; if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); @@ -1129,6 +1131,7 @@ static void xprt_destroy(struct kref *kref) rpc_destroy_wait_queue(&xprt->sending); rpc_destroy_wait_queue(&xprt->resend); rpc_destroy_wait_queue(&xprt->backlog); + cancel_work_sync(&xprt->task_cleanup); /* * Tear down transport state and free the rpc_xprt */ diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e5e28d1946a4..2ac3f6e8adff 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -249,6 +249,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, req->rl_nchunks = nchunks; BUG_ON(nchunks == 0); + BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) + && (nchunks > 3)); /* * finish off header. If write, marshal discrim and nchunks. diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 27015c6d8eb5..5f4c7b3bc711 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -650,10 +650,22 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_send_wr = cdata->max_requests; switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: - /* Add room for frmr register and invalidate WRs */ - ep->rep_attr.cap.max_send_wr *= 3; - if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) - return -EINVAL; + /* Add room for frmr register and invalidate WRs. + * 1. FRMR reg WR for head + * 2. FRMR invalidate WR for head + * 3. FRMR reg WR for pagelist + * 4. FRMR invalidate WR for pagelist + * 5. FRMR reg WR for tail + * 6. FRMR invalidate WR for tail + * 7. The RDMA_SEND WR + */ + ep->rep_attr.cap.max_send_wr *= 7; + if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { + cdata->max_requests = devattr.max_qp_wr / 7; + if (!cdata->max_requests) + return -EINVAL; + ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7; + } break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: @@ -1490,7 +1502,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, memset(&frmr_wr, 0, sizeof frmr_wr); frmr_wr.opcode = IB_WR_FAST_REG_MR; frmr_wr.send_flags = 0; /* unsignaled */ - frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma; + frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; frmr_wr.wr.fast_reg.page_list_len = i; frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7ca65c7005ea..b6309db56226 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1305,10 +1305,11 @@ static void xs_tcp_state_change(struct sock *sk) if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); - dprintk("RPC: state %x conn %d dead %d zapped %d\n", + dprintk("RPC: state %x conn %d dead %d zapped %d sk_shutdown %d\n", sk->sk_state, xprt_connected(xprt), sock_flag(sk, SOCK_DEAD), - sock_flag(sk, SOCK_ZAPPED)); + sock_flag(sk, SOCK_ZAPPED), + sk->sk_shutdown); switch (sk->sk_state) { case TCP_ESTABLISHED: @@ -1779,10 +1780,25 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra { unsigned int state = transport->inet->sk_state; - if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) - return; - if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) - return; + if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) { + /* we don't need to abort the connection if the socket + * hasn't undergone a shutdown + */ + if (transport->inet->sk_shutdown == 0) + return; + dprintk("RPC: %s: TCP_CLOSEd and sk_shutdown set to %d\n", + __func__, transport->inet->sk_shutdown); + } + if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) { + /* we don't need to abort the connection if the socket + * hasn't undergone a shutdown + */ + if (transport->inet->sk_shutdown == 0) + return; + dprintk("RPC: %s: ESTABLISHED/SYN_SENT " + "sk_shutdown set to %d\n", + __func__, transport->inet->sk_shutdown); + } xs_abort_connection(xprt, transport); } @@ -2577,7 +2593,8 @@ void cleanup_socket_xprt(void) xprt_unregister_transport(&xs_bc_tcp_transport); } -static int param_set_uint_minmax(const char *val, struct kernel_param *kp, +static int param_set_uint_minmax(const char *val, + const struct kernel_param *kp, unsigned int min, unsigned int max) { unsigned long num; @@ -2592,34 +2609,37 @@ static int param_set_uint_minmax(const char *val, struct kernel_param *kp, return 0; } -static int param_set_portnr(const char *val, struct kernel_param *kp) +static int param_set_portnr(const char *val, const struct kernel_param *kp) { return param_set_uint_minmax(val, kp, RPC_MIN_RESVPORT, RPC_MAX_RESVPORT); } -static int param_get_portnr(char *buffer, struct kernel_param *kp) -{ - return param_get_uint(buffer, kp); -} +static struct kernel_param_ops param_ops_portnr = { + .set = param_set_portnr, + .get = param_get_uint, +}; + #define param_check_portnr(name, p) \ __param_check(name, p, unsigned int); module_param_named(min_resvport, xprt_min_resvport, portnr, 0644); module_param_named(max_resvport, xprt_max_resvport, portnr, 0644); -static int param_set_slot_table_size(const char *val, struct kernel_param *kp) +static int param_set_slot_table_size(const char *val, + const struct kernel_param *kp) { return param_set_uint_minmax(val, kp, RPC_MIN_SLOT_TABLE, RPC_MAX_SLOT_TABLE); } -static int param_get_slot_table_size(char *buffer, struct kernel_param *kp) -{ - return param_get_uint(buffer, kp); -} +static struct kernel_param_ops param_ops_slot_table_size = { + .set = param_set_slot_table_size, + .get = param_get_uint, +}; + #define param_check_slot_table_size(name, p) \ __param_check(name, p, unsigned int); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4414a18c63b4..0b39b2451ea5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -692,6 +692,7 @@ static int unix_autobind(struct socket *sock) static u32 ordernum = 1; struct unix_address *addr; int err; + unsigned int retries = 0; mutex_lock(&u->readlock); @@ -717,9 +718,17 @@ retry: if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, addr->hash)) { spin_unlock(&unix_table_lock); - /* Sanity yield. It is unusual case, but yet... */ - if (!(ordernum&0xFF)) - yield(); + /* + * __unix_find_socket_byname() may take long time if many names + * are already in use. + */ + cond_resched(); + /* Give up if all names seems to be in use. */ + if (retries++ == 0xFFFFF) { + err = -ENOSPC; + kfree(addr); + goto out; + } goto retry; } addr->hash ^= sk->sk_type; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index a3cca0a94346..64f2ae1fdc15 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -101,7 +101,7 @@ resume: err = -EHOSTUNREACH; goto error_nolock; } - skb_dst_set_noref(skb, dst); + skb_dst_set(skb, dst_clone(dst)); x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2b3ed7ad4933..cbab6e1a8c9c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1175,9 +1175,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; - family = tmpl->encap_family; - if (xfrm_addr_any(local, family)) { - error = xfrm_get_saddr(net, &tmp, remote, family); + if (xfrm_addr_any(local, tmpl->encap_family)) { + error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); if (error) goto fail; local = &tmp; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5208b12fbfb4..eb96ce52f178 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -656,15 +656,23 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) EXPORT_SYMBOL(xfrm_sad_getinfo); static int -xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr, - unsigned short family) +xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl, + struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr, + unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) return -1; - afinfo->init_tempsel(x, fl, tmpl, daddr, saddr); + afinfo->init_tempsel(&x->sel, fl); + + if (family != tmpl->encap_family) { + xfrm_state_put_afinfo(afinfo); + afinfo = xfrm_state_get_afinfo(tmpl->encap_family); + if (!afinfo) + return -1; + } + afinfo->init_temprop(x, tmpl, daddr, saddr); xfrm_state_put_afinfo(afinfo); return 0; } @@ -790,37 +798,38 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, int error = 0; struct xfrm_state *best = NULL; u32 mark = pol->mark.v & pol->mark.m; + unsigned short encap_family = tmpl->encap_family; to_put = NULL; spin_lock_bh(&xfrm_state_lock); - h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family); + h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { - if (x->props.family == family && + if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && - xfrm_state_addr_check(x, daddr, saddr, family) && + xfrm_state_addr_check(x, daddr, saddr, encap_family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, &best, &acquire_in_progress, &error); } if (best) goto found; - h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); + h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { - if (x->props.family == family && + if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && - xfrm_state_addr_check(x, daddr, saddr, family) && + xfrm_state_addr_check(x, daddr, saddr, encap_family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, &best, &acquire_in_progress, &error); } @@ -829,7 +838,7 @@ found: if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi, - tmpl->id.proto, family)) != NULL) { + tmpl->id.proto, encap_family)) != NULL) { to_put = x0; error = -EEXIST; goto out; @@ -839,9 +848,9 @@ found: error = -ENOMEM; goto out; } - /* Initialize temporary selector matching only + /* Initialize temporary state matching only * to current session. */ - xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); memcpy(&x->mark, &pol->mark, sizeof(x->mark)); error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); @@ -856,10 +865,10 @@ found: x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); - h = xfrm_src_hash(net, daddr, saddr, family); + h = xfrm_src_hash(net, daddr, saddr, encap_family); hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ba59983aaffe..8bae6b22c846 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1801,7 +1801,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; struct xfrm_mark m; - u32 mark = xfrm_mark_get(attrs, &m);; + u32 mark = xfrm_mark_get(attrs, &m); x = xfrm_state_lookup(net, mark, &p->id.daddr, p->id.spi, p->id.proto, p->family); @@ -2504,7 +2504,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, if (p->dir > XFRM_POLICY_OUT) return NULL; - xp = xfrm_policy_alloc(net, GFP_KERNEL); + xp = xfrm_policy_alloc(net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; |