diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 1 | ||||
-rw-r--r-- | net/core/datagram.c | 16 | ||||
-rw-r--r-- | net/core/dev.c | 41 | ||||
-rw-r--r-- | net/core/dst.c | 2 | ||||
-rw-r--r-- | net/core/dv.c | 546 | ||||
-rw-r--r-- | net/core/fib_rules.c | 71 | ||||
-rw-r--r-- | net/core/filter.c | 6 | ||||
-rw-r--r-- | net/core/flow.c | 6 | ||||
-rw-r--r-- | net/core/iovec.c | 4 | ||||
-rw-r--r-- | net/core/kmap_skb.h | 19 | ||||
-rw-r--r-- | net/core/link_watch.c | 13 | ||||
-rw-r--r-- | net/core/neighbour.c | 37 | ||||
-rw-r--r-- | net/core/netpoll.c | 348 | ||||
-rw-r--r-- | net/core/pktgen.c | 224 | ||||
-rw-r--r-- | net/core/request_sock.c | 35 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 60 | ||||
-rw-r--r-- | net/core/skbuff.c | 49 | ||||
-rw-r--r-- | net/core/sock.c | 39 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 14 | ||||
-rw-r--r-- | net/core/utils.c | 10 | ||||
-rw-r--r-- | net/core/wireless.c | 4 |
21 files changed, 506 insertions, 1039 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 119568077dab..73272d506e93 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -12,7 +12,6 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ obj-$(CONFIG_XFRM) += flow.o obj-$(CONFIG_SYSFS) += net-sysfs.o -obj-$(CONFIG_NET_DIVERT) += dv.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_WIRELESS_EXT) += wireless.o obj-$(CONFIG_NETPOLL) += netpoll.o diff --git a/net/core/datagram.c b/net/core/datagram.c index f558c61aecc7..797fdd4352ce 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -321,7 +321,7 @@ fault: static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 __user *to, int len, - unsigned int *csump) + __wsum *csump) { int start = skb_headlen(skb); int pos = 0; @@ -350,7 +350,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { - unsigned int csum2; + __wsum csum2; int err = 0; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -386,7 +386,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, end = start + list->len; if ((copy = end - offset) > 0) { - unsigned int csum2 = 0; + __wsum csum2 = 0; if (copy > len) copy = len; if (skb_copy_and_csum_datagram(list, @@ -411,11 +411,11 @@ fault: return -EFAULT; } -unsigned int __skb_checksum_complete(struct sk_buff *skb) +__sum16 __skb_checksum_complete(struct sk_buff *skb) { - unsigned int sum; + __sum16 sum; - sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); + sum = csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); @@ -441,7 +441,7 @@ EXPORT_SYMBOL(__skb_checksum_complete); int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, struct iovec *iov) { - unsigned int csum; + __wsum csum; int chunk = skb->len - hlen; /* Skip filled elements. @@ -460,7 +460,7 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base, chunk, &csum)) goto fault; - if ((unsigned short)csum_fold(csum)) + if (csum_fold(csum)) goto csum_error; if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); diff --git a/net/core/dev.c b/net/core/dev.c index 81c426adcd1e..e660cb57e42a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -98,7 +98,6 @@ #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/if_bridge.h> -#include <linux/divert.h> #include <net/dst.h> #include <net/pkt_sched.h> #include <net/checksum.h> @@ -1170,7 +1169,7 @@ EXPORT_SYMBOL(netif_device_attach); */ int skb_checksum_help(struct sk_buff *skb) { - unsigned int csum; + __wsum csum; int ret = 0, offset = skb->h.raw - skb->data; if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -1192,9 +1191,9 @@ int skb_checksum_help(struct sk_buff *skb) offset = skb->tail - skb->h.raw; BUG_ON(offset <= 0); - BUG_ON(skb->csum + 2 > offset); + BUG_ON(skb->csum_offset + 2 > offset); - *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); + *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum); out_set_summed: skb->ip_summed = CHECKSUM_NONE; @@ -1216,7 +1215,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; - int type = skb->protocol; + __be16 type = skb->protocol; int err; BUG_ON(skb_shinfo(skb)->frag_list); @@ -1767,7 +1766,7 @@ int netif_receive_skb(struct sk_buff *skb) struct packet_type *ptype, *pt_prev; struct net_device *orig_dev; int ret = NET_RX_DROP; - unsigned short type; + __be16 type; /* if we've gotten here through NAPI, check netpoll */ if (skb->dev->poll && netpoll_rx(skb)) @@ -1827,8 +1826,6 @@ int netif_receive_skb(struct sk_buff *skb) ncls: #endif - handle_diverter(skb); - if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) goto out; @@ -2898,10 +2895,6 @@ int register_netdevice(struct net_device *dev) spin_lock_init(&dev->ingress_lock); #endif - ret = alloc_divert_blk(dev); - if (ret) - goto out; - dev->iflink = -1; /* Init, if this function is available */ @@ -2910,13 +2903,13 @@ int register_netdevice(struct net_device *dev) if (ret) { if (ret > 0) ret = -EIO; - goto out_err; + goto out; } } if (!dev_valid_name(dev->name)) { ret = -EINVAL; - goto out_err; + goto out; } dev->ifindex = dev_new_index(); @@ -2930,7 +2923,7 @@ int register_netdevice(struct net_device *dev) = hlist_entry(p, struct net_device, name_hlist); if (!strncmp(d->name, dev->name, IFNAMSIZ)) { ret = -EEXIST; - goto out_err; + goto out; } } @@ -2974,7 +2967,7 @@ int register_netdevice(struct net_device *dev) ret = netdev_register_sysfs(dev); if (ret) - goto out_err; + goto out; dev->reg_state = NETREG_REGISTERED; /* @@ -3001,9 +2994,6 @@ int register_netdevice(struct net_device *dev) out: return ret; -out_err: - free_divert_blk(dev); - goto out; } /** @@ -3035,15 +3025,6 @@ int register_netdev(struct net_device *dev) goto out; } - /* - * Back compatibility hook. Kill this one in 2.5 - */ - if (dev->name[0] == 0 || dev->name[0] == ' ') { - err = dev_alloc_name(dev, "eth%d"); - if (err < 0) - goto out; - } - err = register_netdevice(dev); out: rtnl_unlock(); @@ -3329,8 +3310,6 @@ int unregister_netdevice(struct net_device *dev) /* Notifier chain MUST detach us from master device. */ BUG_TRAP(!dev->master); - free_divert_blk(dev); - /* Finish processing unregister after unlock */ net_set_todo(dev); @@ -3361,7 +3340,6 @@ void unregister_netdev(struct net_device *dev) EXPORT_SYMBOL(unregister_netdev); -#ifdef CONFIG_HOTPLUG_CPU static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, void *ocpu) @@ -3405,7 +3383,6 @@ static int dev_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_NET_DMA /** diff --git a/net/core/dst.c b/net/core/dst.c index 1a5e49da0e77..836ec6606925 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -125,7 +125,7 @@ void * dst_alloc(struct dst_ops * ops) if (ops->gc()) return NULL; } - dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC); + dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; memset(dst, 0, ops->entry_size); diff --git a/net/core/dv.c b/net/core/dv.c deleted file mode 100644 index 29ee77f15932..000000000000 --- a/net/core/dv.c +++ /dev/null @@ -1,546 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Generic frame diversion - * - * Authors: - * Benoit LOCHER: initial integration within the kernel with support for ethernet - * Dave Miller: improvement on the code (correctness, performance and source files) - * - */ -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/ip.h> -#include <linux/udp.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/skbuff.h> -#include <linux/capability.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <net/dst.h> -#include <net/arp.h> -#include <net/sock.h> -#include <net/ipv6.h> -#include <net/ip.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/checksum.h> -#include <linux/divert.h> -#include <linux/sockios.h> - -const char sysctl_divert_version[32]="0.46"; /* Current version */ - -static int __init dv_init(void) -{ - return 0; -} -module_init(dv_init); - -/* - * Allocate a divert_blk for a device. This must be an ethernet nic. - */ -int alloc_divert_blk(struct net_device *dev) -{ - int alloc_size = (sizeof(struct divert_blk) + 3) & ~3; - - dev->divert = NULL; - if (dev->type == ARPHRD_ETHER) { - dev->divert = kzalloc(alloc_size, GFP_KERNEL); - if (dev->divert == NULL) { - printk(KERN_INFO "divert: unable to allocate divert_blk for %s\n", - dev->name); - return -ENOMEM; - } - dev_hold(dev); - } - - return 0; -} - -/* - * Free a divert_blk allocated by the above function, if it was - * allocated on that device. - */ -void free_divert_blk(struct net_device *dev) -{ - if (dev->divert) { - kfree(dev->divert); - dev->divert=NULL; - dev_put(dev); - } -} - -/* - * Adds a tcp/udp (source or dest) port to an array - */ -static int add_port(u16 ports[], u16 port) -{ - int i; - - if (port == 0) - return -EINVAL; - - /* Storing directly in network format for performance, - * thanks Dave :) - */ - port = htons(port); - - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - if (ports[i] == port) - return -EALREADY; - } - - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - if (ports[i] == 0) { - ports[i] = port; - return 0; - } - } - - return -ENOBUFS; -} - -/* - * Removes a port from an array tcp/udp (source or dest) - */ -static int remove_port(u16 ports[], u16 port) -{ - int i; - - if (port == 0) - return -EINVAL; - - /* Storing directly in network format for performance, - * thanks Dave ! - */ - port = htons(port); - - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - if (ports[i] == port) { - ports[i] = 0; - return 0; - } - } - - return -EINVAL; -} - -/* Some basic sanity checks on the arguments passed to divert_ioctl() */ -static int check_args(struct divert_cf *div_cf, struct net_device **dev) -{ - char devname[32]; - int ret; - - if (dev == NULL) - return -EFAULT; - - /* GETVERSION: all other args are unused */ - if (div_cf->cmd == DIVCMD_GETVERSION) - return 0; - - /* Network device index should reasonably be between 0 and 1000 :) */ - if (div_cf->dev_index < 0 || div_cf->dev_index > 1000) - return -EINVAL; - - /* Let's try to find the ifname */ - sprintf(devname, "eth%d", div_cf->dev_index); - *dev = dev_get_by_name(devname); - - /* dev should NOT be null */ - if (*dev == NULL) - return -EINVAL; - - ret = 0; - - /* user issuing the ioctl must be a super one :) */ - if (!capable(CAP_SYS_ADMIN)) { - ret = -EPERM; - goto out; - } - - /* Device must have a divert_blk member NOT null */ - if ((*dev)->divert == NULL) - ret = -EINVAL; -out: - dev_put(*dev); - return ret; -} - -/* - * control function of the diverter - */ -#if 0 -#define DVDBG(a) \ - printk(KERN_DEBUG "divert_ioctl() line %d %s\n", __LINE__, (a)) -#else -#define DVDBG(a) -#endif - -int divert_ioctl(unsigned int cmd, struct divert_cf __user *arg) -{ - struct divert_cf div_cf; - struct divert_blk *div_blk; - struct net_device *dev; - int ret; - - switch (cmd) { - case SIOCGIFDIVERT: - DVDBG("SIOCGIFDIVERT, copy_from_user"); - if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf))) - return -EFAULT; - DVDBG("before check_args"); - ret = check_args(&div_cf, &dev); - if (ret) - return ret; - DVDBG("after checkargs"); - div_blk = dev->divert; - - DVDBG("befre switch()"); - switch (div_cf.cmd) { - case DIVCMD_GETSTATUS: - /* Now, just give the user the raw divert block - * for him to play with :) - */ - if (copy_to_user(div_cf.arg1.ptr, dev->divert, - sizeof(struct divert_blk))) - return -EFAULT; - break; - - case DIVCMD_GETVERSION: - DVDBG("GETVERSION: checking ptr"); - if (div_cf.arg1.ptr == NULL) - return -EINVAL; - DVDBG("GETVERSION: copying data to userland"); - if (copy_to_user(div_cf.arg1.ptr, - sysctl_divert_version, 32)) - return -EFAULT; - DVDBG("GETVERSION: data copied"); - break; - - default: - return -EINVAL; - } - - break; - - case SIOCSIFDIVERT: - if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf))) - return -EFAULT; - - ret = check_args(&div_cf, &dev); - if (ret) - return ret; - - div_blk = dev->divert; - - switch(div_cf.cmd) { - case DIVCMD_RESET: - div_blk->divert = 0; - div_blk->protos = DIVERT_PROTO_NONE; - memset(div_blk->tcp_dst, 0, - MAX_DIVERT_PORTS * sizeof(u16)); - memset(div_blk->tcp_src, 0, - MAX_DIVERT_PORTS * sizeof(u16)); - memset(div_blk->udp_dst, 0, - MAX_DIVERT_PORTS * sizeof(u16)); - memset(div_blk->udp_src, 0, - MAX_DIVERT_PORTS * sizeof(u16)); - return 0; - - case DIVCMD_DIVERT: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->divert) - return -EALREADY; - div_blk->divert = 1; - break; - - case DIVARG1_DISABLE: - if (!div_blk->divert) - return -EALREADY; - div_blk->divert = 0; - break; - - default: - return -EINVAL; - } - - break; - - case DIVCMD_IP: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->protos & DIVERT_PROTO_IP) - return -EALREADY; - div_blk->protos |= DIVERT_PROTO_IP; - break; - - case DIVARG1_DISABLE: - if (!(div_blk->protos & DIVERT_PROTO_IP)) - return -EALREADY; - div_blk->protos &= ~DIVERT_PROTO_IP; - break; - - default: - return -EINVAL; - } - - break; - - case DIVCMD_TCP: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->protos & DIVERT_PROTO_TCP) - return -EALREADY; - div_blk->protos |= DIVERT_PROTO_TCP; - break; - - case DIVARG1_DISABLE: - if (!(div_blk->protos & DIVERT_PROTO_TCP)) - return -EALREADY; - div_blk->protos &= ~DIVERT_PROTO_TCP; - break; - - default: - return -EINVAL; - } - - break; - - case DIVCMD_TCPDST: - switch(div_cf.arg1.int32) { - case DIVARG1_ADD: - return add_port(div_blk->tcp_dst, - div_cf.arg2.uint16); - - case DIVARG1_REMOVE: - return remove_port(div_blk->tcp_dst, - div_cf.arg2.uint16); - - default: - return -EINVAL; - } - - break; - - case DIVCMD_TCPSRC: - switch(div_cf.arg1.int32) { - case DIVARG1_ADD: - return add_port(div_blk->tcp_src, - div_cf.arg2.uint16); - - case DIVARG1_REMOVE: - return remove_port(div_blk->tcp_src, - div_cf.arg2.uint16); - - default: - return -EINVAL; - } - - break; - - case DIVCMD_UDP: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->protos & DIVERT_PROTO_UDP) - return -EALREADY; - div_blk->protos |= DIVERT_PROTO_UDP; - break; - - case DIVARG1_DISABLE: - if (!(div_blk->protos & DIVERT_PROTO_UDP)) - return -EALREADY; - div_blk->protos &= ~DIVERT_PROTO_UDP; - break; - - default: - return -EINVAL; - } - - break; - - case DIVCMD_UDPDST: - switch(div_cf.arg1.int32) { - case DIVARG1_ADD: - return add_port(div_blk->udp_dst, - div_cf.arg2.uint16); - - case DIVARG1_REMOVE: - return remove_port(div_blk->udp_dst, - div_cf.arg2.uint16); - - default: - return -EINVAL; - } - - break; - - case DIVCMD_UDPSRC: - switch(div_cf.arg1.int32) { - case DIVARG1_ADD: - return add_port(div_blk->udp_src, - div_cf.arg2.uint16); - - case DIVARG1_REMOVE: - return remove_port(div_blk->udp_src, - div_cf.arg2.uint16); - - default: - return -EINVAL; - } - - break; - - case DIVCMD_ICMP: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->protos & DIVERT_PROTO_ICMP) - return -EALREADY; - div_blk->protos |= DIVERT_PROTO_ICMP; - break; - - case DIVARG1_DISABLE: - if (!(div_blk->protos & DIVERT_PROTO_ICMP)) - return -EALREADY; - div_blk->protos &= ~DIVERT_PROTO_ICMP; - break; - - default: - return -EINVAL; - } - - break; - - default: - return -EINVAL; - } - - break; - - default: - return -EINVAL; - } - - return 0; -} - - -/* - * Check if packet should have its dest mac address set to the box itself - * for diversion - */ - -#define ETH_DIVERT_FRAME(skb) \ - memcpy(eth_hdr(skb), skb->dev->dev_addr, ETH_ALEN); \ - skb->pkt_type=PACKET_HOST - -void divert_frame(struct sk_buff *skb) -{ - struct ethhdr *eth = eth_hdr(skb); - struct iphdr *iph; - struct tcphdr *tcph; - struct udphdr *udph; - struct divert_blk *divert = skb->dev->divert; - int i, src, dst; - unsigned char *skb_data_end = skb->data + skb->len; - - /* Packet is already aimed at us, return */ - if (!compare_ether_addr(eth->h_dest, skb->dev->dev_addr)) - return; - - /* proto is not IP, do nothing */ - if (eth->h_proto != htons(ETH_P_IP)) - return; - - /* Divert all IP frames ? */ - if (divert->protos & DIVERT_PROTO_IP) { - ETH_DIVERT_FRAME(skb); - return; - } - - /* Check for possible (maliciously) malformed IP frame (thanks Dave) */ - iph = (struct iphdr *) skb->data; - if (((iph->ihl<<2)+(unsigned char*)(iph)) >= skb_data_end) { - printk(KERN_INFO "divert: malformed IP packet !\n"); - return; - } - - switch (iph->protocol) { - /* Divert all ICMP frames ? */ - case IPPROTO_ICMP: - if (divert->protos & DIVERT_PROTO_ICMP) { - ETH_DIVERT_FRAME(skb); - return; - } - break; - - /* Divert all TCP frames ? */ - case IPPROTO_TCP: - if (divert->protos & DIVERT_PROTO_TCP) { - ETH_DIVERT_FRAME(skb); - return; - } - - /* Check for possible (maliciously) malformed IP - * frame (thanx Dave) - */ - tcph = (struct tcphdr *) - (((unsigned char *)iph) + (iph->ihl<<2)); - if (((unsigned char *)(tcph+1)) >= skb_data_end) { - printk(KERN_INFO "divert: malformed TCP packet !\n"); - return; - } - - /* Divert some tcp dst/src ports only ?*/ - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - dst = divert->tcp_dst[i]; - src = divert->tcp_src[i]; - if ((dst && dst == tcph->dest) || - (src && src == tcph->source)) { - ETH_DIVERT_FRAME(skb); - return; - } - } - break; - - /* Divert all UDP frames ? */ - case IPPROTO_UDP: - if (divert->protos & DIVERT_PROTO_UDP) { - ETH_DIVERT_FRAME(skb); - return; - } - - /* Check for possible (maliciously) malformed IP - * packet (thanks Dave) - */ - udph = (struct udphdr *) - (((unsigned char *)iph) + (iph->ihl<<2)); - if (((unsigned char *)(udph+1)) >= skb_data_end) { - printk(KERN_INFO - "divert: malformed UDP packet !\n"); - return; - } - - /* Divert some udp dst/src ports only ? */ - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - dst = divert->udp_dst[i]; - src = divert->udp_src[i]; - if ((dst && dst == udph->dest) || - (src && src == udph->source)) { - ETH_DIVERT_FRAME(skb); - return; - } - } - break; - } -} diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 6b0e63cacd93..1df6cd4568d3 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -107,6 +107,22 @@ out: EXPORT_SYMBOL_GPL(fib_rules_unregister); +static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, + struct flowi *fl, int flags) +{ + int ret = 0; + + if (rule->ifindex && (rule->ifindex != fl->iif)) + goto out; + + if ((rule->mark ^ fl->mark) & rule->mark_mask) + goto out; + + ret = ops->match(rule, fl, flags); +out: + return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; +} + int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) { @@ -116,10 +132,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, rcu_read_lock(); list_for_each_entry_rcu(rule, ops->rules_list, list) { - if (rule->ifindex && (rule->ifindex != fl->iif)) - continue; - - if (!ops->match(rule, fl, flags)) + if (!fib_rule_match(rule, ops, fl, flags)) continue; err = ops->action(rule, fl, flags, arg); @@ -179,6 +192,18 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->ifindex = dev->ifindex; } + if (tb[FRA_FWMARK]) { + rule->mark = nla_get_u32(tb[FRA_FWMARK]); + if (rule->mark) + /* compatibility: if the mark value is non-zero all bits + * are compared unless a mask is explicitly specified. + */ + rule->mark_mask = 0xFFFFFFFF; + } + + if (tb[FRA_FWMASK]) + rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]); + rule->action = frh->action; rule->flags = frh->flags; rule->table = frh_get_table(frh, tb); @@ -250,6 +275,14 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) nla_strcmp(tb[FRA_IFNAME], rule->ifname)) continue; + if (tb[FRA_FWMARK] && + (rule->mark != nla_get_u32(tb[FRA_FWMARK]))) + continue; + + if (tb[FRA_FWMASK] && + (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK]))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -273,6 +306,22 @@ errout: return err; } +static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + struct fib_rule *rule) +{ + size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)) + + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */ + + nla_total_size(4) /* FRA_PRIORITY */ + + nla_total_size(4) /* FRA_TABLE */ + + nla_total_size(4) /* FRA_FWMARK */ + + nla_total_size(4); /* FRA_FWMASK */ + + if (ops->nlmsg_payload) + payload += ops->nlmsg_payload(rule); + + return payload; +} + static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, u32 pid, u32 seq, int type, int flags, struct fib_rules_ops *ops) @@ -298,6 +347,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->pref) NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); + if (rule->mark) + NLA_PUT_U32(skb, FRA_FWMARK, rule->mark); + + if (rule->mark_mask || rule->mark) + NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask); + if (ops->fill(rule, skb, nlh, frh) < 0) goto nla_put_failure; @@ -345,15 +400,13 @@ static void notify_rule_change(int event, struct fib_rule *rule, struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL); if (skb == NULL) goto errout; err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in fib_rule_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); errout: diff --git a/net/core/filter.c b/net/core/filter.c index 6732782a5a40..0df843b667f4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -178,7 +178,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int load_w: ptr = load_pointer(skb, k, 4, &tmp); if (ptr != NULL) { - A = ntohl(get_unaligned((u32 *)ptr)); + A = ntohl(get_unaligned((__be32 *)ptr)); continue; } break; @@ -187,7 +187,7 @@ load_w: load_h: ptr = load_pointer(skb, k, 2, &tmp); if (ptr != NULL) { - A = ntohs(get_unaligned((u16 *)ptr)); + A = ntohs(get_unaligned((__be16 *)ptr)); continue; } break; @@ -261,7 +261,7 @@ load_b: */ switch (k-SKF_AD_OFF) { case SKF_AD_PROTOCOL: - A = htons(skb->protocol); + A = ntohs(skb->protocol); continue; case SKF_AD_PKTTYPE: A = skb->pkt_type; diff --git a/net/core/flow.c b/net/core/flow.c index b16d31ae5e54..d137f971f97d 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -44,7 +44,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; #define flow_table(cpu) (per_cpu(flow_tables, cpu)) -static kmem_cache_t *flow_cachep __read_mostly; +static struct kmem_cache *flow_cachep __read_mostly; static int flow_lwm, flow_hwm; @@ -211,7 +211,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, if (flow_count(cpu) > flow_hwm) flow_cache_shrink(cpu); - fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC); + fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { fle->next = *head; *head = fle; @@ -340,7 +340,6 @@ static void __devinit flow_cache_cpu_prepare(int cpu) tasklet_init(tasklet, flow_cache_flush_tasklet, 0); } -#ifdef CONFIG_HOTPLUG_CPU static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) @@ -349,7 +348,6 @@ static int flow_cache_cpu(struct notifier_block *nfb, __flow_cache_shrink((unsigned long)hcpu, 0); return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ static int __init flow_cache_init(void) { diff --git a/net/core/iovec.c b/net/core/iovec.c index 65e4b56fbc77..04b249c40b5b 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -158,9 +158,9 @@ int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset, * call to this function will be unaligned also. */ int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov, - int offset, unsigned int len, int *csump) + int offset, unsigned int len, __wsum *csump) { - int csum = *csump; + __wsum csum = *csump; int partial_cnt = 0, err = 0; /* Skip over the finished iovecs */ diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h new file mode 100644 index 000000000000..283c2b993fb8 --- /dev/null +++ b/net/core/kmap_skb.h @@ -0,0 +1,19 @@ +#include <linux/highmem.h> + +static inline void *kmap_skb_frag(const skb_frag_t *frag) +{ +#ifdef CONFIG_HIGHMEM + BUG_ON(in_irq()); + + local_bh_disable(); +#endif + return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); +} + +static inline void kunmap_skb_frag(void *vaddr) +{ + kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); +#ifdef CONFIG_HIGHMEM + local_bh_enable(); +#endif +} diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 4b36114744c5..549a2ce951b0 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -34,8 +34,8 @@ enum lw_bits { static unsigned long linkwatch_flags; static unsigned long linkwatch_nextevent; -static void linkwatch_event(void *dummy); -static DECLARE_WORK(linkwatch_work, linkwatch_event, NULL); +static void linkwatch_event(struct work_struct *dummy); +static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); static LIST_HEAD(lweventlist); static DEFINE_SPINLOCK(lweventlist_lock); @@ -127,7 +127,7 @@ void linkwatch_run_queue(void) } -static void linkwatch_event(void *dummy) +static void linkwatch_event(struct work_struct *dummy) { /* Limit the number of linkwatch events to one * per second so that a runaway driver does not @@ -171,10 +171,9 @@ void linkwatch_fire_event(struct net_device *dev) unsigned long delay = linkwatch_nextevent - jiffies; /* If we wrap around we'll delay it by at most HZ. */ - if (!delay || delay > HZ) - schedule_work(&linkwatch_work); - else - schedule_delayed_work(&linkwatch_work, delay); + if (delay > HZ) + delay = 0; + schedule_delayed_work(&linkwatch_work, delay); } } } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b4b478353b27..e7300b6b4079 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -251,7 +251,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) goto out_entries; } - n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC); + n = kmem_cache_alloc(tbl->kmem_cachep, GFP_ATOMIC); if (!n) goto out_entries; @@ -577,9 +577,10 @@ void neigh_destroy(struct neighbour *neigh) while ((hh = neigh->hh) != NULL) { neigh->hh = hh->hh_next; hh->hh_next = NULL; - write_lock_bh(&hh->hh_lock); + + write_seqlock_bh(&hh->hh_lock); hh->hh_output = neigh_blackhole; - write_unlock_bh(&hh->hh_lock); + write_sequnlock_bh(&hh->hh_lock); if (atomic_dec_and_test(&hh->hh_refcnt)) kfree(hh); } @@ -897,9 +898,9 @@ static void neigh_update_hhs(struct neighbour *neigh) if (update) { for (hh = neigh->hh; hh; hh = hh->hh_next) { - write_lock_bh(&hh->hh_lock); + write_seqlock_bh(&hh->hh_lock); update(hh, neigh->dev, neigh->ha); - write_unlock_bh(&hh->hh_lock); + write_sequnlock_bh(&hh->hh_lock); } } } @@ -1089,7 +1090,7 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, break; if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) { - rwlock_init(&hh->hh_lock); + seqlock_init(&hh->hh_lock); hh->hh_type = protocol; atomic_set(&hh->hh_refcnt, 0); hh->hh_next = NULL; @@ -1266,10 +1267,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { - struct neigh_parms *p = kmalloc(sizeof(*p), GFP_KERNEL); + struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); if (p) { - memcpy(p, &tbl->parms, sizeof(*p)); p->tbl = tbl; atomic_set(&p->refcnt, 1); INIT_RCU_HEAD(&p->rcu_head); @@ -2410,20 +2410,27 @@ static struct file_operations neigh_stat_seq_fops = { #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_ARPD +static inline size_t neigh_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ + + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(struct nda_cacheinfo)) + + nla_total_size(4); /* NDA_PROBES */ +} + static void __neigh_notify(struct neighbour *n, int type, int flags) { struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; err = neigh_fill_info(skb, n, 0, 0, type, flags); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in neigh_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); errout: @@ -2618,14 +2625,14 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, int p_id, int pdev_id, char *p_name, proc_handler *handler, ctl_handler *strategy) { - struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL); + struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template, + sizeof(*t), GFP_KERNEL); const char *dev_name_source = NULL; char *dev_name = NULL; int err = 0; if (!t) return -ENOBUFS; - memcpy(t, &neigh_sysctl_template, sizeof(*t)); t->neigh_vars[0].data = &p->mcast_probes; t->neigh_vars[1].data = &p->ucast_probes; t->neigh_vars[2].data = &p->app_probes; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 6589adb14cbf..823215d8e90f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -34,18 +34,12 @@ #define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 #define MAX_QUEUE_DEPTH (MAX_SKBS / 2) -#define MAX_RETRIES 20000 -static DEFINE_SPINLOCK(skb_list_lock); -static int nr_skbs; -static struct sk_buff *skbs; - -static DEFINE_SPINLOCK(queue_lock); -static int queue_depth; -static struct sk_buff *queue_head, *queue_tail; +static struct sk_buff_head skb_pool; static atomic_t trapped; +#define USEC_PER_POLL 50 #define NETPOLL_RX_ENABLED 1 #define NETPOLL_RX_DROP 2 @@ -56,54 +50,41 @@ static atomic_t trapped; static void zap_completion_queue(void); static void arp_reply(struct sk_buff *skb); -static void queue_process(void *p) +static void queue_process(struct work_struct *work) { - unsigned long flags; + struct netpoll_info *npinfo = + container_of(work, struct netpoll_info, tx_work.work); struct sk_buff *skb; - - while (queue_head) { - spin_lock_irqsave(&queue_lock, flags); - - skb = queue_head; - queue_head = skb->next; - if (skb == queue_tail) - queue_head = NULL; - - queue_depth--; - - spin_unlock_irqrestore(&queue_lock, flags); - - dev_queue_xmit(skb); - } -} - -static DECLARE_WORK(send_queue, queue_process, NULL); - -void netpoll_queue(struct sk_buff *skb) -{ unsigned long flags; - if (queue_depth == MAX_QUEUE_DEPTH) { - __kfree_skb(skb); - return; - } + while ((skb = skb_dequeue(&npinfo->txq))) { + struct net_device *dev = skb->dev; - spin_lock_irqsave(&queue_lock, flags); - if (!queue_head) - queue_head = skb; - else - queue_tail->next = skb; - queue_tail = skb; - queue_depth++; - spin_unlock_irqrestore(&queue_lock, flags); + if (!netif_device_present(dev) || !netif_running(dev)) { + __kfree_skb(skb); + continue; + } - schedule_work(&send_queue); + local_irq_save(flags); + netif_tx_lock(dev); + if (netif_queue_stopped(dev) || + dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + skb_queue_head(&npinfo->txq, skb); + netif_tx_unlock(dev); + local_irq_restore(flags); + + schedule_delayed_work(&npinfo->tx_work, HZ/10); + return; + } + netif_tx_unlock(dev); + local_irq_restore(flags); + } } -static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, - unsigned short ulen, u32 saddr, u32 daddr) +static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, + unsigned short ulen, __be32 saddr, __be32 daddr) { - unsigned int psum; + __wsum psum; if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY) return 0; @@ -111,7 +92,7 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); if (skb->ip_summed == CHECKSUM_COMPLETE && - !(u16)csum_fold(csum_add(psum, skb->csum))) + !csum_fold(csum_add(psum, skb->csum))) return 0; skb->csum = psum; @@ -167,12 +148,11 @@ static void service_arp_queue(struct netpoll_info *npi) arp_reply(skb); skb = skb_dequeue(&npi->arp_tx); } - return; } void netpoll_poll(struct netpoll *np) { - if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) + if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) return; /* Process pending work on NIC */ @@ -190,17 +170,15 @@ static void refill_skbs(void) struct sk_buff *skb; unsigned long flags; - spin_lock_irqsave(&skb_list_lock, flags); - while (nr_skbs < MAX_SKBS) { + spin_lock_irqsave(&skb_pool.lock, flags); + while (skb_pool.qlen < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; - skb->next = skbs; - skbs = skb; - nr_skbs++; + __skb_queue_tail(&skb_pool, skb); } - spin_unlock_irqrestore(&skb_list_lock, flags); + spin_unlock_irqrestore(&skb_pool.lock, flags); } static void zap_completion_queue(void) @@ -219,7 +197,7 @@ static void zap_completion_queue(void) while (clist != NULL) { struct sk_buff *skb = clist; clist = clist->next; - if(skb->destructor) + if (skb->destructor) dev_kfree_skb_any(skb); /* put this one back */ else __kfree_skb(skb); @@ -229,38 +207,25 @@ static void zap_completion_queue(void) put_cpu_var(softnet_data); } -static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve) +static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) { - int once = 1, count = 0; - unsigned long flags; - struct sk_buff *skb = NULL; + int count = 0; + struct sk_buff *skb; zap_completion_queue(); + refill_skbs(); repeat: - if (nr_skbs < MAX_SKBS) - refill_skbs(); skb = alloc_skb(len, GFP_ATOMIC); + if (!skb) + skb = skb_dequeue(&skb_pool); if (!skb) { - spin_lock_irqsave(&skb_list_lock, flags); - skb = skbs; - if (skb) { - skbs = skb->next; - skb->next = NULL; - nr_skbs--; - } - spin_unlock_irqrestore(&skb_list_lock, flags); - } - - if(!skb) { - count++; - if (once && (count == 1000000)) { - printk("out of netpoll skbs!\n"); - once = 0; + if (++count < 10) { + netpoll_poll(np); + goto repeat; } - netpoll_poll(np); - goto repeat; + return NULL; } atomic_set(&skb->users, 1); @@ -270,50 +235,46 @@ repeat: static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { - int status; - struct netpoll_info *npinfo; + int status = NETDEV_TX_BUSY; + unsigned long tries; + struct net_device *dev = np->dev; + struct netpoll_info *npinfo = np->dev->npinfo; - if (!np || !np->dev || !netif_running(np->dev)) { - __kfree_skb(skb); - return; - } + if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { + __kfree_skb(skb); + return; + } - npinfo = np->dev->npinfo; + /* don't get messages out of order, and no recursion */ + if (skb_queue_len(&npinfo->txq) == 0 && + npinfo->poll_owner != smp_processor_id()) { + unsigned long flags; - /* avoid recursion */ - if (npinfo->poll_owner == smp_processor_id() || - np->dev->xmit_lock_owner == smp_processor_id()) { - if (np->drop) - np->drop(skb); - else - __kfree_skb(skb); - return; - } - - do { - npinfo->tries--; - netif_tx_lock(np->dev); + local_irq_save(flags); + if (netif_tx_trylock(dev)) { + /* try until next clock tick */ + for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; + tries > 0; --tries) { + if (!netif_queue_stopped(dev)) + status = dev->hard_start_xmit(skb, dev); - /* - * network drivers do not expect to be called if the queue is - * stopped. - */ - status = NETDEV_TX_BUSY; - if (!netif_queue_stopped(np->dev)) - status = np->dev->hard_start_xmit(skb, np->dev); + if (status == NETDEV_TX_OK) + break; - netif_tx_unlock(np->dev); + /* tickle device maybe there is some cleanup */ + netpoll_poll(np); - /* success */ - if(!status) { - npinfo->tries = MAX_RETRIES; /* reset */ - return; + udelay(USEC_PER_POLL); + } + netif_tx_unlock(dev); } + local_irq_restore(flags); + } - /* transmit busy */ - netpoll_poll(np); - udelay(50); - } while (npinfo->tries > 0); + if (status != NETDEV_TX_OK) { + skb_queue_tail(&npinfo->txq, skb); + schedule_delayed_work(&npinfo->tx_work,0); + } } void netpoll_send_udp(struct netpoll *np, const char *msg, int len) @@ -345,7 +306,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udp_len, IPPROTO_UDP, csum_partial((unsigned char *)udph, udp_len, 0)); if (udph->check == 0) - udph->check = -1; + udph->check = CSUM_MANGLED_0; skb->nh.iph = iph = (struct iphdr *)skb_push(skb, sizeof(*iph)); @@ -379,7 +340,8 @@ static void arp_reply(struct sk_buff *skb) struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; - u32 sip, tip; + __be32 sip, tip; + unsigned char *sha; struct sk_buff *send_skb; struct netpoll *np = NULL; @@ -406,9 +368,14 @@ static void arp_reply(struct sk_buff *skb) arp->ar_op != htons(ARPOP_REQUEST)) return; - arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len; + arp_ptr = (unsigned char *)(arp+1); + /* save the location of the src hw addr */ + sha = arp_ptr; + arp_ptr += skb->dev->addr_len; memcpy(&sip, arp_ptr, 4); - arp_ptr += 4 + skb->dev->addr_len; + arp_ptr += 4; + /* if we actually cared about dst hw addr, it would get copied here */ + arp_ptr += skb->dev->addr_len; memcpy(&tip, arp_ptr, 4); /* Should we ignore arp? */ @@ -431,8 +398,8 @@ static void arp_reply(struct sk_buff *skb) if (np->dev->hard_header && np->dev->hard_header(send_skb, skb->dev, ptype, - np->remote_mac, np->local_mac, - send_skb->len) < 0) { + sha, np->local_mac, + send_skb->len) < 0) { kfree_skb(send_skb); return; } @@ -455,7 +422,7 @@ static void arp_reply(struct sk_buff *skb) arp_ptr += np->dev->addr_len; memcpy(arp_ptr, &tip, 4); arp_ptr += 4; - memcpy(arp_ptr, np->remote_mac, np->dev->addr_len); + memcpy(arp_ptr, sha, np->dev->addr_len); arp_ptr += np->dev->addr_len; memcpy(arp_ptr, &sip, 4); @@ -470,7 +437,6 @@ int __netpoll_rx(struct sk_buff *skb) struct netpoll_info *npi = skb->dev->npinfo; struct netpoll *np = npi->rx_np; - if (!np) goto out; if (skb->dev->type != ARPHRD_ETHER) @@ -543,47 +509,47 @@ int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; - if(*cur != '@') { + if (*cur != '@') { if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; - *delim=0; - np->local_port=simple_strtol(cur, NULL, 10); - cur=delim; + *delim = 0; + np->local_port = simple_strtol(cur, NULL, 10); + cur = delim; } cur++; printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port); - if(*cur != '/') { + if (*cur != '/') { if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; - *delim=0; - np->local_ip=ntohl(in_aton(cur)); - cur=delim; + *delim = 0; + np->local_ip = ntohl(in_aton(cur)); + cur = delim; printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", np->name, HIPQUAD(np->local_ip)); } cur++; - if ( *cur != ',') { + if (*cur != ',') { /* parse out dev name */ if ((delim = strchr(cur, ',')) == NULL) goto parse_failed; - *delim=0; + *delim = 0; strlcpy(np->dev_name, cur, sizeof(np->dev_name)); - cur=delim; + cur = delim; } cur++; printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name); - if ( *cur != '@' ) { + if (*cur != '@') { /* dst port */ if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; - *delim=0; - np->remote_port=simple_strtol(cur, NULL, 10); - cur=delim; + *delim = 0; + np->remote_port = simple_strtol(cur, NULL, 10); + cur = delim; } cur++; printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port); @@ -591,42 +557,41 @@ int netpoll_parse_options(struct netpoll *np, char *opt) /* dst ip */ if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; - *delim=0; - np->remote_ip=ntohl(in_aton(cur)); - cur=delim+1; + *delim = 0; + np->remote_ip = ntohl(in_aton(cur)); + cur = delim + 1; printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->remote_ip)); + np->name, HIPQUAD(np->remote_ip)); - if( *cur != 0 ) - { + if (*cur != 0) { /* MAC address */ if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[0]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[0] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[1]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[1] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[2]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[2] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[3]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[3] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[4]=simple_strtol(cur, NULL, 16); - cur=delim+1; - np->remote_mac[5]=simple_strtol(cur, NULL, 16); + *delim = 0; + np->remote_mac[4] = simple_strtol(cur, NULL, 16); + cur = delim + 1; + np->remote_mac[5] = simple_strtol(cur, NULL, 16); } printk(KERN_INFO "%s: remote ethernet address " @@ -653,34 +618,44 @@ int netpoll_setup(struct netpoll *np) struct in_device *in_dev; struct netpoll_info *npinfo; unsigned long flags; + int err; if (np->dev_name) ndev = dev_get_by_name(np->dev_name); if (!ndev) { printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", np->name, np->dev_name); - return -1; + return -ENODEV; } np->dev = ndev; if (!ndev->npinfo) { npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); - if (!npinfo) + if (!npinfo) { + err = -ENOMEM; goto release; + } npinfo->rx_flags = 0; npinfo->rx_np = NULL; spin_lock_init(&npinfo->poll_lock); npinfo->poll_owner = -1; - npinfo->tries = MAX_RETRIES; + spin_lock_init(&npinfo->rx_lock); skb_queue_head_init(&npinfo->arp_tx); - } else + skb_queue_head_init(&npinfo->txq); + INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); + + atomic_set(&npinfo->refcnt, 1); + } else { npinfo = ndev->npinfo; + atomic_inc(&npinfo->refcnt); + } if (!ndev->poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); + err = -ENOTSUPP; goto release; } @@ -691,13 +666,14 @@ int netpoll_setup(struct netpoll *np) np->name, np->dev_name); rtnl_lock(); - if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) { + err = dev_open(ndev); + rtnl_unlock(); + + if (err) { printk(KERN_ERR "%s: failed to open %s\n", - np->name, np->dev_name); - rtnl_unlock(); + np->name, ndev->name); goto release; } - rtnl_unlock(); atleast = jiffies + HZ/10; atmost = jiffies + 4*HZ; @@ -735,6 +711,7 @@ int netpoll_setup(struct netpoll *np) rcu_read_unlock(); printk(KERN_ERR "%s: no IP address for %s, aborting\n", np->name, np->dev_name); + err = -EDESTADDRREQ; goto release; } @@ -767,9 +744,16 @@ int netpoll_setup(struct netpoll *np) kfree(npinfo); np->dev = NULL; dev_put(ndev); - return -1; + return err; } +static int __init netpoll_init(void) +{ + skb_queue_head_init(&skb_pool); + return 0; +} +core_initcall(netpoll_init); + void netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; @@ -777,12 +761,25 @@ void netpoll_cleanup(struct netpoll *np) if (np->dev) { npinfo = np->dev->npinfo; - if (npinfo && npinfo->rx_np == np) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - npinfo->rx_np = NULL; - npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; - spin_unlock_irqrestore(&npinfo->rx_lock, flags); + if (npinfo) { + if (npinfo->rx_np == np) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_np = NULL; + npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + + np->dev->npinfo = NULL; + if (atomic_dec_and_test(&npinfo->refcnt)) { + skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->txq); + cancel_rearming_delayed_work(&npinfo->tx_work); + flush_scheduled_work(); + + kfree(npinfo); + } } + dev_put(np->dev); } @@ -809,4 +806,3 @@ EXPORT_SYMBOL(netpoll_setup); EXPORT_SYMBOL(netpoll_cleanup); EXPORT_SYMBOL(netpoll_send_udp); EXPORT_SYMBOL(netpoll_poll); -EXPORT_SYMBOL(netpoll_queue); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 733d86d0a4fb..04d4b93c68eb 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -148,6 +148,7 @@ #include <linux/seq_file.h> #include <linux/wait.h> #include <linux/etherdevice.h> +#include <linux/kthread.h> #include <net/checksum.h> #include <net/ipv6.h> #include <net/addrconf.h> @@ -207,7 +208,7 @@ static struct proc_dir_entry *pg_proc_dir = NULL; #define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4) struct flow_state { - __u32 cur_daddr; + __be32 cur_daddr; int count; }; @@ -282,10 +283,10 @@ struct pktgen_dev { /* If we're doing ranges, random or incremental, then this * defines the min/max for those ranges. */ - __u32 saddr_min; /* inclusive, source IP address */ - __u32 saddr_max; /* exclusive, source IP address */ - __u32 daddr_min; /* inclusive, dest IP address */ - __u32 daddr_max; /* exclusive, dest IP address */ + __be32 saddr_min; /* inclusive, source IP address */ + __be32 saddr_max; /* exclusive, source IP address */ + __be32 daddr_min; /* inclusive, dest IP address */ + __be32 daddr_max; /* exclusive, dest IP address */ __u16 udp_src_min; /* inclusive, source UDP port */ __u16 udp_src_max; /* exclusive, source UDP port */ @@ -317,8 +318,8 @@ struct pktgen_dev { __u32 cur_dst_mac_offset; __u32 cur_src_mac_offset; - __u32 cur_saddr; - __u32 cur_daddr; + __be32 cur_saddr; + __be32 cur_daddr; __u16 cur_udp_dst; __u16 cur_udp_src; __u32 cur_pkt_size; @@ -350,18 +351,17 @@ struct pktgen_dev { }; struct pktgen_hdr { - __u32 pgh_magic; - __u32 seq_num; - __u32 tv_sec; - __u32 tv_usec; + __be32 pgh_magic; + __be32 seq_num; + __be32 tv_sec; + __be32 tv_usec; }; struct pktgen_thread { spinlock_t if_lock; struct list_head if_list; /* All device here */ struct list_head th_list; - int removed; - char name[32]; + struct task_struct *tsk; char result[512]; u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ @@ -1689,7 +1689,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) BUG_ON(!t); seq_printf(seq, "Name: %s max_before_softirq: %d\n", - t->name, t->max_before_softirq); + t->tsk->comm, t->max_before_softirq); seq_printf(seq, "Running: "); @@ -2160,7 +2160,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) for(i = 0; i < pkt_dev->nr_labels; i++) if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM) pkt_dev->labels[i] = MPLS_STACK_BOTTOM | - (pktgen_random() & + ((__force __be32)pktgen_random() & htonl(0x000fffff)); } @@ -2220,29 +2220,25 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (pkt_dev->cflows && pkt_dev->flows[flow].count != 0) { pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr; } else { - - if ((imn = ntohl(pkt_dev->daddr_min)) < (imx = - ntohl(pkt_dev-> - daddr_max))) - { + imn = ntohl(pkt_dev->daddr_min); + imx = ntohl(pkt_dev->daddr_max); + if (imn < imx) { __u32 t; + __be32 s; if (pkt_dev->flags & F_IPDST_RND) { - t = ((pktgen_random() % (imx - imn)) + - imn); - t = htonl(t); + t = pktgen_random() % (imx - imn) + imn; + s = htonl(t); - while (LOOPBACK(t) || MULTICAST(t) - || BADCLASS(t) || ZERONET(t) - || LOCAL_MCAST(t)) { - t = ((pktgen_random() % - (imx - imn)) + imn); - t = htonl(t); + while (LOOPBACK(s) || MULTICAST(s) + || BADCLASS(s) || ZERONET(s) + || LOCAL_MCAST(s)) { + t = (pktgen_random() % + (imx - imn)) + imn; + s = htonl(t); } - pkt_dev->cur_daddr = t; - } - - else { + pkt_dev->cur_daddr = s; + } else { t = ntohl(pkt_dev->cur_daddr); t++; if (t > imx) { @@ -2270,7 +2266,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) for (i = 0; i < 4; i++) { pkt_dev->cur_in6_daddr.s6_addr32[i] = - ((pktgen_random() | + (((__force __be32)pktgen_random() | pkt_dev->min_in6_daddr.s6_addr32[i]) & pkt_dev->max_in6_daddr.s6_addr32[i]); } @@ -2377,7 +2373,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); memcpy(eth, pkt_dev->hh, 12); - *(u16 *) & eth[12] = protocol; + *(__be16 *) & eth[12] = protocol; /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - @@ -2497,7 +2493,7 @@ static unsigned int scan_ip6(const char *s, char ip[16]) char suffix[16]; unsigned int prefixlen = 0; unsigned int suffixlen = 0; - __u32 tmp; + __be32 tmp; for (i = 0; i < 16; i++) ip[i] = 0; @@ -2713,7 +2709,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); memcpy(eth, pkt_dev->hh, 12); - *(u16 *) & eth[12] = protocol; + *(__be16 *) & eth[12] = protocol; /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - @@ -2732,11 +2728,11 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, udph->len = htons(datalen + sizeof(struct udphdr)); udph->check = 0; /* No checksum */ - *(u32 *) iph = __constant_htonl(0x60000000); /* Version + flow */ + *(__be32 *) iph = __constant_htonl(0x60000000); /* Version + flow */ if (pkt_dev->traffic_class) { /* Version + traffic class + flow (0) */ - *(u32 *)iph |= htonl(0x60000000 | (pkt_dev->traffic_class << 20)); + *(__be32 *)iph |= htonl(0x60000000 | (pkt_dev->traffic_class << 20)); } iph->hop_limit = 32; @@ -3116,7 +3112,7 @@ static void pktgen_rem_thread(struct pktgen_thread *t) { /* Remove from the thread list */ - remove_proc_entry(t->name, pg_proc_dir); + remove_proc_entry(t->tsk->comm, pg_proc_dir); mutex_lock(&pktgen_thread_lock); @@ -3264,58 +3260,40 @@ out:; * Main loop of the thread goes here */ -static void pktgen_thread_worker(struct pktgen_thread *t) +static int pktgen_thread_worker(void *arg) { DEFINE_WAIT(wait); + struct pktgen_thread *t = arg; struct pktgen_dev *pkt_dev = NULL; int cpu = t->cpu; - sigset_t tmpsig; u32 max_before_softirq; u32 tx_since_softirq = 0; - daemonize("pktgen/%d", cpu); - - /* Block all signals except SIGKILL, SIGSTOP and SIGTERM */ - - spin_lock_irq(¤t->sighand->siglock); - tmpsig = current->blocked; - siginitsetinv(¤t->blocked, - sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGTERM)); - - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - /* Migrate to the right CPU */ - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) - BUG(); + BUG_ON(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); - t->control &= ~(T_TERMINATE); - t->control &= ~(T_RUN); - t->control &= ~(T_STOP); - t->control &= ~(T_REMDEVALL); - t->control &= ~(T_REMDEV); - t->pid = current->pid; PG_DEBUG(printk("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid)); max_before_softirq = t->max_before_softirq; - __set_current_state(TASK_INTERRUPTIBLE); - mb(); + set_current_state(TASK_INTERRUPTIBLE); - while (1) { - - __set_current_state(TASK_RUNNING); + while (!kthread_should_stop()) { + pkt_dev = next_to_run(t); - /* - * Get next dev to xmit -- if any. - */ + if (!pkt_dev && + (t->control & (T_STOP | T_RUN | T_REMDEVALL | T_REMDEV)) + == 0) { + prepare_to_wait(&(t->queue), &wait, + TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 10); + finish_wait(&(t->queue), &wait); + } - pkt_dev = next_to_run(t); + __set_current_state(TASK_RUNNING); if (pkt_dev) { @@ -3333,21 +3311,8 @@ static void pktgen_thread_worker(struct pktgen_thread *t) do_softirq(); tx_since_softirq = 0; } - } else { - prepare_to_wait(&(t->queue), &wait, TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 10); - finish_wait(&(t->queue), &wait); } - /* - * Back from sleep, either due to the timeout or signal. - * We check if we have any "posted" work for us. - */ - - if (t->control & T_TERMINATE || signal_pending(current)) - /* we received a request to terminate ourself */ - break; - if (t->control & T_STOP) { pktgen_stop(t); t->control &= ~(T_STOP); @@ -3368,20 +3333,19 @@ static void pktgen_thread_worker(struct pktgen_thread *t) t->control &= ~(T_REMDEV); } - if (need_resched()) - schedule(); + set_current_state(TASK_INTERRUPTIBLE); } - PG_DEBUG(printk("pktgen: %s stopping all device\n", t->name)); + PG_DEBUG(printk("pktgen: %s stopping all device\n", t->tsk->comm)); pktgen_stop(t); - PG_DEBUG(printk("pktgen: %s removing all device\n", t->name)); + PG_DEBUG(printk("pktgen: %s removing all device\n", t->tsk->comm)); pktgen_rem_all_ifs(t); - PG_DEBUG(printk("pktgen: %s removing thread.\n", t->name)); + PG_DEBUG(printk("pktgen: %s removing thread.\n", t->tsk->comm)); pktgen_rem_thread(t); - t->removed = 1; + return 0; } static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, @@ -3499,37 +3463,11 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) return add_dev_to_thread(t, pkt_dev); } -static struct pktgen_thread *__init pktgen_find_thread(const char *name) +static int __init pktgen_create_thread(int cpu) { struct pktgen_thread *t; - - mutex_lock(&pktgen_thread_lock); - - list_for_each_entry(t, &pktgen_threads, th_list) - if (strcmp(t->name, name) == 0) { - mutex_unlock(&pktgen_thread_lock); - return t; - } - - mutex_unlock(&pktgen_thread_lock); - return NULL; -} - -static int __init pktgen_create_thread(const char *name, int cpu) -{ - int err; - struct pktgen_thread *t = NULL; struct proc_dir_entry *pe; - - if (strlen(name) > 31) { - printk("pktgen: ERROR: Thread name cannot be more than 31 characters.\n"); - return -EINVAL; - } - - if (pktgen_find_thread(name)) { - printk("pktgen: ERROR: thread: %s already exists\n", name); - return -EINVAL; - } + struct task_struct *p; t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL); if (!t) { @@ -3537,14 +3475,29 @@ static int __init pktgen_create_thread(const char *name, int cpu) return -ENOMEM; } - strcpy(t->name, name); spin_lock_init(&t->if_lock); t->cpu = cpu; - pe = create_proc_entry(t->name, 0600, pg_proc_dir); + INIT_LIST_HEAD(&t->if_list); + + list_add_tail(&t->th_list, &pktgen_threads); + + p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); + if (IS_ERR(p)) { + printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu); + list_del(&t->th_list); + kfree(t); + return PTR_ERR(p); + } + kthread_bind(p, cpu); + t->tsk = p; + + pe = create_proc_entry(t->tsk->comm, 0600, pg_proc_dir); if (!pe) { printk("pktgen: cannot create %s/%s procfs entry.\n", - PG_PROC_DIR, t->name); + PG_PROC_DIR, t->tsk->comm); + kthread_stop(p); + list_del(&t->th_list); kfree(t); return -EINVAL; } @@ -3552,21 +3505,7 @@ static int __init pktgen_create_thread(const char *name, int cpu) pe->proc_fops = &pktgen_thread_fops; pe->data = t; - INIT_LIST_HEAD(&t->if_list); - - list_add_tail(&t->th_list, &pktgen_threads); - - t->removed = 0; - - err = kernel_thread((void *)pktgen_thread_worker, (void *)t, - CLONE_FS | CLONE_FILES | CLONE_SIGHAND); - if (err < 0) { - printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu); - remove_proc_entry(t->name, pg_proc_dir); - list_del(&t->th_list); - kfree(t); - return err; - } + wake_up_process(p); return 0; } @@ -3647,10 +3586,8 @@ static int __init pg_init(void) for_each_online_cpu(cpu) { int err; - char buf[30]; - sprintf(buf, "kpktgend_%i", cpu); - err = pktgen_create_thread(buf, cpu); + err = pktgen_create_thread(cpu); if (err) printk("pktgen: WARNING: Cannot create thread for cpu %d (%d)\n", cpu, err); @@ -3678,9 +3615,8 @@ static void __exit pg_cleanup(void) list_for_each_safe(q, n, &pktgen_threads) { t = list_entry(q, struct pktgen_thread, th_list); - t->control |= (T_TERMINATE); - - wait_event_interruptible_timeout(queue, (t->removed == 1), HZ); + kthread_stop(t->tsk); + kfree(t); } /* Un-register us from receiving netdevice events */ diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 79ebd75fbe4d..5f0818d815e6 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -15,6 +15,7 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/vmalloc.h> #include <net/request_sock.h> @@ -29,22 +30,31 @@ * it is absolutely not enough even at 100conn/sec. 256 cures most * of problems. This value is adjusted to 128 for very small machines * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb). - * Further increasing requires to change hash table size. + * Note : Dont forget somaxconn that may limit backlog too. */ int sysctl_max_syn_backlog = 256; int reqsk_queue_alloc(struct request_sock_queue *queue, - const int nr_table_entries) + unsigned int nr_table_entries) { - const int lopt_size = sizeof(struct listen_sock) + - nr_table_entries * sizeof(struct request_sock *); - struct listen_sock *lopt = kzalloc(lopt_size, GFP_KERNEL); - + size_t lopt_size = sizeof(struct listen_sock); + struct listen_sock *lopt; + + nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); + nr_table_entries = max_t(u32, nr_table_entries, 8); + nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); + lopt_size += nr_table_entries * sizeof(struct request_sock *); + if (lopt_size > PAGE_SIZE) + lopt = __vmalloc(lopt_size, + GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL); + else + lopt = kzalloc(lopt_size, GFP_KERNEL); if (lopt == NULL) return -ENOMEM; - for (lopt->max_qlen_log = 6; - (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog; + for (lopt->max_qlen_log = 3; + (1 << lopt->max_qlen_log) < nr_table_entries; lopt->max_qlen_log++); get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); @@ -65,9 +75,11 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) { /* make all the listen_opt local to us */ struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); + size_t lopt_size = sizeof(struct listen_sock) + + lopt->nr_table_entries * sizeof(struct request_sock *); if (lopt->qlen != 0) { - int i; + unsigned int i; for (i = 0; i < lopt->nr_table_entries; i++) { struct request_sock *req; @@ -81,7 +93,10 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) } BUG_TRAP(lopt->qlen == 0); - kfree(lopt); + if (lopt_size > PAGE_SIZE) + vfree(lopt); + else + kfree(lopt); } EXPORT_SYMBOL(reqsk_queue_destroy); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 02f3c7947898..e76539a5eb5e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -108,7 +108,6 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)), - [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), }; @@ -213,6 +212,26 @@ nla_put_failure: return nla_nest_cancel(skb, mx); } +int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, + u32 ts, u32 tsage, long expires, u32 error) +{ + struct rta_cacheinfo ci = { + .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), + .rta_used = dst->__use, + .rta_clntref = atomic_read(&(dst->__refcnt)), + .rta_error = error, + .rta_id = id, + .rta_ts = ts, + .rta_tsage = tsage, + }; + + if (expires) + ci.rta_expires = jiffies_to_clock_t(expires); + + return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); +} + +EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); static void set_operstate(struct net_device *dev, unsigned char transition) { @@ -273,6 +292,25 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; }; +static inline size_t if_nlmsg_size(int iwbuflen) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ + + nla_total_size(sizeof(struct rtnl_link_ifmap)) + + nla_total_size(sizeof(struct rtnl_link_stats)) + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ + + nla_total_size(4) /* IFLA_TXQLEN */ + + nla_total_size(4) /* IFLA_WEIGHT */ + + nla_total_size(4) /* IFLA_MTU */ + + nla_total_size(4) /* IFLA_LINK */ + + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(1) /* IFLA_OPERSTATE */ + + nla_total_size(1) /* IFLA_LINKMODE */ + + nla_total_size(iwbuflen); +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, void *iwbuf, int iwbuflen, int type, u32 pid, u32 seq, u32 change, unsigned int flags) @@ -558,7 +596,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) struct sk_buff *nskb; char *iw_buf = NULL, *iw = NULL; int iw_buf_len = 0; - int err, payload; + int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) @@ -587,9 +625,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ - payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) + - nla_total_size(iw_buf_len)); - nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); + nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL); if (nskb == NULL) { err = -ENOBUFS; goto errout; @@ -597,10 +633,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0); - if (err <= 0) { - kfree_skb(nskb); - goto errout; - } + /* failure impilies BUG in if_nlmsg_size or wireless_rtnetlink_get */ + BUG_ON(err < 0); err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); errout: @@ -639,15 +673,13 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(if_nlmsg_size(0), GFP_KERNEL); if (skb == NULL) goto errout; err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in if_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); errout: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b8b106358040..de7801d589e7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -56,7 +56,6 @@ #include <linux/cache.h> #include <linux/rtnetlink.h> #include <linux/init.h> -#include <linux/highmem.h> #include <net/protocol.h> #include <net/dst.h> @@ -67,8 +66,10 @@ #include <asm/uaccess.h> #include <asm/system.h> -static kmem_cache_t *skbuff_head_cache __read_mostly; -static kmem_cache_t *skbuff_fclone_cache __read_mostly; +#include "kmap_skb.h" + +static struct kmem_cache *skbuff_head_cache __read_mostly; +static struct kmem_cache *skbuff_fclone_cache __read_mostly; /* * Keep out-of-line to prevent kernel bloat. @@ -131,6 +132,7 @@ EXPORT_SYMBOL(skb_truesize_bug); * @gfp_mask: allocation mask * @fclone: allocate from fclone cache instead of head cache * and allocate a cloned (child) skb + * @node: numa node to allocate memory on * * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of size bytes. The object has a reference count of one. @@ -140,9 +142,9 @@ EXPORT_SYMBOL(skb_truesize_bug); * %GFP_ATOMIC. */ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, - int fclone) + int fclone, int node) { - kmem_cache_t *cache; + struct kmem_cache *cache; struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; @@ -150,14 +152,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; /* Get the HEAD */ - skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA); + skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); if (!skb) goto out; /* Get the DATA. Size must match skb_add_mtu(). */ size = SKB_DATA_ALIGN(size); - data = kmalloc_track_caller(size + sizeof(struct skb_shared_info), - gfp_mask); + data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), + gfp_mask, node); if (!data) goto nodata; @@ -209,7 +211,7 @@ nodata: * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, +struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp, unsigned int size, gfp_t gfp_mask) { @@ -266,9 +268,10 @@ nodata: struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, gfp_t gfp_mask) { + int node = dev->class_dev.dev ? dev_to_node(dev->class_dev.dev) : -1; struct sk_buff *skb; - skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); + skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); if (likely(skb)) { skb_reserve(skb, NET_SKB_PAD); skb->dev = dev; @@ -473,8 +476,8 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) #endif C(protocol); n->destructor = NULL; + C(mark); #ifdef CONFIG_NETFILTER - C(nfmark); C(nfct); nf_conntrack_get(skb->nfct); C(nfctinfo); @@ -534,8 +537,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->pkt_type = old->pkt_type; new->tstamp = old->tstamp; new->destructor = NULL; + new->mark = old->mark; #ifdef CONFIG_NETFILTER - new->nfmark = old->nfmark; new->nfct = old->nfct; nf_conntrack_get(old->nfct); new->nfctinfo = old->nfctinfo; @@ -1240,8 +1243,8 @@ EXPORT_SYMBOL(skb_store_bits); /* Checksum skb data. */ -unsigned int skb_checksum(const struct sk_buff *skb, int offset, - int len, unsigned int csum) +__wsum skb_checksum(const struct sk_buff *skb, int offset, + int len, __wsum csum) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -1265,7 +1268,7 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset, end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { - unsigned int csum2; + __wsum csum2; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -1294,7 +1297,7 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset, end = start + list->len; if ((copy = end - offset) > 0) { - unsigned int csum2; + __wsum csum2; if (copy > len) copy = len; csum2 = skb_checksum(list, offset - start, @@ -1315,8 +1318,8 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset, /* Both of above in one bottle. */ -unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, - u8 *to, int len, unsigned int csum) +__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, + u8 *to, int len, __wsum csum) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -1342,7 +1345,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { - unsigned int csum2; + __wsum csum2; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -1368,7 +1371,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, struct sk_buff *list = skb_shinfo(skb)->frag_list; for (; list; list = list->next) { - unsigned int csum2; + __wsum csum2; int end; BUG_TRAP(start <= offset + len); @@ -1396,7 +1399,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) { - unsigned int csum; + __wsum csum; long csstart; if (skb->ip_summed == CHECKSUM_PARTIAL) @@ -1414,9 +1417,9 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) skb->len - csstart, 0); if (skb->ip_summed == CHECKSUM_PARTIAL) { - long csstuff = csstart + skb->csum; + long csstuff = csstart + skb->csum_offset; - *((unsigned short *)(to + csstuff)) = csum_fold(csum); + *((__sum16 *)(to + csstuff)) = csum_fold(csum); } } diff --git a/net/core/sock.c b/net/core/sock.c index ee6cd2541d35..0ed5b4f0bc40 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -111,6 +111,7 @@ #include <linux/poll.h> #include <linux/tcp.h> #include <linux/init.h> +#include <linux/highmem.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -270,7 +271,7 @@ out: } EXPORT_SYMBOL(sock_queue_rcv_skb); -int sk_receive_skb(struct sock *sk, struct sk_buff *skb) +int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) { int rc = NET_RX_SUCCESS; @@ -279,7 +280,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb) skb->dev = NULL; - bh_lock_sock(sk); + if (nested) + bh_lock_sock_nested(sk); + else + bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { /* * trylock + unlock semantics: @@ -806,24 +810,11 @@ lenout: */ static void inline sock_lock_init(struct sock *sk) { - spin_lock_init(&sk->sk_lock.slock); - sk->sk_lock.owner = NULL; - init_waitqueue_head(&sk->sk_lock.wq); - /* - * Make sure we are not reinitializing a held lock: - */ - debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock)); - - /* - * Mark both the sk_lock and the sk_lock.slock as a - * per-address-family lock class: - */ - lockdep_set_class_and_name(&sk->sk_lock.slock, - af_family_slock_keys + sk->sk_family, - af_family_slock_key_strings[sk->sk_family]); - lockdep_init_map(&sk->sk_lock.dep_map, - af_family_key_strings[sk->sk_family], - af_family_keys + sk->sk_family, 0); + sock_lock_init_class_and_name(sk, + af_family_slock_key_strings[sk->sk_family], + af_family_slock_keys + sk->sk_family, + af_family_key_strings[sk->sk_family], + af_family_keys + sk->sk_family); } /** @@ -837,7 +828,7 @@ struct sock *sk_alloc(int family, gfp_t priority, struct proto *prot, int zero_it) { struct sock *sk = NULL; - kmem_cache_t *slab = prot->slab; + struct kmem_cache *slab = prot->slab; if (slab != NULL) sk = kmem_cache_alloc(slab, priority); @@ -1527,7 +1518,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) atomic_set(&sk->sk_refcnt, 1); } -void fastcall lock_sock(struct sock *sk) +void fastcall lock_sock_nested(struct sock *sk, int subclass) { might_sleep(); spin_lock_bh(&sk->sk_lock.slock); @@ -1538,11 +1529,11 @@ void fastcall lock_sock(struct sock *sk) /* * The sk_lock has mutex_lock() semantics here: */ - mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); local_bh_enable(); } -EXPORT_SYMBOL(lock_sock); +EXPORT_SYMBOL(lock_sock_nested); void fastcall release_sock(struct sock *sk) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 02534131d88e..1e75b1585460 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -21,10 +21,6 @@ extern __u32 sysctl_rmem_max; extern int sysctl_core_destroy_delay; -#ifdef CONFIG_NET_DIVERT -extern char sysctl_divert_version[]; -#endif /* CONFIG_NET_DIVERT */ - #ifdef CONFIG_XFRM extern u32 sysctl_xfrm_aevent_etime; extern u32 sysctl_xfrm_aevent_rseqth; @@ -105,16 +101,6 @@ ctl_table core_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, -#ifdef CONFIG_NET_DIVERT - { - .ctl_name = NET_CORE_DIVERT_VERSION, - .procname = "divert_version", - .data = (void *)sysctl_divert_version, - .maxlen = 32, - .mode = 0444, - .proc_handler = &proc_dostring - }, -#endif /* CONFIG_NET_DIVERT */ #ifdef CONFIG_XFRM { .ctl_name = NET_CORE_AEVENT_ETIME, diff --git a/net/core/utils.c b/net/core/utils.c index d93fe64f6693..61556065f07e 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -88,7 +88,7 @@ EXPORT_SYMBOL(in_aton); #define IN6PTON_NULL 0x20000000 /* first/tail */ #define IN6PTON_UNKNOWN 0x40000000 -static inline int digit2bin(char c, char delim) +static inline int digit2bin(char c, int delim) { if (c == delim || c == '\0') return IN6PTON_DELIM; @@ -99,7 +99,7 @@ static inline int digit2bin(char c, char delim) return IN6PTON_UNKNOWN; } -static inline int xdigit2bin(char c, char delim) +static inline int xdigit2bin(char c, int delim) { if (c == delim || c == '\0') return IN6PTON_DELIM; @@ -113,12 +113,14 @@ static inline int xdigit2bin(char c, char delim) return (IN6PTON_XDIGIT | (c - 'a' + 10)); if (c >= 'A' && c <= 'F') return (IN6PTON_XDIGIT | (c - 'A' + 10)); + if (delim == -1) + return IN6PTON_DELIM; return IN6PTON_UNKNOWN; } int in4_pton(const char *src, int srclen, u8 *dst, - char delim, const char **end) + int delim, const char **end) { const char *s; u8 *d; @@ -173,7 +175,7 @@ EXPORT_SYMBOL(in4_pton); int in6_pton(const char *src, int srclen, u8 *dst, - char delim, const char **end) + int delim, const char **end) { const char *s, *tok = NULL; u8 *d, *dc = NULL; diff --git a/net/core/wireless.c b/net/core/wireless.c index cb1b8728d7ee..f69ab7b4408e 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -2130,7 +2130,7 @@ int iw_handler_set_spy(struct net_device * dev, * The rtnl_lock() make sure we don't race with the other iw_handlers. * This make sure wireless_spy_update() "see" that the spy list * is temporarily disabled. */ - wmb(); + smp_wmb(); /* Are there are addresses to copy? */ if(wrqu->data.length > 0) { @@ -2159,7 +2159,7 @@ int iw_handler_set_spy(struct net_device * dev, } /* Make sure above is updated before re-enabling */ - wmb(); + smp_wmb(); /* Enable addresses */ spydata->spy_number = wrqu->data.length; |