diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 485 | ||||
-rw-r--r-- | net/core/dev_mcast.c | 24 | ||||
-rw-r--r-- | net/core/ethtool.c | 37 | ||||
-rw-r--r-- | net/core/fib_rules.c | 6 | ||||
-rw-r--r-- | net/core/filter.c | 1 | ||||
-rw-r--r-- | net/core/flow.c | 2 | ||||
-rw-r--r-- | net/core/iovec.c | 2 | ||||
-rw-r--r-- | net/core/link_watch.c | 11 | ||||
-rw-r--r-- | net/core/neighbour.c | 17 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 26 | ||||
-rw-r--r-- | net/core/net_namespace.c | 3 | ||||
-rw-r--r-- | net/core/netpoll.c | 24 | ||||
-rw-r--r-- | net/core/pktgen.c | 71 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 29 | ||||
-rw-r--r-- | net/core/skbuff.c | 162 | ||||
-rw-r--r-- | net/core/sock.c | 8 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 39 | ||||
-rw-r--r-- | net/core/user_dma.c | 2 |
18 files changed, 630 insertions, 319 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 582963077877..6bf217da9d8f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -90,6 +90,7 @@ #include <linux/if_ether.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/ethtool.h> #include <linux/notifier.h> #include <linux/skbuff.h> #include <net/net_namespace.h> @@ -119,6 +120,12 @@ #include <linux/err.h> #include <linux/ctype.h> #include <linux/if_arp.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/jhash.h> +#include <linux/random.h> #include "net-sysfs.h" @@ -254,9 +261,9 @@ static RAW_NOTIFIER_HEAD(netdev_chain); DEFINE_PER_CPU(struct softnet_data, softnet_data); -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#ifdef CONFIG_LOCKDEP /* - * register_netdevice() inits dev->_xmit_lock and sets lockdep class + * register_netdevice() inits txq->_xmit_lock and sets lockdep class * according to dev->type */ static const unsigned short netdev_lock_type[] = @@ -294,6 +301,7 @@ static const char *netdev_lock_name[] = "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; +static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; static inline unsigned short netdev_lock_pos(unsigned short dev_type) { @@ -306,8 +314,8 @@ static inline unsigned short netdev_lock_pos(unsigned short dev_type) return ARRAY_SIZE(netdev_lock_type) - 1; } -static inline void netdev_set_lockdep_class(spinlock_t *lock, - unsigned short dev_type) +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) { int i; @@ -315,9 +323,22 @@ static inline void netdev_set_lockdep_class(spinlock_t *lock, lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], netdev_lock_name[i]); } + +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) +{ + int i; + + i = netdev_lock_pos(dev->type); + lockdep_set_class_and_name(&dev->addr_list_lock, + &netdev_addr_lock_key[i], + netdev_lock_name[i]); +} #else -static inline void netdev_set_lockdep_class(spinlock_t *lock, - unsigned short dev_type) +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ +} +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) { } #endif @@ -453,7 +474,7 @@ static int netdev_boot_setup_add(char *name, struct ifmap *map) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { memset(s[i].name, 0, sizeof(s[i].name)); - strcpy(s[i].name, name); + strlcpy(s[i].name, name, IFNAMSIZ); memcpy(&s[i].map, map, sizeof(s[i].map)); break; } @@ -478,7 +499,7 @@ int netdev_boot_setup_check(struct net_device *dev) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && - !strncmp(dev->name, s[i].name, strlen(s[i].name))) { + !strcmp(dev->name, s[i].name)) { dev->irq = s[i].map.irq; dev->base_addr = s[i].map.base_addr; dev->mem_start = s[i].map.mem_start; @@ -960,6 +981,12 @@ void netdev_state_change(struct net_device *dev) } } +void netdev_bonding_change(struct net_device *dev) +{ + call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev); +} +EXPORT_SYMBOL(netdev_bonding_change); + /** * dev_load - load a network module * @net: the applicable net namespace @@ -1116,6 +1143,29 @@ int dev_close(struct net_device *dev) } +/** + * dev_disable_lro - disable Large Receive Offload on a device + * @dev: device + * + * Disable Large Receive Offload (LRO) on a net device. Must be + * called under RTNL. This is needed if received packets may be + * forwarded to another interface. + */ +void dev_disable_lro(struct net_device *dev) +{ + if (dev->ethtool_ops && dev->ethtool_ops->get_flags && + dev->ethtool_ops->set_flags) { + u32 flags = dev->ethtool_ops->get_flags(dev); + if (flags & ETH_FLAG_LRO) { + flags &= ~ETH_FLAG_LRO; + dev->ethtool_ops->set_flags(dev, flags); + } + } + WARN_ON(dev->features & NETIF_F_LRO); +} +EXPORT_SYMBOL(dev_disable_lro); + + static int dev_boot_phase = 1; /* @@ -1289,16 +1339,19 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) } -void __netif_schedule(struct net_device *dev) +void __netif_schedule(struct Qdisc *q) { - if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { - unsigned long flags; + if (WARN_ON_ONCE(q == &noop_qdisc)) + return; + + if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) { struct softnet_data *sd; + unsigned long flags; local_irq_save(flags); sd = &__get_cpu_var(softnet_data); - dev->next_sched = sd->output_queue; - sd->output_queue = dev; + q->next_sched = sd->output_queue; + sd->output_queue = q; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } @@ -1362,6 +1415,29 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); +static bool can_checksum_protocol(unsigned long features, __be16 protocol) +{ + return ((features & NETIF_F_GEN_CSUM) || + ((features & NETIF_F_IP_CSUM) && + protocol == htons(ETH_P_IP)) || + ((features & NETIF_F_IPV6_CSUM) && + protocol == htons(ETH_P_IPV6))); +} + +static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) +{ + if (can_checksum_protocol(dev->features, skb->protocol)) + return true; + + if (skb->protocol == htons(ETH_P_8021Q)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + if (can_checksum_protocol(dev->features & dev->vlan_features, + veh->h_vlan_encapsulated_proto)) + return true; + } + + return false; +} /* * Invalidate hardware checksum when packet is to be mangled, and @@ -1542,7 +1618,8 @@ static int dev_gso_segment(struct sk_buff *skb) return 0; } -int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq) { if (likely(!skb->next)) { if (!list_empty(&ptype_all)) @@ -1571,9 +1648,7 @@ gso: skb->next = nskb; return rc; } - if (unlikely((netif_queue_stopped(dev) || - netif_subqueue_stopped(dev, skb)) && - skb->next)) + if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1584,6 +1659,73 @@ out_kfree_skb: return 0; } +static u32 simple_tx_hashrnd; +static int simple_tx_hashrnd_initialized = 0; + +static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) +{ + u32 addr1, addr2, ports; + u32 hash, ihl; + u8 ip_proto; + + if (unlikely(!simple_tx_hashrnd_initialized)) { + get_random_bytes(&simple_tx_hashrnd, 4); + simple_tx_hashrnd_initialized = 1; + } + + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + ip_proto = ip_hdr(skb)->protocol; + addr1 = ip_hdr(skb)->saddr; + addr2 = ip_hdr(skb)->daddr; + ihl = ip_hdr(skb)->ihl; + break; + case __constant_htons(ETH_P_IPV6): + ip_proto = ipv6_hdr(skb)->nexthdr; + addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; + addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; + ihl = (40 >> 2); + break; + default: + return 0; + } + + + switch (ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: + case IPPROTO_AH: + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); + break; + + default: + ports = 0; + break; + } + + hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); + + return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); +} + +static struct netdev_queue *dev_pick_tx(struct net_device *dev, + struct sk_buff *skb) +{ + u16 queue_index = 0; + + if (dev->select_queue) + queue_index = dev->select_queue(dev, skb); + else if (dev->real_num_tx_queues > 1) + queue_index = simple_tx_hash(dev, skb); + + skb_set_queue_mapping(skb, queue_index); + return netdev_get_tx_queue(dev, queue_index); +} + /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -1609,10 +1751,10 @@ out_kfree_skb: * the BH enable code must have IRQs enabled so that it will not deadlock. * --BLG */ - int dev_queue_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; + struct netdev_queue *txq; struct Qdisc *q; int rc = -ENOMEM; @@ -1640,55 +1782,34 @@ int dev_queue_xmit(struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_set_transport_header(skb, skb->csum_start - skb_headroom(skb)); - - if (!(dev->features & NETIF_F_GEN_CSUM) && - !((dev->features & NETIF_F_IP_CSUM) && - skb->protocol == htons(ETH_P_IP)) && - !((dev->features & NETIF_F_IPV6_CSUM) && - skb->protocol == htons(ETH_P_IPV6))) - if (skb_checksum_help(skb)) - goto out_kfree_skb; + if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) + goto out_kfree_skb; } gso: - spin_lock_prefetch(&dev->queue_lock); - /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ rcu_read_lock_bh(); - /* Updates of qdisc are serialized by queue_lock. - * The struct Qdisc which is pointed to by qdisc is now a - * rcu structure - it may be accessed without acquiring - * a lock (but the structure may be stale.) The freeing of the - * qdisc will be deferred until it's known that there are no - * more references to it. - * - * If the qdisc has an enqueue function, we still need to - * hold the queue_lock before calling it, since queue_lock - * also serializes access to the device queue. - */ + txq = dev_pick_tx(dev, skb); + q = rcu_dereference(txq->qdisc); - q = rcu_dereference(dev->qdisc); #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); #endif if (q->enqueue) { - /* Grab device queue */ - spin_lock(&dev->queue_lock); - q = dev->qdisc; - if (q->enqueue) { - /* reset queue_mapping to zero */ - skb_set_queue_mapping(skb, 0); - rc = q->enqueue(skb, q); - qdisc_run(dev); - spin_unlock(&dev->queue_lock); - - rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; - goto out; - } - spin_unlock(&dev->queue_lock); + spinlock_t *root_lock = qdisc_root_lock(q); + + spin_lock(root_lock); + + rc = qdisc_enqueue_root(skb, q); + qdisc_run(q); + + spin_unlock(root_lock); + + rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; + goto out; } /* The device has no queue. Common case for software devices: @@ -1706,19 +1827,18 @@ gso: if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ - if (dev->xmit_lock_owner != cpu) { + if (txq->xmit_lock_owner != cpu) { - HARD_TX_LOCK(dev, cpu); + HARD_TX_LOCK(dev, txq, cpu); - if (!netif_queue_stopped(dev) && - !netif_subqueue_stopped(dev, skb)) { + if (!netif_tx_queue_stopped(txq)) { rc = 0; - if (!dev_hard_start_xmit(skb, dev)) { - HARD_TX_UNLOCK(dev); + if (!dev_hard_start_xmit(skb, dev, txq)) { + HARD_TX_UNLOCK(dev, txq); goto out; } } - HARD_TX_UNLOCK(dev); + HARD_TX_UNLOCK(dev, txq); if (net_ratelimit()) printk(KERN_CRIT "Virtual device %s asks to " "queue packet!\n", dev->name); @@ -1862,7 +1982,7 @@ static void net_tx_action(struct softirq_action *h) } if (sd->output_queue) { - struct net_device *head; + struct Qdisc *head; local_irq_disable(); head = sd->output_queue; @@ -1870,17 +1990,20 @@ static void net_tx_action(struct softirq_action *h) local_irq_enable(); while (head) { - struct net_device *dev = head; + struct Qdisc *q = head; + spinlock_t *root_lock; + head = head->next_sched; smp_mb__before_clear_bit(); - clear_bit(__LINK_STATE_SCHED, &dev->state); + clear_bit(__QDISC_STATE_SCHED, &q->state); - if (spin_trylock(&dev->queue_lock)) { - qdisc_run(dev); - spin_unlock(&dev->queue_lock); + root_lock = qdisc_root_lock(q); + if (spin_trylock(root_lock)) { + qdisc_run(q); + spin_unlock(root_lock); } else { - netif_schedule(dev); + __netif_schedule(q); } } } @@ -1961,10 +2084,11 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, */ static int ing_filter(struct sk_buff *skb) { - struct Qdisc *q; struct net_device *dev = skb->dev; - int result = TC_ACT_OK; u32 ttl = G_TC_RTTL(skb->tc_verd); + struct netdev_queue *rxq; + int result = TC_ACT_OK; + struct Qdisc *q; if (MAX_RED_LOOP < ttl++) { printk(KERN_WARNING @@ -1976,10 +2100,14 @@ static int ing_filter(struct sk_buff *skb) skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - spin_lock(&dev->ingress_lock); - if ((q = dev->qdisc_ingress) != NULL) - result = q->enqueue(skb, q); - spin_unlock(&dev->ingress_lock); + rxq = &dev->rx_queue; + + q = rxq->qdisc; + if (q) { + spin_lock(qdisc_lock(q)); + result = qdisc_enqueue_root(skb, q); + spin_unlock(qdisc_lock(q)); + } return result; } @@ -1988,7 +2116,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { - if (!skb->dev->qdisc_ingress) + if (!skb->dev->rx_queue.qdisc) goto out; if (*pt_prev) { @@ -2012,6 +2140,33 @@ out: } #endif +/* + * netif_nit_deliver - deliver received packets to network taps + * @skb: buffer + * + * This function is used to deliver incoming packets to network + * taps. It should be used when the normal netif_receive_skb path + * is bypassed, for example because of VLAN acceleration. + */ +void netif_nit_deliver(struct sk_buff *skb) +{ + struct packet_type *ptype; + + if (list_empty(&ptype_all)) + return; + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->mac_len = skb->network_header - skb->mac_header; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &ptype_all, list) { + if (!ptype->dev || ptype->dev == skb->dev) + deliver_skb(skb, ptype, skb->dev); + } + rcu_read_unlock(); +} + /** * netif_receive_skb - process receive buffer from network * @skb: buffer to process @@ -2059,6 +2214,10 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); + /* Don't receive packets in an exiting network namespace */ + if (!net_alive(dev_net(skb->dev))) + goto out; + #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); @@ -2747,16 +2906,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } -static void __dev_set_promiscuity(struct net_device *dev, int inc) +static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; ASSERT_RTNL(); - if ((dev->promiscuity += inc) == 0) - dev->flags &= ~IFF_PROMISC; - else - dev->flags |= IFF_PROMISC; + dev->flags |= IFF_PROMISC; + dev->promiscuity += inc; + if (dev->promiscuity == 0) { + /* + * Avoid overflow. + * If inc causes overflow, untouch promisc and return error. + */ + if (inc < 0) + dev->flags &= ~IFF_PROMISC; + else { + dev->promiscuity -= inc; + printk(KERN_WARNING "%s: promiscuity touches roof, " + "set promiscuity failed, promiscuity feature " + "of device might be broken.\n", dev->name); + return -EOVERFLOW; + } + } if (dev->flags != old_flags) { printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : @@ -2774,6 +2946,7 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) if (dev->change_rx_flags) dev->change_rx_flags(dev, IFF_PROMISC); } + return 0; } /** @@ -2785,14 +2958,19 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) * remains above zero the interface remains promiscuous. Once it hits zero * the device reverts back to normal filtering operation. A negative inc * value is used to drop promiscuity on the device. + * Return 0 if successful or a negative errno code on error. */ -void dev_set_promiscuity(struct net_device *dev, int inc) +int dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + int err; - __dev_set_promiscuity(dev, inc); + err = __dev_set_promiscuity(dev, inc); + if (err < 0) + return err; if (dev->flags != old_flags) dev_set_rx_mode(dev); + return err; } /** @@ -2805,22 +2983,38 @@ void dev_set_promiscuity(struct net_device *dev, int inc) * to all interfaces. Once it hits zero the device reverts back to normal * filtering operation. A negative @inc value is used to drop the counter * when releasing a resource needing all multicasts. + * Return 0 if successful or a negative errno code on error. */ -void dev_set_allmulti(struct net_device *dev, int inc) +int dev_set_allmulti(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; ASSERT_RTNL(); dev->flags |= IFF_ALLMULTI; - if ((dev->allmulti += inc) == 0) - dev->flags &= ~IFF_ALLMULTI; + dev->allmulti += inc; + if (dev->allmulti == 0) { + /* + * Avoid overflow. + * If inc causes overflow, untouch allmulti and return error. + */ + if (inc < 0) + dev->flags &= ~IFF_ALLMULTI; + else { + dev->allmulti -= inc; + printk(KERN_WARNING "%s: allmulti touches roof, " + "set allmulti failed, allmulti feature of " + "device might be broken.\n", dev->name); + return -EOVERFLOW; + } + } if (dev->flags ^ old_flags) { if (dev->change_rx_flags) dev->change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); } + return 0; } /* @@ -2859,9 +3053,9 @@ void __dev_set_rx_mode(struct net_device *dev) void dev_set_rx_mode(struct net_device *dev) { - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); } int __dev_addr_delete(struct dev_addr_list **list, int *count, @@ -2939,11 +3133,11 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); if (!err) __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_delete); @@ -2951,7 +3145,7 @@ EXPORT_SYMBOL(dev_unicast_delete); /** * dev_unicast_add - add a secondary unicast address * @dev: device - * @addr: address to delete + * @addr: address to add * @alen: length of @addr * * Add a secondary unicast address to the device or increase @@ -2965,11 +3159,11 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); if (!err) __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_add); @@ -3036,12 +3230,12 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) { int err = 0; - netif_tx_lock_bh(to); + netif_addr_lock_bh(to); err = __dev_addr_sync(&to->uc_list, &to->uc_count, &from->uc_list, &from->uc_count); if (!err) __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); + netif_addr_unlock_bh(to); return err; } EXPORT_SYMBOL(dev_unicast_sync); @@ -3057,15 +3251,15 @@ EXPORT_SYMBOL(dev_unicast_sync); */ void dev_unicast_unsync(struct net_device *to, struct net_device *from) { - netif_tx_lock_bh(from); - netif_tx_lock_bh(to); + netif_addr_lock_bh(from); + netif_addr_lock(to); __dev_addr_unsync(&to->uc_list, &to->uc_count, &from->uc_list, &from->uc_count); __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); - netif_tx_unlock_bh(from); + netif_addr_unlock(to); + netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_unicast_unsync); @@ -3085,7 +3279,7 @@ static void __dev_addr_discard(struct dev_addr_list **list) static void dev_addr_discard(struct net_device *dev) { - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); __dev_addr_discard(&dev->uc_list); dev->uc_count = 0; @@ -3093,7 +3287,7 @@ static void dev_addr_discard(struct net_device *dev) __dev_addr_discard(&dev->mc_list); dev->mc_count = 0; - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); } unsigned dev_get_flags(const struct net_device *dev) @@ -3666,6 +3860,21 @@ static void rollback_registered(struct net_device *dev) dev_put(dev); } +static void __netdev_init_queue_locks_one(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + spin_lock_init(&dev_queue->_xmit_lock); + netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); + dev_queue->xmit_lock_owner = -1; +} + +static void netdev_init_queue_locks(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); + __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); +} + /** * register_netdevice - register a network device * @dev: device to register @@ -3700,11 +3909,9 @@ int register_netdevice(struct net_device *dev) BUG_ON(!dev_net(dev)); net = dev_net(dev); - spin_lock_init(&dev->queue_lock); - spin_lock_init(&dev->_xmit_lock); - netdev_set_lockdep_class(&dev->_xmit_lock, dev->type); - dev->xmit_lock_owner = -1; - spin_lock_init(&dev->ingress_lock); + spin_lock_init(&dev->addr_list_lock); + netdev_set_addr_lockdep_class(dev); + netdev_init_queue_locks(dev); dev->iflink = -1; @@ -3985,6 +4192,19 @@ static struct net_device_stats *internal_stats(struct net_device *dev) return &dev->stats; } +static void netdev_init_one_queue(struct net_device *dev, + struct netdev_queue *queue, + void *_unused) +{ + queue->dev = dev; +} + +static void netdev_init_queues(struct net_device *dev) +{ + netdev_init_one_queue(dev, &dev->rx_queue, NULL); + netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); +} + /** * alloc_netdev_mq - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -3999,14 +4219,14 @@ static struct net_device_stats *internal_stats(struct net_device *dev) struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, void (*setup)(struct net_device *), unsigned int queue_count) { - void *p; + struct netdev_queue *tx; struct net_device *dev; - int alloc_size; + size_t alloc_size; + void *p; BUG_ON(strlen(name) >= sizeof(dev->name)); - alloc_size = sizeof(struct net_device) + - sizeof(struct net_device_subqueue) * (queue_count - 1); + alloc_size = sizeof(struct net_device); if (sizeof_priv) { /* ensure 32-byte alignment of private area */ alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; @@ -4021,22 +4241,33 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, return NULL; } + tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL); + if (!tx) { + printk(KERN_ERR "alloc_netdev: Unable to allocate " + "tx qdiscs.\n"); + kfree(p); + return NULL; + } + dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; dev_net_set(dev, &init_net); + dev->_tx = tx; + dev->num_tx_queues = queue_count; + dev->real_num_tx_queues = queue_count; + if (sizeof_priv) { dev->priv = ((char *)dev + - ((sizeof(struct net_device) + - (sizeof(struct net_device_subqueue) * - (queue_count - 1)) + NETDEV_ALIGN_CONST) + ((sizeof(struct net_device) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST)); } - dev->egress_subqueue_count = queue_count; dev->gso_max_size = GSO_MAX_SIZE; + netdev_init_queues(dev); + dev->get_stats = internal_stats; netpoll_netdev_init(dev); setup(dev); @@ -4057,6 +4288,8 @@ void free_netdev(struct net_device *dev) { release_net(dev_net(dev)); + kfree(dev->_tx); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); @@ -4238,7 +4471,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, void *ocpu) { struct sk_buff **list_skb; - struct net_device **list_net; + struct Qdisc **list_net; struct sk_buff *skb; unsigned int cpu, oldcpu = (unsigned long)ocpu; struct softnet_data *sd, *oldsd; @@ -4467,6 +4700,26 @@ err_name: return -ENOMEM; } +char *netdev_drivername(struct net_device *dev, char *buffer, int len) +{ + struct device_driver *driver; + struct device *parent; + + if (len <= 0 || !buffer) + return buffer; + buffer[0] = 0; + + parent = dev->dev.parent; + + if (!parent) + return buffer; + + driver = parent->driver; + if (driver && driver->name) + strlcpy(buffer, driver->name, len); + return buffer; +} + static void __net_exit netdev_exit(struct net *net) { kfree(net->dev_name_head); @@ -4563,8 +4816,8 @@ static int __init net_dev_init(void) dev_boot_phase = 0; - open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); - open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); + open_softirq(NET_TX_SOFTIRQ, net_tx_action); + open_softirq(NET_RX_SOFTIRQ, net_rx_action); hotcpu_notifier(dev_cpu_callback, 0); dst_init(); diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index f8a3455f4493..5402b3b38e0d 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -72,7 +72,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) { int err; - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_delete(&dev->mc_list, &dev->mc_count, addr, alen, glbl); if (!err) { @@ -83,7 +83,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) __dev_set_rx_mode(dev); } - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } @@ -95,11 +95,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) { int err; - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); if (!err) __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } @@ -119,12 +119,12 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) { int err = 0; - netif_tx_lock_bh(to); + netif_addr_lock_bh(to); err = __dev_addr_sync(&to->mc_list, &to->mc_count, &from->mc_list, &from->mc_count); if (!err) __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); + netif_addr_unlock_bh(to); return err; } @@ -143,15 +143,15 @@ EXPORT_SYMBOL(dev_mc_sync); */ void dev_mc_unsync(struct net_device *to, struct net_device *from) { - netif_tx_lock_bh(from); - netif_tx_lock_bh(to); + netif_addr_lock_bh(from); + netif_addr_lock(to); __dev_addr_unsync(&to->mc_list, &to->mc_count, &from->mc_list, &from->mc_count); __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); - netif_tx_unlock_bh(from); + netif_addr_unlock(to); + netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_mc_unsync); @@ -164,7 +164,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) return 0; - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); for (m = dev->mc_list; m; m = m->next) { int i; @@ -176,7 +176,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) seq_putc(seq, '\n'); } - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return 0; } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0133b5ebd545..14ada537f895 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -209,6 +209,36 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) return 0; } +static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_rxnfc cmd; + + if (!dev->ethtool_ops->set_rxhash) + return -EOPNOTSUPP; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + + return dev->ethtool_ops->set_rxhash(dev, &cmd); +} + +static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_rxnfc info; + + if (!dev->ethtool_ops->get_rxhash) + return -EOPNOTSUPP; + + if (copy_from_user(&info, useraddr, sizeof(info))) + return -EFAULT; + + dev->ethtool_ops->get_rxhash(dev, &info); + + if (copy_to_user(useraddr, &info, sizeof(info))) + return -EFAULT; + return 0; +} + static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) { struct ethtool_regs regs; @@ -826,6 +856,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GGSO: case ETHTOOL_GFLAGS: case ETHTOOL_GPFLAGS: + case ETHTOOL_GRXFH: break; default: if (!capable(CAP_NET_ADMIN)) @@ -977,6 +1008,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_set_value(dev, useraddr, dev->ethtool_ops->set_priv_flags); break; + case ETHTOOL_GRXFH: + rc = ethtool_get_rxhash(dev, useraddr); + break; + case ETHTOOL_SRXFH: + rc = ethtool_set_rxhash(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e3e9ab0f74e3..79de3b14a8d1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -69,7 +69,7 @@ static void rules_ops_put(struct fib_rules_ops *ops) static void flush_route_cache(struct fib_rules_ops *ops) { if (ops->flush_cache) - ops->flush_cache(); + ops->flush_cache(ops); } int fib_rules_register(struct fib_rules_ops *ops) @@ -226,7 +226,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { - err = EAFNOSUPPORT; + err = -EAFNOSUPPORT; goto errout; } @@ -365,7 +365,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { - err = EAFNOSUPPORT; + err = -EAFNOSUPPORT; goto errout; } diff --git a/net/core/filter.c b/net/core/filter.c index 4f8369729a4e..df3744355839 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -68,7 +68,6 @@ static inline void *load_pointer(struct sk_buff *skb, int k, * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff * @skb: buffer to filter - * @needlock: set to 1 if the sock is not locked by caller. * * Run the filter code and then cut skb->data to correct size returned by * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller diff --git a/net/core/flow.c b/net/core/flow.c index 19991175fdeb..5cf81052d044 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -298,7 +298,7 @@ void flow_cache_flush(void) init_completion(&info.completion); local_bh_disable(); - smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0); + smp_call_function(flow_cache_flush_per_cpu, &info, 0); flow_cache_flush_tasklet((unsigned long)&info); local_bh_enable(); diff --git a/net/core/iovec.c b/net/core/iovec.c index 755c37fdaee7..4c9c0121c9da 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -36,7 +36,7 @@ * in any case. */ -int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode) +int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) { int size, err, ct; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index a5e372b9ec4d..bf8f7af699d7 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -77,10 +77,10 @@ static void rfc2863_policy(struct net_device *dev) } -static int linkwatch_urgent_event(struct net_device *dev) +static bool linkwatch_urgent_event(struct net_device *dev) { return netif_running(dev) && netif_carrier_ok(dev) && - dev->qdisc != dev->qdisc_sleeping; + qdisc_tx_changing(dev); } @@ -180,10 +180,9 @@ static void __linkwatch_run_queue(int urgent_only) rfc2863_policy(dev); if (dev->flags & IFF_UP) { - if (netif_carrier_ok(dev)) { - WARN_ON(dev->qdisc_sleeping == &noop_qdisc); + if (netif_carrier_ok(dev)) dev_activate(dev); - } else + else dev_deactivate(dev); netdev_state_change(dev); @@ -214,7 +213,7 @@ static void linkwatch_event(struct work_struct *dummy) void linkwatch_fire_event(struct net_device *dev) { - int urgent = linkwatch_urgent_event(dev); + bool urgent = linkwatch_urgent_event(dev); if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { dev_hold(dev); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5d9d7130bd6e..f62c8af85d38 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -930,6 +930,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) buff = neigh->arp_queue.next; __skb_unlink(buff, &neigh->arp_queue); kfree_skb(buff); + NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } __skb_queue_tail(&neigh->arp_queue, skb); } @@ -1714,7 +1715,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) return nla_nest_end(skb, nest); nla_put_failure: - return nla_nest_cancel(skb, nest); + nla_nest_cancel(skb, nest); + return -EMSGSIZE; } static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, @@ -2057,9 +2059,9 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, goto nla_put_failure; } - ci.ndm_used = now - neigh->used; - ci.ndm_confirmed = now - neigh->confirmed; - ci.ndm_updated = now - neigh->updated; + ci.ndm_used = jiffies_to_clock_t(now - neigh->used); + ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); + ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; read_unlock_bh(&neigh->lock); @@ -2461,12 +2463,12 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) struct neigh_statistics *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n"); + seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n"); return 0; } seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " - "%08lx %08lx %08lx %08lx\n", + "%08lx %08lx %08lx %08lx %08lx\n", atomic_read(&tbl->entries), st->allocs, @@ -2482,7 +2484,8 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) st->rcv_probes_ucast, st->periodic_gc_runs, - st->forced_gc_runs + st->forced_gc_runs, + st->unres_discards ); return 0; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 90e2177af081..c1f4e0d428c0 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -242,11 +242,11 @@ static ssize_t netstat_show(const struct device *d, offset % sizeof(unsigned long) != 0); read_lock(&dev_base_lock); - if (dev_isalive(dev) && dev->get_stats && - (stats = (*dev->get_stats)(dev))) + if (dev_isalive(dev)) { + stats = dev->get_stats(dev); ret = sprintf(buf, fmt_ulong, *(unsigned long *)(((u8 *) stats) + offset)); - + } read_unlock(&dev_base_lock); return ret; } @@ -318,7 +318,7 @@ static struct attribute_group netstat_group = { .attrs = netstat_attrs, }; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_WIRELESS_EXT_SYSFS /* helper function that does all the locking etc for wireless stats */ static ssize_t wireless_show(struct device *d, char *buf, ssize_t (*format)(const struct iw_statistics *, @@ -457,10 +457,9 @@ int netdev_register_kobject(struct net_device *net) strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); #ifdef CONFIG_SYSFS - if (net->get_stats) - *groups++ = &netstat_group; + *groups++ = &netstat_group; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_WIRELESS_EXT_SYSFS if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) *groups++ = &wireless_group; #endif @@ -469,6 +468,19 @@ int netdev_register_kobject(struct net_device *net) return device_add(dev); } +int netdev_class_create_file(struct class_attribute *class_attr) +{ + return class_create_file(&net_class, class_attr); +} + +void netdev_class_remove_file(struct class_attribute *class_attr) +{ + class_remove_file(&net_class, class_attr); +} + +EXPORT_SYMBOL(netdev_class_create_file); +EXPORT_SYMBOL(netdev_class_remove_file); + void netdev_initialize_kobject(struct net_device *net) { struct device *device = &(net->dev); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 72b4c184dd84..7c52fe277b62 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -140,6 +140,9 @@ static void cleanup_net(struct work_struct *work) struct pernet_operations *ops; struct net *net; + /* Be very certain incoming network packets will not find us */ + rcu_barrier(); + net = container_of(work, struct net, work); mutex_lock(&net_mutex); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 8fb134da0346..c12720895ecf 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -58,25 +58,27 @@ static void queue_process(struct work_struct *work) while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; + struct netdev_queue *txq; if (!netif_device_present(dev) || !netif_running(dev)) { __kfree_skb(skb); continue; } + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + local_irq_save(flags); - netif_tx_lock(dev); - if ((netif_queue_stopped(dev) || - netif_subqueue_stopped(dev, skb)) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + __netif_tx_lock(txq, smp_processor_id()); + if (netif_tx_queue_stopped(txq) || + dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); - netif_tx_unlock(dev); + __netif_tx_unlock(txq); local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } - netif_tx_unlock(dev); + __netif_tx_unlock(txq); local_irq_restore(flags); } } @@ -278,17 +280,19 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { + struct netdev_queue *txq; unsigned long flags; + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + local_irq_save(flags); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { - if (netif_tx_trylock(dev)) { - if (!netif_queue_stopped(dev) && - !netif_subqueue_stopped(dev, skb)) + if (__netif_tx_trylock(txq)) { + if (!netif_tx_queue_stopped(txq)) status = dev->hard_start_xmit(skb, dev); - netif_tx_unlock(dev); + __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) break; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index fdf537707e51..c7d484f7e1c4 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1875,7 +1875,7 @@ static int pktgen_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* It is OK that we do not hold the group lock right now, @@ -2123,6 +2123,24 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) } } #endif +static void set_cur_queue_map(struct pktgen_dev *pkt_dev) +{ + if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { + __u16 t; + if (pkt_dev->flags & F_QUEUE_MAP_RND) { + t = random32() % + (pkt_dev->queue_map_max - + pkt_dev->queue_map_min + 1) + + pkt_dev->queue_map_min; + } else { + t = pkt_dev->cur_queue_map + 1; + if (t > pkt_dev->queue_map_max) + t = pkt_dev->queue_map_min; + } + pkt_dev->cur_queue_map = t; + } +} + /* Increment/randomize headers according to flags and current values * for IP src/dest, UDP src/dst port, MAC-Addr src/dst */ @@ -2325,19 +2343,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->cur_pkt_size = t; } - if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { - __u16 t; - if (pkt_dev->flags & F_QUEUE_MAP_RND) { - t = random32() % - (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1) - + pkt_dev->queue_map_min; - } else { - t = pkt_dev->cur_queue_map + 1; - if (t > pkt_dev->queue_map_max) - t = pkt_dev->queue_map_min; - } - pkt_dev->cur_queue_map = t; - } + set_cur_queue_map(pkt_dev); pkt_dev->flows[flow].count++; } @@ -2458,7 +2464,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ - + u16 queue_map; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2469,6 +2475,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, /* Update any of the values, used when we're incrementing various * fields. */ + queue_map = pkt_dev->cur_queue_map; mod_cur_headers(pkt_dev); datalen = (odev->hard_header_len + 16) & ~0xf; @@ -2507,7 +2514,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); - skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); + skb_set_queue_mapping(skb, queue_map); iph = ip_hdr(skb); udph = udp_hdr(skb); @@ -2797,6 +2804,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ + u16 queue_map; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2807,6 +2815,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, /* Update any of the values, used when we're incrementing various * fields. */ + queue_map = pkt_dev->cur_queue_map; mod_cur_headers(pkt_dev); skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 + @@ -2844,7 +2853,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); - skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); + skb_set_queue_mapping(skb, queue_map); iph = ipv6_hdr(skb); udph = udp_hdr(skb); @@ -3263,7 +3272,9 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) { struct net_device *odev = NULL; + struct netdev_queue *txq; __u64 idle_start = 0; + u16 queue_map; int ret; odev = pkt_dev->odev; @@ -3285,9 +3296,15 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - if ((netif_queue_stopped(odev) || - (pkt_dev->skb && - netif_subqueue_stopped(odev, pkt_dev->skb))) || + if (!pkt_dev->skb) { + set_cur_queue_map(pkt_dev); + queue_map = pkt_dev->cur_queue_map; + } else { + queue_map = skb_get_queue_mapping(pkt_dev->skb); + } + + txq = netdev_get_tx_queue(odev, queue_map); + if (netif_tx_queue_stopped(txq) || need_resched()) { idle_start = getCurUs(); @@ -3303,8 +3320,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->idle_acc += getCurUs() - idle_start; - if (netif_queue_stopped(odev) || - netif_subqueue_stopped(odev, pkt_dev->skb)) { + if (netif_tx_queue_stopped(txq)) { pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; goto out; /* Try the next interface */ @@ -3331,9 +3347,12 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - netif_tx_lock_bh(odev); - if (!netif_queue_stopped(odev) && - !netif_subqueue_stopped(odev, pkt_dev->skb)) { + /* fill_packet() might have changed the queue */ + queue_map = skb_get_queue_mapping(pkt_dev->skb); + txq = netdev_get_tx_queue(odev, queue_map); + + __netif_tx_lock_bh(txq); + if (!netif_tx_queue_stopped(txq)) { atomic_inc(&(pkt_dev->skb->users)); retry_now: @@ -3377,7 +3396,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->next_tx_ns = 0; } - netif_tx_unlock_bh(odev); + __netif_tx_unlock_bh(txq); /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cf857c4dc7b1..71edb8b36341 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -498,7 +498,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) return nla_nest_end(skb, mx); nla_put_failure: - return nla_nest_cancel(skb, mx); + nla_nest_cancel(skb, mx); + return -EMSGSIZE; } int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, @@ -604,8 +605,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags) { + struct netdev_queue *txq; struct ifinfomsg *ifm; struct nlmsghdr *nlh; + struct net_device_stats *stats; + struct nlattr *attr; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) @@ -632,8 +636,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (dev->master) NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); - if (dev->qdisc_sleeping) - NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id); + txq = netdev_get_tx_queue(dev, 0); + if (txq->qdisc_sleeping) + NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id); if (1) { struct rtnl_link_ifmap map = { @@ -652,19 +657,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); } - if (dev->get_stats) { - struct net_device_stats *stats = dev->get_stats(dev); - if (stats) { - struct nlattr *attr; - - attr = nla_reserve(skb, IFLA_STATS, - sizeof(struct rtnl_link_stats)); - if (attr == NULL) - goto nla_put_failure; + attr = nla_reserve(skb, IFLA_STATS, + sizeof(struct rtnl_link_stats)); + if (attr == NULL) + goto nla_put_failure; - copy_rtnl_link_stats(nla_data(attr), stats); - } - } + stats = dev->get_stats(dev); + copy_rtnl_link_stats(nla_data(attr), stats); if (dev->rtnl_link_ops) { if (rtnl_link_fill(skb, dev) < 0) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5c459f2b7985..e4115672b6cf 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4,8 +4,6 @@ * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * - * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ - * * Fixes: * Alan Cox : Fixed the worst of the load * balancer bugs. @@ -461,6 +459,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->tc_verd = old->tc_verd; #endif #endif + new->vlan_tci = old->vlan_tci; + skb_copy_secmark(new, old); } @@ -1282,107 +1282,83 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, return 0; } -/* - * Map linear and fragment data from the skb to spd. Returns number of - * pages mapped. - */ -static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, - unsigned int *total_len, - struct splice_pipe_desc *spd) +static inline void __segment_seek(struct page **page, unsigned int *poff, + unsigned int *plen, unsigned int off) { - unsigned int nr_pages = spd->nr_pages; - unsigned int poff, plen, len, toff, tlen; - int headlen, seg; + *poff += off; + *page += *poff / PAGE_SIZE; + *poff = *poff % PAGE_SIZE; + *plen -= off; +} - toff = *offset; - tlen = *total_len; - if (!tlen) - goto err; +static inline int __splice_segment(struct page *page, unsigned int poff, + unsigned int plen, unsigned int *off, + unsigned int *len, struct sk_buff *skb, + struct splice_pipe_desc *spd) +{ + if (!*len) + return 1; - /* - * if the offset is greater than the linear part, go directly to - * the fragments. - */ - headlen = skb_headlen(skb); - if (toff >= headlen) { - toff -= headlen; - goto map_frag; + /* skip this segment if already processed */ + if (*off >= plen) { + *off -= plen; + return 0; } - /* - * first map the linear region into the pages/partial map, skipping - * any potential initial offset. - */ - len = 0; - while (len < headlen) { - void *p = skb->data + len; - - poff = (unsigned long) p & (PAGE_SIZE - 1); - plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff); - len += plen; - - if (toff) { - if (plen <= toff) { - toff -= plen; - continue; - } - plen -= toff; - poff += toff; - toff = 0; - } + /* ignore any bits we already processed */ + if (*off) { + __segment_seek(&page, &poff, &plen, *off); + *off = 0; + } - plen = min(plen, tlen); - if (!plen) - break; + do { + unsigned int flen = min(*len, plen); - /* - * just jump directly to update and return, no point - * in going over fragments when the output is full. - */ - if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb)) - goto done; + /* the linear region may spread across several pages */ + flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - tlen -= plen; - } + if (spd_fill_page(spd, page, flen, poff, skb)) + return 1; + + __segment_seek(&page, &poff, &plen, flen); + *len -= flen; + + } while (*len && plen); + + return 0; +} + +/* + * Map linear and fragment data from the skb to spd. It reports failure if the + * pipe is full or if we already spliced the requested length. + */ +static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, + unsigned int *len, + struct splice_pipe_desc *spd) +{ + int seg; + + /* + * map the linear part + */ + if (__splice_segment(virt_to_page(skb->data), + (unsigned long) skb->data & (PAGE_SIZE - 1), + skb_headlen(skb), + offset, len, skb, spd)) + return 1; /* * then map the fragments */ -map_frag: for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; - plen = f->size; - poff = f->page_offset; - - if (toff) { - if (plen <= toff) { - toff -= plen; - continue; - } - plen -= toff; - poff += toff; - toff = 0; - } - - plen = min(plen, tlen); - if (!plen) - break; - - if (spd_fill_page(spd, f->page, plen, poff, skb)) - break; - - tlen -= plen; + if (__splice_segment(f->page, f->page_offset, f->size, + offset, len, skb, spd)) + return 1; } -done: - if (spd->nr_pages - nr_pages) { - *offset = 0; - *total_len = tlen; - return 0; - } -err: - return 1; + return 0; } /* @@ -1445,6 +1421,7 @@ done: if (spd.nr_pages) { int ret; + struct sock *sk = __skb->sk; /* * Drop the socket lock, otherwise we have reverse @@ -1455,9 +1432,9 @@ done: * we call into ->sendpage() with the i_mutex lock held * and networking will grab the socket lock. */ - release_sock(__skb->sk); + release_sock(sk); ret = splice_to_pipe(pipe, &spd); - lock_sock(__skb->sk); + lock_sock(sk); return ret; } @@ -2280,6 +2257,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) skb_copy_queue_mapping(nskb, skb); nskb->priority = skb->priority; nskb->protocol = skb->protocol; + nskb->vlan_tci = skb->vlan_tci; nskb->dst = dst_clone(skb->dst); memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); nskb->pkt_type = skb->pkt_type; @@ -2584,6 +2562,13 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) return true; } +void __skb_warn_lro_forwarding(const struct sk_buff *skb) +{ + if (net_ratelimit()) + pr_warning("%s: received packets cannot be forwarded" + " while LRO is enabled\n", skb->dev->name); +} + EXPORT_SYMBOL(___pskb_trim); EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(kfree_skb); @@ -2617,6 +2602,7 @@ EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); EXPORT_SYMBOL(skb_append_datato_frags); +EXPORT_SYMBOL(__skb_warn_lro_forwarding); EXPORT_SYMBOL_GPL(skb_to_sgvec); EXPORT_SYMBOL_GPL(skb_cow_data); diff --git a/net/core/sock.c b/net/core/sock.c index 88094cb09c06..10a64d57078c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -7,8 +7,6 @@ * handler for protocols to use and generic option handler. * * - * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Florian La Roche, <flla@stud.uni-sb.de> @@ -1068,7 +1066,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) * to be taken into account in all callers. -acme */ sk_refcnt_debug_inc(newsk); - newsk->sk_socket = NULL; + sk_set_socket(newsk, NULL); newsk->sk_sleep = NULL; if (newsk->sk_prot->sockets_allocated) @@ -1444,7 +1442,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) /* Under pressure. */ if (allocated > prot->sysctl_mem[1]) if (prot->enter_memory_pressure) - prot->enter_memory_pressure(); + prot->enter_memory_pressure(sk); /* Over hard limit. */ if (allocated > prot->sysctl_mem[2]) @@ -1704,7 +1702,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; sk->sk_state = TCP_CLOSE; - sk->sk_socket = sock; + sk_set_socket(sk, sock); sock_set_flag(sk, SOCK_ZAPPED); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 5fc801057244..a570e2af22cb 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -125,14 +125,6 @@ static struct ctl_table net_core_table[] = { #endif /* CONFIG_XFRM */ #endif /* CONFIG_NET */ { - .ctl_name = NET_CORE_SOMAXCONN, - .procname = "somaxconn", - .data = &init_net.core.sysctl_somaxconn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = NET_CORE_BUDGET, .procname = "netdev_budget", .data = &netdev_budget, @@ -151,6 +143,18 @@ static struct ctl_table net_core_table[] = { { .ctl_name = 0 } }; +static struct ctl_table netns_core_table[] = { + { + .ctl_name = NET_CORE_SOMAXCONN, + .procname = "somaxconn", + .data = &init_net.core.sysctl_somaxconn, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { .ctl_name = 0 } +}; + static __net_initdata struct ctl_path net_core_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "core", .ctl_name = NET_CORE, }, @@ -159,23 +163,17 @@ static __net_initdata struct ctl_path net_core_path[] = { static __net_init int sysctl_core_net_init(struct net *net) { - struct ctl_table *tbl, *tmp; + struct ctl_table *tbl; net->core.sysctl_somaxconn = SOMAXCONN; - tbl = net_core_table; + tbl = netns_core_table; if (net != &init_net) { - tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL); + tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); if (tbl == NULL) goto err_dup; - for (tmp = tbl; tmp->procname; tmp++) { - if (tmp->data >= (void *)&init_net && - tmp->data < (void *)(&init_net + 1)) - tmp->data += (char *)net - (char *)&init_net; - else - tmp->mode &= ~0222; - } + tbl[0].data = &net->core.sysctl_somaxconn; } net->core.sysctl_hdr = register_net_sysctl_table(net, @@ -186,7 +184,7 @@ static __net_init int sysctl_core_net_init(struct net *net) return 0; err_reg: - if (tbl != net_core_table) + if (tbl != netns_core_table) kfree(tbl); err_dup: return -ENOMEM; @@ -198,7 +196,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net) tbl = net->core.sysctl_hdr->ctl_table_arg; unregister_net_sysctl_table(net->core.sysctl_hdr); - BUG_ON(tbl == net_core_table); + BUG_ON(tbl == netns_core_table); kfree(tbl); } @@ -209,6 +207,7 @@ static __net_initdata struct pernet_operations sysctl_core_ops = { static __init int sysctl_core_init(void) { + register_net_sysctl_rotable(net_core_path, net_core_table); return register_pernet_subsys(&sysctl_core_ops); } diff --git a/net/core/user_dma.c b/net/core/user_dma.c index de760504f6fe..8c6b706963ff 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -76,7 +76,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, end = start + skb_shinfo(skb)->frags[i].size; copy = end - offset; - if ((copy = end - offset) > 0) { + if (copy > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = frag->page; |