diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 433 | ||||
-rw-r--r-- | net/core/dev_mcast.c | 200 | ||||
-rw-r--r-- | net/core/ethtool.c | 12 | ||||
-rw-r--r-- | net/core/gen_estimator.c | 82 | ||||
-rw-r--r-- | net/core/netevent.c | 1 | ||||
-rw-r--r-- | net/core/netpoll.c | 36 | ||||
-rw-r--r-- | net/core/pktgen.c | 251 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 470 | ||||
-rw-r--r-- | net/core/scm.c | 3 | ||||
-rw-r--r-- | net/core/skbuff.c | 21 | ||||
-rw-r--r-- | net/core/sock.c | 42 |
11 files changed, 1204 insertions, 347 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 26090621ea6b..6357f54c8ff7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -98,6 +98,7 @@ #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/if_bridge.h> +#include <linux/if_macvlan.h> #include <net/dst.h> #include <net/pkt_sched.h> #include <net/checksum.h> @@ -151,9 +152,22 @@ static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */ static struct list_head ptype_all __read_mostly; /* Taps */ #ifdef CONFIG_NET_DMA -static struct dma_client *net_dma_client; -static unsigned int net_dma_count; -static spinlock_t net_dma_event_lock; +struct net_dma { + struct dma_client client; + spinlock_t lock; + cpumask_t channel_mask; + struct dma_chan *channels[NR_CPUS]; +}; + +static enum dma_state_client +netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state); + +static struct net_dma net_dma = { + .client = { + .event_callback = netdev_dma_event, + }, +}; #endif /* @@ -942,7 +956,7 @@ int dev_open(struct net_device *dev) /* * Initialize multicasting status */ - dev_mc_upload(dev); + dev_set_rx_mode(dev); /* * Wakeup transmit queue engine @@ -1429,7 +1443,9 @@ gso: skb->next = nskb; return rc; } - if (unlikely(netif_queue_stopped(dev) && skb->next)) + if (unlikely((netif_queue_stopped(dev) || + netif_subqueue_stopped(dev, skb->queue_mapping)) && + skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1510,8 +1526,10 @@ int dev_queue_xmit(struct sk_buff *skb) skb_headroom(skb)); if (!(dev->features & NETIF_F_GEN_CSUM) && - (!(dev->features & NETIF_F_IP_CSUM) || - skb->protocol != htons(ETH_P_IP))) + !((dev->features & NETIF_F_IP_CSUM) && + skb->protocol == htons(ETH_P_IP)) && + !((dev->features & NETIF_F_IPV6_CSUM) && + skb->protocol == htons(ETH_P_IPV6))) if (skb_checksum_help(skb)) goto out_kfree_skb; } @@ -1545,6 +1563,8 @@ gso: spin_lock(&dev->queue_lock); q = dev->qdisc; if (q->enqueue) { + /* reset queue_mapping to zero */ + skb->queue_mapping = 0; rc = q->enqueue(skb, q); qdisc_run(dev); spin_unlock(&dev->queue_lock); @@ -1574,7 +1594,8 @@ gso: HARD_TX_LOCK(dev, cpu); - if (!netif_queue_stopped(dev)) { + if (!netif_queue_stopped(dev) && + !netif_subqueue_stopped(dev, skb->queue_mapping)) { rc = 0; if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); @@ -1793,6 +1814,28 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb, #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) #endif +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; +EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); + +static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, + struct packet_type **pt_prev, + int *ret, + struct net_device *orig_dev) +{ + if (skb->dev->macvlan_port == NULL) + return skb; + + if (*pt_prev) { + *ret = deliver_skb(skb, *pt_prev, orig_dev); + *pt_prev = NULL; + } + return macvlan_handle_frame_hook(skb); +} +#else +#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) +#endif + #ifdef CONFIG_NET_CLS_ACT /* TODO: Maybe we should just force sch_ingress to be compiled in * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions @@ -1900,6 +1943,9 @@ ncls: skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); if (!skb) goto out; + skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); + if (!skb) + goto out; type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { @@ -2009,20 +2055,21 @@ static void net_rx_action(struct softirq_action *h) } } out: + local_irq_enable(); #ifdef CONFIG_NET_DMA /* * There may not be any more sk_buffs coming right now, so push * any pending DMA copies to hardware */ - if (net_dma_client) { - struct dma_chan *chan; - rcu_read_lock(); - list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) - dma_async_memcpy_issue_pending(chan); - rcu_read_unlock(); + if (!cpus_empty(net_dma.channel_mask)) { + int chan_idx; + for_each_cpu_mask(chan_idx, net_dma.channel_mask) { + struct dma_chan *chan = net_dma.channels[chan_idx]; + if (chan) + dma_async_memcpy_issue_pending(chan); + } } #endif - local_irq_enable(); return; softnet_break: @@ -2496,26 +2543,17 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - */ -void dev_set_promiscuity(struct net_device *dev, int inc) +static void __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + ASSERT_RTNL(); + if ((dev->promiscuity += inc) == 0) dev->flags &= ~IFF_PROMISC; else dev->flags |= IFF_PROMISC; if (dev->flags != old_flags) { - dev_mc_upload(dev); printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : "left"); @@ -2525,10 +2563,32 @@ void dev_set_promiscuity(struct net_device *dev, int inc) dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), audit_get_loginuid(current->audit_context)); + + if (dev->change_rx_flags) + dev->change_rx_flags(dev, IFF_PROMISC); } } /** + * dev_set_promiscuity - update promiscuity count on a device + * @dev: device + * @inc: modifier + * + * Add or remove promiscuity from a device. While the count in the device + * remains above zero the interface remains promiscuous. Once it hits zero + * the device reverts back to normal filtering operation. A negative inc + * value is used to drop promiscuity on the device. + */ +void dev_set_promiscuity(struct net_device *dev, int inc) +{ + unsigned short old_flags = dev->flags; + + __dev_set_promiscuity(dev, inc); + if (dev->flags != old_flags) + dev_set_rx_mode(dev); +} + +/** * dev_set_allmulti - update allmulti count on a device * @dev: device * @inc: modifier @@ -2544,11 +2604,190 @@ void dev_set_allmulti(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + ASSERT_RTNL(); + dev->flags |= IFF_ALLMULTI; if ((dev->allmulti += inc) == 0) dev->flags &= ~IFF_ALLMULTI; - if (dev->flags ^ old_flags) - dev_mc_upload(dev); + if (dev->flags ^ old_flags) { + if (dev->change_rx_flags) + dev->change_rx_flags(dev, IFF_ALLMULTI); + dev_set_rx_mode(dev); + } +} + +/* + * Upload unicast and multicast address lists to device and + * configure RX filtering. When the device doesn't support unicast + * filtering it is put in promiscous mode while unicast addresses + * are present. + */ +void __dev_set_rx_mode(struct net_device *dev) +{ + /* dev_open will call this function so the list will stay sane. */ + if (!(dev->flags&IFF_UP)) + return; + + if (!netif_device_present(dev)) + return; + + if (dev->set_rx_mode) + dev->set_rx_mode(dev); + else { + /* Unicast addresses changes may only happen under the rtnl, + * therefore calling __dev_set_promiscuity here is safe. + */ + if (dev->uc_count > 0 && !dev->uc_promisc) { + __dev_set_promiscuity(dev, 1); + dev->uc_promisc = 1; + } else if (dev->uc_count == 0 && dev->uc_promisc) { + __dev_set_promiscuity(dev, -1); + dev->uc_promisc = 0; + } + + if (dev->set_multicast_list) + dev->set_multicast_list(dev); + } +} + +void dev_set_rx_mode(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); +} + +int __dev_addr_delete(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) +{ + struct dev_addr_list *da; + + for (; (da = *list) != NULL; list = &da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + alen == da->da_addrlen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 0; + if (old_glbl == 0) + break; + } + if (--da->da_users) + return 0; + + *list = da->next; + kfree(da); + (*count)--; + return 0; + } + } + return -ENOENT; +} + +int __dev_addr_add(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) +{ + struct dev_addr_list *da; + + for (da = *list; da != NULL; da = da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + da->da_addrlen == alen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 1; + if (old_glbl) + return 0; + } + da->da_users++; + return 0; + } + } + + da = kmalloc(sizeof(*da), GFP_ATOMIC); + if (da == NULL) + return -ENOMEM; + memcpy(da->da_addr, addr, alen); + da->da_addrlen = alen; + da->da_users = 1; + da->da_gusers = glbl ? 1 : 0; + da->next = *list; + *list = da; + (*count)++; + return 0; +} + +/** + * dev_unicast_delete - Release secondary unicast address. + * @dev: device + * + * Release reference to a secondary unicast address and remove it + * from the device if the reference count drop to zero. + * + * The caller must hold the rtnl_mutex. + */ +int dev_unicast_delete(struct net_device *dev, void *addr, int alen) +{ + int err; + + ASSERT_RTNL(); + + netif_tx_lock_bh(dev); + err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); + if (!err) + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_unicast_delete); + +/** + * dev_unicast_add - add a secondary unicast address + * @dev: device + * + * Add a secondary unicast address to the device or increase + * the reference count if it already exists. + * + * The caller must hold the rtnl_mutex. + */ +int dev_unicast_add(struct net_device *dev, void *addr, int alen) +{ + int err; + + ASSERT_RTNL(); + + netif_tx_lock_bh(dev); + err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); + if (!err) + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_unicast_add); + +static void __dev_addr_discard(struct dev_addr_list **list) +{ + struct dev_addr_list *tmp; + + while (*list != NULL) { + tmp = *list; + *list = tmp->next; + if (tmp->da_users > tmp->da_gusers) + printk("__dev_addr_discard: address leakage! " + "da_users=%d\n", tmp->da_users); + kfree(tmp); + } +} + +static void dev_addr_discard(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + + __dev_addr_discard(&dev->uc_list); + dev->uc_count = 0; + + __dev_addr_discard(&dev->mc_list); + dev->mc_count = 0; + + netif_tx_unlock_bh(dev); } unsigned dev_get_flags(const struct net_device *dev) @@ -2580,6 +2819,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) int ret, changes; int old_flags = dev->flags; + ASSERT_RTNL(); + /* * Set the flags on our device. */ @@ -2594,7 +2835,10 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - dev_mc_upload(dev); + if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST) + dev->change_rx_flags(dev, IFF_MULTICAST); + + dev_set_rx_mode(dev); /* * Have we downed the interface. We handle IFF_UP ourselves @@ -2607,7 +2851,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); if (!ret) - dev_mc_upload(dev); + dev_set_rx_mode(dev); } if (dev->flags & IFF_UP && @@ -3107,6 +3351,22 @@ int register_netdevice(struct net_device *dev) } } + /* Fix illegal checksum combinations */ + if ((dev->features & NETIF_F_HW_CSUM) && + (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + } + + if ((dev->features & NETIF_F_NO_CSUM) && + (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + } + + /* Fix illegal SG+CSUM combinations. */ if ((dev->features & NETIF_F_SG) && !(dev->features & NETIF_F_ALL_CSUM)) { @@ -3343,16 +3603,18 @@ static struct net_device_stats *internal_stats(struct net_device *dev) } /** - * alloc_netdev - allocate network device + * alloc_netdev_mq - allocate network device * @sizeof_priv: size of private data to allocate space for * @name: device name format string * @setup: callback to initialize device + * @queue_count: the number of subqueues to allocate * * Allocates a struct net_device with private data area for driver use - * and performs basic initialization. + * and performs basic initialization. Also allocates subquue structs + * for each queue on the device at the end of the netdevice. */ -struct net_device *alloc_netdev(int sizeof_priv, const char *name, - void (*setup)(struct net_device *)) +struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), unsigned int queue_count) { void *p; struct net_device *dev; @@ -3361,7 +3623,9 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, BUG_ON(strlen(name) >= sizeof(dev->name)); /* ensure 32-byte alignment of both the device and private area */ - alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; + alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST + + (sizeof(struct net_device_subqueue) * queue_count)) & + ~NETDEV_ALIGN_CONST; alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; p = kzalloc(alloc_size, GFP_KERNEL); @@ -3374,15 +3638,22 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; - if (sizeof_priv) - dev->priv = netdev_priv(dev); + if (sizeof_priv) { + dev->priv = ((char *)dev + + ((sizeof(struct net_device) + + (sizeof(struct net_device_subqueue) * + queue_count) + NETDEV_ALIGN_CONST) + & ~NETDEV_ALIGN_CONST)); + } + + dev->egress_subqueue_count = queue_count; dev->get_stats = internal_stats; setup(dev); strcpy(dev->name, name); return dev; } -EXPORT_SYMBOL(alloc_netdev); +EXPORT_SYMBOL(alloc_netdev_mq); /** * free_netdev - free network device @@ -3471,9 +3742,9 @@ void unregister_netdevice(struct net_device *dev) raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); /* - * Flush the multicast chain + * Flush the unicast and multicast chains */ - dev_mc_discard(dev); + dev_addr_discard(dev); if (dev->uninit) dev->uninit(dev); @@ -3563,12 +3834,13 @@ static int dev_cpu_callback(struct notifier_block *nfb, * This is called when the number of channels allocated to the net_dma_client * changes. The net_dma_client tries to have one DMA channel per CPU. */ -static void net_dma_rebalance(void) + +static void net_dma_rebalance(struct net_dma *net_dma) { - unsigned int cpu, i, n; + unsigned int cpu, i, n, chan_idx; struct dma_chan *chan; - if (net_dma_count == 0) { + if (cpus_empty(net_dma->channel_mask)) { for_each_online_cpu(cpu) rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); return; @@ -3577,10 +3849,12 @@ static void net_dma_rebalance(void) i = 0; cpu = first_cpu(cpu_online_map); - rcu_read_lock(); - list_for_each_entry(chan, &net_dma_client->channels, client_node) { - n = ((num_online_cpus() / net_dma_count) - + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); + for_each_cpu_mask(chan_idx, net_dma->channel_mask) { + chan = net_dma->channels[chan_idx]; + + n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) + + (i < (num_online_cpus() % + cpus_weight(net_dma->channel_mask)) ? 1 : 0)); while(n) { per_cpu(softnet_data, cpu).net_dma = chan; @@ -3589,7 +3863,6 @@ static void net_dma_rebalance(void) } i++; } - rcu_read_unlock(); } /** @@ -3598,23 +3871,53 @@ static void net_dma_rebalance(void) * @chan: DMA channel for the event * @event: event type */ -static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, - enum dma_event event) -{ - spin_lock(&net_dma_event_lock); - switch (event) { - case DMA_RESOURCE_ADDED: - net_dma_count++; - net_dma_rebalance(); +static enum dma_state_client +netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state) +{ + int i, found = 0, pos = -1; + struct net_dma *net_dma = + container_of(client, struct net_dma, client); + enum dma_state_client ack = DMA_DUP; /* default: take no action */ + + spin_lock(&net_dma->lock); + switch (state) { + case DMA_RESOURCE_AVAILABLE: + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + break; + } else if (net_dma->channels[i] == NULL && pos < 0) + pos = i; + + if (!found && pos >= 0) { + ack = DMA_ACK; + net_dma->channels[pos] = chan; + cpu_set(pos, net_dma->channel_mask); + net_dma_rebalance(net_dma); + } break; case DMA_RESOURCE_REMOVED: - net_dma_count--; - net_dma_rebalance(); + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + pos = i; + break; + } + + if (found) { + ack = DMA_ACK; + cpu_clear(pos, net_dma->channel_mask); + net_dma->channels[i] = NULL; + net_dma_rebalance(net_dma); + } break; default: break; } - spin_unlock(&net_dma_event_lock); + spin_unlock(&net_dma->lock); + + return ack; } /** @@ -3622,12 +3925,10 @@ static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, */ static int __init netdev_dma_register(void) { - spin_lock_init(&net_dma_event_lock); - net_dma_client = dma_async_client_register(netdev_dma_event); - if (net_dma_client == NULL) - return -ENOMEM; - - dma_async_client_chan_request(net_dma_client, num_online_cpus()); + spin_lock_init(&net_dma.lock); + dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); + dma_async_client_register(&net_dma.client); + dma_async_client_chan_request(&net_dma.client); return 0; } diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 5a54053386c8..99aece1aeccf 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -64,85 +64,24 @@ */ /* - * Update the multicast list into the physical NIC controller. - */ - -static void __dev_mc_upload(struct net_device *dev) -{ - /* Don't do anything till we up the interface - * [dev_open will call this function so the list will - * stay sane] - */ - - if (!(dev->flags&IFF_UP)) - return; - - /* - * Devices with no set multicast or which have been - * detached don't get set. - */ - - if (dev->set_multicast_list == NULL || - !netif_device_present(dev)) - return; - - dev->set_multicast_list(dev); -} - -void dev_mc_upload(struct net_device *dev) -{ - netif_tx_lock_bh(dev); - __dev_mc_upload(dev); - netif_tx_unlock_bh(dev); -} - -/* * Delete a device level multicast */ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) { - int err = 0; - struct dev_mc_list *dmi, **dmip; + int err; netif_tx_lock_bh(dev); - - for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { + err = __dev_addr_delete(&dev->mc_list, &dev->mc_count, + addr, alen, glbl); + if (!err) { /* - * Find the entry we want to delete. The device could - * have variable length entries so check these too. + * We have altered the list, so the card + * loaded filter is now wrong. Fix it */ - if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && - alen == dmi->dmi_addrlen) { - if (glbl) { - int old_glbl = dmi->dmi_gusers; - dmi->dmi_gusers = 0; - if (old_glbl == 0) - break; - } - if (--dmi->dmi_users) - goto done; - - /* - * Last user. So delete the entry. - */ - *dmip = dmi->next; - dev->mc_count--; - - kfree(dmi); - - /* - * We have altered the list, so the card - * loaded filter is now wrong. Fix it - */ - __dev_mc_upload(dev); - - netif_tx_unlock_bh(dev); - return 0; - } + + __dev_set_rx_mode(dev); } - err = -ENOENT; -done: netif_tx_unlock_bh(dev); return err; } @@ -153,68 +92,90 @@ done: int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) { - int err = 0; - struct dev_mc_list *dmi, *dmi1; - - dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC); + int err; netif_tx_lock_bh(dev); - for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { - if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && - dmi->dmi_addrlen == alen) { - if (glbl) { - int old_glbl = dmi->dmi_gusers; - dmi->dmi_gusers = 1; - if (old_glbl) - goto done; - } - dmi->dmi_users++; - goto done; - } - } - - if ((dmi = dmi1) == NULL) { - netif_tx_unlock_bh(dev); - return -ENOMEM; - } - memcpy(dmi->dmi_addr, addr, alen); - dmi->dmi_addrlen = alen; - dmi->next = dev->mc_list; - dmi->dmi_users = 1; - dmi->dmi_gusers = glbl ? 1 : 0; - dev->mc_list = dmi; - dev->mc_count++; + err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); + if (!err) + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); + return err; +} - __dev_mc_upload(dev); +/** + * dev_mc_sync - Synchronize device's multicast list to another device + * @to: destination device + * @from: source device + * + * Add newly added addresses to the destination device and release + * addresses that have no users left. The source device must be + * locked by netif_tx_lock_bh. + * + * This function is intended to be called from the dev->set_multicast_list + * function of layered software devices. + */ +int dev_mc_sync(struct net_device *to, struct net_device *from) +{ + struct dev_addr_list *da; + int err = 0; - netif_tx_unlock_bh(dev); - return 0; + netif_tx_lock_bh(to); + for (da = from->mc_list; da != NULL; da = da->next) { + if (!da->da_synced) { + err = __dev_addr_add(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + if (err < 0) + break; + da->da_synced = 1; + da->da_users++; + } else if (da->da_users == 1) { + __dev_addr_delete(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + __dev_addr_delete(&from->mc_list, &from->mc_count, + da->da_addr, da->da_addrlen, 0); + } + } + if (!err) + __dev_set_rx_mode(to); + netif_tx_unlock_bh(to); -done: - netif_tx_unlock_bh(dev); - kfree(dmi1); return err; } +EXPORT_SYMBOL(dev_mc_sync); -/* - * Discard multicast list when a device is downed - */ -void dev_mc_discard(struct net_device *dev) +/** + * dev_mc_unsync - Remove synchronized addresses from the destination + * device + * @to: destination device + * @from: source device + * + * Remove all addresses that were added to the destination device by + * dev_mc_sync(). This function is intended to be called from the + * dev->stop function of layered software devices. + */ +void dev_mc_unsync(struct net_device *to, struct net_device *from) { - netif_tx_lock_bh(dev); - - while (dev->mc_list != NULL) { - struct dev_mc_list *tmp = dev->mc_list; - dev->mc_list = tmp->next; - if (tmp->dmi_users > tmp->dmi_gusers) - printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users); - kfree(tmp); + struct dev_addr_list *da; + + netif_tx_lock_bh(from); + netif_tx_lock_bh(to); + + for (da = from->mc_list; da != NULL; da = da->next) { + if (!da->da_synced) + continue; + __dev_addr_delete(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + da->da_synced = 0; + __dev_addr_delete(&from->mc_list, &from->mc_count, + da->da_addr, da->da_addrlen, 0); } - dev->mc_count = 0; + __dev_set_rx_mode(to); - netif_tx_unlock_bh(dev); + netif_tx_unlock_bh(to); + netif_tx_unlock_bh(from); } +EXPORT_SYMBOL(dev_mc_unsync); #ifdef CONFIG_PROC_FS static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) @@ -244,7 +205,7 @@ static void dev_mc_seq_stop(struct seq_file *seq, void *v) static int dev_mc_seq_show(struct seq_file *seq, void *v) { - struct dev_mc_list *m; + struct dev_addr_list *m; struct net_device *dev = v; netif_tx_lock_bh(dev); @@ -292,4 +253,3 @@ void __init dev_mcast_init(void) EXPORT_SYMBOL(dev_mc_add); EXPORT_SYMBOL(dev_mc_delete); -EXPORT_SYMBOL(dev_mc_upload); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 8d5e5a09b576..0b531e98ec33 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -52,6 +52,17 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) return 0; } + +int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) +{ + if (data) + dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + else + dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + + return 0; +} + u32 ethtool_op_get_sg(struct net_device *dev) { return (dev->features & NETIF_F_SG) != 0; @@ -980,5 +991,6 @@ EXPORT_SYMBOL(ethtool_op_set_sg); EXPORT_SYMBOL(ethtool_op_set_tso); EXPORT_SYMBOL(ethtool_op_set_tx_csum); EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); +EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); EXPORT_SYMBOL(ethtool_op_set_ufo); EXPORT_SYMBOL(ethtool_op_get_ufo); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 17daf4c9f793..590a767b029c 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -79,27 +79,27 @@ struct gen_estimator { - struct gen_estimator *next; + struct list_head list; struct gnet_stats_basic *bstats; struct gnet_stats_rate_est *rate_est; spinlock_t *stats_lock; - unsigned interval; int ewma_log; u64 last_bytes; u32 last_packets; u32 avpps; u32 avbps; + struct rcu_head e_rcu; }; struct gen_estimator_head { struct timer_list timer; - struct gen_estimator *list; + struct list_head list; }; static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; -/* Estimator array lock */ +/* Protects against NULL dereference */ static DEFINE_RWLOCK(est_lock); static void est_timer(unsigned long arg) @@ -107,13 +107,17 @@ static void est_timer(unsigned long arg) int idx = (int)arg; struct gen_estimator *e; - read_lock(&est_lock); - for (e = elist[idx].list; e; e = e->next) { + rcu_read_lock(); + list_for_each_entry_rcu(e, &elist[idx].list, list) { u64 nbytes; u32 npackets; u32 rate; spin_lock(e->stats_lock); + read_lock(&est_lock); + if (e->bstats == NULL) + goto skip; + nbytes = e->bstats->bytes; npackets = e->bstats->packets; rate = (nbytes - e->last_bytes)<<(7 - idx); @@ -125,11 +129,14 @@ static void est_timer(unsigned long arg) e->last_packets = npackets; e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; e->rate_est->pps = (e->avpps+0x1FF)>>10; +skip: + read_unlock(&est_lock); spin_unlock(e->stats_lock); } - mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); - read_unlock(&est_lock); + if (!list_empty(&elist[idx].list)) + mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); + rcu_read_unlock(); } /** @@ -146,12 +153,17 @@ static void est_timer(unsigned long arg) * &rate_est with the statistics lock grabed during this period. * * Returns 0 on success or a negative error code. + * + * NOTE: Called under rtnl_mutex */ int gen_new_estimator(struct gnet_stats_basic *bstats, - struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt) + struct gnet_stats_rate_est *rate_est, + spinlock_t *stats_lock, + struct rtattr *opt) { struct gen_estimator *est; struct gnet_estimator *parm = RTA_DATA(opt); + int idx; if (RTA_PAYLOAD(opt) < sizeof(*parm)) return -EINVAL; @@ -163,7 +175,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, if (est == NULL) return -ENOBUFS; - est->interval = parm->interval + 2; + idx = parm->interval + 2; est->bstats = bstats; est->rate_est = rate_est; est->stats_lock = stats_lock; @@ -173,20 +185,25 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, est->last_packets = bstats->packets; est->avpps = rate_est->pps<<10; - est->next = elist[est->interval].list; - if (est->next == NULL) { - init_timer(&elist[est->interval].timer); - elist[est->interval].timer.data = est->interval; - elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4); - elist[est->interval].timer.function = est_timer; - add_timer(&elist[est->interval].timer); + if (!elist[idx].timer.function) { + INIT_LIST_HEAD(&elist[idx].list); + setup_timer(&elist[idx].timer, est_timer, idx); } - write_lock_bh(&est_lock); - elist[est->interval].list = est; - write_unlock_bh(&est_lock); + + if (list_empty(&elist[idx].list)) + mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); + + list_add_rcu(&est->list, &elist[idx].list); return 0; } +static void __gen_kill_estimator(struct rcu_head *head) +{ + struct gen_estimator *e = container_of(head, + struct gen_estimator, e_rcu); + kfree(e); +} + /** * gen_kill_estimator - remove a rate estimator * @bstats: basic statistics @@ -194,31 +211,32 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, * * Removes the rate estimator specified by &bstats and &rate_est * and deletes the timer. + * + * NOTE: Called under rtnl_mutex */ void gen_kill_estimator(struct gnet_stats_basic *bstats, struct gnet_stats_rate_est *rate_est) { int idx; - struct gen_estimator *est, **pest; + struct gen_estimator *e, *n; for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { - int killed = 0; - pest = &elist[idx].list; - while ((est=*pest) != NULL) { - if (est->rate_est != rate_est || est->bstats != bstats) { - pest = &est->next; + + /* Skip non initialized indexes */ + if (!elist[idx].timer.function) + continue; + + list_for_each_entry_safe(e, n, &elist[idx].list, list) { + if (e->rate_est != rate_est || e->bstats != bstats) continue; - } write_lock_bh(&est_lock); - *pest = est->next; + e->bstats = NULL; write_unlock_bh(&est_lock); - kfree(est); - killed++; + list_del_rcu(&e->list); + call_rcu(&e->e_rcu, __gen_kill_estimator); } - if (killed && elist[idx].list == NULL) - del_timer(&elist[idx].timer); } } diff --git a/net/core/netevent.c b/net/core/netevent.c index 35d02c38554e..95f81de87502 100644 --- a/net/core/netevent.c +++ b/net/core/netevent.c @@ -15,6 +15,7 @@ #include <linux/rtnetlink.h> #include <linux/notifier.h> +#include <net/netevent.h> static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 758dafe284c0..de1b26aa5720 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -66,8 +66,9 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); netif_tx_lock(dev); - if (netif_queue_stopped(dev) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + if ((netif_queue_stopped(dev) || + netif_subqueue_stopped(dev, skb->queue_mapping)) || + dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); netif_tx_unlock(dev); local_irq_restore(flags); @@ -250,22 +251,24 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) unsigned long flags; local_irq_save(flags); - if (netif_tx_trylock(dev)) { - /* try until next clock tick */ - for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; - tries > 0; --tries) { - if (!netif_queue_stopped(dev)) + /* try until next clock tick */ + for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; + tries > 0; --tries) { + if (netif_tx_trylock(dev)) { + if (!netif_queue_stopped(dev) && + !netif_subqueue_stopped(dev, skb->queue_mapping)) status = dev->hard_start_xmit(skb, dev); + netif_tx_unlock(dev); if (status == NETDEV_TX_OK) break; - /* tickle device maybe there is some cleanup */ - netpoll_poll(np); - - udelay(USEC_PER_POLL); } - netif_tx_unlock(dev); + + /* tickle device maybe there is some cleanup */ + netpoll_poll(np); + + udelay(USEC_PER_POLL); } local_irq_restore(flags); } @@ -780,14 +783,19 @@ void netpoll_cleanup(struct netpoll *np) spin_unlock_irqrestore(&npinfo->rx_lock, flags); } - np->dev->npinfo = NULL; if (atomic_dec_and_test(&npinfo->refcnt)) { skb_queue_purge(&npinfo->arp_tx); skb_queue_purge(&npinfo->txq); cancel_rearming_delayed_work(&npinfo->tx_work); - flush_scheduled_work(); + /* clean after last, unfinished work */ + if (!skb_queue_empty(&npinfo->txq)) { + struct sk_buff *skb; + skb = __skb_dequeue(&npinfo->txq); + kfree_skb(skb); + } kfree(npinfo); + np->dev->npinfo = NULL; } } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 9cd3a1cb60ef..bca787fdbc51 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -152,6 +152,9 @@ #include <net/checksum.h> #include <net/ipv6.h> #include <net/addrconf.h> +#ifdef CONFIG_XFRM +#include <net/xfrm.h> +#endif #include <asm/byteorder.h> #include <linux/rcupdate.h> #include <asm/bitops.h> @@ -181,6 +184,8 @@ #define F_MPLS_RND (1<<8) /* Random MPLS labels */ #define F_VID_RND (1<<9) /* Random VLAN ID */ #define F_SVID_RND (1<<10) /* Random SVLAN ID */ +#define F_FLOW_SEQ (1<<11) /* Sequential flows */ +#define F_IPSEC_ON (1<<12) /* ipsec on for flows */ /* Thread control flag bits */ #define T_TERMINATE (1<<0) @@ -207,8 +212,15 @@ static struct proc_dir_entry *pg_proc_dir = NULL; struct flow_state { __be32 cur_daddr; int count; +#ifdef CONFIG_XFRM + struct xfrm_state *x; +#endif + __u32 flags; }; +/* flow flag bits */ +#define F_INIT (1<<0) /* flow has been initialized */ + struct pktgen_dev { /* * Try to keep frequent/infrequent used vars. separated. @@ -228,6 +240,7 @@ struct pktgen_dev { int min_pkt_size; /* = ETH_ZLEN; */ int max_pkt_size; /* = ETH_ZLEN; */ + int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; __u32 delay_us; /* Default delay */ __u32 delay_ns; @@ -341,7 +354,11 @@ struct pktgen_dev { unsigned cflows; /* Concurrent flows (config) */ unsigned lflow; /* Flow length (config) */ unsigned nflows; /* accumulated flows (stats) */ - + unsigned curfl; /* current sequenced flow (state)*/ +#ifdef CONFIG_XFRM + __u8 ipsmode; /* IPSEC mode (config) */ + __u8 ipsproto; /* IPSEC type (config) */ +#endif char result[512]; }; @@ -690,6 +707,18 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->flags & F_MPLS_RND) seq_printf(seq, "MPLS_RND "); + if (pkt_dev->cflows) { + if (pkt_dev->flags & F_FLOW_SEQ) + seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/ + else + seq_printf(seq, "FLOW_RND "); + } + +#ifdef CONFIG_XFRM + if (pkt_dev->flags & F_IPSEC_ON) + seq_printf(seq, "IPSEC "); +#endif + if (pkt_dev->flags & F_MACSRC_RND) seq_printf(seq, "MACSRC_RND "); @@ -1181,6 +1210,14 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "!SVID_RND") == 0) pkt_dev->flags &= ~F_SVID_RND; + else if (strcmp(f, "FLOW_SEQ") == 0) + pkt_dev->flags |= F_FLOW_SEQ; + +#ifdef CONFIG_XFRM + else if (strcmp(f, "IPSEC") == 0) + pkt_dev->flags |= F_IPSEC_ON; +#endif + else if (strcmp(f, "!IPV6") == 0) pkt_dev->flags &= ~F_IPV6; @@ -1189,7 +1226,7 @@ static ssize_t pktgen_if_write(struct file *file, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " - "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND\n"); + "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n"); return count; } sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); @@ -2075,6 +2112,70 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) pkt_dev->idle_acc += now - start; } +static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) +{ + pkt_dev->pkt_overhead = 0; + pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32); + pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev); + pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev); +} + +static inline int f_seen(struct pktgen_dev *pkt_dev, int flow) +{ + + if (pkt_dev->flows[flow].flags & F_INIT) + return 1; + else + return 0; +} + +static inline int f_pick(struct pktgen_dev *pkt_dev) +{ + int flow = pkt_dev->curfl; + + if (pkt_dev->flags & F_FLOW_SEQ) { + if (pkt_dev->flows[flow].count >= pkt_dev->lflow) { + /* reset time */ + pkt_dev->flows[flow].count = 0; + pkt_dev->curfl += 1; + if (pkt_dev->curfl >= pkt_dev->cflows) + pkt_dev->curfl = 0; /*reset */ + } + } else { + flow = random32() % pkt_dev->cflows; + + if (pkt_dev->flows[flow].count > pkt_dev->lflow) + pkt_dev->flows[flow].count = 0; + } + + return pkt_dev->curfl; +} + + +#ifdef CONFIG_XFRM +/* If there was already an IPSEC SA, we keep it as is, else + * we go look for it ... +*/ +inline +void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) +{ + struct xfrm_state *x = pkt_dev->flows[flow].x; + if (!x) { + /*slow path: we dont already have xfrm_state*/ + x = xfrm_stateonly_find((xfrm_address_t *)&pkt_dev->cur_daddr, + (xfrm_address_t *)&pkt_dev->cur_saddr, + AF_INET, + pkt_dev->ipsmode, + pkt_dev->ipsproto, 0); + if (x) { + pkt_dev->flows[flow].x = x; + set_pkt_overhead(pkt_dev); + pkt_dev->pkt_overhead+=x->props.header_len; + } + + } +} +#endif /* Increment/randomize headers according to flags and current values * for IP src/dest, UDP src/dst port, MAC-Addr src/dst */ @@ -2084,12 +2185,8 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) __u32 imx; int flow = 0; - if (pkt_dev->cflows) { - flow = random32() % pkt_dev->cflows; - - if (pkt_dev->flows[flow].count > pkt_dev->lflow) - pkt_dev->flows[flow].count = 0; - } + if (pkt_dev->cflows) + flow = f_pick(pkt_dev); /* Deal with source MAC */ if (pkt_dev->src_mac_count > 1) { @@ -2205,7 +2302,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->cur_saddr = htonl(t); } - if (pkt_dev->cflows && pkt_dev->flows[flow].count != 0) { + if (pkt_dev->cflows && f_seen(pkt_dev, flow)) { pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr; } else { imn = ntohl(pkt_dev->daddr_min); @@ -2235,8 +2332,13 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) } } if (pkt_dev->cflows) { + pkt_dev->flows[flow].flags |= F_INIT; pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr; +#ifdef CONFIG_XFRM + if (pkt_dev->flags & F_IPSEC_ON) + get_ipsec_sa(pkt_dev, flow); +#endif pkt_dev->nflows++; } } @@ -2277,6 +2379,91 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->flows[flow].count++; } + +#ifdef CONFIG_XFRM +static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) +{ + struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; + int err = 0; + struct iphdr *iph; + + if (!x) + return 0; + /* XXX: we dont support tunnel mode for now until + * we resolve the dst issue */ + if (x->props.mode != XFRM_MODE_TRANSPORT) + return 0; + + spin_lock(&x->lock); + iph = ip_hdr(skb); + + err = x->mode->output(x, skb); + if (err) + goto error; + err = x->type->output(x, skb); + if (err) + goto error; + + x->curlft.bytes +=skb->len; + x->curlft.packets++; + spin_unlock(&x->lock); + +error: + spin_unlock(&x->lock); + return err; +} + +static inline void free_SAs(struct pktgen_dev *pkt_dev) +{ + if (pkt_dev->cflows) { + /* let go of the SAs if we have them */ + int i = 0; + for (; i < pkt_dev->nflows; i++){ + struct xfrm_state *x = pkt_dev->flows[i].x; + if (x) { + xfrm_state_put(x); + pkt_dev->flows[i].x = NULL; + } + } + } +} + +static inline int process_ipsec(struct pktgen_dev *pkt_dev, + struct sk_buff *skb, __be16 protocol) +{ + if (pkt_dev->flags & F_IPSEC_ON) { + struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; + int nhead = 0; + if (x) { + int ret; + __u8 *eth; + nhead = x->props.header_len - skb_headroom(skb); + if (nhead >0) { + ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); + if (ret < 0) { + printk("Error expanding ipsec packet %d\n",ret); + return 0; + } + } + + /* ipsec is not expecting ll header */ + skb_pull(skb, ETH_HLEN); + ret = pktgen_output_ipsec(skb, pkt_dev); + if (ret) { + printk("Error creating ipsec packet %d\n",ret); + kfree_skb(skb); + return 0; + } + /* restore ll */ + eth = (__u8 *) skb_push(skb, ETH_HLEN); + memcpy(eth, pkt_dev->hh, 12); + *(u16 *) & eth[12] = protocol; + } + } + return 1; +} +#endif + static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev) { unsigned i; @@ -2323,9 +2510,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, datalen = (odev->hard_header_len + 16) & ~0xf; skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + datalen + - pkt_dev->nr_labels*sizeof(u32) + - VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev), - GFP_ATOMIC); + pkt_dev->pkt_overhead, GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -2368,7 +2553,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - - pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); + pkt_dev->pkt_overhead; if (datalen < sizeof(struct pktgen_hdr)) datalen = sizeof(struct pktgen_hdr); @@ -2391,8 +2576,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->check = ip_fast_csum((void *)iph, iph->ihl); skb->protocol = protocol; skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->nr_labels * sizeof(u32) - - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev)); + pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; @@ -2463,6 +2647,11 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, pgh->tv_usec = htonl(timestamp.tv_usec); } +#ifdef CONFIG_XFRM + if (!process_ipsec(pkt_dev, skb, protocol)) + return NULL; +#endif + return skb; } @@ -2662,9 +2851,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 + - pkt_dev->nr_labels*sizeof(u32) + - VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev), - GFP_ATOMIC); + pkt_dev->pkt_overhead, GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -2708,7 +2895,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - sizeof(struct ipv6hdr) - sizeof(struct udphdr) - - pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); + pkt_dev->pkt_overhead; if (datalen < sizeof(struct pktgen_hdr)) { datalen = sizeof(struct pktgen_hdr); @@ -2738,8 +2925,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr); skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->nr_labels * sizeof(u32) - - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev)); + pkt_dev->pkt_overhead); skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; @@ -2857,6 +3043,7 @@ static void pktgen_run(struct pktgen_thread *t) pkt_dev->started_at = getCurUs(); pkt_dev->next_tx_us = getCurUs(); /* Transmit immediately */ pkt_dev->next_tx_ns = 0; + set_pkt_overhead(pkt_dev); strcpy(pkt_dev->result, "Starting"); started++; @@ -3139,7 +3326,9 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - if (netif_queue_stopped(odev) || need_resched()) { + if ((netif_queue_stopped(odev) || + netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) || + need_resched()) { idle_start = getCurUs(); if (!netif_running(odev)) { @@ -3154,7 +3343,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->idle_acc += getCurUs() - idle_start; - if (netif_queue_stopped(odev)) { + if (netif_queue_stopped(odev) || + netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; goto out; /* Try the next interface */ @@ -3181,7 +3371,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } netif_tx_lock_bh(odev); - if (!netif_queue_stopped(odev)) { + if (!netif_queue_stopped(odev) && + !netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { atomic_inc(&(pkt_dev->skb->users)); retry_now: @@ -3274,6 +3465,8 @@ static int pktgen_thread_worker(void *arg) set_current_state(TASK_INTERRUPTIBLE); + set_freezable(); + while (!kthread_should_stop()) { pkt_dev = next_to_run(t); @@ -3446,11 +3639,18 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) } pkt_dev->entry->proc_fops = &pktgen_if_fops; pkt_dev->entry->data = pkt_dev; +#ifdef CONFIG_XFRM + pkt_dev->ipsmode = XFRM_MODE_TRANSPORT; + pkt_dev->ipsproto = IPPROTO_ESP; +#endif return add_dev_to_thread(t, pkt_dev); out2: dev_put(pkt_dev->odev); out1: +#ifdef CONFIG_XFRM + free_SAs(pkt_dev); +#endif if (pkt_dev->flows) vfree(pkt_dev->flows); kfree(pkt_dev); @@ -3545,6 +3745,9 @@ static int pktgen_remove_device(struct pktgen_thread *t, if (pkt_dev->entry) remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); +#ifdef CONFIG_XFRM + free_SAs(pkt_dev); +#endif if (pkt_dev->flows) vfree(pkt_dev->flows); kfree(pkt_dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 02e8bf084277..864cbdf31ed7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -97,6 +97,19 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) return 0; } +int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, + struct rtattr *rta, int len) +{ + if (RTA_PAYLOAD(rta) < len) + return -1; + if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) { + rta = RTA_DATA(rta) + RTA_ALIGN(len); + return rtattr_parse_nested(tb, maxattr, rta); + } + memset(tb, 0, sizeof(struct rtattr *) * maxattr); + return 0; +} + static struct rtnl_link *rtnl_msg_handlers[NPROTO]; static inline int rtm_msgindex(int msgtype) @@ -243,6 +256,150 @@ void rtnl_unregister_all(int protocol) EXPORT_SYMBOL_GPL(rtnl_unregister_all); +static LIST_HEAD(link_ops); + +/** + * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. + * @ops: struct rtnl_link_ops * to register + * + * The caller must hold the rtnl_mutex. This function should be used + * by drivers that create devices during module initialization. It + * must be called before registering the devices. + * + * Returns 0 on success or a negative error code. + */ +int __rtnl_link_register(struct rtnl_link_ops *ops) +{ + if (!ops->dellink) + ops->dellink = unregister_netdevice; + + list_add_tail(&ops->list, &link_ops); + return 0; +} + +EXPORT_SYMBOL_GPL(__rtnl_link_register); + +/** + * rtnl_link_register - Register rtnl_link_ops with rtnetlink. + * @ops: struct rtnl_link_ops * to register + * + * Returns 0 on success or a negative error code. + */ +int rtnl_link_register(struct rtnl_link_ops *ops) +{ + int err; + + rtnl_lock(); + err = __rtnl_link_register(ops); + rtnl_unlock(); + return err; +} + +EXPORT_SYMBOL_GPL(rtnl_link_register); + +/** + * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. + * @ops: struct rtnl_link_ops * to unregister + * + * The caller must hold the rtnl_mutex. + */ +void __rtnl_link_unregister(struct rtnl_link_ops *ops) +{ + struct net_device *dev, *n; + + for_each_netdev_safe(dev, n) { + if (dev->rtnl_link_ops == ops) + ops->dellink(dev); + } + list_del(&ops->list); +} + +EXPORT_SYMBOL_GPL(__rtnl_link_unregister); + +/** + * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. + * @ops: struct rtnl_link_ops * to unregister + */ +void rtnl_link_unregister(struct rtnl_link_ops *ops) +{ + rtnl_lock(); + __rtnl_link_unregister(ops); + rtnl_unlock(); +} + +EXPORT_SYMBOL_GPL(rtnl_link_unregister); + +static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) +{ + const struct rtnl_link_ops *ops; + + list_for_each_entry(ops, &link_ops, list) { + if (!strcmp(ops->kind, kind)) + return ops; + } + return NULL; +} + +static size_t rtnl_link_get_size(const struct net_device *dev) +{ + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + size_t size; + + if (!ops) + return 0; + + size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ + nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ + + if (ops->get_size) + /* IFLA_INFO_DATA + nested data */ + size += nlmsg_total_size(sizeof(struct nlattr)) + + ops->get_size(dev); + + if (ops->get_xstats_size) + size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */ + + return size; +} + +static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) +{ + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + struct nlattr *linkinfo, *data; + int err = -EMSGSIZE; + + linkinfo = nla_nest_start(skb, IFLA_LINKINFO); + if (linkinfo == NULL) + goto out; + + if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0) + goto err_cancel_link; + if (ops->fill_xstats) { + err = ops->fill_xstats(skb, dev); + if (err < 0) + goto err_cancel_link; + } + if (ops->fill_info) { + data = nla_nest_start(skb, IFLA_INFO_DATA); + if (data == NULL) + goto err_cancel_link; + err = ops->fill_info(skb, dev); + if (err < 0) + goto err_cancel_data; + nla_nest_end(skb, data); + } + + nla_nest_end(skb, linkinfo); + return 0; + +err_cancel_data: + nla_nest_cancel(skb, data); +err_cancel_link: + nla_nest_cancel(skb, linkinfo); +out: + return err; +} + static const int rtm_min[RTM_NR_FAMILIES] = { [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), @@ -437,7 +594,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; }; -static inline size_t if_nlmsg_size(void) +static inline size_t if_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ @@ -452,7 +609,8 @@ static inline size_t if_nlmsg_size(void) + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(1) /* IFLA_OPERSTATE */ - + nla_total_size(1); /* IFLA_LINKMODE */ + + nla_total_size(1) /* IFLA_LINKMODE */ + + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ } static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, @@ -522,6 +680,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, } } + if (dev->rtnl_link_ops) { + if (rtnl_link_fill(skb, dev) < 0) + goto nla_put_failure; + } + return nlmsg_end(skb, nlh); nla_put_failure: @@ -553,6 +716,8 @@ cont: static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, + [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, + [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, @@ -561,44 +726,16 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKMODE] = { .type = NLA_U8 }, }; -static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) -{ - struct ifinfomsg *ifm; - struct net_device *dev; - int err, send_addr_notify = 0, modified = 0; - struct nlattr *tb[IFLA_MAX+1]; - char ifname[IFNAMSIZ]; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); - if (err < 0) - goto errout; - - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - else - ifname[0] = '\0'; - - err = -EINVAL; - ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) - dev = dev_get_by_index(ifm->ifi_index); - else if (tb[IFLA_IFNAME]) - dev = dev_get_by_name(ifname); - else - goto errout; - - if (dev == NULL) { - err = -ENODEV; - goto errout; - } - - if (tb[IFLA_ADDRESS] && - nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) - goto errout_dev; +static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { + [IFLA_INFO_KIND] = { .type = NLA_STRING }, + [IFLA_INFO_DATA] = { .type = NLA_NESTED }, +}; - if (tb[IFLA_BROADCAST] && - nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) - goto errout_dev; +static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, + struct nlattr **tb, char *ifname, int modified) +{ + int send_addr_notify = 0; + int err; if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; @@ -606,12 +743,12 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!dev->set_config) { err = -EOPNOTSUPP; - goto errout_dev; + goto errout; } if (!netif_device_present(dev)) { err = -ENODEV; - goto errout_dev; + goto errout; } u_map = nla_data(tb[IFLA_MAP]); @@ -624,7 +761,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = dev->set_config(dev, &k_map); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -635,19 +772,19 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!dev->set_mac_address) { err = -EOPNOTSUPP; - goto errout_dev; + goto errout; } if (!netif_device_present(dev)) { err = -ENODEV; - goto errout_dev; + goto errout; } len = sizeof(sa_family_t) + dev->addr_len; sa = kmalloc(len, GFP_KERNEL); if (!sa) { err = -ENOMEM; - goto errout_dev; + goto errout; } sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), @@ -655,7 +792,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = dev->set_mac_address(dev, sa); kfree(sa); if (err) - goto errout_dev; + goto errout; send_addr_notify = 1; modified = 1; } @@ -663,7 +800,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (tb[IFLA_MTU]) { err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -675,7 +812,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (ifm->ifi_index > 0 && ifname[0]) { err = dev_change_name(dev, ifname); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -684,7 +821,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) send_addr_notify = 1; } - if (ifm->ifi_flags || ifm->ifi_change) { unsigned int flags = ifm->ifi_flags; @@ -712,7 +848,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = 0; -errout_dev: +errout: if (err < 0 && modified && net_ratelimit()) printk(KERN_WARNING "A link change request failed with " "some changes comitted already. Interface %s may " @@ -721,12 +857,239 @@ errout_dev: if (send_addr_notify) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return err; +} + +static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct ifinfomsg *ifm; + struct net_device *dev; + int err; + struct nlattr *tb[IFLA_MAX+1]; + char ifname[IFNAMSIZ]; + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + goto errout; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; + + err = -EINVAL; + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = dev_get_by_index(ifm->ifi_index); + else if (tb[IFLA_IFNAME]) + dev = dev_get_by_name(ifname); + else + goto errout; + + if (dev == NULL) { + err = -ENODEV; + goto errout; + } + if (tb[IFLA_ADDRESS] && + nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) + goto errout_dev; + + if (tb[IFLA_BROADCAST] && + nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) + goto errout_dev; + + err = do_setlink(dev, ifm, tb, ifname, 0); +errout_dev: dev_put(dev); errout: return err; } +static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + const struct rtnl_link_ops *ops; + struct net_device *dev; + struct ifinfomsg *ifm; + char ifname[IFNAMSIZ]; + struct nlattr *tb[IFLA_MAX+1]; + int err; + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + return err; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(ifm->ifi_index); + else if (tb[IFLA_IFNAME]) + dev = __dev_get_by_name(ifname); + else + return -EINVAL; + + if (!dev) + return -ENODEV; + + ops = dev->rtnl_link_ops; + if (!ops) + return -EOPNOTSUPP; + + ops->dellink(dev); + return 0; +} + +static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + const struct rtnl_link_ops *ops; + struct net_device *dev; + struct ifinfomsg *ifm; + char kind[MODULE_NAME_LEN]; + char ifname[IFNAMSIZ]; + struct nlattr *tb[IFLA_MAX+1]; + struct nlattr *linkinfo[IFLA_INFO_MAX+1]; + int err; + +replay: + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + return err; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(ifm->ifi_index); + else if (ifname[0]) + dev = __dev_get_by_name(ifname); + else + dev = NULL; + + if (tb[IFLA_LINKINFO]) { + err = nla_parse_nested(linkinfo, IFLA_INFO_MAX, + tb[IFLA_LINKINFO], ifla_info_policy); + if (err < 0) + return err; + } else + memset(linkinfo, 0, sizeof(linkinfo)); + + if (linkinfo[IFLA_INFO_KIND]) { + nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); + ops = rtnl_link_ops_get(kind); + } else { + kind[0] = '\0'; + ops = NULL; + } + + if (1) { + struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; + + if (ops) { + if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { + err = nla_parse_nested(attr, ops->maxtype, + linkinfo[IFLA_INFO_DATA], + ops->policy); + if (err < 0) + return err; + data = attr; + } + if (ops->validate) { + err = ops->validate(tb, data); + if (err < 0) + return err; + } + } + + if (dev) { + int modified = 0; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + if (linkinfo[IFLA_INFO_DATA]) { + if (!ops || ops != dev->rtnl_link_ops || + !ops->changelink) + return -EOPNOTSUPP; + + err = ops->changelink(dev, tb, data); + if (err < 0) + return err; + modified = 1; + } + + return do_setlink(dev, ifm, tb, ifname, modified); + } + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -ENODEV; + + if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change) + return -EOPNOTSUPP; + if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) + return -EOPNOTSUPP; + + if (!ops) { +#ifdef CONFIG_KMOD + if (kind[0]) { + __rtnl_unlock(); + request_module("rtnl-link-%s", kind); + rtnl_lock(); + ops = rtnl_link_ops_get(kind); + if (ops) + goto replay; + } +#endif + return -EOPNOTSUPP; + } + + if (!ifname[0]) + snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); + dev = alloc_netdev(ops->priv_size, ifname, ops->setup); + if (!dev) + return -ENOMEM; + + if (strchr(dev->name, '%')) { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto err_free; + } + dev->rtnl_link_ops = ops; + + if (tb[IFLA_MTU]) + dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_ADDRESS]) + memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), + nla_len(tb[IFLA_ADDRESS])); + if (tb[IFLA_BROADCAST]) + memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), + nla_len(tb[IFLA_BROADCAST])); + if (tb[IFLA_TXQLEN]) + dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + if (tb[IFLA_WEIGHT]) + dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); + if (tb[IFLA_OPERSTATE]) + set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + if (tb[IFLA_LINKMODE]) + dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + + if (ops->newlink) + err = ops->newlink(dev, tb, data); + else + err = register_netdevice(dev); +err_free: + if (err < 0) + free_netdev(dev); + return err; + } +} + static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { struct ifinfomsg *ifm; @@ -747,7 +1110,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } else return -EINVAL; - nskb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); + nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); if (nskb == NULL) { err = -ENOBUFS; goto errout; @@ -797,7 +1160,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); + skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); if (skb == NULL) goto errout; @@ -952,6 +1315,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo); rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL); + rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL); rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all); rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); @@ -960,6 +1325,7 @@ void __init rtnetlink_init(void) EXPORT_SYMBOL(__rta_fill); EXPORT_SYMBOL(rtattr_strlcpy); EXPORT_SYMBOL(rtattr_parse); +EXPORT_SYMBOL(__rtattr_parse_nested_compat); EXPORT_SYMBOL(rtnetlink_put_metrics); EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); diff --git a/net/core/scm.c b/net/core/scm.c index 292ad8d5ad76..44c4ec2c8769 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -228,7 +228,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) err = security_file_receive(fp[i]); if (err) break; - err = get_unused_fd(); + err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & msg->msg_flags + ? O_CLOEXEC : 0); if (err < 0) break; new_fd = err; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7c6a34e21eee..0583e8498f13 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -415,9 +415,11 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) C(csum); C(local_df); n->cloned = 1; + n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; n->nohdr = 0; C(pkt_type); C(ip_summed); + skb_copy_queue_mapping(n, skb); C(priority); #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) C(ipvs_property); @@ -426,6 +428,10 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->destructor = NULL; C(mark); __nf_copy(n, skb); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + C(nf_trace); +#endif #ifdef CONFIG_NET_SCHED C(tc_index); #ifdef CONFIG_NET_CLS_ACT @@ -434,8 +440,8 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->tc_verd = CLR_TC_MUNGED(n->tc_verd); C(iif); #endif - skb_copy_secmark(n, skb); #endif + skb_copy_secmark(n, skb); C(truesize); atomic_set(&n->users, 1); C(head); @@ -459,6 +465,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif new->sk = NULL; new->dev = old->dev; + skb_copy_queue_mapping(new, old); new->priority = old->priority; new->protocol = old->protocol; new->dst = dst_clone(old->dst); @@ -482,6 +489,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->destructor = NULL; new->mark = old->mark; __nf_copy(new, old); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + new->nf_trace = old->nf_trace; +#endif #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new->ipvs_property = old->ipvs_property; #endif @@ -676,6 +687,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->network_header += off; skb->mac_header += off; skb->cloned = 0; + skb->hdr_len = 0; skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; @@ -1706,6 +1718,11 @@ next_skb: st->stepped_offset += frag->size; } + if (st->frag_data) { + kunmap_skb_frag(st->frag_data); + st->frag_data = NULL; + } + if (st->cur_skb->next) { st->cur_skb = st->cur_skb->next; st->frag_idx = 0; @@ -1925,6 +1942,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) tail = nskb; nskb->dev = skb->dev; + skb_copy_queue_mapping(nskb, skb); nskb->priority = skb->priority; nskb->protocol = skb->protocol; nskb->dst = dst_clone(skb->dst); @@ -2206,7 +2224,6 @@ EXPORT_SYMBOL(pskb_copy); EXPORT_SYMBOL(pskb_expand_head); EXPORT_SYMBOL(skb_checksum); EXPORT_SYMBOL(skb_clone); -EXPORT_SYMBOL(skb_clone_fraglist); EXPORT_SYMBOL(skb_copy); EXPORT_SYMBOL(skb_copy_and_csum_bits); EXPORT_SYMBOL(skb_copy_and_csum_dev); diff --git a/net/core/sock.c b/net/core/sock.c index c14ce0198d25..091032a250c7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -210,7 +210,8 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) return -EDOM; if (tv.tv_sec < 0) { - static int warned = 0; + static int warned __read_mostly; + *timeo_p = 0; if (warned < 10 && net_ratelimit()) warned++; @@ -1851,46 +1852,15 @@ void proto_unregister(struct proto *prot) EXPORT_SYMBOL(proto_unregister); #ifdef CONFIG_PROC_FS -static inline struct proto *__proto_head(void) -{ - return list_entry(proto_list.next, struct proto, node); -} - -static inline struct proto *proto_head(void) -{ - return list_empty(&proto_list) ? NULL : __proto_head(); -} - -static inline struct proto *proto_next(struct proto *proto) -{ - return proto->node.next == &proto_list ? NULL : - list_entry(proto->node.next, struct proto, node); -} - -static inline struct proto *proto_get_idx(loff_t pos) -{ - struct proto *proto; - loff_t i = 0; - - list_for_each_entry(proto, &proto_list, node) - if (i++ == pos) - goto out; - - proto = NULL; -out: - return proto; -} - static void *proto_seq_start(struct seq_file *seq, loff_t *pos) { read_lock(&proto_list_lock); - return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN; + return seq_list_start_head(&proto_list, *pos); } static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - ++*pos; - return v == SEQ_START_TOKEN ? proto_head() : proto_next(v); + return seq_list_next(v, &proto_list, pos); } static void proto_seq_stop(struct seq_file *seq, void *v) @@ -1938,7 +1908,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto) static int proto_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) + if (v == &proto_list) seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", "protocol", "size", @@ -1950,7 +1920,7 @@ static int proto_seq_show(struct seq_file *seq, void *v) "module", "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); else - proto_seq_printf(seq, v); + proto_seq_printf(seq, list_entry(v, struct proto, node)); return 0; } |