diff options
author | Patrick McHardy <kaber@trash.net> | 2011-01-14 16:12:37 +0300 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2011-01-14 16:12:37 +0300 |
commit | 0134e89c7bcc9fde1da962c82a120691e185619f (patch) | |
tree | 3e03335cf001019a2687d161e956de4f73379984 /net | |
parent | c7066f70d9610df0b9406cc635fc09e86136e714 (diff) | |
parent | 6faee60a4e82075853a437831768cc9e2e563e4e (diff) | |
download | linux-0134e89c7bcc9fde1da962c82a120691e185619f.tar.xz |
Merge branch 'master' of git://1984.lsi.us.es/net-next-2.6
Conflicts:
net/ipv4/route.c
Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net')
364 files changed, 20913 insertions, 7487 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 52077ca22072..6e64f7c6a2e9 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -272,13 +272,11 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id); } - new_dev = alloc_netdev_mq(sizeof(struct vlan_dev_info), name, - vlan_setup, real_dev->num_tx_queues); + new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup); if (new_dev == NULL) return -ENOBUFS; - netif_copy_real_num_queues(new_dev, real_dev); dev_net_set(new_dev, net); /* need 4 bytes for extra VLAN header info, * hope the underlying device can handle it. @@ -334,12 +332,15 @@ static void vlan_transfer_features(struct net_device *dev, vlandev->features &= ~dev->vlan_features; vlandev->features |= dev->features & dev->vlan_features; vlandev->gso_max_size = dev->gso_max_size; + + if (dev->features & NETIF_F_HW_VLAN_TX) + vlandev->hard_header_len = dev->hard_header_len; + else + vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; + #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; #endif - vlandev->real_num_tx_queues = dev->real_num_tx_queues; - BUG_ON(vlandev->real_num_tx_queues > vlandev->num_tx_queues); - if (old_features != vlandev->features) netdev_features_change(vlandev); } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index db01b3181fdc..5687c9b95f33 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -19,19 +19,25 @@ struct vlan_priority_tci_mapping { /** - * struct vlan_rx_stats - VLAN percpu rx stats + * struct vlan_pcpu_stats - VLAN percpu rx/tx stats * @rx_packets: number of received packets * @rx_bytes: number of received bytes * @rx_multicast: number of received multicast packets + * @tx_packets: number of transmitted packets + * @tx_bytes: number of transmitted bytes * @syncp: synchronization point for 64bit counters - * @rx_errors: number of errors + * @rx_errors: number of rx errors + * @tx_dropped: number of tx drops */ -struct vlan_rx_stats { +struct vlan_pcpu_stats { u64 rx_packets; u64 rx_bytes; u64 rx_multicast; + u64 tx_packets; + u64 tx_bytes; struct u64_stats_sync syncp; - unsigned long rx_errors; + u32 rx_errors; + u32 tx_dropped; }; /** @@ -45,9 +51,7 @@ struct vlan_rx_stats { * @real_dev: underlying netdevice * @real_dev_addr: address of underlying netdevice * @dent: proc dir entry - * @cnt_inc_headroom_on_tx: statistic - number of skb expansions on TX - * @cnt_encap_on_xmit: statistic - number of skb encapsulations on TX - * @vlan_rx_stats: ptr to percpu rx stats + * @vlan_pcpu_stats: ptr to percpu rx stats */ struct vlan_dev_info { unsigned int nr_ingress_mappings; @@ -62,9 +66,7 @@ struct vlan_dev_info { unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; - unsigned long cnt_inc_headroom_on_tx; - unsigned long cnt_encap_on_xmit; - struct vlan_rx_stats __percpu *vlan_rx_stats; + struct vlan_pcpu_stats __percpu *vlan_pcpu_stats; }; static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev) diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 69b2f79800a5..ce8e3ab3e7a5 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -9,7 +9,7 @@ bool vlan_hwaccel_do_receive(struct sk_buff **skbp) struct sk_buff *skb = *skbp; u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; struct net_device *vlan_dev; - struct vlan_rx_stats *rx_stats; + struct vlan_pcpu_stats *rx_stats; vlan_dev = vlan_find_dev(skb->dev, vlan_id); if (!vlan_dev) { @@ -26,7 +26,7 @@ bool vlan_hwaccel_do_receive(struct sk_buff **skbp) skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); skb->vlan_tci = 0; - rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_rx_stats); + rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 14e3d1fa07a0..be737539f34d 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -141,7 +141,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { struct vlan_hdr *vhdr; - struct vlan_rx_stats *rx_stats; + struct vlan_pcpu_stats *rx_stats; struct net_device *vlan_dev; u16 vlan_id; u16 vlan_tci; @@ -177,7 +177,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, } else { skb->dev = vlan_dev; - rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats); + rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; @@ -274,9 +274,6 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, u16 vlan_tci = 0; int rc; - if (WARN_ON(skb_headroom(skb) < dev->hard_header_len)) - return -ENOSPC; - if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) { vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); @@ -313,8 +310,6 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { - int i = skb_get_queue_mapping(skb); - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); unsigned int len; int ret; @@ -326,71 +321,31 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, */ if (veth->h_vlan_proto != htons(ETH_P_8021Q) || vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) { - unsigned int orig_headroom = skb_headroom(skb); u16 vlan_tci; - - vlan_dev_info(dev)->cnt_encap_on_xmit++; - vlan_tci = vlan_dev_info(dev)->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); - skb = __vlan_put_tag(skb, vlan_tci); - if (!skb) { - txq->tx_dropped++; - return NETDEV_TX_OK; - } - - if (orig_headroom < VLAN_HLEN) - vlan_dev_info(dev)->cnt_inc_headroom_on_tx++; + skb = __vlan_hwaccel_put_tag(skb, vlan_tci); } - skb_set_dev(skb, vlan_dev_info(dev)->real_dev); len = skb->len; ret = dev_queue_xmit(skb); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { - txq->tx_packets++; - txq->tx_bytes += len; - } else - txq->tx_dropped++; + struct vlan_pcpu_stats *stats; - return ret; -} - -static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - int i = skb_get_queue_mapping(skb); - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - u16 vlan_tci; - unsigned int len; - int ret; - - vlan_tci = vlan_dev_info(dev)->vlan_id; - vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); - skb = __vlan_hwaccel_put_tag(skb, vlan_tci); - - skb->dev = vlan_dev_info(dev)->real_dev; - len = skb->len; - ret = dev_queue_xmit(skb); - - if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { - txq->tx_packets++; - txq->tx_bytes += len; - } else - txq->tx_dropped++; + stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += len; + u64_stats_update_begin(&stats->syncp); + } else { + this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped); + } return ret; } -static u16 vlan_dev_select_queue(struct net_device *dev, struct sk_buff *skb) -{ - struct net_device *rdev = vlan_dev_info(dev)->real_dev; - const struct net_device_ops *ops = rdev->netdev_ops; - - return ops->ndo_select_queue(rdev, skb); -} - static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) { /* TODO: gotta make sure the underlying layer can handle it, @@ -719,8 +674,7 @@ static const struct header_ops vlan_header_ops = { .parse = eth_header_parse, }; -static const struct net_device_ops vlan_netdev_ops, vlan_netdev_accel_ops, - vlan_netdev_ops_sq, vlan_netdev_accel_ops_sq; +static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { @@ -738,6 +692,7 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_PRESENT); dev->features |= real_dev->features & real_dev->vlan_features; + dev->features |= NETIF_F_LLTX; dev->gso_max_size = real_dev->gso_max_size; /* ipv6 shared card related stuff */ @@ -755,26 +710,20 @@ static int vlan_dev_init(struct net_device *dev) if (real_dev->features & NETIF_F_HW_VLAN_TX) { dev->header_ops = real_dev->header_ops; dev->hard_header_len = real_dev->hard_header_len; - if (real_dev->netdev_ops->ndo_select_queue) - dev->netdev_ops = &vlan_netdev_accel_ops_sq; - else - dev->netdev_ops = &vlan_netdev_accel_ops; } else { dev->header_ops = &vlan_header_ops; dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; - if (real_dev->netdev_ops->ndo_select_queue) - dev->netdev_ops = &vlan_netdev_ops_sq; - else - dev->netdev_ops = &vlan_netdev_ops; } + dev->netdev_ops = &vlan_netdev_ops; + if (is_vlan_dev(real_dev)) subclass = 1; vlan_dev_set_lockdep_class(dev, subclass); - vlan_dev_info(dev)->vlan_rx_stats = alloc_percpu(struct vlan_rx_stats); - if (!vlan_dev_info(dev)->vlan_rx_stats) + vlan_dev_info(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats); + if (!vlan_dev_info(dev)->vlan_pcpu_stats) return -ENOMEM; return 0; @@ -786,8 +735,8 @@ static void vlan_dev_uninit(struct net_device *dev) struct vlan_dev_info *vlan = vlan_dev_info(dev); int i; - free_percpu(vlan->vlan_rx_stats); - vlan->vlan_rx_stats = NULL; + free_percpu(vlan->vlan_pcpu_stats); + vlan->vlan_pcpu_stats = NULL; for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { while ((pm = vlan->egress_priority_map[i]) != NULL) { vlan->egress_priority_map[i] = pm->next; @@ -825,33 +774,37 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev) static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { - dev_txq_stats_fold(dev, stats); - if (vlan_dev_info(dev)->vlan_rx_stats) { - struct vlan_rx_stats *p, accum = {0}; + if (vlan_dev_info(dev)->vlan_pcpu_stats) { + struct vlan_pcpu_stats *p; + u32 rx_errors = 0, tx_dropped = 0; int i; for_each_possible_cpu(i) { - u64 rxpackets, rxbytes, rxmulticast; + u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes; unsigned int start; - p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i); + p = per_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats, i); do { start = u64_stats_fetch_begin_bh(&p->syncp); rxpackets = p->rx_packets; rxbytes = p->rx_bytes; rxmulticast = p->rx_multicast; + txpackets = p->tx_packets; + txbytes = p->tx_bytes; } while (u64_stats_fetch_retry_bh(&p->syncp, start)); - accum.rx_packets += rxpackets; - accum.rx_bytes += rxbytes; - accum.rx_multicast += rxmulticast; - /* rx_errors is ulong, not protected by syncp */ - accum.rx_errors += p->rx_errors; + + stats->rx_packets += rxpackets; + stats->rx_bytes += rxbytes; + stats->multicast += rxmulticast; + stats->tx_packets += txpackets; + stats->tx_bytes += txbytes; + /* rx_errors & tx_dropped are u32 */ + rx_errors += p->rx_errors; + tx_dropped += p->tx_dropped; } - stats->rx_packets = accum.rx_packets; - stats->rx_bytes = accum.rx_bytes; - stats->rx_errors = accum.rx_errors; - stats->multicast = accum.rx_multicast; + stats->rx_errors = rx_errors; + stats->tx_dropped = tx_dropped; } return stats; } @@ -908,80 +861,6 @@ static const struct net_device_ops vlan_netdev_ops = { #endif }; -static const struct net_device_ops vlan_netdev_accel_ops = { - .ndo_change_mtu = vlan_dev_change_mtu, - .ndo_init = vlan_dev_init, - .ndo_uninit = vlan_dev_uninit, - .ndo_open = vlan_dev_open, - .ndo_stop = vlan_dev_stop, - .ndo_start_xmit = vlan_dev_hwaccel_hard_start_xmit, - .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = vlan_dev_set_mac_address, - .ndo_set_rx_mode = vlan_dev_set_rx_mode, - .ndo_set_multicast_list = vlan_dev_set_rx_mode, - .ndo_change_rx_flags = vlan_dev_change_rx_flags, - .ndo_do_ioctl = vlan_dev_ioctl, - .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats64 = vlan_dev_get_stats64, -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) - .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, - .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, - .ndo_fcoe_enable = vlan_dev_fcoe_enable, - .ndo_fcoe_disable = vlan_dev_fcoe_disable, - .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, -#endif -}; - -static const struct net_device_ops vlan_netdev_ops_sq = { - .ndo_select_queue = vlan_dev_select_queue, - .ndo_change_mtu = vlan_dev_change_mtu, - .ndo_init = vlan_dev_init, - .ndo_uninit = vlan_dev_uninit, - .ndo_open = vlan_dev_open, - .ndo_stop = vlan_dev_stop, - .ndo_start_xmit = vlan_dev_hard_start_xmit, - .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = vlan_dev_set_mac_address, - .ndo_set_rx_mode = vlan_dev_set_rx_mode, - .ndo_set_multicast_list = vlan_dev_set_rx_mode, - .ndo_change_rx_flags = vlan_dev_change_rx_flags, - .ndo_do_ioctl = vlan_dev_ioctl, - .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats64 = vlan_dev_get_stats64, -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) - .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, - .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, - .ndo_fcoe_enable = vlan_dev_fcoe_enable, - .ndo_fcoe_disable = vlan_dev_fcoe_disable, - .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, -#endif -}; - -static const struct net_device_ops vlan_netdev_accel_ops_sq = { - .ndo_select_queue = vlan_dev_select_queue, - .ndo_change_mtu = vlan_dev_change_mtu, - .ndo_init = vlan_dev_init, - .ndo_uninit = vlan_dev_uninit, - .ndo_open = vlan_dev_open, - .ndo_stop = vlan_dev_stop, - .ndo_start_xmit = vlan_dev_hwaccel_hard_start_xmit, - .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = vlan_dev_set_mac_address, - .ndo_set_rx_mode = vlan_dev_set_rx_mode, - .ndo_set_multicast_list = vlan_dev_set_rx_mode, - .ndo_change_rx_flags = vlan_dev_change_rx_flags, - .ndo_do_ioctl = vlan_dev_ioctl, - .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats64 = vlan_dev_get_stats64, -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) - .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, - .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, - .ndo_fcoe_enable = vlan_dev_fcoe_enable, - .ndo_fcoe_disable = vlan_dev_fcoe_disable, - .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, -#endif -}; - void vlan_setup(struct net_device *dev) { ether_setup(dev); diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index ddc105734af7..be9a5c19a775 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -101,25 +101,6 @@ static int vlan_changelink(struct net_device *dev, return 0; } -static int vlan_get_tx_queues(struct net *net, - struct nlattr *tb[], - unsigned int *num_tx_queues, - unsigned int *real_num_tx_queues) -{ - struct net_device *real_dev; - - if (!tb[IFLA_LINK]) - return -EINVAL; - - real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK])); - if (!real_dev) - return -ENODEV; - - *num_tx_queues = real_dev->num_tx_queues; - *real_num_tx_queues = real_dev->real_num_tx_queues; - return 0; -} - static int vlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { @@ -237,7 +218,6 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = { .maxtype = IFLA_VLAN_MAX, .policy = vlan_policy, .priv_size = sizeof(struct vlan_dev_info), - .get_tx_queues = vlan_get_tx_queues, .setup = vlan_setup, .validate = vlan_validate, .newlink = vlan_newlink, diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 80e280f56686..d1314cf18adf 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -280,7 +280,6 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev); struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats; - static const char fmt[] = "%30s %12lu\n"; static const char fmt64[] = "%30s %12llu\n"; int i; @@ -299,10 +298,6 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) seq_puts(seq, "\n"); seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets); seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes); - seq_printf(seq, fmt, "total headroom inc", - dev_info->cnt_inc_headroom_on_tx); - seq_printf(seq, fmt, "total encap on xmit", - dev_info->cnt_encap_on_xmit); seq_printf(seq, "Device: %s", dev_info->real_dev->name); /* now show all PRIORITY mappings relating to this VLAN */ seq_printf(seq, "\nINGRESS priority mappings: " diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 45c15f491401..798beac7f100 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -27,31 +27,16 @@ #include <linux/module.h> #include <linux/errno.h> +#include <linux/kernel.h> #include <linux/uaccess.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/stddef.h> #include <linux/types.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include "protocol.h" -#ifndef MIN -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) -#endif - -#ifndef MAX -#define MAX(a, b) (((a) > (b)) ? (a) : (b)) -#endif - -#ifndef offset_of -#define offset_of(type, memb) \ - ((unsigned long)(&((type *)0)->memb)) -#endif -#ifndef container_of -#define container_of(obj, type, memb) \ - ((type *)(((char *)obj) - offset_of(type, memb))) -#endif - static int p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); @@ -104,7 +89,7 @@ EXPORT_SYMBOL(p9stat_free); static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) { - size_t len = MIN(pdu->size - pdu->offset, size); + size_t len = min(pdu->size - pdu->offset, size); memcpy(data, &pdu->sdata[pdu->offset], len); pdu->offset += len; return size - len; @@ -112,7 +97,7 @@ static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size) { - size_t len = MIN(pdu->capacity - pdu->size, size); + size_t len = min(pdu->capacity - pdu->size, size); memcpy(&pdu->sdata[pdu->size], data, len); pdu->size += len; return size - len; @@ -121,7 +106,7 @@ static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size) static size_t pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) { - size_t len = MIN(pdu->capacity - pdu->size, size); + size_t len = min(pdu->capacity - pdu->size, size); if (copy_from_user(&pdu->sdata[pdu->size], udata, len)) len = 0; @@ -201,7 +186,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, if (errcode) break; - size = MAX(len, 0); + size = max_t(int16_t, len, 0); *sptr = kmalloc(size + 1, GFP_KERNEL); if (*sptr == NULL) { @@ -256,8 +241,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, p9pdu_readf(pdu, proto_version, "d", count); if (!errcode) { *count = - MIN(*count, - pdu->size - pdu->offset); + min_t(int32_t, *count, + pdu->size - pdu->offset); *data = &pdu->sdata[pdu->offset]; } } @@ -421,7 +406,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, const char *sptr = va_arg(ap, const char *); int16_t len = 0; if (sptr) - len = MIN(strlen(sptr), USHRT_MAX); + len = min_t(int16_t, strlen(sptr), USHRT_MAX); errcode = p9pdu_writef(pdu, proto_version, "w", len); diff --git a/net/Kconfig b/net/Kconfig index 55fd82e9ffd9..ad0aafe903f8 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -214,12 +214,18 @@ source "net/ieee802154/Kconfig" source "net/sched/Kconfig" source "net/dcb/Kconfig" source "net/dns_resolver/Kconfig" +source "net/batman-adv/Kconfig" config RPS boolean depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS default y +config XPS + boolean + depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS + default y + menu "Network testing" config NET_PKTGEN diff --git a/net/Makefile b/net/Makefile index 6b7bfd7f1416..a3330ebe2c53 100644 --- a/net/Makefile +++ b/net/Makefile @@ -69,3 +69,4 @@ endif obj-$(CONFIG_WIMAX) += wimax/ obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ obj-$(CONFIG_CEPH_LIB) += ceph/ +obj-$(CONFIG_BATMAN_ADV) += batman-adv/ diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index 799c631f0fed..f7fa67c78766 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -143,12 +143,13 @@ static struct class atm_class = { .dev_uevent = atm_uevent, }; -int atm_register_sysfs(struct atm_dev *adev) +int atm_register_sysfs(struct atm_dev *adev, struct device *parent) { struct device *cdev = &adev->class_dev; int i, j, err; cdev->class = &atm_class; + cdev->parent = parent; dev_set_drvdata(cdev, adev); dev_set_name(cdev, "%s%d", adev->type, adev->number); diff --git a/net/atm/br2684.c b/net/atm/br2684.c index ad2b232a2055..fce2eae8d476 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -97,7 +97,7 @@ static LIST_HEAD(br2684_devs); static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev) { - return (struct br2684_dev *)netdev_priv(net_dev); + return netdev_priv(net_dev); } static inline struct net_device *list_entry_brdev(const struct list_head *le) diff --git a/net/atm/clip.c b/net/atm/clip.c index ff956d1115bc..d257da50fcfb 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -502,7 +502,8 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) struct atmarp_entry *entry; int error; struct clip_vcc *clip_vcc; - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} }; + struct flowi fl = { .fl4_dst = ip, + .fl4_tos = 1 }; struct rtable *rt; if (vcc->push != clip_push) { diff --git a/net/atm/lec.c b/net/atm/lec.c index 181d70c73d70..38754fdb88ba 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -816,8 +816,7 @@ static int lec_mcast_attach(struct atm_vcc *vcc, int arg) if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) return -EINVAL; vcc->proto_data = dev_lec[arg]; - return lec_mcast_make((struct lec_priv *)netdev_priv(dev_lec[arg]), - vcc); + return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc); } /* Initialize device. */ @@ -1608,7 +1607,7 @@ static void lec_arp_destroy(struct lec_priv *priv) struct lec_arp_table *entry; int i; - cancel_rearming_delayed_work(&priv->lec_arp_work); + cancel_delayed_work_sync(&priv->lec_arp_work); /* * Remove all entries diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 74bcc662c3dd..644cdf071642 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -64,8 +64,6 @@ do { if (0) printk(KERN_CONT format, ##args); } while (0) #endif -#define MPOA_TAG_LEN 4 - /* mpc_daemon -> kernel */ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc); static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc); diff --git a/net/atm/resources.c b/net/atm/resources.c index d29e58261511..23f45ce6f351 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -74,8 +74,9 @@ struct atm_dev *atm_dev_lookup(int number) } EXPORT_SYMBOL(atm_dev_lookup); -struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops, - int number, unsigned long *flags) +struct atm_dev *atm_dev_register(const char *type, struct device *parent, + const struct atmdev_ops *ops, int number, + unsigned long *flags) { struct atm_dev *dev, *inuse; @@ -115,7 +116,7 @@ struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops, goto out_fail; } - if (atm_register_sysfs(dev) < 0) { + if (atm_register_sysfs(dev, parent) < 0) { pr_err("atm_register_sysfs failed for dev %s\n", type); atm_proc_dev_deregister(dev); goto out_fail; diff --git a/net/atm/resources.h b/net/atm/resources.h index 126fb1840dfb..521431e30507 100644 --- a/net/atm/resources.h +++ b/net/atm/resources.h @@ -42,6 +42,6 @@ static inline void atm_proc_dev_deregister(struct atm_dev *dev) #endif /* CONFIG_PROC_FS */ -int atm_register_sysfs(struct atm_dev *adev); +int atm_register_sysfs(struct atm_dev *adev, struct device *parent); void atm_unregister_sysfs(struct atm_dev *adev); #endif diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 26eaebf4aaa9..bb86d2932394 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1392,6 +1392,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr, ax25_cb *ax25; int err = 0; + memset(fsa, 0, sizeof(fsa)); lock_sock(sk); ax25 = ax25_sk(sk); @@ -1403,7 +1404,6 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr, fsa->fsa_ax25.sax25_family = AF_AX25; fsa->fsa_ax25.sax25_call = ax25->dest_addr; - fsa->fsa_ax25.sax25_ndigis = 0; if (ax25->digipeat != NULL) { ndigi = ax25->digipeat->ndigi; diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig new file mode 100644 index 000000000000..6c051ad833eb --- /dev/null +++ b/net/batman-adv/Kconfig @@ -0,0 +1,25 @@ +# +# B.A.T.M.A.N meshing protocol +# + +config BATMAN_ADV + tristate "B.A.T.M.A.N. Advanced Meshing Protocol" + depends on NET + default n + ---help--- + + B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is + a routing protocol for multi-hop ad-hoc mesh networks. The + networks may be wired or wireless. See + http://www.open-mesh.org/ for more information and user space + tools. + +config BATMAN_ADV_DEBUG + bool "B.A.T.M.A.N. debugging" + depends on BATMAN_ADV != n + ---help--- + + This is an option for use by developers; most people should + say N here. This enables compilation of support for + outputting debugging information to the kernel log. The + output is controlled via the module parameter debug. diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile new file mode 100644 index 000000000000..d936aeccd194 --- /dev/null +++ b/net/batman-adv/Makefile @@ -0,0 +1,39 @@ +# +# Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: +# +# Marek Lindner, Simon Wunderlich +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of version 2 of the GNU General Public +# License as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA +# + +obj-$(CONFIG_BATMAN_ADV) += batman-adv.o +batman-adv-y += aggregation.o +batman-adv-y += bat_debugfs.o +batman-adv-y += bat_sysfs.o +batman-adv-y += bitarray.o +batman-adv-y += gateway_client.o +batman-adv-y += gateway_common.o +batman-adv-y += hard-interface.o +batman-adv-y += hash.o +batman-adv-y += icmp_socket.o +batman-adv-y += main.o +batman-adv-y += originator.o +batman-adv-y += ring_buffer.o +batman-adv-y += routing.o +batman-adv-y += send.o +batman-adv-y += soft-interface.o +batman-adv-y += translation-table.o +batman-adv-y += unicast.o +batman-adv-y += vis.o diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c new file mode 100644 index 000000000000..3850a3ecf947 --- /dev/null +++ b/net/batman-adv/aggregation.c @@ -0,0 +1,273 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "aggregation.h" +#include "send.h" +#include "routing.h" + +/* calculate the size of the hna information for a given packet */ +static int hna_len(struct batman_packet *batman_packet) +{ + return batman_packet->num_hna * ETH_ALEN; +} + +/* return true if new_packet can be aggregated with forw_packet */ +static bool can_aggregate_with(struct batman_packet *new_batman_packet, + int packet_len, + unsigned long send_time, + bool directlink, + struct batman_if *if_incoming, + struct forw_packet *forw_packet) +{ + struct batman_packet *batman_packet = + (struct batman_packet *)forw_packet->skb->data; + int aggregated_bytes = forw_packet->packet_len + packet_len; + + /** + * we can aggregate the current packet to this aggregated packet + * if: + * + * - the send time is within our MAX_AGGREGATION_MS time + * - the resulting packet wont be bigger than + * MAX_AGGREGATION_BYTES + */ + + if (time_before(send_time, forw_packet->send_time) && + time_after_eq(send_time + msecs_to_jiffies(MAX_AGGREGATION_MS), + forw_packet->send_time) && + (aggregated_bytes <= MAX_AGGREGATION_BYTES)) { + + /** + * check aggregation compatibility + * -> direct link packets are broadcasted on + * their interface only + * -> aggregate packet if the current packet is + * a "global" packet as well as the base + * packet + */ + + /* packets without direct link flag and high TTL + * are flooded through the net */ + if ((!directlink) && + (!(batman_packet->flags & DIRECTLINK)) && + (batman_packet->ttl != 1) && + + /* own packets originating non-primary + * interfaces leave only that interface */ + ((!forw_packet->own) || + (forw_packet->if_incoming->if_num == 0))) + return true; + + /* if the incoming packet is sent via this one + * interface only - we still can aggregate */ + if ((directlink) && + (new_batman_packet->ttl == 1) && + (forw_packet->if_incoming == if_incoming) && + + /* packets from direct neighbors or + * own secondary interface packets + * (= secondary interface packets in general) */ + (batman_packet->flags & DIRECTLINK || + (forw_packet->own && + forw_packet->if_incoming->if_num != 0))) + return true; + } + + return false; +} + +#define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0) +/* create a new aggregated packet and add this packet to it */ +static void new_aggregated_packet(unsigned char *packet_buff, int packet_len, + unsigned long send_time, bool direct_link, + struct batman_if *if_incoming, + int own_packet) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct forw_packet *forw_packet_aggr; + unsigned char *skb_buff; + + /* own packet should always be scheduled */ + if (!own_packet) { + if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) { + bat_dbg(DBG_BATMAN, bat_priv, + "batman packet queue full\n"); + return; + } + } + + forw_packet_aggr = kmalloc(sizeof(struct forw_packet), GFP_ATOMIC); + if (!forw_packet_aggr) { + if (!own_packet) + atomic_inc(&bat_priv->batman_queue_left); + return; + } + + if ((atomic_read(&bat_priv->aggregated_ogms)) && + (packet_len < MAX_AGGREGATION_BYTES)) + forw_packet_aggr->skb = dev_alloc_skb(MAX_AGGREGATION_BYTES + + sizeof(struct ethhdr)); + else + forw_packet_aggr->skb = dev_alloc_skb(packet_len + + sizeof(struct ethhdr)); + + if (!forw_packet_aggr->skb) { + if (!own_packet) + atomic_inc(&bat_priv->batman_queue_left); + kfree(forw_packet_aggr); + return; + } + skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr)); + + INIT_HLIST_NODE(&forw_packet_aggr->list); + + skb_buff = skb_put(forw_packet_aggr->skb, packet_len); + forw_packet_aggr->packet_len = packet_len; + memcpy(skb_buff, packet_buff, packet_len); + + forw_packet_aggr->own = own_packet; + forw_packet_aggr->if_incoming = if_incoming; + forw_packet_aggr->num_packets = 0; + forw_packet_aggr->direct_link_flags = 0; + forw_packet_aggr->send_time = send_time; + + /* save packet direct link flag status */ + if (direct_link) + forw_packet_aggr->direct_link_flags |= 1; + + /* add new packet to packet list */ + spin_lock_bh(&bat_priv->forw_bat_list_lock); + hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list); + spin_unlock_bh(&bat_priv->forw_bat_list_lock); + + /* start timer for this packet */ + INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work, + send_outstanding_bat_packet); + queue_delayed_work(bat_event_workqueue, + &forw_packet_aggr->delayed_work, + send_time - jiffies); +} + +/* aggregate a new packet into the existing aggregation */ +static void aggregate(struct forw_packet *forw_packet_aggr, + unsigned char *packet_buff, + int packet_len, + bool direct_link) +{ + unsigned char *skb_buff; + + skb_buff = skb_put(forw_packet_aggr->skb, packet_len); + memcpy(skb_buff, packet_buff, packet_len); + forw_packet_aggr->packet_len += packet_len; + forw_packet_aggr->num_packets++; + + /* save packet direct link flag status */ + if (direct_link) + forw_packet_aggr->direct_link_flags |= + (1 << forw_packet_aggr->num_packets); +} + +void add_bat_packet_to_list(struct bat_priv *bat_priv, + unsigned char *packet_buff, int packet_len, + struct batman_if *if_incoming, char own_packet, + unsigned long send_time) +{ + /** + * _aggr -> pointer to the packet we want to aggregate with + * _pos -> pointer to the position in the queue + */ + struct forw_packet *forw_packet_aggr = NULL, *forw_packet_pos = NULL; + struct hlist_node *tmp_node; + struct batman_packet *batman_packet = + (struct batman_packet *)packet_buff; + bool direct_link = batman_packet->flags & DIRECTLINK ? 1 : 0; + + /* find position for the packet in the forward queue */ + spin_lock_bh(&bat_priv->forw_bat_list_lock); + /* own packets are not to be aggregated */ + if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) { + hlist_for_each_entry(forw_packet_pos, tmp_node, + &bat_priv->forw_bat_list, list) { + if (can_aggregate_with(batman_packet, + packet_len, + send_time, + direct_link, + if_incoming, + forw_packet_pos)) { + forw_packet_aggr = forw_packet_pos; + break; + } + } + } + + /* nothing to aggregate with - either aggregation disabled or no + * suitable aggregation packet found */ + if (!forw_packet_aggr) { + /* the following section can run without the lock */ + spin_unlock_bh(&bat_priv->forw_bat_list_lock); + + /** + * if we could not aggregate this packet with one of the others + * we hold it back for a while, so that it might be aggregated + * later on + */ + if ((!own_packet) && + (atomic_read(&bat_priv->aggregated_ogms))) + send_time += msecs_to_jiffies(MAX_AGGREGATION_MS); + + new_aggregated_packet(packet_buff, packet_len, + send_time, direct_link, + if_incoming, own_packet); + } else { + aggregate(forw_packet_aggr, + packet_buff, packet_len, + direct_link); + spin_unlock_bh(&bat_priv->forw_bat_list_lock); + } +} + +/* unpack the aggregated packets and process them one by one */ +void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, + int packet_len, struct batman_if *if_incoming) +{ + struct batman_packet *batman_packet; + int buff_pos = 0; + unsigned char *hna_buff; + + batman_packet = (struct batman_packet *)packet_buff; + + do { + /* network to host order for our 32bit seqno, and the + orig_interval. */ + batman_packet->seqno = ntohl(batman_packet->seqno); + + hna_buff = packet_buff + buff_pos + BAT_PACKET_LEN; + receive_bat_packet(ethhdr, batman_packet, + hna_buff, hna_len(batman_packet), + if_incoming); + + buff_pos += BAT_PACKET_LEN + hna_len(batman_packet); + batman_packet = (struct batman_packet *) + (packet_buff + buff_pos); + } while (aggregated_packet(buff_pos, packet_len, + batman_packet->num_hna)); +} diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h new file mode 100644 index 000000000000..71a91b3da913 --- /dev/null +++ b/net/batman-adv/aggregation.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_AGGREGATION_H_ +#define _NET_BATMAN_ADV_AGGREGATION_H_ + +#include "main.h" + +/* is there another aggregated packet here? */ +static inline int aggregated_packet(int buff_pos, int packet_len, int num_hna) +{ + int next_buff_pos = buff_pos + BAT_PACKET_LEN + (num_hna * ETH_ALEN); + + return (next_buff_pos <= packet_len) && + (next_buff_pos <= MAX_AGGREGATION_BYTES); +} + +void add_bat_packet_to_list(struct bat_priv *bat_priv, + unsigned char *packet_buff, int packet_len, + struct batman_if *if_incoming, char own_packet, + unsigned long send_time); +void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, + int packet_len, struct batman_if *if_incoming); + +#endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */ diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c new file mode 100644 index 000000000000..0ae81d07f102 --- /dev/null +++ b/net/batman-adv/bat_debugfs.c @@ -0,0 +1,360 @@ +/* + * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" + +#include <linux/debugfs.h> + +#include "bat_debugfs.h" +#include "translation-table.h" +#include "originator.h" +#include "hard-interface.h" +#include "gateway_common.h" +#include "gateway_client.h" +#include "soft-interface.h" +#include "vis.h" +#include "icmp_socket.h" + +static struct dentry *bat_debugfs; + +#ifdef CONFIG_BATMAN_ADV_DEBUG +#define LOG_BUFF_MASK (log_buff_len-1) +#define LOG_BUFF(idx) (debug_log->log_buff[(idx) & LOG_BUFF_MASK]) + +static int log_buff_len = LOG_BUF_LEN; + +static void emit_log_char(struct debug_log *debug_log, char c) +{ + LOG_BUFF(debug_log->log_end) = c; + debug_log->log_end++; + + if (debug_log->log_end - debug_log->log_start > log_buff_len) + debug_log->log_start = debug_log->log_end - log_buff_len; +} + +static int fdebug_log(struct debug_log *debug_log, char *fmt, ...) +{ + int printed_len; + va_list args; + static char debug_log_buf[256]; + char *p; + + if (!debug_log) + return 0; + + spin_lock_bh(&debug_log->lock); + va_start(args, fmt); + printed_len = vscnprintf(debug_log_buf, sizeof(debug_log_buf), + fmt, args); + va_end(args); + + for (p = debug_log_buf; *p != 0; p++) + emit_log_char(debug_log, *p); + + spin_unlock_bh(&debug_log->lock); + + wake_up(&debug_log->queue_wait); + + return 0; +} + +int debug_log(struct bat_priv *bat_priv, char *fmt, ...) +{ + va_list args; + char tmp_log_buf[256]; + + va_start(args, fmt); + vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args); + fdebug_log(bat_priv->debug_log, "[%10u] %s", + (jiffies / HZ), tmp_log_buf); + va_end(args); + + return 0; +} + +static int log_open(struct inode *inode, struct file *file) +{ + nonseekable_open(inode, file); + file->private_data = inode->i_private; + inc_module_count(); + return 0; +} + +static int log_release(struct inode *inode, struct file *file) +{ + dec_module_count(); + return 0; +} + +static ssize_t log_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct bat_priv *bat_priv = file->private_data; + struct debug_log *debug_log = bat_priv->debug_log; + int error, i = 0; + char c; + + if ((file->f_flags & O_NONBLOCK) && + !(debug_log->log_end - debug_log->log_start)) + return -EAGAIN; + + if ((!buf) || (count < 0)) + return -EINVAL; + + if (count == 0) + return 0; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + error = wait_event_interruptible(debug_log->queue_wait, + (debug_log->log_start - debug_log->log_end)); + + if (error) + return error; + + spin_lock_bh(&debug_log->lock); + + while ((!error) && (i < count) && + (debug_log->log_start != debug_log->log_end)) { + c = LOG_BUFF(debug_log->log_start); + + debug_log->log_start++; + + spin_unlock_bh(&debug_log->lock); + + error = __put_user(c, buf); + + spin_lock_bh(&debug_log->lock); + + buf++; + i++; + + } + + spin_unlock_bh(&debug_log->lock); + + if (!error) + return i; + + return error; +} + +static unsigned int log_poll(struct file *file, poll_table *wait) +{ + struct bat_priv *bat_priv = file->private_data; + struct debug_log *debug_log = bat_priv->debug_log; + + poll_wait(file, &debug_log->queue_wait, wait); + + if (debug_log->log_end - debug_log->log_start) + return POLLIN | POLLRDNORM; + + return 0; +} + +static const struct file_operations log_fops = { + .open = log_open, + .release = log_release, + .read = log_read, + .poll = log_poll, + .llseek = no_llseek, +}; + +static int debug_log_setup(struct bat_priv *bat_priv) +{ + struct dentry *d; + + if (!bat_priv->debug_dir) + goto err; + + bat_priv->debug_log = kzalloc(sizeof(struct debug_log), GFP_ATOMIC); + if (!bat_priv->debug_log) + goto err; + + spin_lock_init(&bat_priv->debug_log->lock); + init_waitqueue_head(&bat_priv->debug_log->queue_wait); + + d = debugfs_create_file("log", S_IFREG | S_IRUSR, + bat_priv->debug_dir, bat_priv, &log_fops); + if (d) + goto err; + + return 0; + +err: + return 1; +} + +static void debug_log_cleanup(struct bat_priv *bat_priv) +{ + kfree(bat_priv->debug_log); + bat_priv->debug_log = NULL; +} +#else /* CONFIG_BATMAN_ADV_DEBUG */ +static int debug_log_setup(struct bat_priv *bat_priv) +{ + bat_priv->debug_log = NULL; + return 0; +} + +static void debug_log_cleanup(struct bat_priv *bat_priv) +{ + return; +} +#endif + +static int originators_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, orig_seq_print_text, net_dev); +} + +static int gateways_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, gw_client_seq_print_text, net_dev); +} + +static int softif_neigh_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, softif_neigh_seq_print_text, net_dev); +} + +static int transtable_global_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, hna_global_seq_print_text, net_dev); +} + +static int transtable_local_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, hna_local_seq_print_text, net_dev); +} + +static int vis_data_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, vis_seq_print_text, net_dev); +} + +struct bat_debuginfo { + struct attribute attr; + const struct file_operations fops; +}; + +#define BAT_DEBUGINFO(_name, _mode, _open) \ +struct bat_debuginfo bat_debuginfo_##_name = { \ + .attr = { .name = __stringify(_name), \ + .mode = _mode, }, \ + .fops = { .owner = THIS_MODULE, \ + .open = _open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ + } \ +}; + +static BAT_DEBUGINFO(originators, S_IRUGO, originators_open); +static BAT_DEBUGINFO(gateways, S_IRUGO, gateways_open); +static BAT_DEBUGINFO(softif_neigh, S_IRUGO, softif_neigh_open); +static BAT_DEBUGINFO(transtable_global, S_IRUGO, transtable_global_open); +static BAT_DEBUGINFO(transtable_local, S_IRUGO, transtable_local_open); +static BAT_DEBUGINFO(vis_data, S_IRUGO, vis_data_open); + +static struct bat_debuginfo *mesh_debuginfos[] = { + &bat_debuginfo_originators, + &bat_debuginfo_gateways, + &bat_debuginfo_softif_neigh, + &bat_debuginfo_transtable_global, + &bat_debuginfo_transtable_local, + &bat_debuginfo_vis_data, + NULL, +}; + +void debugfs_init(void) +{ + bat_debugfs = debugfs_create_dir(DEBUGFS_BAT_SUBDIR, NULL); + if (bat_debugfs == ERR_PTR(-ENODEV)) + bat_debugfs = NULL; +} + +void debugfs_destroy(void) +{ + if (bat_debugfs) { + debugfs_remove_recursive(bat_debugfs); + bat_debugfs = NULL; + } +} + +int debugfs_add_meshif(struct net_device *dev) +{ + struct bat_priv *bat_priv = netdev_priv(dev); + struct bat_debuginfo **bat_debug; + struct dentry *file; + + if (!bat_debugfs) + goto out; + + bat_priv->debug_dir = debugfs_create_dir(dev->name, bat_debugfs); + if (!bat_priv->debug_dir) + goto out; + + bat_socket_setup(bat_priv); + debug_log_setup(bat_priv); + + for (bat_debug = mesh_debuginfos; *bat_debug; ++bat_debug) { + file = debugfs_create_file(((*bat_debug)->attr).name, + S_IFREG | ((*bat_debug)->attr).mode, + bat_priv->debug_dir, + dev, &(*bat_debug)->fops); + if (!file) { + bat_err(dev, "Can't add debugfs file: %s/%s\n", + dev->name, ((*bat_debug)->attr).name); + goto rem_attr; + } + } + + return 0; +rem_attr: + debugfs_remove_recursive(bat_priv->debug_dir); + bat_priv->debug_dir = NULL; +out: +#ifdef CONFIG_DEBUG_FS + return -ENOMEM; +#else + return 0; +#endif /* CONFIG_DEBUG_FS */ +} + +void debugfs_del_meshif(struct net_device *dev) +{ + struct bat_priv *bat_priv = netdev_priv(dev); + + debug_log_cleanup(bat_priv); + + if (bat_debugfs) { + debugfs_remove_recursive(bat_priv->debug_dir); + bat_priv->debug_dir = NULL; + } +} diff --git a/net/batman-adv/bat_debugfs.h b/net/batman-adv/bat_debugfs.h new file mode 100644 index 000000000000..72df532b7d5f --- /dev/null +++ b/net/batman-adv/bat_debugfs.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + + +#ifndef _NET_BATMAN_ADV_DEBUGFS_H_ +#define _NET_BATMAN_ADV_DEBUGFS_H_ + +#define DEBUGFS_BAT_SUBDIR "batman_adv" + +void debugfs_init(void); +void debugfs_destroy(void); +int debugfs_add_meshif(struct net_device *dev); +void debugfs_del_meshif(struct net_device *dev); + +#endif /* _NET_BATMAN_ADV_DEBUGFS_H_ */ diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c new file mode 100644 index 000000000000..cd7bb51825f1 --- /dev/null +++ b/net/batman-adv/bat_sysfs.c @@ -0,0 +1,593 @@ +/* + * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "bat_sysfs.h" +#include "translation-table.h" +#include "originator.h" +#include "hard-interface.h" +#include "gateway_common.h" +#include "gateway_client.h" +#include "vis.h" + +#define to_dev(obj) container_of(obj, struct device, kobj) +#define kobj_to_netdev(obj) to_net_dev(to_dev(obj->parent)) +#define kobj_to_batpriv(obj) netdev_priv(kobj_to_netdev(obj)) + +/* Use this, if you have customized show and store functions */ +#define BAT_ATTR(_name, _mode, _show, _store) \ +struct bat_attribute bat_attr_##_name = { \ + .attr = {.name = __stringify(_name), \ + .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +#define BAT_ATTR_STORE_BOOL(_name, _post_func) \ +ssize_t store_##_name(struct kobject *kobj, struct attribute *attr, \ + char *buff, size_t count) \ +{ \ + struct net_device *net_dev = kobj_to_netdev(kobj); \ + struct bat_priv *bat_priv = netdev_priv(net_dev); \ + return __store_bool_attr(buff, count, _post_func, attr, \ + &bat_priv->_name, net_dev); \ +} + +#define BAT_ATTR_SHOW_BOOL(_name) \ +ssize_t show_##_name(struct kobject *kobj, struct attribute *attr, \ + char *buff) \ +{ \ + struct bat_priv *bat_priv = kobj_to_batpriv(kobj); \ + return sprintf(buff, "%s\n", \ + atomic_read(&bat_priv->_name) == 0 ? \ + "disabled" : "enabled"); \ +} \ + +/* Use this, if you are going to turn a [name] in bat_priv on or off */ +#define BAT_ATTR_BOOL(_name, _mode, _post_func) \ + static BAT_ATTR_STORE_BOOL(_name, _post_func) \ + static BAT_ATTR_SHOW_BOOL(_name) \ + static BAT_ATTR(_name, _mode, show_##_name, store_##_name) + + +#define BAT_ATTR_STORE_UINT(_name, _min, _max, _post_func) \ +ssize_t store_##_name(struct kobject *kobj, struct attribute *attr, \ + char *buff, size_t count) \ +{ \ + struct net_device *net_dev = kobj_to_netdev(kobj); \ + struct bat_priv *bat_priv = netdev_priv(net_dev); \ + return __store_uint_attr(buff, count, _min, _max, _post_func, \ + attr, &bat_priv->_name, net_dev); \ +} + +#define BAT_ATTR_SHOW_UINT(_name) \ +ssize_t show_##_name(struct kobject *kobj, struct attribute *attr, \ + char *buff) \ +{ \ + struct bat_priv *bat_priv = kobj_to_batpriv(kobj); \ + return sprintf(buff, "%i\n", atomic_read(&bat_priv->_name)); \ +} \ + +/* Use this, if you are going to set [name] in bat_priv to unsigned integer + * values only */ +#define BAT_ATTR_UINT(_name, _mode, _min, _max, _post_func) \ + static BAT_ATTR_STORE_UINT(_name, _min, _max, _post_func) \ + static BAT_ATTR_SHOW_UINT(_name) \ + static BAT_ATTR(_name, _mode, show_##_name, store_##_name) + + +static int store_bool_attr(char *buff, size_t count, + struct net_device *net_dev, + char *attr_name, atomic_t *attr) +{ + int enabled = -1; + + if (buff[count - 1] == '\n') + buff[count - 1] = '\0'; + + if ((strncmp(buff, "1", 2) == 0) || + (strncmp(buff, "enable", 7) == 0) || + (strncmp(buff, "enabled", 8) == 0)) + enabled = 1; + + if ((strncmp(buff, "0", 2) == 0) || + (strncmp(buff, "disable", 8) == 0) || + (strncmp(buff, "disabled", 9) == 0)) + enabled = 0; + + if (enabled < 0) { + bat_info(net_dev, + "%s: Invalid parameter received: %s\n", + attr_name, buff); + return -EINVAL; + } + + if (atomic_read(attr) == enabled) + return count; + + bat_info(net_dev, "%s: Changing from: %s to: %s\n", attr_name, + atomic_read(attr) == 1 ? "enabled" : "disabled", + enabled == 1 ? "enabled" : "disabled"); + + atomic_set(attr, (unsigned)enabled); + return count; +} + +static inline ssize_t __store_bool_attr(char *buff, size_t count, + void (*post_func)(struct net_device *), + struct attribute *attr, + atomic_t *attr_store, struct net_device *net_dev) +{ + int ret; + + ret = store_bool_attr(buff, count, net_dev, (char *)attr->name, + attr_store); + if (post_func && ret) + post_func(net_dev); + + return ret; +} + +static int store_uint_attr(char *buff, size_t count, + struct net_device *net_dev, char *attr_name, + unsigned int min, unsigned int max, atomic_t *attr) +{ + unsigned long uint_val; + int ret; + + ret = strict_strtoul(buff, 10, &uint_val); + if (ret) { + bat_info(net_dev, + "%s: Invalid parameter received: %s\n", + attr_name, buff); + return -EINVAL; + } + + if (uint_val < min) { + bat_info(net_dev, "%s: Value is too small: %lu min: %u\n", + attr_name, uint_val, min); + return -EINVAL; + } + + if (uint_val > max) { + bat_info(net_dev, "%s: Value is too big: %lu max: %u\n", + attr_name, uint_val, max); + return -EINVAL; + } + + if (atomic_read(attr) == uint_val) + return count; + + bat_info(net_dev, "%s: Changing from: %i to: %lu\n", + attr_name, atomic_read(attr), uint_val); + + atomic_set(attr, uint_val); + return count; +} + +static inline ssize_t __store_uint_attr(char *buff, size_t count, + int min, int max, + void (*post_func)(struct net_device *), + struct attribute *attr, + atomic_t *attr_store, struct net_device *net_dev) +{ + int ret; + + ret = store_uint_attr(buff, count, net_dev, (char *)attr->name, + min, max, attr_store); + if (post_func && ret) + post_func(net_dev); + + return ret; +} + +static ssize_t show_vis_mode(struct kobject *kobj, struct attribute *attr, + char *buff) +{ + struct bat_priv *bat_priv = kobj_to_batpriv(kobj); + int vis_mode = atomic_read(&bat_priv->vis_mode); + + return sprintf(buff, "%s\n", + vis_mode == VIS_TYPE_CLIENT_UPDATE ? + "client" : "server"); +} + +static ssize_t store_vis_mode(struct kobject *kobj, struct attribute *attr, + char *buff, size_t count) +{ + struct net_device *net_dev = kobj_to_netdev(kobj); + struct bat_priv *bat_priv = netdev_priv(net_dev); + unsigned long val; + int ret, vis_mode_tmp = -1; + + ret = strict_strtoul(buff, 10, &val); + + if (((count == 2) && (!ret) && (val == VIS_TYPE_CLIENT_UPDATE)) || + (strncmp(buff, "client", 6) == 0) || + (strncmp(buff, "off", 3) == 0)) + vis_mode_tmp = VIS_TYPE_CLIENT_UPDATE; + + if (((count == 2) && (!ret) && (val == VIS_TYPE_SERVER_SYNC)) || + (strncmp(buff, "server", 6) == 0)) + vis_mode_tmp = VIS_TYPE_SERVER_SYNC; + + if (vis_mode_tmp < 0) { + if (buff[count - 1] == '\n') + buff[count - 1] = '\0'; + + bat_info(net_dev, + "Invalid parameter for 'vis mode' setting received: " + "%s\n", buff); + return -EINVAL; + } + + if (atomic_read(&bat_priv->vis_mode) == vis_mode_tmp) + return count; + + bat_info(net_dev, "Changing vis mode from: %s to: %s\n", + atomic_read(&bat_priv->vis_mode) == VIS_TYPE_CLIENT_UPDATE ? + "client" : "server", vis_mode_tmp == VIS_TYPE_CLIENT_UPDATE ? + "client" : "server"); + + atomic_set(&bat_priv->vis_mode, (unsigned)vis_mode_tmp); + return count; +} + +static void post_gw_deselect(struct net_device *net_dev) +{ + struct bat_priv *bat_priv = netdev_priv(net_dev); + gw_deselect(bat_priv); +} + +static ssize_t show_gw_mode(struct kobject *kobj, struct attribute *attr, + char *buff) +{ + struct bat_priv *bat_priv = kobj_to_batpriv(kobj); + int bytes_written; + + switch (atomic_read(&bat_priv->gw_mode)) { + case GW_MODE_CLIENT: + bytes_written = sprintf(buff, "%s\n", GW_MODE_CLIENT_NAME); + break; + case GW_MODE_SERVER: + bytes_written = sprintf(buff, "%s\n", GW_MODE_SERVER_NAME); + break; + default: + bytes_written = sprintf(buff, "%s\n", GW_MODE_OFF_NAME); + break; + } + + return bytes_written; +} + +static ssize_t store_gw_mode(struct kobject *kobj, struct attribute *attr, + char *buff, size_t count) +{ + struct net_device *net_dev = kobj_to_netdev(kobj); + struct bat_priv *bat_priv = netdev_priv(net_dev); + char *curr_gw_mode_str; + int gw_mode_tmp = -1; + + if (buff[count - 1] == '\n') + buff[count - 1] = '\0'; + + if (strncmp(buff, GW_MODE_OFF_NAME, strlen(GW_MODE_OFF_NAME)) == 0) + gw_mode_tmp = GW_MODE_OFF; + + if (strncmp(buff, GW_MODE_CLIENT_NAME, + strlen(GW_MODE_CLIENT_NAME)) == 0) + gw_mode_tmp = GW_MODE_CLIENT; + + if (strncmp(buff, GW_MODE_SERVER_NAME, + strlen(GW_MODE_SERVER_NAME)) == 0) + gw_mode_tmp = GW_MODE_SERVER; + + if (gw_mode_tmp < 0) { + bat_info(net_dev, + "Invalid parameter for 'gw mode' setting received: " + "%s\n", buff); + return -EINVAL; + } + + if (atomic_read(&bat_priv->gw_mode) == gw_mode_tmp) + return count; + + switch (atomic_read(&bat_priv->gw_mode)) { + case GW_MODE_CLIENT: + curr_gw_mode_str = GW_MODE_CLIENT_NAME; + break; + case GW_MODE_SERVER: + curr_gw_mode_str = GW_MODE_SERVER_NAME; + break; + default: + curr_gw_mode_str = GW_MODE_OFF_NAME; + break; + } + + bat_info(net_dev, "Changing gw mode from: %s to: %s\n", + curr_gw_mode_str, buff); + + gw_deselect(bat_priv); + atomic_set(&bat_priv->gw_mode, (unsigned)gw_mode_tmp); + return count; +} + +static ssize_t show_gw_bwidth(struct kobject *kobj, struct attribute *attr, + char *buff) +{ + struct bat_priv *bat_priv = kobj_to_batpriv(kobj); + int down, up; + int gw_bandwidth = atomic_read(&bat_priv->gw_bandwidth); + + gw_bandwidth_to_kbit(gw_bandwidth, &down, &up); + return sprintf(buff, "%i%s/%i%s\n", + (down > 2048 ? down / 1024 : down), + (down > 2048 ? "MBit" : "KBit"), + (up > 2048 ? up / 1024 : up), + (up > 2048 ? "MBit" : "KBit")); +} + +static ssize_t store_gw_bwidth(struct kobject *kobj, struct attribute *attr, + char *buff, size_t count) +{ + struct net_device *net_dev = kobj_to_netdev(kobj); + + if (buff[count - 1] == '\n') + buff[count - 1] = '\0'; + + return gw_bandwidth_set(net_dev, buff, count); +} + +BAT_ATTR_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL); +BAT_ATTR_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); +BAT_ATTR_BOOL(fragmentation, S_IRUGO | S_IWUSR, update_min_mtu); +static BAT_ATTR(vis_mode, S_IRUGO | S_IWUSR, show_vis_mode, store_vis_mode); +static BAT_ATTR(gw_mode, S_IRUGO | S_IWUSR, show_gw_mode, store_gw_mode); +BAT_ATTR_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * JITTER, INT_MAX, NULL); +BAT_ATTR_UINT(hop_penalty, S_IRUGO | S_IWUSR, 0, TQ_MAX_VALUE, NULL); +BAT_ATTR_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, TQ_MAX_VALUE, + post_gw_deselect); +static BAT_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, show_gw_bwidth, + store_gw_bwidth); +#ifdef CONFIG_BATMAN_ADV_DEBUG +BAT_ATTR_UINT(log_level, S_IRUGO | S_IWUSR, 0, 3, NULL); +#endif + +static struct bat_attribute *mesh_attrs[] = { + &bat_attr_aggregated_ogms, + &bat_attr_bonding, + &bat_attr_fragmentation, + &bat_attr_vis_mode, + &bat_attr_gw_mode, + &bat_attr_orig_interval, + &bat_attr_hop_penalty, + &bat_attr_gw_sel_class, + &bat_attr_gw_bandwidth, +#ifdef CONFIG_BATMAN_ADV_DEBUG + &bat_attr_log_level, +#endif + NULL, +}; + +int sysfs_add_meshif(struct net_device *dev) +{ + struct kobject *batif_kobject = &dev->dev.kobj; + struct bat_priv *bat_priv = netdev_priv(dev); + struct bat_attribute **bat_attr; + int err; + + bat_priv->mesh_obj = kobject_create_and_add(SYSFS_IF_MESH_SUBDIR, + batif_kobject); + if (!bat_priv->mesh_obj) { + bat_err(dev, "Can't add sysfs directory: %s/%s\n", dev->name, + SYSFS_IF_MESH_SUBDIR); + goto out; + } + + for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr) { + err = sysfs_create_file(bat_priv->mesh_obj, + &((*bat_attr)->attr)); + if (err) { + bat_err(dev, "Can't add sysfs file: %s/%s/%s\n", + dev->name, SYSFS_IF_MESH_SUBDIR, + ((*bat_attr)->attr).name); + goto rem_attr; + } + } + + return 0; + +rem_attr: + for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr) + sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr)); + + kobject_put(bat_priv->mesh_obj); + bat_priv->mesh_obj = NULL; +out: + return -ENOMEM; +} + +void sysfs_del_meshif(struct net_device *dev) +{ + struct bat_priv *bat_priv = netdev_priv(dev); + struct bat_attribute **bat_attr; + + for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr) + sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr)); + + kobject_put(bat_priv->mesh_obj); + bat_priv->mesh_obj = NULL; +} + +static ssize_t show_mesh_iface(struct kobject *kobj, struct attribute *attr, + char *buff) +{ + struct net_device *net_dev = kobj_to_netdev(kobj); + struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); + ssize_t length; + + if (!batman_if) + return 0; + + length = sprintf(buff, "%s\n", batman_if->if_status == IF_NOT_IN_USE ? + "none" : batman_if->soft_iface->name); + + kref_put(&batman_if->refcount, hardif_free_ref); + + return length; +} + +static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr, + char *buff, size_t count) +{ + struct net_device *net_dev = kobj_to_netdev(kobj); + struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); + int status_tmp = -1; + int ret; + + if (!batman_if) + return count; + + if (buff[count - 1] == '\n') + buff[count - 1] = '\0'; + + if (strlen(buff) >= IFNAMSIZ) { + pr_err("Invalid parameter for 'mesh_iface' setting received: " + "interface name too long '%s'\n", buff); + kref_put(&batman_if->refcount, hardif_free_ref); + return -EINVAL; + } + + if (strncmp(buff, "none", 4) == 0) + status_tmp = IF_NOT_IN_USE; + else + status_tmp = IF_I_WANT_YOU; + + if ((batman_if->if_status == status_tmp) || ((batman_if->soft_iface) && + (strncmp(batman_if->soft_iface->name, buff, IFNAMSIZ) == 0))) { + kref_put(&batman_if->refcount, hardif_free_ref); + return count; + } + + if (status_tmp == IF_NOT_IN_USE) { + rtnl_lock(); + hardif_disable_interface(batman_if); + rtnl_unlock(); + kref_put(&batman_if->refcount, hardif_free_ref); + return count; + } + + /* if the interface already is in use */ + if (batman_if->if_status != IF_NOT_IN_USE) { + rtnl_lock(); + hardif_disable_interface(batman_if); + rtnl_unlock(); + } + + ret = hardif_enable_interface(batman_if, buff); + kref_put(&batman_if->refcount, hardif_free_ref); + + return ret; +} + +static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr, + char *buff) +{ + struct net_device *net_dev = kobj_to_netdev(kobj); + struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); + ssize_t length; + + if (!batman_if) + return 0; + + switch (batman_if->if_status) { + case IF_TO_BE_REMOVED: + length = sprintf(buff, "disabling\n"); + break; + case IF_INACTIVE: + length = sprintf(buff, "inactive\n"); + break; + case IF_ACTIVE: + length = sprintf(buff, "active\n"); + break; + case IF_TO_BE_ACTIVATED: + length = sprintf(buff, "enabling\n"); + break; + case IF_NOT_IN_USE: + default: + length = sprintf(buff, "not in use\n"); + break; + } + + kref_put(&batman_if->refcount, hardif_free_ref); + + return length; +} + +static BAT_ATTR(mesh_iface, S_IRUGO | S_IWUSR, + show_mesh_iface, store_mesh_iface); +static BAT_ATTR(iface_status, S_IRUGO, show_iface_status, NULL); + +static struct bat_attribute *batman_attrs[] = { + &bat_attr_mesh_iface, + &bat_attr_iface_status, + NULL, +}; + +int sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev) +{ + struct kobject *hardif_kobject = &dev->dev.kobj; + struct bat_attribute **bat_attr; + int err; + + *hardif_obj = kobject_create_and_add(SYSFS_IF_BAT_SUBDIR, + hardif_kobject); + + if (!*hardif_obj) { + bat_err(dev, "Can't add sysfs directory: %s/%s\n", dev->name, + SYSFS_IF_BAT_SUBDIR); + goto out; + } + + for (bat_attr = batman_attrs; *bat_attr; ++bat_attr) { + err = sysfs_create_file(*hardif_obj, &((*bat_attr)->attr)); + if (err) { + bat_err(dev, "Can't add sysfs file: %s/%s/%s\n", + dev->name, SYSFS_IF_BAT_SUBDIR, + ((*bat_attr)->attr).name); + goto rem_attr; + } + } + + return 0; + +rem_attr: + for (bat_attr = batman_attrs; *bat_attr; ++bat_attr) + sysfs_remove_file(*hardif_obj, &((*bat_attr)->attr)); +out: + return -ENOMEM; +} + +void sysfs_del_hardif(struct kobject **hardif_obj) +{ + kobject_put(*hardif_obj); + *hardif_obj = NULL; +} diff --git a/net/batman-adv/bat_sysfs.h b/net/batman-adv/bat_sysfs.h new file mode 100644 index 000000000000..7f186c007b4f --- /dev/null +++ b/net/batman-adv/bat_sysfs.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + + +#ifndef _NET_BATMAN_ADV_SYSFS_H_ +#define _NET_BATMAN_ADV_SYSFS_H_ + +#define SYSFS_IF_MESH_SUBDIR "mesh" +#define SYSFS_IF_BAT_SUBDIR "batman_adv" + +struct bat_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, struct attribute *attr, + char *buf); + ssize_t (*store)(struct kobject *kobj, struct attribute *attr, + char *buf, size_t count); +}; + +int sysfs_add_meshif(struct net_device *dev); +void sysfs_del_meshif(struct net_device *dev); +int sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev); +void sysfs_del_hardif(struct kobject **hardif_obj); + +#endif /* _NET_BATMAN_ADV_SYSFS_H_ */ diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c new file mode 100644 index 000000000000..bbcd8f744cdd --- /dev/null +++ b/net/batman-adv/bitarray.c @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * + * Simon Wunderlich, Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "bitarray.h" + +#include <linux/bitops.h> + +/* returns true if the corresponding bit in the given seq_bits indicates true + * and curr_seqno is within range of last_seqno */ +uint8_t get_bit_status(unsigned long *seq_bits, uint32_t last_seqno, + uint32_t curr_seqno) +{ + int32_t diff, word_offset, word_num; + + diff = last_seqno - curr_seqno; + if (diff < 0 || diff >= TQ_LOCAL_WINDOW_SIZE) { + return 0; + } else { + /* which word */ + word_num = (last_seqno - curr_seqno) / WORD_BIT_SIZE; + /* which position in the selected word */ + word_offset = (last_seqno - curr_seqno) % WORD_BIT_SIZE; + + if (test_bit(word_offset, &seq_bits[word_num])) + return 1; + else + return 0; + } +} + +/* turn corresponding bit on, so we can remember that we got the packet */ +void bit_mark(unsigned long *seq_bits, int32_t n) +{ + int32_t word_offset, word_num; + + /* if too old, just drop it */ + if (n < 0 || n >= TQ_LOCAL_WINDOW_SIZE) + return; + + /* which word */ + word_num = n / WORD_BIT_SIZE; + /* which position in the selected word */ + word_offset = n % WORD_BIT_SIZE; + + set_bit(word_offset, &seq_bits[word_num]); /* turn the position on */ +} + +/* shift the packet array by n places. */ +static void bit_shift(unsigned long *seq_bits, int32_t n) +{ + int32_t word_offset, word_num; + int32_t i; + + if (n <= 0 || n >= TQ_LOCAL_WINDOW_SIZE) + return; + + word_offset = n % WORD_BIT_SIZE;/* shift how much inside each word */ + word_num = n / WORD_BIT_SIZE; /* shift over how much (full) words */ + + for (i = NUM_WORDS - 1; i > word_num; i--) { + /* going from old to new, so we don't overwrite the data we copy + * from. + * + * left is high, right is low: FEDC BA98 7654 3210 + * ^^ ^^ + * vvvv + * ^^^^ = from, vvvvv =to, we'd have word_num==1 and + * word_offset==WORD_BIT_SIZE/2 ????? in this example. + * (=24 bits) + * + * our desired output would be: 9876 5432 1000 0000 + * */ + + seq_bits[i] = + (seq_bits[i - word_num] << word_offset) + + /* take the lower port from the left half, shift it left + * to its final position */ + (seq_bits[i - word_num - 1] >> + (WORD_BIT_SIZE-word_offset)); + /* and the upper part of the right half and shift it left to + * it's position */ + /* for our example that would be: word[0] = 9800 + 0076 = + * 9876 */ + } + /* now for our last word, i==word_num, we only have the it's "left" + * half. that's the 1000 word in our example.*/ + + seq_bits[i] = (seq_bits[i - word_num] << word_offset); + + /* pad the rest with 0, if there is anything */ + i--; + + for (; i >= 0; i--) + seq_bits[i] = 0; +} + +static void bit_reset_window(unsigned long *seq_bits) +{ + int i; + for (i = 0; i < NUM_WORDS; i++) + seq_bits[i] = 0; +} + + +/* receive and process one packet within the sequence number window. + * + * returns: + * 1 if the window was moved (either new or very old) + * 0 if the window was not moved/shifted. + */ +char bit_get_packet(void *priv, unsigned long *seq_bits, + int32_t seq_num_diff, int8_t set_mark) +{ + struct bat_priv *bat_priv = (struct bat_priv *)priv; + + /* sequence number is slightly older. We already got a sequence number + * higher than this one, so we just mark it. */ + + if ((seq_num_diff <= 0) && (seq_num_diff > -TQ_LOCAL_WINDOW_SIZE)) { + if (set_mark) + bit_mark(seq_bits, -seq_num_diff); + return 0; + } + + /* sequence number is slightly newer, so we shift the window and + * set the mark if required */ + + if ((seq_num_diff > 0) && (seq_num_diff < TQ_LOCAL_WINDOW_SIZE)) { + bit_shift(seq_bits, seq_num_diff); + + if (set_mark) + bit_mark(seq_bits, 0); + return 1; + } + + /* sequence number is much newer, probably missed a lot of packets */ + + if ((seq_num_diff >= TQ_LOCAL_WINDOW_SIZE) + || (seq_num_diff < EXPECTED_SEQNO_RANGE)) { + bat_dbg(DBG_BATMAN, bat_priv, + "We missed a lot of packets (%i) !\n", + seq_num_diff - 1); + bit_reset_window(seq_bits); + if (set_mark) + bit_mark(seq_bits, 0); + return 1; + } + + /* received a much older packet. The other host either restarted + * or the old packet got delayed somewhere in the network. The + * packet should be dropped without calling this function if the + * seqno window is protected. */ + + if ((seq_num_diff <= -TQ_LOCAL_WINDOW_SIZE) + || (seq_num_diff >= EXPECTED_SEQNO_RANGE)) { + + bat_dbg(DBG_BATMAN, bat_priv, + "Other host probably restarted!\n"); + + bit_reset_window(seq_bits); + if (set_mark) + bit_mark(seq_bits, 0); + + return 1; + } + + /* never reached */ + return 0; +} + +/* count the hamming weight, how many good packets did we receive? just count + * the 1's. + */ +int bit_packet_count(unsigned long *seq_bits) +{ + int i, hamming = 0; + + for (i = 0; i < NUM_WORDS; i++) + hamming += hweight_long(seq_bits[i]); + + return hamming; +} diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h new file mode 100644 index 000000000000..ac54017601b1 --- /dev/null +++ b/net/batman-adv/bitarray.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * + * Simon Wunderlich, Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_BITARRAY_H_ +#define _NET_BATMAN_ADV_BITARRAY_H_ + +#define WORD_BIT_SIZE (sizeof(unsigned long) * 8) + +/* returns true if the corresponding bit in the given seq_bits indicates true + * and curr_seqno is within range of last_seqno */ +uint8_t get_bit_status(unsigned long *seq_bits, uint32_t last_seqno, + uint32_t curr_seqno); + +/* turn corresponding bit on, so we can remember that we got the packet */ +void bit_mark(unsigned long *seq_bits, int32_t n); + + +/* receive and process one packet, returns 1 if received seq_num is considered + * new, 0 if old */ +char bit_get_packet(void *priv, unsigned long *seq_bits, + int32_t seq_num_diff, int8_t set_mark); + +/* count the hamming weight, how many good packets did we receive? */ +int bit_packet_count(unsigned long *seq_bits); + +#endif /* _NET_BATMAN_ADV_BITARRAY_H_ */ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c new file mode 100644 index 000000000000..0065ffb8d96d --- /dev/null +++ b/net/batman-adv/gateway_client.c @@ -0,0 +1,477 @@ +/* + * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "gateway_client.h" +#include "gateway_common.h" +#include "hard-interface.h" +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include <linux/if_vlan.h> + +static void gw_node_free_ref(struct kref *refcount) +{ + struct gw_node *gw_node; + + gw_node = container_of(refcount, struct gw_node, refcount); + kfree(gw_node); +} + +static void gw_node_free_rcu(struct rcu_head *rcu) +{ + struct gw_node *gw_node; + + gw_node = container_of(rcu, struct gw_node, rcu); + kref_put(&gw_node->refcount, gw_node_free_ref); +} + +void *gw_get_selected(struct bat_priv *bat_priv) +{ + struct gw_node *curr_gateway_tmp = bat_priv->curr_gw; + + if (!curr_gateway_tmp) + return NULL; + + return curr_gateway_tmp->orig_node; +} + +void gw_deselect(struct bat_priv *bat_priv) +{ + struct gw_node *gw_node = bat_priv->curr_gw; + + bat_priv->curr_gw = NULL; + + if (gw_node) + kref_put(&gw_node->refcount, gw_node_free_ref); +} + +static struct gw_node *gw_select(struct bat_priv *bat_priv, + struct gw_node *new_gw_node) +{ + struct gw_node *curr_gw_node = bat_priv->curr_gw; + + if (new_gw_node) + kref_get(&new_gw_node->refcount); + + bat_priv->curr_gw = new_gw_node; + return curr_gw_node; +} + +void gw_election(struct bat_priv *bat_priv) +{ + struct hlist_node *node; + struct gw_node *gw_node, *curr_gw_tmp = NULL, *old_gw_node = NULL; + uint8_t max_tq = 0; + uint32_t max_gw_factor = 0, tmp_gw_factor = 0; + int down, up; + + /** + * The batman daemon checks here if we already passed a full originator + * cycle in order to make sure we don't choose the first gateway we + * hear about. This check is based on the daemon's uptime which we + * don't have. + **/ + if (atomic_read(&bat_priv->gw_mode) != GW_MODE_CLIENT) + return; + + if (bat_priv->curr_gw) + return; + + rcu_read_lock(); + if (hlist_empty(&bat_priv->gw_list)) { + rcu_read_unlock(); + + if (bat_priv->curr_gw) { + bat_dbg(DBG_BATMAN, bat_priv, + "Removing selected gateway - " + "no gateway in range\n"); + gw_deselect(bat_priv); + } + + return; + } + + hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { + if (!gw_node->orig_node->router) + continue; + + if (gw_node->deleted) + continue; + + switch (atomic_read(&bat_priv->gw_sel_class)) { + case 1: /* fast connection */ + gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, + &down, &up); + + tmp_gw_factor = (gw_node->orig_node->router->tq_avg * + gw_node->orig_node->router->tq_avg * + down * 100 * 100) / + (TQ_LOCAL_WINDOW_SIZE * + TQ_LOCAL_WINDOW_SIZE * 64); + + if ((tmp_gw_factor > max_gw_factor) || + ((tmp_gw_factor == max_gw_factor) && + (gw_node->orig_node->router->tq_avg > max_tq))) + curr_gw_tmp = gw_node; + break; + + default: /** + * 2: stable connection (use best statistic) + * 3: fast-switch (use best statistic but change as + * soon as a better gateway appears) + * XX: late-switch (use best statistic but change as + * soon as a better gateway appears which has + * $routing_class more tq points) + **/ + if (gw_node->orig_node->router->tq_avg > max_tq) + curr_gw_tmp = gw_node; + break; + } + + if (gw_node->orig_node->router->tq_avg > max_tq) + max_tq = gw_node->orig_node->router->tq_avg; + + if (tmp_gw_factor > max_gw_factor) + max_gw_factor = tmp_gw_factor; + } + + if (bat_priv->curr_gw != curr_gw_tmp) { + if ((bat_priv->curr_gw) && (!curr_gw_tmp)) + bat_dbg(DBG_BATMAN, bat_priv, + "Removing selected gateway - " + "no gateway in range\n"); + else if ((!bat_priv->curr_gw) && (curr_gw_tmp)) + bat_dbg(DBG_BATMAN, bat_priv, + "Adding route to gateway %pM " + "(gw_flags: %i, tq: %i)\n", + curr_gw_tmp->orig_node->orig, + curr_gw_tmp->orig_node->gw_flags, + curr_gw_tmp->orig_node->router->tq_avg); + else + bat_dbg(DBG_BATMAN, bat_priv, + "Changing route to gateway %pM " + "(gw_flags: %i, tq: %i)\n", + curr_gw_tmp->orig_node->orig, + curr_gw_tmp->orig_node->gw_flags, + curr_gw_tmp->orig_node->router->tq_avg); + + old_gw_node = gw_select(bat_priv, curr_gw_tmp); + } + + rcu_read_unlock(); + + /* the kfree() has to be outside of the rcu lock */ + if (old_gw_node) + kref_put(&old_gw_node->refcount, gw_node_free_ref); +} + +void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node) +{ + struct gw_node *curr_gateway_tmp = bat_priv->curr_gw; + uint8_t gw_tq_avg, orig_tq_avg; + + if (!curr_gateway_tmp) + return; + + if (!curr_gateway_tmp->orig_node) + goto deselect; + + if (!curr_gateway_tmp->orig_node->router) + goto deselect; + + /* this node already is the gateway */ + if (curr_gateway_tmp->orig_node == orig_node) + return; + + if (!orig_node->router) + return; + + gw_tq_avg = curr_gateway_tmp->orig_node->router->tq_avg; + orig_tq_avg = orig_node->router->tq_avg; + + /* the TQ value has to be better */ + if (orig_tq_avg < gw_tq_avg) + return; + + /** + * if the routing class is greater than 3 the value tells us how much + * greater the TQ value of the new gateway must be + **/ + if ((atomic_read(&bat_priv->gw_sel_class) > 3) && + (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class))) + return; + + bat_dbg(DBG_BATMAN, bat_priv, + "Restarting gateway selection: better gateway found (tq curr: " + "%i, tq new: %i)\n", + gw_tq_avg, orig_tq_avg); + +deselect: + gw_deselect(bat_priv); +} + +static void gw_node_add(struct bat_priv *bat_priv, + struct orig_node *orig_node, uint8_t new_gwflags) +{ + struct gw_node *gw_node; + int down, up; + + gw_node = kmalloc(sizeof(struct gw_node), GFP_ATOMIC); + if (!gw_node) + return; + + memset(gw_node, 0, sizeof(struct gw_node)); + INIT_HLIST_NODE(&gw_node->list); + gw_node->orig_node = orig_node; + kref_init(&gw_node->refcount); + + spin_lock_bh(&bat_priv->gw_list_lock); + hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list); + spin_unlock_bh(&bat_priv->gw_list_lock); + + gw_bandwidth_to_kbit(new_gwflags, &down, &up); + bat_dbg(DBG_BATMAN, bat_priv, + "Found new gateway %pM -> gw_class: %i - %i%s/%i%s\n", + orig_node->orig, new_gwflags, + (down > 2048 ? down / 1024 : down), + (down > 2048 ? "MBit" : "KBit"), + (up > 2048 ? up / 1024 : up), + (up > 2048 ? "MBit" : "KBit")); +} + +void gw_node_update(struct bat_priv *bat_priv, + struct orig_node *orig_node, uint8_t new_gwflags) +{ + struct hlist_node *node; + struct gw_node *gw_node; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { + if (gw_node->orig_node != orig_node) + continue; + + bat_dbg(DBG_BATMAN, bat_priv, + "Gateway class of originator %pM changed from " + "%i to %i\n", + orig_node->orig, gw_node->orig_node->gw_flags, + new_gwflags); + + gw_node->deleted = 0; + + if (new_gwflags == 0) { + gw_node->deleted = jiffies; + bat_dbg(DBG_BATMAN, bat_priv, + "Gateway %pM removed from gateway list\n", + orig_node->orig); + + if (gw_node == bat_priv->curr_gw) { + rcu_read_unlock(); + gw_deselect(bat_priv); + return; + } + } + + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + + if (new_gwflags == 0) + return; + + gw_node_add(bat_priv, orig_node, new_gwflags); +} + +void gw_node_delete(struct bat_priv *bat_priv, struct orig_node *orig_node) +{ + return gw_node_update(bat_priv, orig_node, 0); +} + +void gw_node_purge(struct bat_priv *bat_priv) +{ + struct gw_node *gw_node; + struct hlist_node *node, *node_tmp; + unsigned long timeout = 2 * PURGE_TIMEOUT * HZ; + + spin_lock_bh(&bat_priv->gw_list_lock); + + hlist_for_each_entry_safe(gw_node, node, node_tmp, + &bat_priv->gw_list, list) { + if (((!gw_node->deleted) || + (time_before(jiffies, gw_node->deleted + timeout))) && + atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) + continue; + + if (bat_priv->curr_gw == gw_node) + gw_deselect(bat_priv); + + hlist_del_rcu(&gw_node->list); + call_rcu(&gw_node->rcu, gw_node_free_rcu); + } + + + spin_unlock_bh(&bat_priv->gw_list_lock); +} + +static int _write_buffer_text(struct bat_priv *bat_priv, + struct seq_file *seq, struct gw_node *gw_node) +{ + int down, up; + + gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, &down, &up); + + return seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n", + (bat_priv->curr_gw == gw_node ? "=>" : " "), + gw_node->orig_node->orig, + gw_node->orig_node->router->tq_avg, + gw_node->orig_node->router->addr, + gw_node->orig_node->router->if_incoming->net_dev->name, + gw_node->orig_node->gw_flags, + (down > 2048 ? down / 1024 : down), + (down > 2048 ? "MBit" : "KBit"), + (up > 2048 ? up / 1024 : up), + (up > 2048 ? "MBit" : "KBit")); +} + +int gw_client_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct bat_priv *bat_priv = netdev_priv(net_dev); + struct gw_node *gw_node; + struct hlist_node *node; + int gw_count = 0; + + if (!bat_priv->primary_if) { + + return seq_printf(seq, "BATMAN mesh %s disabled - please " + "specify interfaces to enable it\n", + net_dev->name); + } + + if (bat_priv->primary_if->if_status != IF_ACTIVE) { + + return seq_printf(seq, "BATMAN mesh %s disabled - " + "primary interface not active\n", + net_dev->name); + } + + seq_printf(seq, " %-12s (%s/%i) %17s [%10s]: gw_class ... " + "[B.A.T.M.A.N. adv %s%s, MainIF/MAC: %s/%pM (%s)]\n", + "Gateway", "#", TQ_MAX_VALUE, "Nexthop", + "outgoingIF", SOURCE_VERSION, REVISION_VERSION_STR, + bat_priv->primary_if->net_dev->name, + bat_priv->primary_if->net_dev->dev_addr, net_dev->name); + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { + if (gw_node->deleted) + continue; + + if (!gw_node->orig_node->router) + continue; + + _write_buffer_text(bat_priv, seq, gw_node); + gw_count++; + } + rcu_read_unlock(); + + if (gw_count == 0) + seq_printf(seq, "No gateways in range ...\n"); + + return 0; +} + +int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) +{ + struct ethhdr *ethhdr; + struct iphdr *iphdr; + struct ipv6hdr *ipv6hdr; + struct udphdr *udphdr; + unsigned int header_len = 0; + + if (atomic_read(&bat_priv->gw_mode) == GW_MODE_OFF) + return 0; + + /* check for ethernet header */ + if (!pskb_may_pull(skb, header_len + ETH_HLEN)) + return 0; + ethhdr = (struct ethhdr *)skb->data; + header_len += ETH_HLEN; + + /* check for initial vlan header */ + if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { + if (!pskb_may_pull(skb, header_len + VLAN_HLEN)) + return 0; + ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN); + header_len += VLAN_HLEN; + } + + /* check for ip header */ + switch (ntohs(ethhdr->h_proto)) { + case ETH_P_IP: + if (!pskb_may_pull(skb, header_len + sizeof(struct iphdr))) + return 0; + iphdr = (struct iphdr *)(skb->data + header_len); + header_len += iphdr->ihl * 4; + + /* check for udp header */ + if (iphdr->protocol != IPPROTO_UDP) + return 0; + + break; + case ETH_P_IPV6: + if (!pskb_may_pull(skb, header_len + sizeof(struct ipv6hdr))) + return 0; + ipv6hdr = (struct ipv6hdr *)(skb->data + header_len); + header_len += sizeof(struct ipv6hdr); + + /* check for udp header */ + if (ipv6hdr->nexthdr != IPPROTO_UDP) + return 0; + + break; + default: + return 0; + } + + if (!pskb_may_pull(skb, header_len + sizeof(struct udphdr))) + return 0; + udphdr = (struct udphdr *)(skb->data + header_len); + header_len += sizeof(struct udphdr); + + /* check for bootp port */ + if ((ntohs(ethhdr->h_proto) == ETH_P_IP) && + (ntohs(udphdr->dest) != 67)) + return 0; + + if ((ntohs(ethhdr->h_proto) == ETH_P_IPV6) && + (ntohs(udphdr->dest) != 547)) + return 0; + + if (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER) + return -1; + + if (!bat_priv->curr_gw) + return 0; + + return 1; +} diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h new file mode 100644 index 000000000000..4585e6549844 --- /dev/null +++ b/net/batman-adv/gateway_client.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ +#define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ + +void gw_deselect(struct bat_priv *bat_priv); +void gw_election(struct bat_priv *bat_priv); +void *gw_get_selected(struct bat_priv *bat_priv); +void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node); +void gw_node_update(struct bat_priv *bat_priv, + struct orig_node *orig_node, uint8_t new_gwflags); +void gw_node_delete(struct bat_priv *bat_priv, struct orig_node *orig_node); +void gw_node_purge(struct bat_priv *bat_priv); +int gw_client_seq_print_text(struct seq_file *seq, void *offset); +int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb); + +#endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c new file mode 100644 index 000000000000..b962982f017e --- /dev/null +++ b/net/batman-adv/gateway_common.c @@ -0,0 +1,177 @@ +/* + * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "gateway_common.h" +#include "gateway_client.h" + +/* calculates the gateway class from kbit */ +static void kbit_to_gw_bandwidth(int down, int up, long *gw_srv_class) +{ + int mdown = 0, tdown, tup, difference; + uint8_t sbit, part; + + *gw_srv_class = 0; + difference = 0x0FFFFFFF; + + /* test all downspeeds */ + for (sbit = 0; sbit < 2; sbit++) { + for (part = 0; part < 16; part++) { + tdown = 32 * (sbit + 2) * (1 << part); + + if (abs(tdown - down) < difference) { + *gw_srv_class = (sbit << 7) + (part << 3); + difference = abs(tdown - down); + mdown = tdown; + } + } + } + + /* test all upspeeds */ + difference = 0x0FFFFFFF; + + for (part = 0; part < 8; part++) { + tup = ((part + 1) * (mdown)) / 8; + + if (abs(tup - up) < difference) { + *gw_srv_class = (*gw_srv_class & 0xF8) | part; + difference = abs(tup - up); + } + } +} + +/* returns the up and downspeeds in kbit, calculated from the class */ +void gw_bandwidth_to_kbit(uint8_t gw_srv_class, int *down, int *up) +{ + char sbit = (gw_srv_class & 0x80) >> 7; + char dpart = (gw_srv_class & 0x78) >> 3; + char upart = (gw_srv_class & 0x07); + + if (!gw_srv_class) { + *down = 0; + *up = 0; + return; + } + + *down = 32 * (sbit + 2) * (1 << dpart); + *up = ((upart + 1) * (*down)) / 8; +} + +static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, + long *up, long *down) +{ + int ret, multi = 1; + char *slash_ptr, *tmp_ptr; + + slash_ptr = strchr(buff, '/'); + if (slash_ptr) + *slash_ptr = 0; + + if (strlen(buff) > 4) { + tmp_ptr = buff + strlen(buff) - 4; + + if (strnicmp(tmp_ptr, "mbit", 4) == 0) + multi = 1024; + + if ((strnicmp(tmp_ptr, "kbit", 4) == 0) || + (multi > 1)) + *tmp_ptr = '\0'; + } + + ret = strict_strtoul(buff, 10, down); + if (ret) { + bat_err(net_dev, + "Download speed of gateway mode invalid: %s\n", + buff); + return false; + } + + *down *= multi; + + /* we also got some upload info */ + if (slash_ptr) { + multi = 1; + + if (strlen(slash_ptr + 1) > 4) { + tmp_ptr = slash_ptr + 1 - 4 + strlen(slash_ptr + 1); + + if (strnicmp(tmp_ptr, "mbit", 4) == 0) + multi = 1024; + + if ((strnicmp(tmp_ptr, "kbit", 4) == 0) || + (multi > 1)) + *tmp_ptr = '\0'; + } + + ret = strict_strtoul(slash_ptr + 1, 10, up); + if (ret) { + bat_err(net_dev, + "Upload speed of gateway mode invalid: " + "%s\n", slash_ptr + 1); + return false; + } + + *up *= multi; + } + + return true; +} + +ssize_t gw_bandwidth_set(struct net_device *net_dev, char *buff, size_t count) +{ + struct bat_priv *bat_priv = netdev_priv(net_dev); + long gw_bandwidth_tmp = 0, up = 0, down = 0; + bool ret; + + ret = parse_gw_bandwidth(net_dev, buff, &up, &down); + if (!ret) + goto end; + + if ((!down) || (down < 256)) + down = 2000; + + if (!up) + up = down / 5; + + kbit_to_gw_bandwidth(down, up, &gw_bandwidth_tmp); + + /** + * the gw bandwidth we guessed above might not match the given + * speeds, hence we need to calculate it back to show the number + * that is going to be propagated + **/ + gw_bandwidth_to_kbit((uint8_t)gw_bandwidth_tmp, + (int *)&down, (int *)&up); + + gw_deselect(bat_priv); + bat_info(net_dev, "Changing gateway bandwidth from: '%i' to: '%ld' " + "(propagating: %ld%s/%ld%s)\n", + atomic_read(&bat_priv->gw_bandwidth), gw_bandwidth_tmp, + (down > 2048 ? down / 1024 : down), + (down > 2048 ? "MBit" : "KBit"), + (up > 2048 ? up / 1024 : up), + (up > 2048 ? "MBit" : "KBit")); + + atomic_set(&bat_priv->gw_bandwidth, gw_bandwidth_tmp); + +end: + return count; +} diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h new file mode 100644 index 000000000000..5e728d0b7959 --- /dev/null +++ b/net/batman-adv/gateway_common.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_GATEWAY_COMMON_H_ +#define _NET_BATMAN_ADV_GATEWAY_COMMON_H_ + +enum gw_modes { + GW_MODE_OFF, + GW_MODE_CLIENT, + GW_MODE_SERVER, +}; + +#define GW_MODE_OFF_NAME "off" +#define GW_MODE_CLIENT_NAME "client" +#define GW_MODE_SERVER_NAME "server" + +void gw_bandwidth_to_kbit(uint8_t gw_class, int *down, int *up); +ssize_t gw_bandwidth_set(struct net_device *net_dev, char *buff, size_t count); + +#endif /* _NET_BATMAN_ADV_GATEWAY_COMMON_H_ */ diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c new file mode 100644 index 000000000000..4f95777ce080 --- /dev/null +++ b/net/batman-adv/hard-interface.c @@ -0,0 +1,651 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "hard-interface.h" +#include "soft-interface.h" +#include "send.h" +#include "translation-table.h" +#include "routing.h" +#include "bat_sysfs.h" +#include "originator.h" +#include "hash.h" + +#include <linux/if_arp.h> + +/* protect update critical side of if_list - but not the content */ +static DEFINE_SPINLOCK(if_list_lock); + +static void hardif_free_rcu(struct rcu_head *rcu) +{ + struct batman_if *batman_if; + + batman_if = container_of(rcu, struct batman_if, rcu); + dev_put(batman_if->net_dev); + kref_put(&batman_if->refcount, hardif_free_ref); +} + +struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev) +{ + struct batman_if *batman_if; + + rcu_read_lock(); + list_for_each_entry_rcu(batman_if, &if_list, list) { + if (batman_if->net_dev == net_dev) + goto out; + } + + batman_if = NULL; + +out: + if (batman_if) + kref_get(&batman_if->refcount); + + rcu_read_unlock(); + return batman_if; +} + +static int is_valid_iface(struct net_device *net_dev) +{ + if (net_dev->flags & IFF_LOOPBACK) + return 0; + + if (net_dev->type != ARPHRD_ETHER) + return 0; + + if (net_dev->addr_len != ETH_ALEN) + return 0; + + /* no batman over batman */ +#ifdef HAVE_NET_DEVICE_OPS + if (net_dev->netdev_ops->ndo_start_xmit == interface_tx) + return 0; +#else + if (net_dev->hard_start_xmit == interface_tx) + return 0; +#endif + + /* Device is being bridged */ + /* if (net_dev->priv_flags & IFF_BRIDGE_PORT) + return 0; */ + + return 1; +} + +static struct batman_if *get_active_batman_if(struct net_device *soft_iface) +{ + struct batman_if *batman_if; + + rcu_read_lock(); + list_for_each_entry_rcu(batman_if, &if_list, list) { + if (batman_if->soft_iface != soft_iface) + continue; + + if (batman_if->if_status == IF_ACTIVE) + goto out; + } + + batman_if = NULL; + +out: + if (batman_if) + kref_get(&batman_if->refcount); + + rcu_read_unlock(); + return batman_if; +} + +static void update_primary_addr(struct bat_priv *bat_priv) +{ + struct vis_packet *vis_packet; + + vis_packet = (struct vis_packet *) + bat_priv->my_vis_info->skb_packet->data; + memcpy(vis_packet->vis_orig, + bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); + memcpy(vis_packet->sender_orig, + bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); +} + +static void set_primary_if(struct bat_priv *bat_priv, + struct batman_if *batman_if) +{ + struct batman_packet *batman_packet; + struct batman_if *old_if; + + if (batman_if) + kref_get(&batman_if->refcount); + + old_if = bat_priv->primary_if; + bat_priv->primary_if = batman_if; + + if (old_if) + kref_put(&old_if->refcount, hardif_free_ref); + + if (!bat_priv->primary_if) + return; + + batman_packet = (struct batman_packet *)(batman_if->packet_buff); + batman_packet->flags = PRIMARIES_FIRST_HOP; + batman_packet->ttl = TTL; + + update_primary_addr(bat_priv); + + /*** + * hacky trick to make sure that we send the HNA information via + * our new primary interface + */ + atomic_set(&bat_priv->hna_local_changed, 1); +} + +static bool hardif_is_iface_up(struct batman_if *batman_if) +{ + if (batman_if->net_dev->flags & IFF_UP) + return true; + + return false; +} + +static void update_mac_addresses(struct batman_if *batman_if) +{ + memcpy(((struct batman_packet *)(batman_if->packet_buff))->orig, + batman_if->net_dev->dev_addr, ETH_ALEN); + memcpy(((struct batman_packet *)(batman_if->packet_buff))->prev_sender, + batman_if->net_dev->dev_addr, ETH_ALEN); +} + +static void check_known_mac_addr(struct net_device *net_dev) +{ + struct batman_if *batman_if; + + rcu_read_lock(); + list_for_each_entry_rcu(batman_if, &if_list, list) { + if ((batman_if->if_status != IF_ACTIVE) && + (batman_if->if_status != IF_TO_BE_ACTIVATED)) + continue; + + if (batman_if->net_dev == net_dev) + continue; + + if (!compare_orig(batman_if->net_dev->dev_addr, + net_dev->dev_addr)) + continue; + + pr_warning("The newly added mac address (%pM) already exists " + "on: %s\n", net_dev->dev_addr, + batman_if->net_dev->name); + pr_warning("It is strongly recommended to keep mac addresses " + "unique to avoid problems!\n"); + } + rcu_read_unlock(); +} + +int hardif_min_mtu(struct net_device *soft_iface) +{ + struct bat_priv *bat_priv = netdev_priv(soft_iface); + struct batman_if *batman_if; + /* allow big frames if all devices are capable to do so + * (have MTU > 1500 + BAT_HEADER_LEN) */ + int min_mtu = ETH_DATA_LEN; + + if (atomic_read(&bat_priv->fragmentation)) + goto out; + + rcu_read_lock(); + list_for_each_entry_rcu(batman_if, &if_list, list) { + if ((batman_if->if_status != IF_ACTIVE) && + (batman_if->if_status != IF_TO_BE_ACTIVATED)) + continue; + + if (batman_if->soft_iface != soft_iface) + continue; + + min_mtu = min_t(int, batman_if->net_dev->mtu - BAT_HEADER_LEN, + min_mtu); + } + rcu_read_unlock(); +out: + return min_mtu; +} + +/* adjusts the MTU if a new interface with a smaller MTU appeared. */ +void update_min_mtu(struct net_device *soft_iface) +{ + int min_mtu; + + min_mtu = hardif_min_mtu(soft_iface); + if (soft_iface->mtu != min_mtu) + soft_iface->mtu = min_mtu; +} + +static void hardif_activate_interface(struct batman_if *batman_if) +{ + struct bat_priv *bat_priv; + + if (batman_if->if_status != IF_INACTIVE) + return; + + bat_priv = netdev_priv(batman_if->soft_iface); + + update_mac_addresses(batman_if); + batman_if->if_status = IF_TO_BE_ACTIVATED; + + /** + * the first active interface becomes our primary interface or + * the next active interface after the old primay interface was removed + */ + if (!bat_priv->primary_if) + set_primary_if(bat_priv, batman_if); + + bat_info(batman_if->soft_iface, "Interface activated: %s\n", + batman_if->net_dev->name); + + update_min_mtu(batman_if->soft_iface); + return; +} + +static void hardif_deactivate_interface(struct batman_if *batman_if) +{ + if ((batman_if->if_status != IF_ACTIVE) && + (batman_if->if_status != IF_TO_BE_ACTIVATED)) + return; + + batman_if->if_status = IF_INACTIVE; + + bat_info(batman_if->soft_iface, "Interface deactivated: %s\n", + batman_if->net_dev->name); + + update_min_mtu(batman_if->soft_iface); +} + +int hardif_enable_interface(struct batman_if *batman_if, char *iface_name) +{ + struct bat_priv *bat_priv; + struct batman_packet *batman_packet; + + if (batman_if->if_status != IF_NOT_IN_USE) + goto out; + + batman_if->soft_iface = dev_get_by_name(&init_net, iface_name); + + if (!batman_if->soft_iface) { + batman_if->soft_iface = softif_create(iface_name); + + if (!batman_if->soft_iface) + goto err; + + /* dev_get_by_name() increases the reference counter for us */ + dev_hold(batman_if->soft_iface); + } + + bat_priv = netdev_priv(batman_if->soft_iface); + batman_if->packet_len = BAT_PACKET_LEN; + batman_if->packet_buff = kmalloc(batman_if->packet_len, GFP_ATOMIC); + + if (!batman_if->packet_buff) { + bat_err(batman_if->soft_iface, "Can't add interface packet " + "(%s): out of memory\n", batman_if->net_dev->name); + goto err; + } + + batman_packet = (struct batman_packet *)(batman_if->packet_buff); + batman_packet->packet_type = BAT_PACKET; + batman_packet->version = COMPAT_VERSION; + batman_packet->flags = 0; + batman_packet->ttl = 2; + batman_packet->tq = TQ_MAX_VALUE; + batman_packet->num_hna = 0; + + batman_if->if_num = bat_priv->num_ifaces; + bat_priv->num_ifaces++; + batman_if->if_status = IF_INACTIVE; + orig_hash_add_if(batman_if, bat_priv->num_ifaces); + + batman_if->batman_adv_ptype.type = __constant_htons(ETH_P_BATMAN); + batman_if->batman_adv_ptype.func = batman_skb_recv; + batman_if->batman_adv_ptype.dev = batman_if->net_dev; + kref_get(&batman_if->refcount); + dev_add_pack(&batman_if->batman_adv_ptype); + + atomic_set(&batman_if->seqno, 1); + atomic_set(&batman_if->frag_seqno, 1); + bat_info(batman_if->soft_iface, "Adding interface: %s\n", + batman_if->net_dev->name); + + if (atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu < + ETH_DATA_LEN + BAT_HEADER_LEN) + bat_info(batman_if->soft_iface, + "The MTU of interface %s is too small (%i) to handle " + "the transport of batman-adv packets. Packets going " + "over this interface will be fragmented on layer2 " + "which could impact the performance. Setting the MTU " + "to %zi would solve the problem.\n", + batman_if->net_dev->name, batman_if->net_dev->mtu, + ETH_DATA_LEN + BAT_HEADER_LEN); + + if (!atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu < + ETH_DATA_LEN + BAT_HEADER_LEN) + bat_info(batman_if->soft_iface, + "The MTU of interface %s is too small (%i) to handle " + "the transport of batman-adv packets. If you experience" + " problems getting traffic through try increasing the " + "MTU to %zi.\n", + batman_if->net_dev->name, batman_if->net_dev->mtu, + ETH_DATA_LEN + BAT_HEADER_LEN); + + if (hardif_is_iface_up(batman_if)) + hardif_activate_interface(batman_if); + else + bat_err(batman_if->soft_iface, "Not using interface %s " + "(retrying later): interface not active\n", + batman_if->net_dev->name); + + /* begin scheduling originator messages on that interface */ + schedule_own_packet(batman_if); + +out: + return 0; + +err: + return -ENOMEM; +} + +void hardif_disable_interface(struct batman_if *batman_if) +{ + struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + + if (batman_if->if_status == IF_ACTIVE) + hardif_deactivate_interface(batman_if); + + if (batman_if->if_status != IF_INACTIVE) + return; + + bat_info(batman_if->soft_iface, "Removing interface: %s\n", + batman_if->net_dev->name); + dev_remove_pack(&batman_if->batman_adv_ptype); + kref_put(&batman_if->refcount, hardif_free_ref); + + bat_priv->num_ifaces--; + orig_hash_del_if(batman_if, bat_priv->num_ifaces); + + if (batman_if == bat_priv->primary_if) { + struct batman_if *new_if; + + new_if = get_active_batman_if(batman_if->soft_iface); + set_primary_if(bat_priv, new_if); + + if (new_if) + kref_put(&new_if->refcount, hardif_free_ref); + } + + kfree(batman_if->packet_buff); + batman_if->packet_buff = NULL; + batman_if->if_status = IF_NOT_IN_USE; + + /* delete all references to this batman_if */ + purge_orig_ref(bat_priv); + purge_outstanding_packets(bat_priv, batman_if); + dev_put(batman_if->soft_iface); + + /* nobody uses this interface anymore */ + if (!bat_priv->num_ifaces) + softif_destroy(batman_if->soft_iface); + + batman_if->soft_iface = NULL; +} + +static struct batman_if *hardif_add_interface(struct net_device *net_dev) +{ + struct batman_if *batman_if; + int ret; + + ret = is_valid_iface(net_dev); + if (ret != 1) + goto out; + + dev_hold(net_dev); + + batman_if = kmalloc(sizeof(struct batman_if), GFP_ATOMIC); + if (!batman_if) { + pr_err("Can't add interface (%s): out of memory\n", + net_dev->name); + goto release_dev; + } + + ret = sysfs_add_hardif(&batman_if->hardif_obj, net_dev); + if (ret) + goto free_if; + + batman_if->if_num = -1; + batman_if->net_dev = net_dev; + batman_if->soft_iface = NULL; + batman_if->if_status = IF_NOT_IN_USE; + INIT_LIST_HEAD(&batman_if->list); + kref_init(&batman_if->refcount); + + check_known_mac_addr(batman_if->net_dev); + + spin_lock(&if_list_lock); + list_add_tail_rcu(&batman_if->list, &if_list); + spin_unlock(&if_list_lock); + + /* extra reference for return */ + kref_get(&batman_if->refcount); + return batman_if; + +free_if: + kfree(batman_if); +release_dev: + dev_put(net_dev); +out: + return NULL; +} + +static void hardif_remove_interface(struct batman_if *batman_if) +{ + /* first deactivate interface */ + if (batman_if->if_status != IF_NOT_IN_USE) + hardif_disable_interface(batman_if); + + if (batman_if->if_status != IF_NOT_IN_USE) + return; + + batman_if->if_status = IF_TO_BE_REMOVED; + sysfs_del_hardif(&batman_if->hardif_obj); + call_rcu(&batman_if->rcu, hardif_free_rcu); +} + +void hardif_remove_interfaces(void) +{ + struct batman_if *batman_if, *batman_if_tmp; + struct list_head if_queue; + + INIT_LIST_HEAD(&if_queue); + + spin_lock(&if_list_lock); + list_for_each_entry_safe(batman_if, batman_if_tmp, &if_list, list) { + list_del_rcu(&batman_if->list); + list_add_tail(&batman_if->list, &if_queue); + } + spin_unlock(&if_list_lock); + + rtnl_lock(); + list_for_each_entry_safe(batman_if, batman_if_tmp, &if_queue, list) { + hardif_remove_interface(batman_if); + } + rtnl_unlock(); +} + +static int hard_if_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *net_dev = (struct net_device *)ptr; + struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); + struct bat_priv *bat_priv; + + if (!batman_if && event == NETDEV_REGISTER) + batman_if = hardif_add_interface(net_dev); + + if (!batman_if) + goto out; + + switch (event) { + case NETDEV_UP: + hardif_activate_interface(batman_if); + break; + case NETDEV_GOING_DOWN: + case NETDEV_DOWN: + hardif_deactivate_interface(batman_if); + break; + case NETDEV_UNREGISTER: + spin_lock(&if_list_lock); + list_del_rcu(&batman_if->list); + spin_unlock(&if_list_lock); + + hardif_remove_interface(batman_if); + break; + case NETDEV_CHANGEMTU: + if (batman_if->soft_iface) + update_min_mtu(batman_if->soft_iface); + break; + case NETDEV_CHANGEADDR: + if (batman_if->if_status == IF_NOT_IN_USE) + goto hardif_put; + + check_known_mac_addr(batman_if->net_dev); + update_mac_addresses(batman_if); + + bat_priv = netdev_priv(batman_if->soft_iface); + if (batman_if == bat_priv->primary_if) + update_primary_addr(bat_priv); + break; + default: + break; + }; + +hardif_put: + kref_put(&batman_if->refcount, hardif_free_ref); +out: + return NOTIFY_DONE; +} + +/* receive a packet with the batman ethertype coming on a hard + * interface */ +int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *ptype, struct net_device *orig_dev) +{ + struct bat_priv *bat_priv; + struct batman_packet *batman_packet; + struct batman_if *batman_if; + int ret; + + batman_if = container_of(ptype, struct batman_if, batman_adv_ptype); + skb = skb_share_check(skb, GFP_ATOMIC); + + /* skb was released by skb_share_check() */ + if (!skb) + goto err_out; + + /* packet should hold at least type and version */ + if (unlikely(!pskb_may_pull(skb, 2))) + goto err_free; + + /* expect a valid ethernet header here. */ + if (unlikely(skb->mac_len != sizeof(struct ethhdr) + || !skb_mac_header(skb))) + goto err_free; + + if (!batman_if->soft_iface) + goto err_free; + + bat_priv = netdev_priv(batman_if->soft_iface); + + if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) + goto err_free; + + /* discard frames on not active interfaces */ + if (batman_if->if_status != IF_ACTIVE) + goto err_free; + + batman_packet = (struct batman_packet *)skb->data; + + if (batman_packet->version != COMPAT_VERSION) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: incompatible batman version (%i)\n", + batman_packet->version); + goto err_free; + } + + /* all receive handlers return whether they received or reused + * the supplied skb. if not, we have to free the skb. */ + + switch (batman_packet->packet_type) { + /* batman originator packet */ + case BAT_PACKET: + ret = recv_bat_packet(skb, batman_if); + break; + + /* batman icmp packet */ + case BAT_ICMP: + ret = recv_icmp_packet(skb, batman_if); + break; + + /* unicast packet */ + case BAT_UNICAST: + ret = recv_unicast_packet(skb, batman_if); + break; + + /* fragmented unicast packet */ + case BAT_UNICAST_FRAG: + ret = recv_ucast_frag_packet(skb, batman_if); + break; + + /* broadcast packet */ + case BAT_BCAST: + ret = recv_bcast_packet(skb, batman_if); + break; + + /* vis packet */ + case BAT_VIS: + ret = recv_vis_packet(skb, batman_if); + break; + default: + ret = NET_RX_DROP; + } + + if (ret == NET_RX_DROP) + kfree_skb(skb); + + /* return NET_RX_SUCCESS in any case as we + * most probably dropped the packet for + * routing-logical reasons. */ + + return NET_RX_SUCCESS; + +err_free: + kfree_skb(skb); +err_out: + return NET_RX_DROP; +} + +struct notifier_block hard_if_notifier = { + .notifier_call = hard_if_event, +}; diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h new file mode 100644 index 000000000000..30ec3b8db459 --- /dev/null +++ b/net/batman-adv/hard-interface.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_HARD_INTERFACE_H_ +#define _NET_BATMAN_ADV_HARD_INTERFACE_H_ + +#define IF_NOT_IN_USE 0 +#define IF_TO_BE_REMOVED 1 +#define IF_INACTIVE 2 +#define IF_ACTIVE 3 +#define IF_TO_BE_ACTIVATED 4 +#define IF_I_WANT_YOU 5 + +extern struct notifier_block hard_if_notifier; + +struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev); +int hardif_enable_interface(struct batman_if *batman_if, char *iface_name); +void hardif_disable_interface(struct batman_if *batman_if); +void hardif_remove_interfaces(void); +int batman_skb_recv(struct sk_buff *skb, + struct net_device *dev, + struct packet_type *ptype, + struct net_device *orig_dev); +int hardif_min_mtu(struct net_device *soft_iface); +void update_min_mtu(struct net_device *soft_iface); + +static inline void hardif_free_ref(struct kref *refcount) +{ + struct batman_if *batman_if; + + batman_if = container_of(refcount, struct batman_if, refcount); + kfree(batman_if); +} + +#endif /* _NET_BATMAN_ADV_HARD_INTERFACE_H_ */ diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c new file mode 100644 index 000000000000..26e623eb9def --- /dev/null +++ b/net/batman-adv/hash.c @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * + * Simon Wunderlich, Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "hash.h" + +/* clears the hash */ +static void hash_init(struct hashtable_t *hash) +{ + int i; + + for (i = 0 ; i < hash->size; i++) + INIT_HLIST_HEAD(&hash->table[i]); +} + +/* free only the hashtable and the hash itself. */ +void hash_destroy(struct hashtable_t *hash) +{ + kfree(hash->table); + kfree(hash); +} + +/* allocates and clears the hash */ +struct hashtable_t *hash_new(int size) +{ + struct hashtable_t *hash; + + hash = kmalloc(sizeof(struct hashtable_t) , GFP_ATOMIC); + + if (!hash) + return NULL; + + hash->size = size; + hash->table = kmalloc(sizeof(struct element_t *) * size, GFP_ATOMIC); + + if (!hash->table) { + kfree(hash); + return NULL; + } + + hash_init(hash); + + return hash; +} diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h new file mode 100644 index 000000000000..09216ade16f1 --- /dev/null +++ b/net/batman-adv/hash.h @@ -0,0 +1,176 @@ +/* + * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * + * Simon Wunderlich, Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_HASH_H_ +#define _NET_BATMAN_ADV_HASH_H_ + +#include <linux/list.h> + +/* callback to a compare function. should + * compare 2 element datas for their keys, + * return 0 if same and not 0 if not + * same */ +typedef int (*hashdata_compare_cb)(void *, void *); + +/* the hashfunction, should return an index + * based on the key in the data of the first + * argument and the size the second */ +typedef int (*hashdata_choose_cb)(void *, int); +typedef void (*hashdata_free_cb)(void *, void *); + +struct element_t { + void *data; /* pointer to the data */ + struct hlist_node hlist; /* bucket list pointer */ +}; + +struct hashtable_t { + struct hlist_head *table; /* the hashtable itself, with the buckets */ + int size; /* size of hashtable */ +}; + +/* allocates and clears the hash */ +struct hashtable_t *hash_new(int size); + +/* remove element if you already found the element you want to delete and don't + * need the overhead to find it again with hash_remove(). But usually, you + * don't want to use this function, as it fiddles with hash-internals. */ +void *hash_remove_element(struct hashtable_t *hash, struct element_t *elem); + +/* free only the hashtable and the hash itself. */ +void hash_destroy(struct hashtable_t *hash); + +/* remove the hash structure. if hashdata_free_cb != NULL, this function will be + * called to remove the elements inside of the hash. if you don't remove the + * elements, memory might be leaked. */ +static inline void hash_delete(struct hashtable_t *hash, + hashdata_free_cb free_cb, void *arg) +{ + struct hlist_head *head; + struct hlist_node *walk, *safe; + struct element_t *bucket; + int i; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_safe(walk, safe, head) { + bucket = hlist_entry(walk, struct element_t, hlist); + if (free_cb) + free_cb(bucket->data, arg); + + hlist_del(walk); + kfree(bucket); + } + } + + hash_destroy(hash); +} + +/* adds data to the hashtable. returns 0 on success, -1 on error */ +static inline int hash_add(struct hashtable_t *hash, + hashdata_compare_cb compare, + hashdata_choose_cb choose, void *data) +{ + int index; + struct hlist_head *head; + struct hlist_node *walk, *safe; + struct element_t *bucket; + + if (!hash) + return -1; + + index = choose(data, hash->size); + head = &hash->table[index]; + + hlist_for_each_safe(walk, safe, head) { + bucket = hlist_entry(walk, struct element_t, hlist); + if (compare(bucket->data, data)) + return -1; + } + + /* no duplicate found in list, add new element */ + bucket = kmalloc(sizeof(struct element_t), GFP_ATOMIC); + + if (!bucket) + return -1; + + bucket->data = data; + hlist_add_head(&bucket->hlist, head); + + return 0; +} + +/* removes data from hash, if found. returns pointer do data on success, so you + * can remove the used structure yourself, or NULL on error . data could be the + * structure you use with just the key filled, we just need the key for + * comparing. */ +static inline void *hash_remove(struct hashtable_t *hash, + hashdata_compare_cb compare, + hashdata_choose_cb choose, void *data) +{ + size_t index; + struct hlist_node *walk; + struct element_t *bucket; + struct hlist_head *head; + void *data_save; + + index = choose(data, hash->size); + head = &hash->table[index]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + if (compare(bucket->data, data)) { + data_save = bucket->data; + hlist_del(walk); + kfree(bucket); + return data_save; + } + } + + return NULL; +} + +/* finds data, based on the key in keydata. returns the found data on success, + * or NULL on error */ +static inline void *hash_find(struct hashtable_t *hash, + hashdata_compare_cb compare, + hashdata_choose_cb choose, void *keydata) +{ + int index; + struct hlist_head *head; + struct hlist_node *walk; + struct element_t *bucket; + + if (!hash) + return NULL; + + index = choose(keydata , hash->size); + head = &hash->table[index]; + + hlist_for_each(walk, head) { + bucket = hlist_entry(walk, struct element_t, hlist); + if (compare(bucket->data, keydata)) + return bucket->data; + } + + return NULL; +} + +#endif /* _NET_BATMAN_ADV_HASH_H_ */ diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c new file mode 100644 index 000000000000..ecf6d7ffab2e --- /dev/null +++ b/net/batman-adv/icmp_socket.c @@ -0,0 +1,356 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include <linux/debugfs.h> +#include <linux/slab.h> +#include "icmp_socket.h" +#include "send.h" +#include "types.h" +#include "hash.h" +#include "originator.h" +#include "hard-interface.h" + +static struct socket_client *socket_client_hash[256]; + +static void bat_socket_add_packet(struct socket_client *socket_client, + struct icmp_packet_rr *icmp_packet, + size_t icmp_len); + +void bat_socket_init(void) +{ + memset(socket_client_hash, 0, sizeof(socket_client_hash)); +} + +static int bat_socket_open(struct inode *inode, struct file *file) +{ + unsigned int i; + struct socket_client *socket_client; + + nonseekable_open(inode, file); + + socket_client = kmalloc(sizeof(struct socket_client), GFP_KERNEL); + + if (!socket_client) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(socket_client_hash); i++) { + if (!socket_client_hash[i]) { + socket_client_hash[i] = socket_client; + break; + } + } + + if (i == ARRAY_SIZE(socket_client_hash)) { + pr_err("Error - can't add another packet client: " + "maximum number of clients reached\n"); + kfree(socket_client); + return -EXFULL; + } + + INIT_LIST_HEAD(&socket_client->queue_list); + socket_client->queue_len = 0; + socket_client->index = i; + socket_client->bat_priv = inode->i_private; + spin_lock_init(&socket_client->lock); + init_waitqueue_head(&socket_client->queue_wait); + + file->private_data = socket_client; + + inc_module_count(); + return 0; +} + +static int bat_socket_release(struct inode *inode, struct file *file) +{ + struct socket_client *socket_client = file->private_data; + struct socket_packet *socket_packet; + struct list_head *list_pos, *list_pos_tmp; + + spin_lock_bh(&socket_client->lock); + + /* for all packets in the queue ... */ + list_for_each_safe(list_pos, list_pos_tmp, &socket_client->queue_list) { + socket_packet = list_entry(list_pos, + struct socket_packet, list); + + list_del(list_pos); + kfree(socket_packet); + } + + socket_client_hash[socket_client->index] = NULL; + spin_unlock_bh(&socket_client->lock); + + kfree(socket_client); + dec_module_count(); + + return 0; +} + +static ssize_t bat_socket_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct socket_client *socket_client = file->private_data; + struct socket_packet *socket_packet; + size_t packet_len; + int error; + + if ((file->f_flags & O_NONBLOCK) && (socket_client->queue_len == 0)) + return -EAGAIN; + + if ((!buf) || (count < sizeof(struct icmp_packet))) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + error = wait_event_interruptible(socket_client->queue_wait, + socket_client->queue_len); + + if (error) + return error; + + spin_lock_bh(&socket_client->lock); + + socket_packet = list_first_entry(&socket_client->queue_list, + struct socket_packet, list); + list_del(&socket_packet->list); + socket_client->queue_len--; + + spin_unlock_bh(&socket_client->lock); + + error = __copy_to_user(buf, &socket_packet->icmp_packet, + socket_packet->icmp_len); + + packet_len = socket_packet->icmp_len; + kfree(socket_packet); + + if (error) + return -EFAULT; + + return packet_len; +} + +static ssize_t bat_socket_write(struct file *file, const char __user *buff, + size_t len, loff_t *off) +{ + struct socket_client *socket_client = file->private_data; + struct bat_priv *bat_priv = socket_client->bat_priv; + struct sk_buff *skb; + struct icmp_packet_rr *icmp_packet; + + struct orig_node *orig_node; + struct batman_if *batman_if; + size_t packet_len = sizeof(struct icmp_packet); + uint8_t dstaddr[ETH_ALEN]; + + if (len < sizeof(struct icmp_packet)) { + bat_dbg(DBG_BATMAN, bat_priv, + "Error - can't send packet from char device: " + "invalid packet size\n"); + return -EINVAL; + } + + if (!bat_priv->primary_if) + return -EFAULT; + + if (len >= sizeof(struct icmp_packet_rr)) + packet_len = sizeof(struct icmp_packet_rr); + + skb = dev_alloc_skb(packet_len + sizeof(struct ethhdr)); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, sizeof(struct ethhdr)); + icmp_packet = (struct icmp_packet_rr *)skb_put(skb, packet_len); + + if (!access_ok(VERIFY_READ, buff, packet_len)) { + len = -EFAULT; + goto free_skb; + } + + if (__copy_from_user(icmp_packet, buff, packet_len)) { + len = -EFAULT; + goto free_skb; + } + + if (icmp_packet->packet_type != BAT_ICMP) { + bat_dbg(DBG_BATMAN, bat_priv, + "Error - can't send packet from char device: " + "got bogus packet type (expected: BAT_ICMP)\n"); + len = -EINVAL; + goto free_skb; + } + + if (icmp_packet->msg_type != ECHO_REQUEST) { + bat_dbg(DBG_BATMAN, bat_priv, + "Error - can't send packet from char device: " + "got bogus message type (expected: ECHO_REQUEST)\n"); + len = -EINVAL; + goto free_skb; + } + + icmp_packet->uid = socket_client->index; + + if (icmp_packet->version != COMPAT_VERSION) { + icmp_packet->msg_type = PARAMETER_PROBLEM; + icmp_packet->ttl = COMPAT_VERSION; + bat_socket_add_packet(socket_client, icmp_packet, packet_len); + goto free_skb; + } + + if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) + goto dst_unreach; + + spin_lock_bh(&bat_priv->orig_hash_lock); + orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, + compare_orig, choose_orig, + icmp_packet->dst)); + + if (!orig_node) + goto unlock; + + if (!orig_node->router) + goto unlock; + + batman_if = orig_node->router->if_incoming; + memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); + + spin_unlock_bh(&bat_priv->orig_hash_lock); + + if (!batman_if) + goto dst_unreach; + + if (batman_if->if_status != IF_ACTIVE) + goto dst_unreach; + + memcpy(icmp_packet->orig, + bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); + + if (packet_len == sizeof(struct icmp_packet_rr)) + memcpy(icmp_packet->rr, batman_if->net_dev->dev_addr, ETH_ALEN); + + + send_skb_packet(skb, batman_if, dstaddr); + + goto out; + +unlock: + spin_unlock_bh(&bat_priv->orig_hash_lock); +dst_unreach: + icmp_packet->msg_type = DESTINATION_UNREACHABLE; + bat_socket_add_packet(socket_client, icmp_packet, packet_len); +free_skb: + kfree_skb(skb); +out: + return len; +} + +static unsigned int bat_socket_poll(struct file *file, poll_table *wait) +{ + struct socket_client *socket_client = file->private_data; + + poll_wait(file, &socket_client->queue_wait, wait); + + if (socket_client->queue_len > 0) + return POLLIN | POLLRDNORM; + + return 0; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = bat_socket_open, + .release = bat_socket_release, + .read = bat_socket_read, + .write = bat_socket_write, + .poll = bat_socket_poll, + .llseek = no_llseek, +}; + +int bat_socket_setup(struct bat_priv *bat_priv) +{ + struct dentry *d; + + if (!bat_priv->debug_dir) + goto err; + + d = debugfs_create_file(ICMP_SOCKET, S_IFREG | S_IWUSR | S_IRUSR, + bat_priv->debug_dir, bat_priv, &fops); + if (d) + goto err; + + return 0; + +err: + return 1; +} + +static void bat_socket_add_packet(struct socket_client *socket_client, + struct icmp_packet_rr *icmp_packet, + size_t icmp_len) +{ + struct socket_packet *socket_packet; + + socket_packet = kmalloc(sizeof(struct socket_packet), GFP_ATOMIC); + + if (!socket_packet) + return; + + INIT_LIST_HEAD(&socket_packet->list); + memcpy(&socket_packet->icmp_packet, icmp_packet, icmp_len); + socket_packet->icmp_len = icmp_len; + + spin_lock_bh(&socket_client->lock); + + /* while waiting for the lock the socket_client could have been + * deleted */ + if (!socket_client_hash[icmp_packet->uid]) { + spin_unlock_bh(&socket_client->lock); + kfree(socket_packet); + return; + } + + list_add_tail(&socket_packet->list, &socket_client->queue_list); + socket_client->queue_len++; + + if (socket_client->queue_len > 100) { + socket_packet = list_first_entry(&socket_client->queue_list, + struct socket_packet, list); + + list_del(&socket_packet->list); + kfree(socket_packet); + socket_client->queue_len--; + } + + spin_unlock_bh(&socket_client->lock); + + wake_up(&socket_client->queue_wait); +} + +void bat_socket_receive_packet(struct icmp_packet_rr *icmp_packet, + size_t icmp_len) +{ + struct socket_client *hash = socket_client_hash[icmp_packet->uid]; + + if (hash) + bat_socket_add_packet(hash, icmp_packet, icmp_len); +} diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h new file mode 100644 index 000000000000..bf9b348cde27 --- /dev/null +++ b/net/batman-adv/icmp_socket.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_ +#define _NET_BATMAN_ADV_ICMP_SOCKET_H_ + +#include "types.h" + +#define ICMP_SOCKET "socket" + +void bat_socket_init(void); +int bat_socket_setup(struct bat_priv *bat_priv); +void bat_socket_receive_packet(struct icmp_packet_rr *icmp_packet, + size_t icmp_len); + +#endif /* _NET_BATMAN_ADV_ICMP_SOCKET_H_ */ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c new file mode 100644 index 000000000000..b827f6a158cb --- /dev/null +++ b/net/batman-adv/main.c @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "bat_sysfs.h" +#include "bat_debugfs.h" +#include "routing.h" +#include "send.h" +#include "originator.h" +#include "soft-interface.h" +#include "icmp_socket.h" +#include "translation-table.h" +#include "hard-interface.h" +#include "gateway_client.h" +#include "types.h" +#include "vis.h" +#include "hash.h" + +struct list_head if_list; + +unsigned char broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +struct workqueue_struct *bat_event_workqueue; + +static int __init batman_init(void) +{ + INIT_LIST_HEAD(&if_list); + + /* the name should not be longer than 10 chars - see + * http://lwn.net/Articles/23634/ */ + bat_event_workqueue = create_singlethread_workqueue("bat_events"); + + if (!bat_event_workqueue) + return -ENOMEM; + + bat_socket_init(); + debugfs_init(); + + register_netdevice_notifier(&hard_if_notifier); + + pr_info("B.A.T.M.A.N. advanced %s%s (compatibility version %i) " + "loaded\n", SOURCE_VERSION, REVISION_VERSION_STR, + COMPAT_VERSION); + + return 0; +} + +static void __exit batman_exit(void) +{ + debugfs_destroy(); + unregister_netdevice_notifier(&hard_if_notifier); + hardif_remove_interfaces(); + + flush_workqueue(bat_event_workqueue); + destroy_workqueue(bat_event_workqueue); + bat_event_workqueue = NULL; + + rcu_barrier(); +} + +int mesh_init(struct net_device *soft_iface) +{ + struct bat_priv *bat_priv = netdev_priv(soft_iface); + + spin_lock_init(&bat_priv->orig_hash_lock); + spin_lock_init(&bat_priv->forw_bat_list_lock); + spin_lock_init(&bat_priv->forw_bcast_list_lock); + spin_lock_init(&bat_priv->hna_lhash_lock); + spin_lock_init(&bat_priv->hna_ghash_lock); + spin_lock_init(&bat_priv->gw_list_lock); + spin_lock_init(&bat_priv->vis_hash_lock); + spin_lock_init(&bat_priv->vis_list_lock); + spin_lock_init(&bat_priv->softif_neigh_lock); + + INIT_HLIST_HEAD(&bat_priv->forw_bat_list); + INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); + INIT_HLIST_HEAD(&bat_priv->gw_list); + INIT_HLIST_HEAD(&bat_priv->softif_neigh_list); + + if (originator_init(bat_priv) < 1) + goto err; + + if (hna_local_init(bat_priv) < 1) + goto err; + + if (hna_global_init(bat_priv) < 1) + goto err; + + hna_local_add(soft_iface, soft_iface->dev_addr); + + if (vis_init(bat_priv) < 1) + goto err; + + atomic_set(&bat_priv->mesh_state, MESH_ACTIVE); + goto end; + +err: + pr_err("Unable to allocate memory for mesh information structures: " + "out of mem ?\n"); + mesh_free(soft_iface); + return -1; + +end: + return 0; +} + +void mesh_free(struct net_device *soft_iface) +{ + struct bat_priv *bat_priv = netdev_priv(soft_iface); + + atomic_set(&bat_priv->mesh_state, MESH_DEACTIVATING); + + purge_outstanding_packets(bat_priv, NULL); + + vis_quit(bat_priv); + + gw_node_purge(bat_priv); + originator_free(bat_priv); + + hna_local_free(bat_priv); + hna_global_free(bat_priv); + + softif_neigh_purge(bat_priv); + + atomic_set(&bat_priv->mesh_state, MESH_INACTIVE); +} + +void inc_module_count(void) +{ + try_module_get(THIS_MODULE); +} + +void dec_module_count(void) +{ + module_put(THIS_MODULE); +} + +int is_my_mac(uint8_t *addr) +{ + struct batman_if *batman_if; + + rcu_read_lock(); + list_for_each_entry_rcu(batman_if, &if_list, list) { + if (batman_if->if_status != IF_ACTIVE) + continue; + + if (compare_orig(batman_if->net_dev->dev_addr, addr)) { + rcu_read_unlock(); + return 1; + } + } + rcu_read_unlock(); + return 0; + +} + +module_init(batman_init); +module_exit(batman_exit); + +MODULE_LICENSE("GPL"); + +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_SUPPORTED_DEVICE(DRIVER_DEVICE); +#ifdef REVISION_VERSION +MODULE_VERSION(SOURCE_VERSION "-" REVISION_VERSION); +#else +MODULE_VERSION(SOURCE_VERSION); +#endif diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h new file mode 100644 index 000000000000..d4d9926c2201 --- /dev/null +++ b/net/batman-adv/main.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_MAIN_H_ +#define _NET_BATMAN_ADV_MAIN_H_ + +/* Kernel Programming */ +#define LINUX + +#define DRIVER_AUTHOR "Marek Lindner <lindner_marek@yahoo.de>, " \ + "Simon Wunderlich <siwu@hrz.tu-chemnitz.de>" +#define DRIVER_DESC "B.A.T.M.A.N. advanced" +#define DRIVER_DEVICE "batman-adv" + +#define SOURCE_VERSION "next" + + +/* B.A.T.M.A.N. parameters */ + +#define TQ_MAX_VALUE 255 +#define JITTER 20 +#define TTL 50 /* Time To Live of broadcast messages */ + +#define PURGE_TIMEOUT 200 /* purge originators after time in seconds if no + * valid packet comes in -> TODO: check + * influence on TQ_LOCAL_WINDOW_SIZE */ +#define LOCAL_HNA_TIMEOUT 3600 /* in seconds */ + +#define TQ_LOCAL_WINDOW_SIZE 64 /* sliding packet range of received originator + * messages in squence numbers (should be a + * multiple of our word size) */ +#define TQ_GLOBAL_WINDOW_SIZE 5 +#define TQ_LOCAL_BIDRECT_SEND_MINIMUM 1 +#define TQ_LOCAL_BIDRECT_RECV_MINIMUM 1 +#define TQ_TOTAL_BIDRECT_LIMIT 1 + +#define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE) + +#define PACKBUFF_SIZE 2000 +#define LOG_BUF_LEN 8192 /* has to be a power of 2 */ + +#define VIS_INTERVAL 5000 /* 5 seconds */ + +/* how much worse secondary interfaces may be to + * to be considered as bonding candidates */ + +#define BONDING_TQ_THRESHOLD 50 + +#define MAX_AGGREGATION_BYTES 512 /* should not be bigger than 512 bytes or + * change the size of + * forw_packet->direct_link_flags */ +#define MAX_AGGREGATION_MS 100 + +#define SOFTIF_NEIGH_TIMEOUT 180000 /* 3 minutes */ + +#define RESET_PROTECTION_MS 30000 +#define EXPECTED_SEQNO_RANGE 65536 +/* don't reset again within 30 seconds */ + +#define MESH_INACTIVE 0 +#define MESH_ACTIVE 1 +#define MESH_DEACTIVATING 2 + +#define BCAST_QUEUE_LEN 256 +#define BATMAN_QUEUE_LEN 256 + +/* + * Debug Messages + */ +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* Append 'batman-adv: ' before + * kernel messages */ + +#define DBG_BATMAN 1 /* all messages related to routing / flooding / + * broadcasting / etc */ +#define DBG_ROUTES 2 /* route or hna added / changed / deleted */ +#define DBG_ALL 3 + +#define LOG_BUF_LEN 8192 /* has to be a power of 2 */ + + +/* + * Vis + */ + +/* #define VIS_SUBCLUSTERS_DISABLED */ + +/* + * Kernel headers + */ + +#include <linux/mutex.h> /* mutex */ +#include <linux/module.h> /* needed by all modules */ +#include <linux/netdevice.h> /* netdevice */ +#include <linux/etherdevice.h> /* ethernet address classifaction */ +#include <linux/if_ether.h> /* ethernet header */ +#include <linux/poll.h> /* poll_table */ +#include <linux/kthread.h> /* kernel threads */ +#include <linux/pkt_sched.h> /* schedule types */ +#include <linux/workqueue.h> /* workqueue */ +#include <linux/slab.h> +#include <net/sock.h> /* struct sock */ +#include <linux/jiffies.h> +#include <linux/seq_file.h> +#include "types.h" + +#ifndef REVISION_VERSION +#define REVISION_VERSION_STR "" +#else +#define REVISION_VERSION_STR " "REVISION_VERSION +#endif + +extern struct list_head if_list; + +extern unsigned char broadcast_addr[]; +extern struct workqueue_struct *bat_event_workqueue; + +int mesh_init(struct net_device *soft_iface); +void mesh_free(struct net_device *soft_iface); +void inc_module_count(void); +void dec_module_count(void); +int is_my_mac(uint8_t *addr); + +#ifdef CONFIG_BATMAN_ADV_DEBUG +int debug_log(struct bat_priv *bat_priv, char *fmt, ...); + +#define bat_dbg(type, bat_priv, fmt, arg...) \ + do { \ + if (atomic_read(&bat_priv->log_level) & type) \ + debug_log(bat_priv, fmt, ## arg); \ + } \ + while (0) +#else /* !CONFIG_BATMAN_ADV_DEBUG */ +static inline void bat_dbg(char type __attribute__((unused)), + struct bat_priv *bat_priv __attribute__((unused)), + char *fmt __attribute__((unused)), ...) +{ +} +#endif + +#define bat_warning(net_dev, fmt, arg...) \ + do { \ + struct net_device *_netdev = (net_dev); \ + struct bat_priv *_batpriv = netdev_priv(_netdev); \ + bat_dbg(DBG_ALL, _batpriv, fmt, ## arg); \ + pr_warning("%s: " fmt, _netdev->name, ## arg); \ + } while (0) +#define bat_info(net_dev, fmt, arg...) \ + do { \ + struct net_device *_netdev = (net_dev); \ + struct bat_priv *_batpriv = netdev_priv(_netdev); \ + bat_dbg(DBG_ALL, _batpriv, fmt, ## arg); \ + pr_info("%s: " fmt, _netdev->name, ## arg); \ + } while (0) +#define bat_err(net_dev, fmt, arg...) \ + do { \ + struct net_device *_netdev = (net_dev); \ + struct bat_priv *_batpriv = netdev_priv(_netdev); \ + bat_dbg(DBG_ALL, _batpriv, fmt, ## arg); \ + pr_err("%s: " fmt, _netdev->name, ## arg); \ + } while (0) + +#endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c new file mode 100644 index 000000000000..6b7fb6b7e6f9 --- /dev/null +++ b/net/batman-adv/originator.c @@ -0,0 +1,564 @@ +/* + * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +/* increase the reference counter for this originator */ + +#include "main.h" +#include "originator.h" +#include "hash.h" +#include "translation-table.h" +#include "routing.h" +#include "gateway_client.h" +#include "hard-interface.h" +#include "unicast.h" +#include "soft-interface.h" + +static void purge_orig(struct work_struct *work); + +static void start_purge_timer(struct bat_priv *bat_priv) +{ + INIT_DELAYED_WORK(&bat_priv->orig_work, purge_orig); + queue_delayed_work(bat_event_workqueue, &bat_priv->orig_work, 1 * HZ); +} + +int originator_init(struct bat_priv *bat_priv) +{ + if (bat_priv->orig_hash) + return 1; + + spin_lock_bh(&bat_priv->orig_hash_lock); + bat_priv->orig_hash = hash_new(1024); + + if (!bat_priv->orig_hash) + goto err; + + spin_unlock_bh(&bat_priv->orig_hash_lock); + start_purge_timer(bat_priv); + return 1; + +err: + spin_unlock_bh(&bat_priv->orig_hash_lock); + return 0; +} + +struct neigh_node * +create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, + uint8_t *neigh, struct batman_if *if_incoming) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct neigh_node *neigh_node; + + bat_dbg(DBG_BATMAN, bat_priv, + "Creating new last-hop neighbor of originator\n"); + + neigh_node = kzalloc(sizeof(struct neigh_node), GFP_ATOMIC); + if (!neigh_node) + return NULL; + + INIT_LIST_HEAD(&neigh_node->list); + + memcpy(neigh_node->addr, neigh, ETH_ALEN); + neigh_node->orig_node = orig_neigh_node; + neigh_node->if_incoming = if_incoming; + + list_add_tail(&neigh_node->list, &orig_node->neigh_list); + return neigh_node; +} + +static void free_orig_node(void *data, void *arg) +{ + struct list_head *list_pos, *list_pos_tmp; + struct neigh_node *neigh_node; + struct orig_node *orig_node = (struct orig_node *)data; + struct bat_priv *bat_priv = (struct bat_priv *)arg; + + /* for all neighbors towards this originator ... */ + list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) { + neigh_node = list_entry(list_pos, struct neigh_node, list); + + list_del(list_pos); + kfree(neigh_node); + } + + frag_list_free(&orig_node->frag_list); + hna_global_del_orig(bat_priv, orig_node, "originator timed out"); + + kfree(orig_node->bcast_own); + kfree(orig_node->bcast_own_sum); + kfree(orig_node); +} + +void originator_free(struct bat_priv *bat_priv) +{ + if (!bat_priv->orig_hash) + return; + + cancel_delayed_work_sync(&bat_priv->orig_work); + + spin_lock_bh(&bat_priv->orig_hash_lock); + hash_delete(bat_priv->orig_hash, free_orig_node, bat_priv); + bat_priv->orig_hash = NULL; + spin_unlock_bh(&bat_priv->orig_hash_lock); +} + +/* this function finds or creates an originator entry for the given + * address if it does not exits */ +struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) +{ + struct orig_node *orig_node; + int size; + int hash_added; + + orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, + compare_orig, choose_orig, + addr)); + + if (orig_node) + return orig_node; + + bat_dbg(DBG_BATMAN, bat_priv, + "Creating new originator: %pM\n", addr); + + orig_node = kzalloc(sizeof(struct orig_node), GFP_ATOMIC); + if (!orig_node) + return NULL; + + INIT_LIST_HEAD(&orig_node->neigh_list); + + memcpy(orig_node->orig, addr, ETH_ALEN); + orig_node->router = NULL; + orig_node->hna_buff = NULL; + orig_node->bcast_seqno_reset = jiffies - 1 + - msecs_to_jiffies(RESET_PROTECTION_MS); + orig_node->batman_seqno_reset = jiffies - 1 + - msecs_to_jiffies(RESET_PROTECTION_MS); + + size = bat_priv->num_ifaces * sizeof(unsigned long) * NUM_WORDS; + + orig_node->bcast_own = kzalloc(size, GFP_ATOMIC); + if (!orig_node->bcast_own) + goto free_orig_node; + + size = bat_priv->num_ifaces * sizeof(uint8_t); + orig_node->bcast_own_sum = kzalloc(size, GFP_ATOMIC); + + INIT_LIST_HEAD(&orig_node->frag_list); + orig_node->last_frag_packet = 0; + + if (!orig_node->bcast_own_sum) + goto free_bcast_own; + + hash_added = hash_add(bat_priv->orig_hash, compare_orig, choose_orig, + orig_node); + if (hash_added < 0) + goto free_bcast_own_sum; + + return orig_node; +free_bcast_own_sum: + kfree(orig_node->bcast_own_sum); +free_bcast_own: + kfree(orig_node->bcast_own); +free_orig_node: + kfree(orig_node); + return NULL; +} + +static bool purge_orig_neighbors(struct bat_priv *bat_priv, + struct orig_node *orig_node, + struct neigh_node **best_neigh_node) +{ + struct list_head *list_pos, *list_pos_tmp; + struct neigh_node *neigh_node; + bool neigh_purged = false; + + *best_neigh_node = NULL; + + /* for all neighbors towards this originator ... */ + list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) { + neigh_node = list_entry(list_pos, struct neigh_node, list); + + if ((time_after(jiffies, + neigh_node->last_valid + PURGE_TIMEOUT * HZ)) || + (neigh_node->if_incoming->if_status == IF_INACTIVE) || + (neigh_node->if_incoming->if_status == IF_TO_BE_REMOVED)) { + + if (neigh_node->if_incoming->if_status == + IF_TO_BE_REMOVED) + bat_dbg(DBG_BATMAN, bat_priv, + "neighbor purge: originator %pM, " + "neighbor: %pM, iface: %s\n", + orig_node->orig, neigh_node->addr, + neigh_node->if_incoming->net_dev->name); + else + bat_dbg(DBG_BATMAN, bat_priv, + "neighbor timeout: originator %pM, " + "neighbor: %pM, last_valid: %lu\n", + orig_node->orig, neigh_node->addr, + (neigh_node->last_valid / HZ)); + + neigh_purged = true; + list_del(list_pos); + kfree(neigh_node); + } else { + if ((!*best_neigh_node) || + (neigh_node->tq_avg > (*best_neigh_node)->tq_avg)) + *best_neigh_node = neigh_node; + } + } + return neigh_purged; +} + +static bool purge_orig_node(struct bat_priv *bat_priv, + struct orig_node *orig_node) +{ + struct neigh_node *best_neigh_node; + + if (time_after(jiffies, + orig_node->last_valid + 2 * PURGE_TIMEOUT * HZ)) { + + bat_dbg(DBG_BATMAN, bat_priv, + "Originator timeout: originator %pM, last_valid %lu\n", + orig_node->orig, (orig_node->last_valid / HZ)); + return true; + } else { + if (purge_orig_neighbors(bat_priv, orig_node, + &best_neigh_node)) { + update_routes(bat_priv, orig_node, + best_neigh_node, + orig_node->hna_buff, + orig_node->hna_buff_len); + /* update bonding candidates, we could have lost + * some candidates. */ + update_bonding_candidates(bat_priv, orig_node); + } + } + + return false; +} + +static void _purge_orig(struct bat_priv *bat_priv) +{ + struct hashtable_t *hash = bat_priv->orig_hash; + struct hlist_node *walk, *safe; + struct hlist_head *head; + struct element_t *bucket; + struct orig_node *orig_node; + int i; + + if (!hash) + return; + + spin_lock_bh(&bat_priv->orig_hash_lock); + + /* for all origins... */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { + orig_node = bucket->data; + + if (purge_orig_node(bat_priv, orig_node)) { + if (orig_node->gw_flags) + gw_node_delete(bat_priv, orig_node); + hlist_del(walk); + kfree(bucket); + free_orig_node(orig_node, bat_priv); + } + + if (time_after(jiffies, orig_node->last_frag_packet + + msecs_to_jiffies(FRAG_TIMEOUT))) + frag_list_free(&orig_node->frag_list); + } + } + + spin_unlock_bh(&bat_priv->orig_hash_lock); + + gw_node_purge(bat_priv); + gw_election(bat_priv); + + softif_neigh_purge(bat_priv); +} + +static void purge_orig(struct work_struct *work) +{ + struct delayed_work *delayed_work = + container_of(work, struct delayed_work, work); + struct bat_priv *bat_priv = + container_of(delayed_work, struct bat_priv, orig_work); + + _purge_orig(bat_priv); + start_purge_timer(bat_priv); +} + +void purge_orig_ref(struct bat_priv *bat_priv) +{ + _purge_orig(bat_priv); +} + +int orig_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct bat_priv *bat_priv = netdev_priv(net_dev); + struct hashtable_t *hash = bat_priv->orig_hash; + struct hlist_node *walk; + struct hlist_head *head; + struct element_t *bucket; + struct orig_node *orig_node; + struct neigh_node *neigh_node; + int batman_count = 0; + int last_seen_secs; + int last_seen_msecs; + int i; + + if ((!bat_priv->primary_if) || + (bat_priv->primary_if->if_status != IF_ACTIVE)) { + if (!bat_priv->primary_if) + return seq_printf(seq, "BATMAN mesh %s disabled - " + "please specify interfaces to enable it\n", + net_dev->name); + + return seq_printf(seq, "BATMAN mesh %s " + "disabled - primary interface not active\n", + net_dev->name); + } + + seq_printf(seq, "[B.A.T.M.A.N. adv %s%s, MainIF/MAC: %s/%pM (%s)]\n", + SOURCE_VERSION, REVISION_VERSION_STR, + bat_priv->primary_if->net_dev->name, + bat_priv->primary_if->net_dev->dev_addr, net_dev->name); + seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n", + "Originator", "last-seen", "#", TQ_MAX_VALUE, "Nexthop", + "outgoingIF", "Potential nexthops"); + + spin_lock_bh(&bat_priv->orig_hash_lock); + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + orig_node = bucket->data; + + if (!orig_node->router) + continue; + + if (orig_node->router->tq_avg == 0) + continue; + + last_seen_secs = jiffies_to_msecs(jiffies - + orig_node->last_valid) / 1000; + last_seen_msecs = jiffies_to_msecs(jiffies - + orig_node->last_valid) % 1000; + + neigh_node = orig_node->router; + seq_printf(seq, "%pM %4i.%03is (%3i) %pM [%10s]:", + orig_node->orig, last_seen_secs, + last_seen_msecs, neigh_node->tq_avg, + neigh_node->addr, + neigh_node->if_incoming->net_dev->name); + + list_for_each_entry(neigh_node, &orig_node->neigh_list, + list) { + seq_printf(seq, " %pM (%3i)", neigh_node->addr, + neigh_node->tq_avg); + } + + seq_printf(seq, "\n"); + batman_count++; + } + } + + spin_unlock_bh(&bat_priv->orig_hash_lock); + + if ((batman_count == 0)) + seq_printf(seq, "No batman nodes in range ...\n"); + + return 0; +} + +static int orig_node_add_if(struct orig_node *orig_node, int max_if_num) +{ + void *data_ptr; + + data_ptr = kmalloc(max_if_num * sizeof(unsigned long) * NUM_WORDS, + GFP_ATOMIC); + if (!data_ptr) { + pr_err("Can't resize orig: out of memory\n"); + return -1; + } + + memcpy(data_ptr, orig_node->bcast_own, + (max_if_num - 1) * sizeof(unsigned long) * NUM_WORDS); + kfree(orig_node->bcast_own); + orig_node->bcast_own = data_ptr; + + data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); + if (!data_ptr) { + pr_err("Can't resize orig: out of memory\n"); + return -1; + } + + memcpy(data_ptr, orig_node->bcast_own_sum, + (max_if_num - 1) * sizeof(uint8_t)); + kfree(orig_node->bcast_own_sum); + orig_node->bcast_own_sum = data_ptr; + + return 0; +} + +int orig_hash_add_if(struct batman_if *batman_if, int max_if_num) +{ + struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct hashtable_t *hash = bat_priv->orig_hash; + struct hlist_node *walk; + struct hlist_head *head; + struct element_t *bucket; + struct orig_node *orig_node; + int i; + + /* resize all orig nodes because orig_node->bcast_own(_sum) depend on + * if_num */ + spin_lock_bh(&bat_priv->orig_hash_lock); + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + orig_node = bucket->data; + + if (orig_node_add_if(orig_node, max_if_num) == -1) + goto err; + } + } + + spin_unlock_bh(&bat_priv->orig_hash_lock); + return 0; + +err: + spin_unlock_bh(&bat_priv->orig_hash_lock); + return -ENOMEM; +} + +static int orig_node_del_if(struct orig_node *orig_node, + int max_if_num, int del_if_num) +{ + void *data_ptr = NULL; + int chunk_size; + + /* last interface was removed */ + if (max_if_num == 0) + goto free_bcast_own; + + chunk_size = sizeof(unsigned long) * NUM_WORDS; + data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC); + if (!data_ptr) { + pr_err("Can't resize orig: out of memory\n"); + return -1; + } + + /* copy first part */ + memcpy(data_ptr, orig_node->bcast_own, del_if_num * chunk_size); + + /* copy second part */ + memcpy(data_ptr + del_if_num * chunk_size, + orig_node->bcast_own + ((del_if_num + 1) * chunk_size), + (max_if_num - del_if_num) * chunk_size); + +free_bcast_own: + kfree(orig_node->bcast_own); + orig_node->bcast_own = data_ptr; + + if (max_if_num == 0) + goto free_own_sum; + + data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); + if (!data_ptr) { + pr_err("Can't resize orig: out of memory\n"); + return -1; + } + + memcpy(data_ptr, orig_node->bcast_own_sum, + del_if_num * sizeof(uint8_t)); + + memcpy(data_ptr + del_if_num * sizeof(uint8_t), + orig_node->bcast_own_sum + ((del_if_num + 1) * sizeof(uint8_t)), + (max_if_num - del_if_num) * sizeof(uint8_t)); + +free_own_sum: + kfree(orig_node->bcast_own_sum); + orig_node->bcast_own_sum = data_ptr; + + return 0; +} + +int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) +{ + struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct hashtable_t *hash = bat_priv->orig_hash; + struct hlist_node *walk; + struct hlist_head *head; + struct element_t *bucket; + struct batman_if *batman_if_tmp; + struct orig_node *orig_node; + int i, ret; + + /* resize all orig nodes because orig_node->bcast_own(_sum) depend on + * if_num */ + spin_lock_bh(&bat_priv->orig_hash_lock); + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + orig_node = bucket->data; + + ret = orig_node_del_if(orig_node, max_if_num, + batman_if->if_num); + + if (ret == -1) + goto err; + } + } + + /* renumber remaining batman interfaces _inside_ of orig_hash_lock */ + rcu_read_lock(); + list_for_each_entry_rcu(batman_if_tmp, &if_list, list) { + if (batman_if_tmp->if_status == IF_NOT_IN_USE) + continue; + + if (batman_if == batman_if_tmp) + continue; + + if (batman_if->soft_iface != batman_if_tmp->soft_iface) + continue; + + if (batman_if_tmp->if_num > batman_if->if_num) + batman_if_tmp->if_num--; + } + rcu_read_unlock(); + + batman_if->if_num = -1; + spin_unlock_bh(&bat_priv->orig_hash_lock); + return 0; + +err: + spin_unlock_bh(&bat_priv->orig_hash_lock); + return -ENOMEM; +} diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h new file mode 100644 index 000000000000..d474ceb2a4eb --- /dev/null +++ b/net/batman-adv/originator.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_ORIGINATOR_H_ +#define _NET_BATMAN_ADV_ORIGINATOR_H_ + +int originator_init(struct bat_priv *bat_priv); +void originator_free(struct bat_priv *bat_priv); +void purge_orig_ref(struct bat_priv *bat_priv); +struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr); +struct neigh_node * +create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, + uint8_t *neigh, struct batman_if *if_incoming); +int orig_seq_print_text(struct seq_file *seq, void *offset); +int orig_hash_add_if(struct batman_if *batman_if, int max_if_num); +int orig_hash_del_if(struct batman_if *batman_if, int max_if_num); + + +/* returns 1 if they are the same originator */ +static inline int compare_orig(void *data1, void *data2) +{ + return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); +} + +/* hashfunction to choose an entry in a hash table of given size */ +/* hash algorithm from http://en.wikipedia.org/wiki/Hash_table */ +static inline int choose_orig(void *data, int32_t size) +{ + unsigned char *key = data; + uint32_t hash = 0; + size_t i; + + for (i = 0; i < 6; i++) { + hash += key[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + return hash % size; +} + +#endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */ diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h new file mode 100644 index 000000000000..b49fdf70a6d5 --- /dev/null +++ b/net/batman-adv/packet.h @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_PACKET_H_ +#define _NET_BATMAN_ADV_PACKET_H_ + +#define ETH_P_BATMAN 0x4305 /* unofficial/not registered Ethertype */ + +#define BAT_PACKET 0x01 +#define BAT_ICMP 0x02 +#define BAT_UNICAST 0x03 +#define BAT_BCAST 0x04 +#define BAT_VIS 0x05 +#define BAT_UNICAST_FRAG 0x06 + +/* this file is included by batctl which needs these defines */ +#define COMPAT_VERSION 12 +#define DIRECTLINK 0x40 +#define VIS_SERVER 0x20 +#define PRIMARIES_FIRST_HOP 0x10 + +/* ICMP message types */ +#define ECHO_REPLY 0 +#define DESTINATION_UNREACHABLE 3 +#define ECHO_REQUEST 8 +#define TTL_EXCEEDED 11 +#define PARAMETER_PROBLEM 12 + +/* vis defines */ +#define VIS_TYPE_SERVER_SYNC 0 +#define VIS_TYPE_CLIENT_UPDATE 1 + +/* fragmentation defines */ +#define UNI_FRAG_HEAD 0x01 + +struct batman_packet { + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t flags; /* 0x40: DIRECTLINK flag, 0x20 VIS_SERVER flag... */ + uint8_t tq; + uint32_t seqno; + uint8_t orig[6]; + uint8_t prev_sender[6]; + uint8_t ttl; + uint8_t num_hna; + uint8_t gw_flags; /* flags related to gateway class */ + uint8_t align; +} __attribute__((packed)); + +#define BAT_PACKET_LEN sizeof(struct batman_packet) + +struct icmp_packet { + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t msg_type; /* see ICMP message types above */ + uint8_t ttl; + uint8_t dst[6]; + uint8_t orig[6]; + uint16_t seqno; + uint8_t uid; +} __attribute__((packed)); + +#define BAT_RR_LEN 16 + +/* icmp_packet_rr must start with all fields from imcp_packet + * as this is assumed by code that handles ICMP packets */ +struct icmp_packet_rr { + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t msg_type; /* see ICMP message types above */ + uint8_t ttl; + uint8_t dst[6]; + uint8_t orig[6]; + uint16_t seqno; + uint8_t uid; + uint8_t rr_cur; + uint8_t rr[BAT_RR_LEN][ETH_ALEN]; +} __attribute__((packed)); + +struct unicast_packet { + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t dest[6]; + uint8_t ttl; +} __attribute__((packed)); + +struct unicast_frag_packet { + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t dest[6]; + uint8_t ttl; + uint8_t flags; + uint8_t orig[6]; + uint16_t seqno; +} __attribute__((packed)); + +struct bcast_packet { + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t orig[6]; + uint8_t ttl; + uint32_t seqno; +} __attribute__((packed)); + +struct vis_packet { + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t vis_type; /* which type of vis-participant sent this? */ + uint8_t entries; /* number of entries behind this struct */ + uint32_t seqno; /* sequence number */ + uint8_t ttl; /* TTL */ + uint8_t vis_orig[6]; /* originator that informs about its + * neighbors */ + uint8_t target_orig[6]; /* who should receive this packet */ + uint8_t sender_orig[6]; /* who sent or rebroadcasted this packet */ +} __attribute__((packed)); + +#endif /* _NET_BATMAN_ADV_PACKET_H_ */ diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c new file mode 100644 index 000000000000..defd37c9be1f --- /dev/null +++ b/net/batman-adv/ring_buffer.c @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "ring_buffer.h" + +void ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, uint8_t value) +{ + lq_recv[*lq_index] = value; + *lq_index = (*lq_index + 1) % TQ_GLOBAL_WINDOW_SIZE; +} + +uint8_t ring_buffer_avg(uint8_t lq_recv[]) +{ + uint8_t *ptr; + uint16_t count = 0, i = 0, sum = 0; + + ptr = lq_recv; + + while (i < TQ_GLOBAL_WINDOW_SIZE) { + if (*ptr != 0) { + count++; + sum += *ptr; + } + + i++; + ptr++; + } + + if (count == 0) + return 0; + + return (uint8_t)(sum / count); +} diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h new file mode 100644 index 000000000000..6b0cb9aaeba5 --- /dev/null +++ b/net/batman-adv/ring_buffer.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_RING_BUFFER_H_ +#define _NET_BATMAN_ADV_RING_BUFFER_H_ + +void ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, uint8_t value); +uint8_t ring_buffer_avg(uint8_t lq_recv[]); + +#endif /* _NET_BATMAN_ADV_RING_BUFFER_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c new file mode 100644 index 000000000000..8828eddd3f72 --- /dev/null +++ b/net/batman-adv/routing.c @@ -0,0 +1,1397 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "routing.h" +#include "send.h" +#include "hash.h" +#include "soft-interface.h" +#include "hard-interface.h" +#include "icmp_socket.h" +#include "translation-table.h" +#include "originator.h" +#include "types.h" +#include "ring_buffer.h" +#include "vis.h" +#include "aggregation.h" +#include "gateway_common.h" +#include "gateway_client.h" +#include "unicast.h" + +void slide_own_bcast_window(struct batman_if *batman_if) +{ + struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct hashtable_t *hash = bat_priv->orig_hash; + struct hlist_node *walk; + struct hlist_head *head; + struct element_t *bucket; + struct orig_node *orig_node; + unsigned long *word; + int i; + size_t word_index; + + spin_lock_bh(&bat_priv->orig_hash_lock); + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + orig_node = bucket->data; + word_index = batman_if->if_num * NUM_WORDS; + word = &(orig_node->bcast_own[word_index]); + + bit_get_packet(bat_priv, word, 1, 0); + orig_node->bcast_own_sum[batman_if->if_num] = + bit_packet_count(word); + } + } + + spin_unlock_bh(&bat_priv->orig_hash_lock); +} + +static void update_HNA(struct bat_priv *bat_priv, struct orig_node *orig_node, + unsigned char *hna_buff, int hna_buff_len) +{ + if ((hna_buff_len != orig_node->hna_buff_len) || + ((hna_buff_len > 0) && + (orig_node->hna_buff_len > 0) && + (memcmp(orig_node->hna_buff, hna_buff, hna_buff_len) != 0))) { + + if (orig_node->hna_buff_len > 0) + hna_global_del_orig(bat_priv, orig_node, + "originator changed hna"); + + if ((hna_buff_len > 0) && (hna_buff)) + hna_global_add_orig(bat_priv, orig_node, + hna_buff, hna_buff_len); + } +} + +static void update_route(struct bat_priv *bat_priv, + struct orig_node *orig_node, + struct neigh_node *neigh_node, + unsigned char *hna_buff, int hna_buff_len) +{ + /* route deleted */ + if ((orig_node->router) && (!neigh_node)) { + + bat_dbg(DBG_ROUTES, bat_priv, "Deleting route towards: %pM\n", + orig_node->orig); + hna_global_del_orig(bat_priv, orig_node, + "originator timed out"); + + /* route added */ + } else if ((!orig_node->router) && (neigh_node)) { + + bat_dbg(DBG_ROUTES, bat_priv, + "Adding route towards: %pM (via %pM)\n", + orig_node->orig, neigh_node->addr); + hna_global_add_orig(bat_priv, orig_node, + hna_buff, hna_buff_len); + + /* route changed */ + } else { + bat_dbg(DBG_ROUTES, bat_priv, + "Changing route towards: %pM " + "(now via %pM - was via %pM)\n", + orig_node->orig, neigh_node->addr, + orig_node->router->addr); + } + + orig_node->router = neigh_node; +} + + +void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, + struct neigh_node *neigh_node, unsigned char *hna_buff, + int hna_buff_len) +{ + + if (!orig_node) + return; + + if (orig_node->router != neigh_node) + update_route(bat_priv, orig_node, neigh_node, + hna_buff, hna_buff_len); + /* may be just HNA changed */ + else + update_HNA(bat_priv, orig_node, hna_buff, hna_buff_len); +} + +static int is_bidirectional_neigh(struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + struct batman_packet *batman_packet, + struct batman_if *if_incoming) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; + unsigned char total_count; + + if (orig_node == orig_neigh_node) { + list_for_each_entry(tmp_neigh_node, + &orig_node->neigh_list, + list) { + + if (compare_orig(tmp_neigh_node->addr, + orig_neigh_node->orig) && + (tmp_neigh_node->if_incoming == if_incoming)) + neigh_node = tmp_neigh_node; + } + + if (!neigh_node) + neigh_node = create_neighbor(orig_node, + orig_neigh_node, + orig_neigh_node->orig, + if_incoming); + /* create_neighbor failed, return 0 */ + if (!neigh_node) + return 0; + + neigh_node->last_valid = jiffies; + } else { + /* find packet count of corresponding one hop neighbor */ + list_for_each_entry(tmp_neigh_node, + &orig_neigh_node->neigh_list, list) { + + if (compare_orig(tmp_neigh_node->addr, + orig_neigh_node->orig) && + (tmp_neigh_node->if_incoming == if_incoming)) + neigh_node = tmp_neigh_node; + } + + if (!neigh_node) + neigh_node = create_neighbor(orig_neigh_node, + orig_neigh_node, + orig_neigh_node->orig, + if_incoming); + /* create_neighbor failed, return 0 */ + if (!neigh_node) + return 0; + } + + orig_node->last_valid = jiffies; + + /* pay attention to not get a value bigger than 100 % */ + total_count = (orig_neigh_node->bcast_own_sum[if_incoming->if_num] > + neigh_node->real_packet_count ? + neigh_node->real_packet_count : + orig_neigh_node->bcast_own_sum[if_incoming->if_num]); + + /* if we have too few packets (too less data) we set tq_own to zero */ + /* if we receive too few packets it is not considered bidirectional */ + if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) || + (neigh_node->real_packet_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM)) + orig_neigh_node->tq_own = 0; + else + /* neigh_node->real_packet_count is never zero as we + * only purge old information when getting new + * information */ + orig_neigh_node->tq_own = (TQ_MAX_VALUE * total_count) / + neigh_node->real_packet_count; + + /* + * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does + * affect the nearly-symmetric links only a little, but + * punishes asymmetric links more. This will give a value + * between 0 and TQ_MAX_VALUE + */ + orig_neigh_node->tq_asym_penalty = + TQ_MAX_VALUE - + (TQ_MAX_VALUE * + (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) * + (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) * + (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count)) / + (TQ_LOCAL_WINDOW_SIZE * + TQ_LOCAL_WINDOW_SIZE * + TQ_LOCAL_WINDOW_SIZE); + + batman_packet->tq = ((batman_packet->tq * + orig_neigh_node->tq_own * + orig_neigh_node->tq_asym_penalty) / + (TQ_MAX_VALUE * TQ_MAX_VALUE)); + + bat_dbg(DBG_BATMAN, bat_priv, + "bidirectional: " + "orig = %-15pM neigh = %-15pM => own_bcast = %2i, " + "real recv = %2i, local tq: %3i, asym_penalty: %3i, " + "total tq: %3i\n", + orig_node->orig, orig_neigh_node->orig, total_count, + neigh_node->real_packet_count, orig_neigh_node->tq_own, + orig_neigh_node->tq_asym_penalty, batman_packet->tq); + + /* if link has the minimum required transmission quality + * consider it bidirectional */ + if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT) + return 1; + + return 0; +} + +static void update_orig(struct bat_priv *bat_priv, + struct orig_node *orig_node, + struct ethhdr *ethhdr, + struct batman_packet *batman_packet, + struct batman_if *if_incoming, + unsigned char *hna_buff, int hna_buff_len, + char is_duplicate) +{ + struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; + int tmp_hna_buff_len; + + bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): " + "Searching and updating originator entry of received packet\n"); + + list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { + if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) && + (tmp_neigh_node->if_incoming == if_incoming)) { + neigh_node = tmp_neigh_node; + continue; + } + + if (is_duplicate) + continue; + + ring_buffer_set(tmp_neigh_node->tq_recv, + &tmp_neigh_node->tq_index, 0); + tmp_neigh_node->tq_avg = + ring_buffer_avg(tmp_neigh_node->tq_recv); + } + + if (!neigh_node) { + struct orig_node *orig_tmp; + + orig_tmp = get_orig_node(bat_priv, ethhdr->h_source); + if (!orig_tmp) + return; + + neigh_node = create_neighbor(orig_node, orig_tmp, + ethhdr->h_source, if_incoming); + if (!neigh_node) + return; + } else + bat_dbg(DBG_BATMAN, bat_priv, + "Updating existing last-hop neighbor of originator\n"); + + orig_node->flags = batman_packet->flags; + neigh_node->last_valid = jiffies; + + ring_buffer_set(neigh_node->tq_recv, + &neigh_node->tq_index, + batman_packet->tq); + neigh_node->tq_avg = ring_buffer_avg(neigh_node->tq_recv); + + if (!is_duplicate) { + orig_node->last_ttl = batman_packet->ttl; + neigh_node->last_ttl = batman_packet->ttl; + } + + tmp_hna_buff_len = (hna_buff_len > batman_packet->num_hna * ETH_ALEN ? + batman_packet->num_hna * ETH_ALEN : hna_buff_len); + + /* if this neighbor already is our next hop there is nothing + * to change */ + if (orig_node->router == neigh_node) + goto update_hna; + + /* if this neighbor does not offer a better TQ we won't consider it */ + if ((orig_node->router) && + (orig_node->router->tq_avg > neigh_node->tq_avg)) + goto update_hna; + + /* if the TQ is the same and the link not more symetric we + * won't consider it either */ + if ((orig_node->router) && + ((neigh_node->tq_avg == orig_node->router->tq_avg) && + (orig_node->router->orig_node->bcast_own_sum[if_incoming->if_num] + >= neigh_node->orig_node->bcast_own_sum[if_incoming->if_num]))) + goto update_hna; + + update_routes(bat_priv, orig_node, neigh_node, + hna_buff, tmp_hna_buff_len); + goto update_gw; + +update_hna: + update_routes(bat_priv, orig_node, orig_node->router, + hna_buff, tmp_hna_buff_len); + +update_gw: + if (orig_node->gw_flags != batman_packet->gw_flags) + gw_node_update(bat_priv, orig_node, batman_packet->gw_flags); + + orig_node->gw_flags = batman_packet->gw_flags; + + /* restart gateway selection if fast or late switching was enabled */ + if ((orig_node->gw_flags) && + (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) && + (atomic_read(&bat_priv->gw_sel_class) > 2)) + gw_check_election(bat_priv, orig_node); +} + +/* checks whether the host restarted and is in the protection time. + * returns: + * 0 if the packet is to be accepted + * 1 if the packet is to be ignored. + */ +static int window_protected(struct bat_priv *bat_priv, + int32_t seq_num_diff, + unsigned long *last_reset) +{ + if ((seq_num_diff <= -TQ_LOCAL_WINDOW_SIZE) + || (seq_num_diff >= EXPECTED_SEQNO_RANGE)) { + if (time_after(jiffies, *last_reset + + msecs_to_jiffies(RESET_PROTECTION_MS))) { + + *last_reset = jiffies; + bat_dbg(DBG_BATMAN, bat_priv, + "old packet received, start protection\n"); + + return 0; + } else + return 1; + } + return 0; +} + +/* processes a batman packet for all interfaces, adjusts the sequence number and + * finds out whether it is a duplicate. + * returns: + * 1 the packet is a duplicate + * 0 the packet has not yet been received + * -1 the packet is old and has been received while the seqno window + * was protected. Caller should drop it. + */ +static char count_real_packets(struct ethhdr *ethhdr, + struct batman_packet *batman_packet, + struct batman_if *if_incoming) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct orig_node *orig_node; + struct neigh_node *tmp_neigh_node; + char is_duplicate = 0; + int32_t seq_diff; + int need_update = 0; + int set_mark; + + orig_node = get_orig_node(bat_priv, batman_packet->orig); + if (!orig_node) + return 0; + + seq_diff = batman_packet->seqno - orig_node->last_real_seqno; + + /* signalize caller that the packet is to be dropped. */ + if (window_protected(bat_priv, seq_diff, + &orig_node->batman_seqno_reset)) + return -1; + + list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { + + is_duplicate |= get_bit_status(tmp_neigh_node->real_bits, + orig_node->last_real_seqno, + batman_packet->seqno); + + if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) && + (tmp_neigh_node->if_incoming == if_incoming)) + set_mark = 1; + else + set_mark = 0; + + /* if the window moved, set the update flag. */ + need_update |= bit_get_packet(bat_priv, + tmp_neigh_node->real_bits, + seq_diff, set_mark); + + tmp_neigh_node->real_packet_count = + bit_packet_count(tmp_neigh_node->real_bits); + } + + if (need_update) { + bat_dbg(DBG_BATMAN, bat_priv, + "updating last_seqno: old %d, new %d\n", + orig_node->last_real_seqno, batman_packet->seqno); + orig_node->last_real_seqno = batman_packet->seqno; + } + + return is_duplicate; +} + +/* copy primary address for bonding */ +static void mark_bonding_address(struct bat_priv *bat_priv, + struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + struct batman_packet *batman_packet) + +{ + if (batman_packet->flags & PRIMARIES_FIRST_HOP) + memcpy(orig_neigh_node->primary_addr, + orig_node->orig, ETH_ALEN); + + return; +} + +/* mark possible bond.candidates in the neighbor list */ +void update_bonding_candidates(struct bat_priv *bat_priv, + struct orig_node *orig_node) +{ + int candidates; + int interference_candidate; + int best_tq; + struct neigh_node *tmp_neigh_node, *tmp_neigh_node2; + struct neigh_node *first_candidate, *last_candidate; + + /* update the candidates for this originator */ + if (!orig_node->router) { + orig_node->bond.candidates = 0; + return; + } + + best_tq = orig_node->router->tq_avg; + + /* update bond.candidates */ + + candidates = 0; + + /* mark other nodes which also received "PRIMARIES FIRST HOP" packets + * as "bonding partner" */ + + /* first, zero the list */ + list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { + tmp_neigh_node->next_bond_candidate = NULL; + } + + first_candidate = NULL; + last_candidate = NULL; + list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { + + /* only consider if it has the same primary address ... */ + if (memcmp(orig_node->orig, + tmp_neigh_node->orig_node->primary_addr, + ETH_ALEN) != 0) + continue; + + /* ... and is good enough to be considered */ + if (tmp_neigh_node->tq_avg < best_tq - BONDING_TQ_THRESHOLD) + continue; + + /* check if we have another candidate with the same + * mac address or interface. If we do, we won't + * select this candidate because of possible interference. */ + + interference_candidate = 0; + list_for_each_entry(tmp_neigh_node2, + &orig_node->neigh_list, list) { + + if (tmp_neigh_node2 == tmp_neigh_node) + continue; + + /* we only care if the other candidate is even + * considered as candidate. */ + if (!tmp_neigh_node2->next_bond_candidate) + continue; + + + if ((tmp_neigh_node->if_incoming == + tmp_neigh_node2->if_incoming) + || (memcmp(tmp_neigh_node->addr, + tmp_neigh_node2->addr, ETH_ALEN) == 0)) { + + interference_candidate = 1; + break; + } + } + /* don't care further if it is an interference candidate */ + if (interference_candidate) + continue; + + if (!first_candidate) { + first_candidate = tmp_neigh_node; + tmp_neigh_node->next_bond_candidate = first_candidate; + } else + tmp_neigh_node->next_bond_candidate = last_candidate; + + last_candidate = tmp_neigh_node; + + candidates++; + } + + if (candidates > 0) { + first_candidate->next_bond_candidate = last_candidate; + orig_node->bond.selected = first_candidate; + } + + orig_node->bond.candidates = candidates; +} + +void receive_bat_packet(struct ethhdr *ethhdr, + struct batman_packet *batman_packet, + unsigned char *hna_buff, int hna_buff_len, + struct batman_if *if_incoming) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batman_if *batman_if; + struct orig_node *orig_neigh_node, *orig_node; + char has_directlink_flag; + char is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; + char is_broadcast = 0, is_bidirectional, is_single_hop_neigh; + char is_duplicate; + uint32_t if_incoming_seqno; + + /* Silently drop when the batman packet is actually not a + * correct packet. + * + * This might happen if a packet is padded (e.g. Ethernet has a + * minimum frame length of 64 byte) and the aggregation interprets + * it as an additional length. + * + * TODO: A more sane solution would be to have a bit in the + * batman_packet to detect whether the packet is the last + * packet in an aggregation. Here we expect that the padding + * is always zero (or not 0x01) + */ + if (batman_packet->packet_type != BAT_PACKET) + return; + + /* could be changed by schedule_own_packet() */ + if_incoming_seqno = atomic_read(&if_incoming->seqno); + + has_directlink_flag = (batman_packet->flags & DIRECTLINK ? 1 : 0); + + is_single_hop_neigh = (compare_orig(ethhdr->h_source, + batman_packet->orig) ? 1 : 0); + + bat_dbg(DBG_BATMAN, bat_priv, + "Received BATMAN packet via NB: %pM, IF: %s [%pM] " + "(from OG: %pM, via prev OG: %pM, seqno %d, tq %d, " + "TTL %d, V %d, IDF %d)\n", + ethhdr->h_source, if_incoming->net_dev->name, + if_incoming->net_dev->dev_addr, batman_packet->orig, + batman_packet->prev_sender, batman_packet->seqno, + batman_packet->tq, batman_packet->ttl, batman_packet->version, + has_directlink_flag); + + rcu_read_lock(); + list_for_each_entry_rcu(batman_if, &if_list, list) { + if (batman_if->if_status != IF_ACTIVE) + continue; + + if (batman_if->soft_iface != if_incoming->soft_iface) + continue; + + if (compare_orig(ethhdr->h_source, + batman_if->net_dev->dev_addr)) + is_my_addr = 1; + + if (compare_orig(batman_packet->orig, + batman_if->net_dev->dev_addr)) + is_my_orig = 1; + + if (compare_orig(batman_packet->prev_sender, + batman_if->net_dev->dev_addr)) + is_my_oldorig = 1; + + if (compare_orig(ethhdr->h_source, broadcast_addr)) + is_broadcast = 1; + } + rcu_read_unlock(); + + if (batman_packet->version != COMPAT_VERSION) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: incompatible batman version (%i)\n", + batman_packet->version); + return; + } + + if (is_my_addr) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: received my own broadcast (sender: %pM" + ")\n", + ethhdr->h_source); + return; + } + + if (is_broadcast) { + bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " + "ignoring all packets with broadcast source addr (sender: %pM" + ")\n", ethhdr->h_source); + return; + } + + if (is_my_orig) { + unsigned long *word; + int offset; + + orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source); + + if (!orig_neigh_node) + return; + + /* neighbor has to indicate direct link and it has to + * come via the corresponding interface */ + /* if received seqno equals last send seqno save new + * seqno for bidirectional check */ + if (has_directlink_flag && + compare_orig(if_incoming->net_dev->dev_addr, + batman_packet->orig) && + (batman_packet->seqno - if_incoming_seqno + 2 == 0)) { + offset = if_incoming->if_num * NUM_WORDS; + word = &(orig_neigh_node->bcast_own[offset]); + bit_mark(word, 0); + orig_neigh_node->bcast_own_sum[if_incoming->if_num] = + bit_packet_count(word); + } + + bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " + "originator packet from myself (via neighbor)\n"); + return; + } + + if (is_my_oldorig) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: ignoring all rebroadcast echos (sender: " + "%pM)\n", ethhdr->h_source); + return; + } + + orig_node = get_orig_node(bat_priv, batman_packet->orig); + if (!orig_node) + return; + + is_duplicate = count_real_packets(ethhdr, batman_packet, if_incoming); + + if (is_duplicate == -1) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: packet within seqno protection time " + "(sender: %pM)\n", ethhdr->h_source); + return; + } + + if (batman_packet->tq == 0) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: originator packet with tq equal 0\n"); + return; + } + + /* avoid temporary routing loops */ + if ((orig_node->router) && + (orig_node->router->orig_node->router) && + (compare_orig(orig_node->router->addr, + batman_packet->prev_sender)) && + !(compare_orig(batman_packet->orig, batman_packet->prev_sender)) && + (compare_orig(orig_node->router->addr, + orig_node->router->orig_node->router->addr))) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: ignoring all rebroadcast packets that " + "may make me loop (sender: %pM)\n", ethhdr->h_source); + return; + } + + /* if sender is a direct neighbor the sender mac equals + * originator mac */ + orig_neigh_node = (is_single_hop_neigh ? + orig_node : + get_orig_node(bat_priv, ethhdr->h_source)); + if (!orig_neigh_node) + return; + + /* drop packet if sender is not a direct neighbor and if we + * don't route towards it */ + if (!is_single_hop_neigh && (!orig_neigh_node->router)) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: OGM via unknown neighbor!\n"); + return; + } + + is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node, + batman_packet, if_incoming); + + /* update ranking if it is not a duplicate or has the same + * seqno and similar ttl as the non-duplicate */ + if (is_bidirectional && + (!is_duplicate || + ((orig_node->last_real_seqno == batman_packet->seqno) && + (orig_node->last_ttl - 3 <= batman_packet->ttl)))) + update_orig(bat_priv, orig_node, ethhdr, batman_packet, + if_incoming, hna_buff, hna_buff_len, is_duplicate); + + mark_bonding_address(bat_priv, orig_node, + orig_neigh_node, batman_packet); + update_bonding_candidates(bat_priv, orig_node); + + /* is single hop (direct) neighbor */ + if (is_single_hop_neigh) { + + /* mark direct link on incoming interface */ + schedule_forward_packet(orig_node, ethhdr, batman_packet, + 1, hna_buff_len, if_incoming); + + bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: " + "rebroadcast neighbor packet with direct link flag\n"); + return; + } + + /* multihop originator */ + if (!is_bidirectional) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: not received via bidirectional link\n"); + return; + } + + if (is_duplicate) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: duplicate packet received\n"); + return; + } + + bat_dbg(DBG_BATMAN, bat_priv, + "Forwarding packet: rebroadcast originator packet\n"); + schedule_forward_packet(orig_node, ethhdr, batman_packet, + 0, hna_buff_len, if_incoming); +} + +int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if) +{ + struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct ethhdr *ethhdr; + + /* drop packet if it has not necessary minimum size */ + if (unlikely(!pskb_may_pull(skb, sizeof(struct batman_packet)))) + return NET_RX_DROP; + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* packet with broadcast indication but unicast recipient */ + if (!is_broadcast_ether_addr(ethhdr->h_dest)) + return NET_RX_DROP; + + /* packet with broadcast sender address */ + if (is_broadcast_ether_addr(ethhdr->h_source)) + return NET_RX_DROP; + + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, 0) < 0) + return NET_RX_DROP; + + /* keep skb linear */ + if (skb_linearize(skb) < 0) + return NET_RX_DROP; + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + spin_lock_bh(&bat_priv->orig_hash_lock); + receive_aggr_bat_packet(ethhdr, + skb->data, + skb_headlen(skb), + batman_if); + spin_unlock_bh(&bat_priv->orig_hash_lock); + + kfree_skb(skb); + return NET_RX_SUCCESS; +} + +static int recv_my_icmp_packet(struct bat_priv *bat_priv, + struct sk_buff *skb, size_t icmp_len) +{ + struct orig_node *orig_node; + struct icmp_packet_rr *icmp_packet; + struct ethhdr *ethhdr; + struct batman_if *batman_if; + int ret; + uint8_t dstaddr[ETH_ALEN]; + + icmp_packet = (struct icmp_packet_rr *)skb->data; + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* add data to device queue */ + if (icmp_packet->msg_type != ECHO_REQUEST) { + bat_socket_receive_packet(icmp_packet, icmp_len); + return NET_RX_DROP; + } + + if (!bat_priv->primary_if) + return NET_RX_DROP; + + /* answer echo request (ping) */ + /* get routing information */ + spin_lock_bh(&bat_priv->orig_hash_lock); + orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, + compare_orig, choose_orig, + icmp_packet->orig)); + ret = NET_RX_DROP; + + if ((orig_node) && (orig_node->router)) { + + /* don't lock while sending the packets ... we therefore + * copy the required data before sending */ + batman_if = orig_node->router->if_incoming; + memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); + spin_unlock_bh(&bat_priv->orig_hash_lock); + + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, sizeof(struct ethhdr)) < 0) + return NET_RX_DROP; + + icmp_packet = (struct icmp_packet_rr *)skb->data; + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); + memcpy(icmp_packet->orig, + bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); + icmp_packet->msg_type = ECHO_REPLY; + icmp_packet->ttl = TTL; + + send_skb_packet(skb, batman_if, dstaddr); + ret = NET_RX_SUCCESS; + + } else + spin_unlock_bh(&bat_priv->orig_hash_lock); + + return ret; +} + +static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, + struct sk_buff *skb, size_t icmp_len) +{ + struct orig_node *orig_node; + struct icmp_packet *icmp_packet; + struct ethhdr *ethhdr; + struct batman_if *batman_if; + int ret; + uint8_t dstaddr[ETH_ALEN]; + + icmp_packet = (struct icmp_packet *)skb->data; + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* send TTL exceeded if packet is an echo request (traceroute) */ + if (icmp_packet->msg_type != ECHO_REQUEST) { + pr_debug("Warning - can't forward icmp packet from %pM to " + "%pM: ttl exceeded\n", icmp_packet->orig, + icmp_packet->dst); + return NET_RX_DROP; + } + + if (!bat_priv->primary_if) + return NET_RX_DROP; + + /* get routing information */ + spin_lock_bh(&bat_priv->orig_hash_lock); + orig_node = ((struct orig_node *) + hash_find(bat_priv->orig_hash, compare_orig, choose_orig, + icmp_packet->orig)); + ret = NET_RX_DROP; + + if ((orig_node) && (orig_node->router)) { + + /* don't lock while sending the packets ... we therefore + * copy the required data before sending */ + batman_if = orig_node->router->if_incoming; + memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); + spin_unlock_bh(&bat_priv->orig_hash_lock); + + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, sizeof(struct ethhdr)) < 0) + return NET_RX_DROP; + + icmp_packet = (struct icmp_packet *) skb->data; + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); + memcpy(icmp_packet->orig, + bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); + icmp_packet->msg_type = TTL_EXCEEDED; + icmp_packet->ttl = TTL; + + send_skb_packet(skb, batman_if, dstaddr); + ret = NET_RX_SUCCESS; + + } else + spin_unlock_bh(&bat_priv->orig_hash_lock); + + return ret; +} + + +int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) +{ + struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct icmp_packet_rr *icmp_packet; + struct ethhdr *ethhdr; + struct orig_node *orig_node; + struct batman_if *batman_if; + int hdr_size = sizeof(struct icmp_packet); + int ret; + uint8_t dstaddr[ETH_ALEN]; + + /** + * we truncate all incoming icmp packets if they don't match our size + */ + if (skb->len >= sizeof(struct icmp_packet_rr)) + hdr_size = sizeof(struct icmp_packet_rr); + + /* drop packet if it has not necessary minimum size */ + if (unlikely(!pskb_may_pull(skb, hdr_size))) + return NET_RX_DROP; + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* packet with unicast indication but broadcast recipient */ + if (is_broadcast_ether_addr(ethhdr->h_dest)) + return NET_RX_DROP; + + /* packet with broadcast sender address */ + if (is_broadcast_ether_addr(ethhdr->h_source)) + return NET_RX_DROP; + + /* not for me */ + if (!is_my_mac(ethhdr->h_dest)) + return NET_RX_DROP; + + icmp_packet = (struct icmp_packet_rr *)skb->data; + + /* add record route information if not full */ + if ((hdr_size == sizeof(struct icmp_packet_rr)) && + (icmp_packet->rr_cur < BAT_RR_LEN)) { + memcpy(&(icmp_packet->rr[icmp_packet->rr_cur]), + ethhdr->h_dest, ETH_ALEN); + icmp_packet->rr_cur++; + } + + /* packet for me */ + if (is_my_mac(icmp_packet->dst)) + return recv_my_icmp_packet(bat_priv, skb, hdr_size); + + /* TTL exceeded */ + if (icmp_packet->ttl < 2) + return recv_icmp_ttl_exceeded(bat_priv, skb, hdr_size); + + ret = NET_RX_DROP; + + /* get routing information */ + spin_lock_bh(&bat_priv->orig_hash_lock); + orig_node = ((struct orig_node *) + hash_find(bat_priv->orig_hash, compare_orig, choose_orig, + icmp_packet->dst)); + + if ((orig_node) && (orig_node->router)) { + + /* don't lock while sending the packets ... we therefore + * copy the required data before sending */ + batman_if = orig_node->router->if_incoming; + memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); + spin_unlock_bh(&bat_priv->orig_hash_lock); + + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, sizeof(struct ethhdr)) < 0) + return NET_RX_DROP; + + icmp_packet = (struct icmp_packet_rr *)skb->data; + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* decrement ttl */ + icmp_packet->ttl--; + + /* route it */ + send_skb_packet(skb, batman_if, dstaddr); + ret = NET_RX_SUCCESS; + + } else + spin_unlock_bh(&bat_priv->orig_hash_lock); + + return ret; +} + +/* find a suitable router for this originator, and use + * bonding if possible. */ +struct neigh_node *find_router(struct bat_priv *bat_priv, + struct orig_node *orig_node, + struct batman_if *recv_if) +{ + struct orig_node *primary_orig_node; + struct orig_node *router_orig; + struct neigh_node *router, *first_candidate, *best_router; + static uint8_t zero_mac[ETH_ALEN] = {0, 0, 0, 0, 0, 0}; + int bonding_enabled; + + if (!orig_node) + return NULL; + + if (!orig_node->router) + return NULL; + + /* without bonding, the first node should + * always choose the default router. */ + + bonding_enabled = atomic_read(&bat_priv->bonding); + + if ((!recv_if) && (!bonding_enabled)) + return orig_node->router; + + router_orig = orig_node->router->orig_node; + + /* if we have something in the primary_addr, we can search + * for a potential bonding candidate. */ + if (memcmp(router_orig->primary_addr, zero_mac, ETH_ALEN) == 0) + return orig_node->router; + + /* find the orig_node which has the primary interface. might + * even be the same as our router_orig in many cases */ + + if (memcmp(router_orig->primary_addr, + router_orig->orig, ETH_ALEN) == 0) { + primary_orig_node = router_orig; + } else { + primary_orig_node = hash_find(bat_priv->orig_hash, compare_orig, + choose_orig, + router_orig->primary_addr); + + if (!primary_orig_node) + return orig_node->router; + } + + /* with less than 2 candidates, we can't do any + * bonding and prefer the original router. */ + + if (primary_orig_node->bond.candidates < 2) + return orig_node->router; + + + /* all nodes between should choose a candidate which + * is is not on the interface where the packet came + * in. */ + first_candidate = primary_orig_node->bond.selected; + router = first_candidate; + + if (bonding_enabled) { + /* in the bonding case, send the packets in a round + * robin fashion over the remaining interfaces. */ + do { + /* recv_if == NULL on the first node. */ + if (router->if_incoming != recv_if) + break; + + router = router->next_bond_candidate; + } while (router != first_candidate); + + primary_orig_node->bond.selected = router->next_bond_candidate; + + } else { + /* if bonding is disabled, use the best of the + * remaining candidates which are not using + * this interface. */ + best_router = first_candidate; + + do { + /* recv_if == NULL on the first node. */ + if ((router->if_incoming != recv_if) && + (router->tq_avg > best_router->tq_avg)) + best_router = router; + + router = router->next_bond_candidate; + } while (router != first_candidate); + + router = best_router; + } + + return router; +} + +static int check_unicast_packet(struct sk_buff *skb, int hdr_size) +{ + struct ethhdr *ethhdr; + + /* drop packet if it has not necessary minimum size */ + if (unlikely(!pskb_may_pull(skb, hdr_size))) + return -1; + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* packet with unicast indication but broadcast recipient */ + if (is_broadcast_ether_addr(ethhdr->h_dest)) + return -1; + + /* packet with broadcast sender address */ + if (is_broadcast_ether_addr(ethhdr->h_source)) + return -1; + + /* not for me */ + if (!is_my_mac(ethhdr->h_dest)) + return -1; + + return 0; +} + +int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, + int hdr_size) +{ + struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct orig_node *orig_node; + struct neigh_node *router; + struct batman_if *batman_if; + uint8_t dstaddr[ETH_ALEN]; + struct unicast_packet *unicast_packet; + struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + int ret; + struct sk_buff *new_skb; + + unicast_packet = (struct unicast_packet *)skb->data; + + /* TTL exceeded */ + if (unicast_packet->ttl < 2) { + pr_debug("Warning - can't forward unicast packet from %pM to " + "%pM: ttl exceeded\n", ethhdr->h_source, + unicast_packet->dest); + return NET_RX_DROP; + } + + /* get routing information */ + spin_lock_bh(&bat_priv->orig_hash_lock); + orig_node = ((struct orig_node *) + hash_find(bat_priv->orig_hash, compare_orig, choose_orig, + unicast_packet->dest)); + + router = find_router(bat_priv, orig_node, recv_if); + + if (!router) { + spin_unlock_bh(&bat_priv->orig_hash_lock); + return NET_RX_DROP; + } + + /* don't lock while sending the packets ... we therefore + * copy the required data before sending */ + + batman_if = router->if_incoming; + memcpy(dstaddr, router->addr, ETH_ALEN); + + spin_unlock_bh(&bat_priv->orig_hash_lock); + + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, sizeof(struct ethhdr)) < 0) + return NET_RX_DROP; + + unicast_packet = (struct unicast_packet *)skb->data; + + if (unicast_packet->packet_type == BAT_UNICAST && + atomic_read(&bat_priv->fragmentation) && + skb->len > batman_if->net_dev->mtu) + return frag_send_skb(skb, bat_priv, batman_if, + dstaddr); + + if (unicast_packet->packet_type == BAT_UNICAST_FRAG && + 2 * skb->len - hdr_size <= batman_if->net_dev->mtu) { + + ret = frag_reassemble_skb(skb, bat_priv, &new_skb); + + if (ret == NET_RX_DROP) + return NET_RX_DROP; + + /* packet was buffered for late merge */ + if (!new_skb) + return NET_RX_SUCCESS; + + skb = new_skb; + unicast_packet = (struct unicast_packet *)skb->data; + } + + /* decrement ttl */ + unicast_packet->ttl--; + + /* route it */ + send_skb_packet(skb, batman_if, dstaddr); + + return NET_RX_SUCCESS; +} + +int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if) +{ + struct unicast_packet *unicast_packet; + int hdr_size = sizeof(struct unicast_packet); + + if (check_unicast_packet(skb, hdr_size) < 0) + return NET_RX_DROP; + + unicast_packet = (struct unicast_packet *)skb->data; + + /* packet for me */ + if (is_my_mac(unicast_packet->dest)) { + interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size); + return NET_RX_SUCCESS; + } + + return route_unicast_packet(skb, recv_if, hdr_size); +} + +int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if) +{ + struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct unicast_frag_packet *unicast_packet; + int hdr_size = sizeof(struct unicast_frag_packet); + struct sk_buff *new_skb = NULL; + int ret; + + if (check_unicast_packet(skb, hdr_size) < 0) + return NET_RX_DROP; + + unicast_packet = (struct unicast_frag_packet *)skb->data; + + /* packet for me */ + if (is_my_mac(unicast_packet->dest)) { + + ret = frag_reassemble_skb(skb, bat_priv, &new_skb); + + if (ret == NET_RX_DROP) + return NET_RX_DROP; + + /* packet was buffered for late merge */ + if (!new_skb) + return NET_RX_SUCCESS; + + interface_rx(recv_if->soft_iface, new_skb, recv_if, + sizeof(struct unicast_packet)); + return NET_RX_SUCCESS; + } + + return route_unicast_packet(skb, recv_if, hdr_size); +} + + +int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if) +{ + struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct orig_node *orig_node; + struct bcast_packet *bcast_packet; + struct ethhdr *ethhdr; + int hdr_size = sizeof(struct bcast_packet); + int32_t seq_diff; + + /* drop packet if it has not necessary minimum size */ + if (unlikely(!pskb_may_pull(skb, hdr_size))) + return NET_RX_DROP; + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* packet with broadcast indication but unicast recipient */ + if (!is_broadcast_ether_addr(ethhdr->h_dest)) + return NET_RX_DROP; + + /* packet with broadcast sender address */ + if (is_broadcast_ether_addr(ethhdr->h_source)) + return NET_RX_DROP; + + /* ignore broadcasts sent by myself */ + if (is_my_mac(ethhdr->h_source)) + return NET_RX_DROP; + + bcast_packet = (struct bcast_packet *)skb->data; + + /* ignore broadcasts originated by myself */ + if (is_my_mac(bcast_packet->orig)) + return NET_RX_DROP; + + if (bcast_packet->ttl < 2) + return NET_RX_DROP; + + spin_lock_bh(&bat_priv->orig_hash_lock); + orig_node = ((struct orig_node *) + hash_find(bat_priv->orig_hash, compare_orig, choose_orig, + bcast_packet->orig)); + + if (!orig_node) { + spin_unlock_bh(&bat_priv->orig_hash_lock); + return NET_RX_DROP; + } + + /* check whether the packet is a duplicate */ + if (get_bit_status(orig_node->bcast_bits, + orig_node->last_bcast_seqno, + ntohl(bcast_packet->seqno))) { + spin_unlock_bh(&bat_priv->orig_hash_lock); + return NET_RX_DROP; + } + + seq_diff = ntohl(bcast_packet->seqno) - orig_node->last_bcast_seqno; + + /* check whether the packet is old and the host just restarted. */ + if (window_protected(bat_priv, seq_diff, + &orig_node->bcast_seqno_reset)) { + spin_unlock_bh(&bat_priv->orig_hash_lock); + return NET_RX_DROP; + } + + /* mark broadcast in flood history, update window position + * if required. */ + if (bit_get_packet(bat_priv, orig_node->bcast_bits, seq_diff, 1)) + orig_node->last_bcast_seqno = ntohl(bcast_packet->seqno); + + spin_unlock_bh(&bat_priv->orig_hash_lock); + /* rebroadcast packet */ + add_bcast_packet_to_list(bat_priv, skb); + + /* broadcast for me */ + interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size); + + return NET_RX_SUCCESS; +} + +int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if) +{ + struct vis_packet *vis_packet; + struct ethhdr *ethhdr; + struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); + int hdr_size = sizeof(struct vis_packet); + + /* keep skb linear */ + if (skb_linearize(skb) < 0) + return NET_RX_DROP; + + if (unlikely(!pskb_may_pull(skb, hdr_size))) + return NET_RX_DROP; + + vis_packet = (struct vis_packet *)skb->data; + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* not for me */ + if (!is_my_mac(ethhdr->h_dest)) + return NET_RX_DROP; + + /* ignore own packets */ + if (is_my_mac(vis_packet->vis_orig)) + return NET_RX_DROP; + + if (is_my_mac(vis_packet->sender_orig)) + return NET_RX_DROP; + + switch (vis_packet->vis_type) { + case VIS_TYPE_SERVER_SYNC: + receive_server_sync_packet(bat_priv, vis_packet, + skb_headlen(skb)); + break; + + case VIS_TYPE_CLIENT_UPDATE: + receive_client_update_packet(bat_priv, vis_packet, + skb_headlen(skb)); + break; + + default: /* ignore unknown packet */ + break; + } + + /* We take a copy of the data in the packet, so we should + always free the skbuf. */ + return NET_RX_DROP; +} diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h new file mode 100644 index 000000000000..f108f230bfdb --- /dev/null +++ b/net/batman-adv/routing.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_ROUTING_H_ +#define _NET_BATMAN_ADV_ROUTING_H_ + +#include "types.h" + +void slide_own_bcast_window(struct batman_if *batman_if); +void receive_bat_packet(struct ethhdr *ethhdr, + struct batman_packet *batman_packet, + unsigned char *hna_buff, int hna_buff_len, + struct batman_if *if_incoming); +void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, + struct neigh_node *neigh_node, unsigned char *hna_buff, + int hna_buff_len); +int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, + int hdr_size); +int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if); +int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if); +int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if); +int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if); +int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if); +int recv_bat_packet(struct sk_buff *skb, struct batman_if *recv_if); +struct neigh_node *find_router(struct bat_priv *bat_priv, + struct orig_node *orig_node, struct batman_if *recv_if); +void update_bonding_candidates(struct bat_priv *bat_priv, + struct orig_node *orig_node); + +#endif /* _NET_BATMAN_ADV_ROUTING_H_ */ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c new file mode 100644 index 000000000000..b89b9f7709ae --- /dev/null +++ b/net/batman-adv/send.c @@ -0,0 +1,585 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "send.h" +#include "routing.h" +#include "translation-table.h" +#include "soft-interface.h" +#include "hard-interface.h" +#include "types.h" +#include "vis.h" +#include "aggregation.h" +#include "gateway_common.h" +#include "originator.h" + +static void send_outstanding_bcast_packet(struct work_struct *work); + +/* apply hop penalty for a normal link */ +static uint8_t hop_penalty(const uint8_t tq, struct bat_priv *bat_priv) +{ + int hop_penalty = atomic_read(&bat_priv->hop_penalty); + return (tq * (TQ_MAX_VALUE - hop_penalty)) / (TQ_MAX_VALUE); +} + +/* when do we schedule our own packet to be sent */ +static unsigned long own_send_time(struct bat_priv *bat_priv) +{ + return jiffies + msecs_to_jiffies( + atomic_read(&bat_priv->orig_interval) - + JITTER + (random32() % 2*JITTER)); +} + +/* when do we schedule a forwarded packet to be sent */ +static unsigned long forward_send_time(struct bat_priv *bat_priv) +{ + return jiffies + msecs_to_jiffies(random32() % (JITTER/2)); +} + +/* send out an already prepared packet to the given address via the + * specified batman interface */ +int send_skb_packet(struct sk_buff *skb, + struct batman_if *batman_if, + uint8_t *dst_addr) +{ + struct ethhdr *ethhdr; + + if (batman_if->if_status != IF_ACTIVE) + goto send_skb_err; + + if (unlikely(!batman_if->net_dev)) + goto send_skb_err; + + if (!(batman_if->net_dev->flags & IFF_UP)) { + pr_warning("Interface %s is not up - can't send packet via " + "that interface!\n", batman_if->net_dev->name); + goto send_skb_err; + } + + /* push to the ethernet header. */ + if (my_skb_head_push(skb, sizeof(struct ethhdr)) < 0) + goto send_skb_err; + + skb_reset_mac_header(skb); + + ethhdr = (struct ethhdr *) skb_mac_header(skb); + memcpy(ethhdr->h_source, batman_if->net_dev->dev_addr, ETH_ALEN); + memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); + ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); + + skb_set_network_header(skb, ETH_HLEN); + skb->priority = TC_PRIO_CONTROL; + skb->protocol = __constant_htons(ETH_P_BATMAN); + + skb->dev = batman_if->net_dev; + + /* dev_queue_xmit() returns a negative result on error. However on + * congestion and traffic shaping, it drops and returns NET_XMIT_DROP + * (which is > 0). This will not be treated as an error. */ + + return dev_queue_xmit(skb); +send_skb_err: + kfree_skb(skb); + return NET_XMIT_DROP; +} + +/* Send a packet to a given interface */ +static void send_packet_to_if(struct forw_packet *forw_packet, + struct batman_if *batman_if) +{ + struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + char *fwd_str; + uint8_t packet_num; + int16_t buff_pos; + struct batman_packet *batman_packet; + struct sk_buff *skb; + + if (batman_if->if_status != IF_ACTIVE) + return; + + packet_num = 0; + buff_pos = 0; + batman_packet = (struct batman_packet *)forw_packet->skb->data; + + /* adjust all flags and log packets */ + while (aggregated_packet(buff_pos, + forw_packet->packet_len, + batman_packet->num_hna)) { + + /* we might have aggregated direct link packets with an + * ordinary base packet */ + if ((forw_packet->direct_link_flags & (1 << packet_num)) && + (forw_packet->if_incoming == batman_if)) + batman_packet->flags |= DIRECTLINK; + else + batman_packet->flags &= ~DIRECTLINK; + + fwd_str = (packet_num > 0 ? "Forwarding" : (forw_packet->own ? + "Sending own" : + "Forwarding")); + bat_dbg(DBG_BATMAN, bat_priv, + "%s %spacket (originator %pM, seqno %d, TQ %d, TTL %d," + " IDF %s) on interface %s [%pM]\n", + fwd_str, (packet_num > 0 ? "aggregated " : ""), + batman_packet->orig, ntohl(batman_packet->seqno), + batman_packet->tq, batman_packet->ttl, + (batman_packet->flags & DIRECTLINK ? + "on" : "off"), + batman_if->net_dev->name, batman_if->net_dev->dev_addr); + + buff_pos += sizeof(struct batman_packet) + + (batman_packet->num_hna * ETH_ALEN); + packet_num++; + batman_packet = (struct batman_packet *) + (forw_packet->skb->data + buff_pos); + } + + /* create clone because function is called more than once */ + skb = skb_clone(forw_packet->skb, GFP_ATOMIC); + if (skb) + send_skb_packet(skb, batman_if, broadcast_addr); +} + +/* send a batman packet */ +static void send_packet(struct forw_packet *forw_packet) +{ + struct batman_if *batman_if; + struct net_device *soft_iface; + struct bat_priv *bat_priv; + struct batman_packet *batman_packet = + (struct batman_packet *)(forw_packet->skb->data); + unsigned char directlink = (batman_packet->flags & DIRECTLINK ? 1 : 0); + + if (!forw_packet->if_incoming) { + pr_err("Error - can't forward packet: incoming iface not " + "specified\n"); + return; + } + + soft_iface = forw_packet->if_incoming->soft_iface; + bat_priv = netdev_priv(soft_iface); + + if (forw_packet->if_incoming->if_status != IF_ACTIVE) + return; + + /* multihomed peer assumed */ + /* non-primary OGMs are only broadcasted on their interface */ + if ((directlink && (batman_packet->ttl == 1)) || + (forw_packet->own && (forw_packet->if_incoming->if_num > 0))) { + + /* FIXME: what about aggregated packets ? */ + bat_dbg(DBG_BATMAN, bat_priv, + "%s packet (originator %pM, seqno %d, TTL %d) " + "on interface %s [%pM]\n", + (forw_packet->own ? "Sending own" : "Forwarding"), + batman_packet->orig, ntohl(batman_packet->seqno), + batman_packet->ttl, + forw_packet->if_incoming->net_dev->name, + forw_packet->if_incoming->net_dev->dev_addr); + + /* skb is only used once and than forw_packet is free'd */ + send_skb_packet(forw_packet->skb, forw_packet->if_incoming, + broadcast_addr); + forw_packet->skb = NULL; + + return; + } + + /* broadcast on every interface */ + rcu_read_lock(); + list_for_each_entry_rcu(batman_if, &if_list, list) { + if (batman_if->soft_iface != soft_iface) + continue; + + send_packet_to_if(forw_packet, batman_if); + } + rcu_read_unlock(); +} + +static void rebuild_batman_packet(struct bat_priv *bat_priv, + struct batman_if *batman_if) +{ + int new_len; + unsigned char *new_buff; + struct batman_packet *batman_packet; + + new_len = sizeof(struct batman_packet) + + (bat_priv->num_local_hna * ETH_ALEN); + new_buff = kmalloc(new_len, GFP_ATOMIC); + + /* keep old buffer if kmalloc should fail */ + if (new_buff) { + memcpy(new_buff, batman_if->packet_buff, + sizeof(struct batman_packet)); + batman_packet = (struct batman_packet *)new_buff; + + batman_packet->num_hna = hna_local_fill_buffer(bat_priv, + new_buff + sizeof(struct batman_packet), + new_len - sizeof(struct batman_packet)); + + kfree(batman_if->packet_buff); + batman_if->packet_buff = new_buff; + batman_if->packet_len = new_len; + } +} + +void schedule_own_packet(struct batman_if *batman_if) +{ + struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + unsigned long send_time; + struct batman_packet *batman_packet; + int vis_server; + + if ((batman_if->if_status == IF_NOT_IN_USE) || + (batman_if->if_status == IF_TO_BE_REMOVED)) + return; + + vis_server = atomic_read(&bat_priv->vis_mode); + + /** + * the interface gets activated here to avoid race conditions between + * the moment of activating the interface in + * hardif_activate_interface() where the originator mac is set and + * outdated packets (especially uninitialized mac addresses) in the + * packet queue + */ + if (batman_if->if_status == IF_TO_BE_ACTIVATED) + batman_if->if_status = IF_ACTIVE; + + /* if local hna has changed and interface is a primary interface */ + if ((atomic_read(&bat_priv->hna_local_changed)) && + (batman_if == bat_priv->primary_if)) + rebuild_batman_packet(bat_priv, batman_if); + + /** + * NOTE: packet_buff might just have been re-allocated in + * rebuild_batman_packet() + */ + batman_packet = (struct batman_packet *)batman_if->packet_buff; + + /* change sequence number to network order */ + batman_packet->seqno = + htonl((uint32_t)atomic_read(&batman_if->seqno)); + + if (vis_server == VIS_TYPE_SERVER_SYNC) + batman_packet->flags |= VIS_SERVER; + else + batman_packet->flags &= ~VIS_SERVER; + + if ((batman_if == bat_priv->primary_if) && + (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)) + batman_packet->gw_flags = + (uint8_t)atomic_read(&bat_priv->gw_bandwidth); + else + batman_packet->gw_flags = 0; + + atomic_inc(&batman_if->seqno); + + slide_own_bcast_window(batman_if); + send_time = own_send_time(bat_priv); + add_bat_packet_to_list(bat_priv, + batman_if->packet_buff, + batman_if->packet_len, + batman_if, 1, send_time); +} + +void schedule_forward_packet(struct orig_node *orig_node, + struct ethhdr *ethhdr, + struct batman_packet *batman_packet, + uint8_t directlink, int hna_buff_len, + struct batman_if *if_incoming) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + unsigned char in_tq, in_ttl, tq_avg = 0; + unsigned long send_time; + + if (batman_packet->ttl <= 1) { + bat_dbg(DBG_BATMAN, bat_priv, "ttl exceeded\n"); + return; + } + + in_tq = batman_packet->tq; + in_ttl = batman_packet->ttl; + + batman_packet->ttl--; + memcpy(batman_packet->prev_sender, ethhdr->h_source, ETH_ALEN); + + /* rebroadcast tq of our best ranking neighbor to ensure the rebroadcast + * of our best tq value */ + if ((orig_node->router) && (orig_node->router->tq_avg != 0)) { + + /* rebroadcast ogm of best ranking neighbor as is */ + if (!compare_orig(orig_node->router->addr, ethhdr->h_source)) { + batman_packet->tq = orig_node->router->tq_avg; + + if (orig_node->router->last_ttl) + batman_packet->ttl = orig_node->router->last_ttl + - 1; + } + + tq_avg = orig_node->router->tq_avg; + } + + /* apply hop penalty */ + batman_packet->tq = hop_penalty(batman_packet->tq, bat_priv); + + bat_dbg(DBG_BATMAN, bat_priv, + "Forwarding packet: tq_orig: %i, tq_avg: %i, " + "tq_forw: %i, ttl_orig: %i, ttl_forw: %i\n", + in_tq, tq_avg, batman_packet->tq, in_ttl - 1, + batman_packet->ttl); + + batman_packet->seqno = htonl(batman_packet->seqno); + + /* switch of primaries first hop flag when forwarding */ + batman_packet->flags &= ~PRIMARIES_FIRST_HOP; + if (directlink) + batman_packet->flags |= DIRECTLINK; + else + batman_packet->flags &= ~DIRECTLINK; + + send_time = forward_send_time(bat_priv); + add_bat_packet_to_list(bat_priv, + (unsigned char *)batman_packet, + sizeof(struct batman_packet) + hna_buff_len, + if_incoming, 0, send_time); +} + +static void forw_packet_free(struct forw_packet *forw_packet) +{ + if (forw_packet->skb) + kfree_skb(forw_packet->skb); + kfree(forw_packet); +} + +static void _add_bcast_packet_to_list(struct bat_priv *bat_priv, + struct forw_packet *forw_packet, + unsigned long send_time) +{ + INIT_HLIST_NODE(&forw_packet->list); + + /* add new packet to packet list */ + spin_lock_bh(&bat_priv->forw_bcast_list_lock); + hlist_add_head(&forw_packet->list, &bat_priv->forw_bcast_list); + spin_unlock_bh(&bat_priv->forw_bcast_list_lock); + + /* start timer for this packet */ + INIT_DELAYED_WORK(&forw_packet->delayed_work, + send_outstanding_bcast_packet); + queue_delayed_work(bat_event_workqueue, &forw_packet->delayed_work, + send_time); +} + +#define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0) +/* add a broadcast packet to the queue and setup timers. broadcast packets + * are sent multiple times to increase probability for beeing received. + * + * This function returns NETDEV_TX_OK on success and NETDEV_TX_BUSY on + * errors. + * + * The skb is not consumed, so the caller should make sure that the + * skb is freed. */ +int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb) +{ + struct forw_packet *forw_packet; + struct bcast_packet *bcast_packet; + + if (!atomic_dec_not_zero(&bat_priv->bcast_queue_left)) { + bat_dbg(DBG_BATMAN, bat_priv, "bcast packet queue full\n"); + goto out; + } + + if (!bat_priv->primary_if) + goto out; + + forw_packet = kmalloc(sizeof(struct forw_packet), GFP_ATOMIC); + + if (!forw_packet) + goto out_and_inc; + + skb = skb_copy(skb, GFP_ATOMIC); + if (!skb) + goto packet_free; + + /* as we have a copy now, it is safe to decrease the TTL */ + bcast_packet = (struct bcast_packet *)skb->data; + bcast_packet->ttl--; + + skb_reset_mac_header(skb); + + forw_packet->skb = skb; + forw_packet->if_incoming = bat_priv->primary_if; + + /* how often did we send the bcast packet ? */ + forw_packet->num_packets = 0; + + _add_bcast_packet_to_list(bat_priv, forw_packet, 1); + return NETDEV_TX_OK; + +packet_free: + kfree(forw_packet); +out_and_inc: + atomic_inc(&bat_priv->bcast_queue_left); +out: + return NETDEV_TX_BUSY; +} + +static void send_outstanding_bcast_packet(struct work_struct *work) +{ + struct batman_if *batman_if; + struct delayed_work *delayed_work = + container_of(work, struct delayed_work, work); + struct forw_packet *forw_packet = + container_of(delayed_work, struct forw_packet, delayed_work); + struct sk_buff *skb1; + struct net_device *soft_iface = forw_packet->if_incoming->soft_iface; + struct bat_priv *bat_priv = netdev_priv(soft_iface); + + spin_lock_bh(&bat_priv->forw_bcast_list_lock); + hlist_del(&forw_packet->list); + spin_unlock_bh(&bat_priv->forw_bcast_list_lock); + + if (atomic_read(&bat_priv->mesh_state) == MESH_DEACTIVATING) + goto out; + + /* rebroadcast packet */ + rcu_read_lock(); + list_for_each_entry_rcu(batman_if, &if_list, list) { + if (batman_if->soft_iface != soft_iface) + continue; + + /* send a copy of the saved skb */ + skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); + if (skb1) + send_skb_packet(skb1, batman_if, broadcast_addr); + } + rcu_read_unlock(); + + forw_packet->num_packets++; + + /* if we still have some more bcasts to send */ + if (forw_packet->num_packets < 3) { + _add_bcast_packet_to_list(bat_priv, forw_packet, + ((5 * HZ) / 1000)); + return; + } + +out: + forw_packet_free(forw_packet); + atomic_inc(&bat_priv->bcast_queue_left); +} + +void send_outstanding_bat_packet(struct work_struct *work) +{ + struct delayed_work *delayed_work = + container_of(work, struct delayed_work, work); + struct forw_packet *forw_packet = + container_of(delayed_work, struct forw_packet, delayed_work); + struct bat_priv *bat_priv; + + bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); + spin_lock_bh(&bat_priv->forw_bat_list_lock); + hlist_del(&forw_packet->list); + spin_unlock_bh(&bat_priv->forw_bat_list_lock); + + if (atomic_read(&bat_priv->mesh_state) == MESH_DEACTIVATING) + goto out; + + send_packet(forw_packet); + + /** + * we have to have at least one packet in the queue + * to determine the queues wake up time unless we are + * shutting down + */ + if (forw_packet->own) + schedule_own_packet(forw_packet->if_incoming); + +out: + /* don't count own packet */ + if (!forw_packet->own) + atomic_inc(&bat_priv->batman_queue_left); + + forw_packet_free(forw_packet); +} + +void purge_outstanding_packets(struct bat_priv *bat_priv, + struct batman_if *batman_if) +{ + struct forw_packet *forw_packet; + struct hlist_node *tmp_node, *safe_tmp_node; + + if (batman_if) + bat_dbg(DBG_BATMAN, bat_priv, + "purge_outstanding_packets(): %s\n", + batman_if->net_dev->name); + else + bat_dbg(DBG_BATMAN, bat_priv, + "purge_outstanding_packets()\n"); + + /* free bcast list */ + spin_lock_bh(&bat_priv->forw_bcast_list_lock); + hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node, + &bat_priv->forw_bcast_list, list) { + + /** + * if purge_outstanding_packets() was called with an argmument + * we delete only packets belonging to the given interface + */ + if ((batman_if) && + (forw_packet->if_incoming != batman_if)) + continue; + + spin_unlock_bh(&bat_priv->forw_bcast_list_lock); + + /** + * send_outstanding_bcast_packet() will lock the list to + * delete the item from the list + */ + cancel_delayed_work_sync(&forw_packet->delayed_work); + spin_lock_bh(&bat_priv->forw_bcast_list_lock); + } + spin_unlock_bh(&bat_priv->forw_bcast_list_lock); + + /* free batman packet list */ + spin_lock_bh(&bat_priv->forw_bat_list_lock); + hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node, + &bat_priv->forw_bat_list, list) { + + /** + * if purge_outstanding_packets() was called with an argmument + * we delete only packets belonging to the given interface + */ + if ((batman_if) && + (forw_packet->if_incoming != batman_if)) + continue; + + spin_unlock_bh(&bat_priv->forw_bat_list_lock); + + /** + * send_outstanding_bat_packet() will lock the list to + * delete the item from the list + */ + cancel_delayed_work_sync(&forw_packet->delayed_work); + spin_lock_bh(&bat_priv->forw_bat_list_lock); + } + spin_unlock_bh(&bat_priv->forw_bat_list_lock); +} diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h new file mode 100644 index 000000000000..c4cefa8e4f85 --- /dev/null +++ b/net/batman-adv/send.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_SEND_H_ +#define _NET_BATMAN_ADV_SEND_H_ + +#include "types.h" + +int send_skb_packet(struct sk_buff *skb, + struct batman_if *batman_if, + uint8_t *dst_addr); +void schedule_own_packet(struct batman_if *batman_if); +void schedule_forward_packet(struct orig_node *orig_node, + struct ethhdr *ethhdr, + struct batman_packet *batman_packet, + uint8_t directlink, int hna_buff_len, + struct batman_if *if_outgoing); +int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb); +void send_outstanding_bat_packet(struct work_struct *work); +void purge_outstanding_packets(struct bat_priv *bat_priv, + struct batman_if *batman_if); + +#endif /* _NET_BATMAN_ADV_SEND_H_ */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c new file mode 100644 index 000000000000..e89ede192ed0 --- /dev/null +++ b/net/batman-adv/soft-interface.c @@ -0,0 +1,697 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "soft-interface.h" +#include "hard-interface.h" +#include "routing.h" +#include "send.h" +#include "bat_debugfs.h" +#include "translation-table.h" +#include "types.h" +#include "hash.h" +#include "gateway_common.h" +#include "gateway_client.h" +#include "send.h" +#include "bat_sysfs.h" +#include <linux/slab.h> +#include <linux/ethtool.h> +#include <linux/etherdevice.h> +#include <linux/if_vlan.h> +#include "unicast.h" +#include "routing.h" + + +static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); +static void bat_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info); +static u32 bat_get_msglevel(struct net_device *dev); +static void bat_set_msglevel(struct net_device *dev, u32 value); +static u32 bat_get_link(struct net_device *dev); +static u32 bat_get_rx_csum(struct net_device *dev); +static int bat_set_rx_csum(struct net_device *dev, u32 data); + +static const struct ethtool_ops bat_ethtool_ops = { + .get_settings = bat_get_settings, + .get_drvinfo = bat_get_drvinfo, + .get_msglevel = bat_get_msglevel, + .set_msglevel = bat_set_msglevel, + .get_link = bat_get_link, + .get_rx_csum = bat_get_rx_csum, + .set_rx_csum = bat_set_rx_csum +}; + +int my_skb_head_push(struct sk_buff *skb, unsigned int len) +{ + int result; + + /** + * TODO: We must check if we can release all references to non-payload + * data using skb_header_release in our skbs to allow skb_cow_header to + * work optimally. This means that those skbs are not allowed to read + * or write any data which is before the current position of skb->data + * after that call and thus allow other skbs with the same data buffer + * to write freely in that area. + */ + result = skb_cow_head(skb, len); + if (result < 0) + return result; + + skb_push(skb, len); + return 0; +} + +static void softif_neigh_free_ref(struct kref *refcount) +{ + struct softif_neigh *softif_neigh; + + softif_neigh = container_of(refcount, struct softif_neigh, refcount); + kfree(softif_neigh); +} + +static void softif_neigh_free_rcu(struct rcu_head *rcu) +{ + struct softif_neigh *softif_neigh; + + softif_neigh = container_of(rcu, struct softif_neigh, rcu); + kref_put(&softif_neigh->refcount, softif_neigh_free_ref); +} + +void softif_neigh_purge(struct bat_priv *bat_priv) +{ + struct softif_neigh *softif_neigh, *softif_neigh_tmp; + struct hlist_node *node, *node_tmp; + + spin_lock_bh(&bat_priv->softif_neigh_lock); + + hlist_for_each_entry_safe(softif_neigh, node, node_tmp, + &bat_priv->softif_neigh_list, list) { + + if ((!time_after(jiffies, softif_neigh->last_seen + + msecs_to_jiffies(SOFTIF_NEIGH_TIMEOUT))) && + (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE)) + continue; + + hlist_del_rcu(&softif_neigh->list); + + if (bat_priv->softif_neigh == softif_neigh) { + bat_dbg(DBG_ROUTES, bat_priv, + "Current mesh exit point '%pM' vanished " + "(vid: %d).\n", + softif_neigh->addr, softif_neigh->vid); + softif_neigh_tmp = bat_priv->softif_neigh; + bat_priv->softif_neigh = NULL; + kref_put(&softif_neigh_tmp->refcount, + softif_neigh_free_ref); + } + + call_rcu(&softif_neigh->rcu, softif_neigh_free_rcu); + } + + spin_unlock_bh(&bat_priv->softif_neigh_lock); +} + +static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv, + uint8_t *addr, short vid) +{ + struct softif_neigh *softif_neigh; + struct hlist_node *node; + + rcu_read_lock(); + hlist_for_each_entry_rcu(softif_neigh, node, + &bat_priv->softif_neigh_list, list) { + if (memcmp(softif_neigh->addr, addr, ETH_ALEN) != 0) + continue; + + if (softif_neigh->vid != vid) + continue; + + softif_neigh->last_seen = jiffies; + goto found; + } + + softif_neigh = kzalloc(sizeof(struct softif_neigh), GFP_ATOMIC); + if (!softif_neigh) + goto out; + + memcpy(softif_neigh->addr, addr, ETH_ALEN); + softif_neigh->vid = vid; + softif_neigh->last_seen = jiffies; + kref_init(&softif_neigh->refcount); + + INIT_HLIST_NODE(&softif_neigh->list); + spin_lock_bh(&bat_priv->softif_neigh_lock); + hlist_add_head_rcu(&softif_neigh->list, &bat_priv->softif_neigh_list); + spin_unlock_bh(&bat_priv->softif_neigh_lock); + +found: + kref_get(&softif_neigh->refcount); +out: + rcu_read_unlock(); + return softif_neigh; +} + +int softif_neigh_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct bat_priv *bat_priv = netdev_priv(net_dev); + struct softif_neigh *softif_neigh; + struct hlist_node *node; + size_t buf_size, pos; + char *buff; + + if (!bat_priv->primary_if) { + return seq_printf(seq, "BATMAN mesh %s disabled - " + "please specify interfaces to enable it\n", + net_dev->name); + } + + seq_printf(seq, "Softif neighbor list (%s)\n", net_dev->name); + + buf_size = 1; + /* Estimate length for: " xx:xx:xx:xx:xx:xx\n" */ + rcu_read_lock(); + hlist_for_each_entry_rcu(softif_neigh, node, + &bat_priv->softif_neigh_list, list) + buf_size += 30; + rcu_read_unlock(); + + buff = kmalloc(buf_size, GFP_ATOMIC); + if (!buff) + return -ENOMEM; + + buff[0] = '\0'; + pos = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(softif_neigh, node, + &bat_priv->softif_neigh_list, list) { + pos += snprintf(buff + pos, 31, "%s %pM (vid: %d)\n", + bat_priv->softif_neigh == softif_neigh + ? "=>" : " ", softif_neigh->addr, + softif_neigh->vid); + } + rcu_read_unlock(); + + seq_printf(seq, "%s", buff); + kfree(buff); + return 0; +} + +static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev, + short vid) +{ + struct bat_priv *bat_priv = netdev_priv(dev); + struct ethhdr *ethhdr = (struct ethhdr *)skb->data; + struct batman_packet *batman_packet; + struct softif_neigh *softif_neigh, *softif_neigh_tmp; + + if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) + batman_packet = (struct batman_packet *) + (skb->data + ETH_HLEN + VLAN_HLEN); + else + batman_packet = (struct batman_packet *)(skb->data + ETH_HLEN); + + if (batman_packet->version != COMPAT_VERSION) + goto err; + + if (batman_packet->packet_type != BAT_PACKET) + goto err; + + if (!(batman_packet->flags & PRIMARIES_FIRST_HOP)) + goto err; + + if (is_my_mac(batman_packet->orig)) + goto err; + + softif_neigh = softif_neigh_get(bat_priv, batman_packet->orig, vid); + + if (!softif_neigh) + goto err; + + if (bat_priv->softif_neigh == softif_neigh) + goto out; + + /* we got a neighbor but its mac is 'bigger' than ours */ + if (memcmp(bat_priv->primary_if->net_dev->dev_addr, + softif_neigh->addr, ETH_ALEN) < 0) + goto out; + + /* switch to new 'smallest neighbor' */ + if ((bat_priv->softif_neigh) && + (memcmp(softif_neigh->addr, bat_priv->softif_neigh->addr, + ETH_ALEN) < 0)) { + bat_dbg(DBG_ROUTES, bat_priv, + "Changing mesh exit point from %pM (vid: %d) " + "to %pM (vid: %d).\n", + bat_priv->softif_neigh->addr, + bat_priv->softif_neigh->vid, + softif_neigh->addr, softif_neigh->vid); + softif_neigh_tmp = bat_priv->softif_neigh; + bat_priv->softif_neigh = softif_neigh; + kref_put(&softif_neigh_tmp->refcount, softif_neigh_free_ref); + /* we need to hold the additional reference */ + goto err; + } + + /* close own batX device and use softif_neigh as exit node */ + if ((!bat_priv->softif_neigh) && + (memcmp(softif_neigh->addr, + bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN) < 0)) { + bat_dbg(DBG_ROUTES, bat_priv, + "Setting mesh exit point to %pM (vid: %d).\n", + softif_neigh->addr, softif_neigh->vid); + bat_priv->softif_neigh = softif_neigh; + /* we need to hold the additional reference */ + goto err; + } + +out: + kref_put(&softif_neigh->refcount, softif_neigh_free_ref); +err: + kfree_skb(skb); + return; +} + +static int interface_open(struct net_device *dev) +{ + netif_start_queue(dev); + return 0; +} + +static int interface_release(struct net_device *dev) +{ + netif_stop_queue(dev); + return 0; +} + +static struct net_device_stats *interface_stats(struct net_device *dev) +{ + struct bat_priv *bat_priv = netdev_priv(dev); + return &bat_priv->stats; +} + +static int interface_set_mac_addr(struct net_device *dev, void *p) +{ + struct bat_priv *bat_priv = netdev_priv(dev); + struct sockaddr *addr = p; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + /* only modify hna-table if it has been initialised before */ + if (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) { + hna_local_remove(bat_priv, dev->dev_addr, + "mac address changed"); + hna_local_add(dev, addr->sa_data); + } + + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + return 0; +} + +static int interface_change_mtu(struct net_device *dev, int new_mtu) +{ + /* check ranges */ + if ((new_mtu < 68) || (new_mtu > hardif_min_mtu(dev))) + return -EINVAL; + + dev->mtu = new_mtu; + + return 0; +} + +int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) +{ + struct ethhdr *ethhdr = (struct ethhdr *)skb->data; + struct bat_priv *bat_priv = netdev_priv(soft_iface); + struct bcast_packet *bcast_packet; + struct vlan_ethhdr *vhdr; + int data_len = skb->len, ret; + short vid = -1; + bool do_bcast = false; + + if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) + goto dropped; + + soft_iface->trans_start = jiffies; + + switch (ntohs(ethhdr->h_proto)) { + case ETH_P_8021Q: + vhdr = (struct vlan_ethhdr *)skb->data; + vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + + if (ntohs(vhdr->h_vlan_encapsulated_proto) != ETH_P_BATMAN) + break; + + /* fall through */ + case ETH_P_BATMAN: + softif_batman_recv(skb, soft_iface, vid); + goto end; + } + + /** + * if we have a another chosen mesh exit node in range + * it will transport the packets to the mesh + */ + if ((bat_priv->softif_neigh) && (bat_priv->softif_neigh->vid == vid)) + goto dropped; + + /* TODO: check this for locks */ + hna_local_add(soft_iface, ethhdr->h_source); + + if (is_multicast_ether_addr(ethhdr->h_dest)) { + ret = gw_is_target(bat_priv, skb); + + if (ret < 0) + goto dropped; + + if (ret == 0) + do_bcast = true; + } + + /* ethernet packet should be broadcasted */ + if (do_bcast) { + if (!bat_priv->primary_if) + goto dropped; + + if (my_skb_head_push(skb, sizeof(struct bcast_packet)) < 0) + goto dropped; + + bcast_packet = (struct bcast_packet *)skb->data; + bcast_packet->version = COMPAT_VERSION; + bcast_packet->ttl = TTL; + + /* batman packet type: broadcast */ + bcast_packet->packet_type = BAT_BCAST; + + /* hw address of first interface is the orig mac because only + * this mac is known throughout the mesh */ + memcpy(bcast_packet->orig, + bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); + + /* set broadcast sequence number */ + bcast_packet->seqno = + htonl(atomic_inc_return(&bat_priv->bcast_seqno)); + + add_bcast_packet_to_list(bat_priv, skb); + + /* a copy is stored in the bcast list, therefore removing + * the original skb. */ + kfree_skb(skb); + + /* unicast packet */ + } else { + ret = unicast_send_skb(skb, bat_priv); + if (ret != 0) + goto dropped_freed; + } + + bat_priv->stats.tx_packets++; + bat_priv->stats.tx_bytes += data_len; + goto end; + +dropped: + kfree_skb(skb); +dropped_freed: + bat_priv->stats.tx_dropped++; +end: + return NETDEV_TX_OK; +} + +void interface_rx(struct net_device *soft_iface, + struct sk_buff *skb, struct batman_if *recv_if, + int hdr_size) +{ + struct bat_priv *bat_priv = netdev_priv(soft_iface); + struct unicast_packet *unicast_packet; + struct ethhdr *ethhdr; + struct vlan_ethhdr *vhdr; + short vid = -1; + int ret; + + /* check if enough space is available for pulling, and pull */ + if (!pskb_may_pull(skb, hdr_size)) + goto dropped; + + skb_pull_rcsum(skb, hdr_size); + skb_reset_mac_header(skb); + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + switch (ntohs(ethhdr->h_proto)) { + case ETH_P_8021Q: + vhdr = (struct vlan_ethhdr *)skb->data; + vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + + if (ntohs(vhdr->h_vlan_encapsulated_proto) != ETH_P_BATMAN) + break; + + /* fall through */ + case ETH_P_BATMAN: + goto dropped; + } + + /** + * if we have a another chosen mesh exit node in range + * it will transport the packets to the non-mesh network + */ + if ((bat_priv->softif_neigh) && (bat_priv->softif_neigh->vid == vid)) { + skb_push(skb, hdr_size); + unicast_packet = (struct unicast_packet *)skb->data; + + if ((unicast_packet->packet_type != BAT_UNICAST) && + (unicast_packet->packet_type != BAT_UNICAST_FRAG)) + goto dropped; + + skb_reset_mac_header(skb); + + memcpy(unicast_packet->dest, + bat_priv->softif_neigh->addr, ETH_ALEN); + ret = route_unicast_packet(skb, recv_if, hdr_size); + if (ret == NET_RX_DROP) + goto dropped; + + goto out; + } + + /* skb->dev & skb->pkt_type are set here */ + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + goto dropped; + skb->protocol = eth_type_trans(skb, soft_iface); + + /* should not be neccesary anymore as we use skb_pull_rcsum() + * TODO: please verify this and remove this TODO + * -- Dec 21st 2009, Simon Wunderlich */ + +/* skb->ip_summed = CHECKSUM_UNNECESSARY;*/ + + bat_priv->stats.rx_packets++; + bat_priv->stats.rx_bytes += skb->len + sizeof(struct ethhdr); + + soft_iface->last_rx = jiffies; + + netif_rx(skb); + return; + +dropped: + kfree_skb(skb); +out: + return; +} + +#ifdef HAVE_NET_DEVICE_OPS +static const struct net_device_ops bat_netdev_ops = { + .ndo_open = interface_open, + .ndo_stop = interface_release, + .ndo_get_stats = interface_stats, + .ndo_set_mac_address = interface_set_mac_addr, + .ndo_change_mtu = interface_change_mtu, + .ndo_start_xmit = interface_tx, + .ndo_validate_addr = eth_validate_addr +}; +#endif + +static void interface_setup(struct net_device *dev) +{ + struct bat_priv *priv = netdev_priv(dev); + char dev_addr[ETH_ALEN]; + + ether_setup(dev); + +#ifdef HAVE_NET_DEVICE_OPS + dev->netdev_ops = &bat_netdev_ops; +#else + dev->open = interface_open; + dev->stop = interface_release; + dev->get_stats = interface_stats; + dev->set_mac_address = interface_set_mac_addr; + dev->change_mtu = interface_change_mtu; + dev->hard_start_xmit = interface_tx; +#endif + dev->destructor = free_netdev; + + /** + * can't call min_mtu, because the needed variables + * have not been initialized yet + */ + dev->mtu = ETH_DATA_LEN; + dev->hard_header_len = BAT_HEADER_LEN; /* reserve more space in the + * skbuff for our header */ + + /* generate random address */ + random_ether_addr(dev_addr); + memcpy(dev->dev_addr, dev_addr, ETH_ALEN); + + SET_ETHTOOL_OPS(dev, &bat_ethtool_ops); + + memset(priv, 0, sizeof(struct bat_priv)); +} + +struct net_device *softif_create(char *name) +{ + struct net_device *soft_iface; + struct bat_priv *bat_priv; + int ret; + + soft_iface = alloc_netdev(sizeof(struct bat_priv) , name, + interface_setup); + + if (!soft_iface) { + pr_err("Unable to allocate the batman interface: %s\n", name); + goto out; + } + + ret = register_netdev(soft_iface); + if (ret < 0) { + pr_err("Unable to register the batman interface '%s': %i\n", + name, ret); + goto free_soft_iface; + } + + bat_priv = netdev_priv(soft_iface); + + atomic_set(&bat_priv->aggregated_ogms, 1); + atomic_set(&bat_priv->bonding, 0); + atomic_set(&bat_priv->vis_mode, VIS_TYPE_CLIENT_UPDATE); + atomic_set(&bat_priv->gw_mode, GW_MODE_OFF); + atomic_set(&bat_priv->gw_sel_class, 20); + atomic_set(&bat_priv->gw_bandwidth, 41); + atomic_set(&bat_priv->orig_interval, 1000); + atomic_set(&bat_priv->hop_penalty, 10); + atomic_set(&bat_priv->log_level, 0); + atomic_set(&bat_priv->fragmentation, 1); + atomic_set(&bat_priv->bcast_queue_left, BCAST_QUEUE_LEN); + atomic_set(&bat_priv->batman_queue_left, BATMAN_QUEUE_LEN); + + atomic_set(&bat_priv->mesh_state, MESH_INACTIVE); + atomic_set(&bat_priv->bcast_seqno, 1); + atomic_set(&bat_priv->hna_local_changed, 0); + + bat_priv->primary_if = NULL; + bat_priv->num_ifaces = 0; + bat_priv->softif_neigh = NULL; + + ret = sysfs_add_meshif(soft_iface); + if (ret < 0) + goto unreg_soft_iface; + + ret = debugfs_add_meshif(soft_iface); + if (ret < 0) + goto unreg_sysfs; + + ret = mesh_init(soft_iface); + if (ret < 0) + goto unreg_debugfs; + + return soft_iface; + +unreg_debugfs: + debugfs_del_meshif(soft_iface); +unreg_sysfs: + sysfs_del_meshif(soft_iface); +unreg_soft_iface: + unregister_netdev(soft_iface); + return NULL; + +free_soft_iface: + free_netdev(soft_iface); +out: + return NULL; +} + +void softif_destroy(struct net_device *soft_iface) +{ + debugfs_del_meshif(soft_iface); + sysfs_del_meshif(soft_iface); + mesh_free(soft_iface); + unregister_netdevice(soft_iface); +} + +/* ethtool */ +static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + cmd->supported = 0; + cmd->advertising = 0; + cmd->speed = SPEED_10; + cmd->duplex = DUPLEX_FULL; + cmd->port = PORT_TP; + cmd->phy_address = 0; + cmd->transceiver = XCVR_INTERNAL; + cmd->autoneg = AUTONEG_DISABLE; + cmd->maxtxpkt = 0; + cmd->maxrxpkt = 0; + + return 0; +} + +static void bat_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strcpy(info->driver, "B.A.T.M.A.N. advanced"); + strcpy(info->version, SOURCE_VERSION); + strcpy(info->fw_version, "N/A"); + strcpy(info->bus_info, "batman"); +} + +static u32 bat_get_msglevel(struct net_device *dev) +{ + return -EOPNOTSUPP; +} + +static void bat_set_msglevel(struct net_device *dev, u32 value) +{ +} + +static u32 bat_get_link(struct net_device *dev) +{ + return 1; +} + +static u32 bat_get_rx_csum(struct net_device *dev) +{ + return 0; +} + +static int bat_set_rx_csum(struct net_device *dev, u32 data) +{ + return -EOPNOTSUPP; +} diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h new file mode 100644 index 000000000000..02b77334d10d --- /dev/null +++ b/net/batman-adv/soft-interface.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_SOFT_INTERFACE_H_ +#define _NET_BATMAN_ADV_SOFT_INTERFACE_H_ + +int my_skb_head_push(struct sk_buff *skb, unsigned int len); +int softif_neigh_seq_print_text(struct seq_file *seq, void *offset); +void softif_neigh_purge(struct bat_priv *bat_priv); +int interface_tx(struct sk_buff *skb, struct net_device *soft_iface); +void interface_rx(struct net_device *soft_iface, + struct sk_buff *skb, struct batman_if *recv_if, + int hdr_size); +struct net_device *softif_create(char *name); +void softif_destroy(struct net_device *soft_iface); + +#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c new file mode 100644 index 000000000000..a633b5a435e2 --- /dev/null +++ b/net/batman-adv/translation-table.c @@ -0,0 +1,534 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "translation-table.h" +#include "soft-interface.h" +#include "types.h" +#include "hash.h" +#include "originator.h" + +static void hna_local_purge(struct work_struct *work); +static void _hna_global_del_orig(struct bat_priv *bat_priv, + struct hna_global_entry *hna_global_entry, + char *message); + +static void hna_local_start_timer(struct bat_priv *bat_priv) +{ + INIT_DELAYED_WORK(&bat_priv->hna_work, hna_local_purge); + queue_delayed_work(bat_event_workqueue, &bat_priv->hna_work, 10 * HZ); +} + +int hna_local_init(struct bat_priv *bat_priv) +{ + if (bat_priv->hna_local_hash) + return 1; + + bat_priv->hna_local_hash = hash_new(1024); + + if (!bat_priv->hna_local_hash) + return 0; + + atomic_set(&bat_priv->hna_local_changed, 0); + hna_local_start_timer(bat_priv); + + return 1; +} + +void hna_local_add(struct net_device *soft_iface, uint8_t *addr) +{ + struct bat_priv *bat_priv = netdev_priv(soft_iface); + struct hna_local_entry *hna_local_entry; + struct hna_global_entry *hna_global_entry; + int required_bytes; + + spin_lock_bh(&bat_priv->hna_lhash_lock); + hna_local_entry = + ((struct hna_local_entry *)hash_find(bat_priv->hna_local_hash, + compare_orig, choose_orig, + addr)); + spin_unlock_bh(&bat_priv->hna_lhash_lock); + + if (hna_local_entry) { + hna_local_entry->last_seen = jiffies; + return; + } + + /* only announce as many hosts as possible in the batman-packet and + space in batman_packet->num_hna That also should give a limit to + MAC-flooding. */ + required_bytes = (bat_priv->num_local_hna + 1) * ETH_ALEN; + required_bytes += BAT_PACKET_LEN; + + if ((required_bytes > ETH_DATA_LEN) || + (atomic_read(&bat_priv->aggregated_ogms) && + required_bytes > MAX_AGGREGATION_BYTES) || + (bat_priv->num_local_hna + 1 > 255)) { + bat_dbg(DBG_ROUTES, bat_priv, + "Can't add new local hna entry (%pM): " + "number of local hna entries exceeds packet size\n", + addr); + return; + } + + bat_dbg(DBG_ROUTES, bat_priv, + "Creating new local hna entry: %pM\n", addr); + + hna_local_entry = kmalloc(sizeof(struct hna_local_entry), GFP_ATOMIC); + if (!hna_local_entry) + return; + + memcpy(hna_local_entry->addr, addr, ETH_ALEN); + hna_local_entry->last_seen = jiffies; + + /* the batman interface mac address should never be purged */ + if (compare_orig(addr, soft_iface->dev_addr)) + hna_local_entry->never_purge = 1; + else + hna_local_entry->never_purge = 0; + + spin_lock_bh(&bat_priv->hna_lhash_lock); + + hash_add(bat_priv->hna_local_hash, compare_orig, choose_orig, + hna_local_entry); + bat_priv->num_local_hna++; + atomic_set(&bat_priv->hna_local_changed, 1); + + spin_unlock_bh(&bat_priv->hna_lhash_lock); + + /* remove address from global hash if present */ + spin_lock_bh(&bat_priv->hna_ghash_lock); + + hna_global_entry = ((struct hna_global_entry *) + hash_find(bat_priv->hna_global_hash, + compare_orig, choose_orig, addr)); + + if (hna_global_entry) + _hna_global_del_orig(bat_priv, hna_global_entry, + "local hna received"); + + spin_unlock_bh(&bat_priv->hna_ghash_lock); +} + +int hna_local_fill_buffer(struct bat_priv *bat_priv, + unsigned char *buff, int buff_len) +{ + struct hashtable_t *hash = bat_priv->hna_local_hash; + struct hna_local_entry *hna_local_entry; + struct element_t *bucket; + int i; + struct hlist_node *walk; + struct hlist_head *head; + int count = 0; + + spin_lock_bh(&bat_priv->hna_lhash_lock); + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + + if (buff_len < (count + 1) * ETH_ALEN) + break; + + hna_local_entry = bucket->data; + memcpy(buff + (count * ETH_ALEN), hna_local_entry->addr, + ETH_ALEN); + + count++; + } + } + + /* if we did not get all new local hnas see you next time ;-) */ + if (count == bat_priv->num_local_hna) + atomic_set(&bat_priv->hna_local_changed, 0); + + spin_unlock_bh(&bat_priv->hna_lhash_lock); + return count; +} + +int hna_local_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct bat_priv *bat_priv = netdev_priv(net_dev); + struct hashtable_t *hash = bat_priv->hna_local_hash; + struct hna_local_entry *hna_local_entry; + int i; + struct hlist_node *walk; + struct hlist_head *head; + struct element_t *bucket; + size_t buf_size, pos; + char *buff; + + if (!bat_priv->primary_if) { + return seq_printf(seq, "BATMAN mesh %s disabled - " + "please specify interfaces to enable it\n", + net_dev->name); + } + + seq_printf(seq, "Locally retrieved addresses (from %s) " + "announced via HNA:\n", + net_dev->name); + + spin_lock_bh(&bat_priv->hna_lhash_lock); + + buf_size = 1; + /* Estimate length for: " * xx:xx:xx:xx:xx:xx\n" */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each(walk, head) + buf_size += 21; + } + + buff = kmalloc(buf_size, GFP_ATOMIC); + if (!buff) { + spin_unlock_bh(&bat_priv->hna_lhash_lock); + return -ENOMEM; + } + buff[0] = '\0'; + pos = 0; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + hna_local_entry = bucket->data; + + pos += snprintf(buff + pos, 22, " * %pM\n", + hna_local_entry->addr); + } + } + + spin_unlock_bh(&bat_priv->hna_lhash_lock); + + seq_printf(seq, "%s", buff); + kfree(buff); + return 0; +} + +static void _hna_local_del(void *data, void *arg) +{ + struct bat_priv *bat_priv = (struct bat_priv *)arg; + + kfree(data); + bat_priv->num_local_hna--; + atomic_set(&bat_priv->hna_local_changed, 1); +} + +static void hna_local_del(struct bat_priv *bat_priv, + struct hna_local_entry *hna_local_entry, + char *message) +{ + bat_dbg(DBG_ROUTES, bat_priv, "Deleting local hna entry (%pM): %s\n", + hna_local_entry->addr, message); + + hash_remove(bat_priv->hna_local_hash, compare_orig, choose_orig, + hna_local_entry->addr); + _hna_local_del(hna_local_entry, bat_priv); +} + +void hna_local_remove(struct bat_priv *bat_priv, + uint8_t *addr, char *message) +{ + struct hna_local_entry *hna_local_entry; + + spin_lock_bh(&bat_priv->hna_lhash_lock); + + hna_local_entry = (struct hna_local_entry *) + hash_find(bat_priv->hna_local_hash, compare_orig, choose_orig, + addr); + + if (hna_local_entry) + hna_local_del(bat_priv, hna_local_entry, message); + + spin_unlock_bh(&bat_priv->hna_lhash_lock); +} + +static void hna_local_purge(struct work_struct *work) +{ + struct delayed_work *delayed_work = + container_of(work, struct delayed_work, work); + struct bat_priv *bat_priv = + container_of(delayed_work, struct bat_priv, hna_work); + struct hashtable_t *hash = bat_priv->hna_local_hash; + struct hna_local_entry *hna_local_entry; + int i; + struct hlist_node *walk, *safe; + struct hlist_head *head; + struct element_t *bucket; + unsigned long timeout; + + spin_lock_bh(&bat_priv->hna_lhash_lock); + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { + hna_local_entry = bucket->data; + + timeout = hna_local_entry->last_seen; + timeout += LOCAL_HNA_TIMEOUT * HZ; + + if ((!hna_local_entry->never_purge) && + time_after(jiffies, timeout)) + hna_local_del(bat_priv, hna_local_entry, + "address timed out"); + } + } + + spin_unlock_bh(&bat_priv->hna_lhash_lock); + hna_local_start_timer(bat_priv); +} + +void hna_local_free(struct bat_priv *bat_priv) +{ + if (!bat_priv->hna_local_hash) + return; + + cancel_delayed_work_sync(&bat_priv->hna_work); + hash_delete(bat_priv->hna_local_hash, _hna_local_del, bat_priv); + bat_priv->hna_local_hash = NULL; +} + +int hna_global_init(struct bat_priv *bat_priv) +{ + if (bat_priv->hna_global_hash) + return 1; + + bat_priv->hna_global_hash = hash_new(1024); + + if (!bat_priv->hna_global_hash) + return 0; + + return 1; +} + +void hna_global_add_orig(struct bat_priv *bat_priv, + struct orig_node *orig_node, + unsigned char *hna_buff, int hna_buff_len) +{ + struct hna_global_entry *hna_global_entry; + struct hna_local_entry *hna_local_entry; + int hna_buff_count = 0; + unsigned char *hna_ptr; + + while ((hna_buff_count + 1) * ETH_ALEN <= hna_buff_len) { + spin_lock_bh(&bat_priv->hna_ghash_lock); + + hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN); + hna_global_entry = (struct hna_global_entry *) + hash_find(bat_priv->hna_global_hash, compare_orig, + choose_orig, hna_ptr); + + if (!hna_global_entry) { + spin_unlock_bh(&bat_priv->hna_ghash_lock); + + hna_global_entry = + kmalloc(sizeof(struct hna_global_entry), + GFP_ATOMIC); + + if (!hna_global_entry) + break; + + memcpy(hna_global_entry->addr, hna_ptr, ETH_ALEN); + + bat_dbg(DBG_ROUTES, bat_priv, + "Creating new global hna entry: " + "%pM (via %pM)\n", + hna_global_entry->addr, orig_node->orig); + + spin_lock_bh(&bat_priv->hna_ghash_lock); + hash_add(bat_priv->hna_global_hash, compare_orig, + choose_orig, hna_global_entry); + + } + + hna_global_entry->orig_node = orig_node; + spin_unlock_bh(&bat_priv->hna_ghash_lock); + + /* remove address from local hash if present */ + spin_lock_bh(&bat_priv->hna_lhash_lock); + + hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN); + hna_local_entry = (struct hna_local_entry *) + hash_find(bat_priv->hna_local_hash, compare_orig, + choose_orig, hna_ptr); + + if (hna_local_entry) + hna_local_del(bat_priv, hna_local_entry, + "global hna received"); + + spin_unlock_bh(&bat_priv->hna_lhash_lock); + + hna_buff_count++; + } + + /* initialize, and overwrite if malloc succeeds */ + orig_node->hna_buff = NULL; + orig_node->hna_buff_len = 0; + + if (hna_buff_len > 0) { + orig_node->hna_buff = kmalloc(hna_buff_len, GFP_ATOMIC); + if (orig_node->hna_buff) { + memcpy(orig_node->hna_buff, hna_buff, hna_buff_len); + orig_node->hna_buff_len = hna_buff_len; + } + } +} + +int hna_global_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct bat_priv *bat_priv = netdev_priv(net_dev); + struct hashtable_t *hash = bat_priv->hna_global_hash; + struct hna_global_entry *hna_global_entry; + int i; + struct hlist_node *walk; + struct hlist_head *head; + struct element_t *bucket; + size_t buf_size, pos; + char *buff; + + if (!bat_priv->primary_if) { + return seq_printf(seq, "BATMAN mesh %s disabled - " + "please specify interfaces to enable it\n", + net_dev->name); + } + + seq_printf(seq, "Globally announced HNAs received via the mesh %s\n", + net_dev->name); + + spin_lock_bh(&bat_priv->hna_ghash_lock); + + buf_size = 1; + /* Estimate length for: " * xx:xx:xx:xx:xx:xx via xx:xx:xx:xx:xx:xx\n"*/ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each(walk, head) + buf_size += 43; + } + + buff = kmalloc(buf_size, GFP_ATOMIC); + if (!buff) { + spin_unlock_bh(&bat_priv->hna_ghash_lock); + return -ENOMEM; + } + buff[0] = '\0'; + pos = 0; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + hna_global_entry = bucket->data; + + pos += snprintf(buff + pos, 44, + " * %pM via %pM\n", + hna_global_entry->addr, + hna_global_entry->orig_node->orig); + } + } + + spin_unlock_bh(&bat_priv->hna_ghash_lock); + + seq_printf(seq, "%s", buff); + kfree(buff); + return 0; +} + +static void _hna_global_del_orig(struct bat_priv *bat_priv, + struct hna_global_entry *hna_global_entry, + char *message) +{ + bat_dbg(DBG_ROUTES, bat_priv, + "Deleting global hna entry %pM (via %pM): %s\n", + hna_global_entry->addr, hna_global_entry->orig_node->orig, + message); + + hash_remove(bat_priv->hna_global_hash, compare_orig, choose_orig, + hna_global_entry->addr); + kfree(hna_global_entry); +} + +void hna_global_del_orig(struct bat_priv *bat_priv, + struct orig_node *orig_node, char *message) +{ + struct hna_global_entry *hna_global_entry; + int hna_buff_count = 0; + unsigned char *hna_ptr; + + if (orig_node->hna_buff_len == 0) + return; + + spin_lock_bh(&bat_priv->hna_ghash_lock); + + while ((hna_buff_count + 1) * ETH_ALEN <= orig_node->hna_buff_len) { + hna_ptr = orig_node->hna_buff + (hna_buff_count * ETH_ALEN); + hna_global_entry = (struct hna_global_entry *) + hash_find(bat_priv->hna_global_hash, compare_orig, + choose_orig, hna_ptr); + + if ((hna_global_entry) && + (hna_global_entry->orig_node == orig_node)) + _hna_global_del_orig(bat_priv, hna_global_entry, + message); + + hna_buff_count++; + } + + spin_unlock_bh(&bat_priv->hna_ghash_lock); + + orig_node->hna_buff_len = 0; + kfree(orig_node->hna_buff); + orig_node->hna_buff = NULL; +} + +static void hna_global_del(void *data, void *arg) +{ + kfree(data); +} + +void hna_global_free(struct bat_priv *bat_priv) +{ + if (!bat_priv->hna_global_hash) + return; + + hash_delete(bat_priv->hna_global_hash, hna_global_del, NULL); + bat_priv->hna_global_hash = NULL; +} + +struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr) +{ + struct hna_global_entry *hna_global_entry; + + spin_lock_bh(&bat_priv->hna_ghash_lock); + hna_global_entry = (struct hna_global_entry *) + hash_find(bat_priv->hna_global_hash, + compare_orig, choose_orig, addr); + spin_unlock_bh(&bat_priv->hna_ghash_lock); + + if (!hna_global_entry) + return NULL; + + return hna_global_entry->orig_node; +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h new file mode 100644 index 000000000000..10c4c5c319b6 --- /dev/null +++ b/net/batman-adv/translation-table.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ +#define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ + +#include "types.h" + +int hna_local_init(struct bat_priv *bat_priv); +void hna_local_add(struct net_device *soft_iface, uint8_t *addr); +void hna_local_remove(struct bat_priv *bat_priv, + uint8_t *addr, char *message); +int hna_local_fill_buffer(struct bat_priv *bat_priv, + unsigned char *buff, int buff_len); +int hna_local_seq_print_text(struct seq_file *seq, void *offset); +void hna_local_free(struct bat_priv *bat_priv); +int hna_global_init(struct bat_priv *bat_priv); +void hna_global_add_orig(struct bat_priv *bat_priv, + struct orig_node *orig_node, + unsigned char *hna_buff, int hna_buff_len); +int hna_global_seq_print_text(struct seq_file *seq, void *offset); +void hna_global_del_orig(struct bat_priv *bat_priv, + struct orig_node *orig_node, char *message); +void hna_global_free(struct bat_priv *bat_priv); +struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr); + +#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h new file mode 100644 index 000000000000..97cb23dd3e69 --- /dev/null +++ b/net/batman-adv/types.h @@ -0,0 +1,271 @@ +/* + * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + + + +#ifndef _NET_BATMAN_ADV_TYPES_H_ +#define _NET_BATMAN_ADV_TYPES_H_ + +#include "packet.h" +#include "bitarray.h" + +#define BAT_HEADER_LEN (sizeof(struct ethhdr) + \ + ((sizeof(struct unicast_packet) > sizeof(struct bcast_packet) ? \ + sizeof(struct unicast_packet) : \ + sizeof(struct bcast_packet)))) + + +struct batman_if { + struct list_head list; + int16_t if_num; + char if_status; + struct net_device *net_dev; + atomic_t seqno; + atomic_t frag_seqno; + unsigned char *packet_buff; + int packet_len; + struct kobject *hardif_obj; + struct kref refcount; + struct packet_type batman_adv_ptype; + struct net_device *soft_iface; + struct rcu_head rcu; +}; + +/** + * orig_node - structure for orig_list maintaining nodes of mesh + * @primary_addr: hosts primary interface address + * @last_valid: when last packet from this node was received + * @bcast_seqno_reset: time when the broadcast seqno window was reset + * @batman_seqno_reset: time when the batman seqno window was reset + * @gw_flags: flags related to gateway class + * @flags: for now only VIS_SERVER flag + * @last_real_seqno: last and best known squence number + * @last_ttl: ttl of last received packet + * @last_bcast_seqno: last broadcast sequence number received by this host + * + * @candidates: how many candidates are available + * @selected: next bonding candidate + */ +struct orig_node { + uint8_t orig[ETH_ALEN]; + uint8_t primary_addr[ETH_ALEN]; + struct neigh_node *router; + unsigned long *bcast_own; + uint8_t *bcast_own_sum; + uint8_t tq_own; + int tq_asym_penalty; + unsigned long last_valid; + unsigned long bcast_seqno_reset; + unsigned long batman_seqno_reset; + uint8_t gw_flags; + uint8_t flags; + unsigned char *hna_buff; + int16_t hna_buff_len; + uint32_t last_real_seqno; + uint8_t last_ttl; + unsigned long bcast_bits[NUM_WORDS]; + uint32_t last_bcast_seqno; + struct list_head neigh_list; + struct list_head frag_list; + unsigned long last_frag_packet; + struct { + uint8_t candidates; + struct neigh_node *selected; + } bond; +}; + +struct gw_node { + struct hlist_node list; + struct orig_node *orig_node; + unsigned long deleted; + struct kref refcount; + struct rcu_head rcu; +}; + +/** + * neigh_node + * @last_valid: when last packet via this neighbor was received + */ +struct neigh_node { + struct list_head list; + uint8_t addr[ETH_ALEN]; + uint8_t real_packet_count; + uint8_t tq_recv[TQ_GLOBAL_WINDOW_SIZE]; + uint8_t tq_index; + uint8_t tq_avg; + uint8_t last_ttl; + struct neigh_node *next_bond_candidate; + unsigned long last_valid; + unsigned long real_bits[NUM_WORDS]; + struct orig_node *orig_node; + struct batman_if *if_incoming; +}; + + +struct bat_priv { + atomic_t mesh_state; + struct net_device_stats stats; + atomic_t aggregated_ogms; /* boolean */ + atomic_t bonding; /* boolean */ + atomic_t fragmentation; /* boolean */ + atomic_t vis_mode; /* VIS_TYPE_* */ + atomic_t gw_mode; /* GW_MODE_* */ + atomic_t gw_sel_class; /* uint */ + atomic_t gw_bandwidth; /* gw bandwidth */ + atomic_t orig_interval; /* uint */ + atomic_t hop_penalty; /* uint */ + atomic_t log_level; /* uint */ + atomic_t bcast_seqno; + atomic_t bcast_queue_left; + atomic_t batman_queue_left; + char num_ifaces; + struct hlist_head softif_neigh_list; + struct softif_neigh *softif_neigh; + struct debug_log *debug_log; + struct batman_if *primary_if; + struct kobject *mesh_obj; + struct dentry *debug_dir; + struct hlist_head forw_bat_list; + struct hlist_head forw_bcast_list; + struct hlist_head gw_list; + struct list_head vis_send_list; + struct hashtable_t *orig_hash; + struct hashtable_t *hna_local_hash; + struct hashtable_t *hna_global_hash; + struct hashtable_t *vis_hash; + spinlock_t orig_hash_lock; /* protects orig_hash */ + spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ + spinlock_t forw_bcast_list_lock; /* protects */ + spinlock_t hna_lhash_lock; /* protects hna_local_hash */ + spinlock_t hna_ghash_lock; /* protects hna_global_hash */ + spinlock_t gw_list_lock; /* protects gw_list */ + spinlock_t vis_hash_lock; /* protects vis_hash */ + spinlock_t vis_list_lock; /* protects vis_info::recv_list */ + spinlock_t softif_neigh_lock; /* protects soft-interface neigh list */ + int16_t num_local_hna; + atomic_t hna_local_changed; + struct delayed_work hna_work; + struct delayed_work orig_work; + struct delayed_work vis_work; + struct gw_node *curr_gw; + struct vis_info *my_vis_info; +}; + +struct socket_client { + struct list_head queue_list; + unsigned int queue_len; + unsigned char index; + spinlock_t lock; /* protects queue_list, queue_len, index */ + wait_queue_head_t queue_wait; + struct bat_priv *bat_priv; +}; + +struct socket_packet { + struct list_head list; + size_t icmp_len; + struct icmp_packet_rr icmp_packet; +}; + +struct hna_local_entry { + uint8_t addr[ETH_ALEN]; + unsigned long last_seen; + char never_purge; +}; + +struct hna_global_entry { + uint8_t addr[ETH_ALEN]; + struct orig_node *orig_node; +}; + +/** + * forw_packet - structure for forw_list maintaining packets to be + * send/forwarded + */ +struct forw_packet { + struct hlist_node list; + unsigned long send_time; + uint8_t own; + struct sk_buff *skb; + uint16_t packet_len; + uint32_t direct_link_flags; + uint8_t num_packets; + struct delayed_work delayed_work; + struct batman_if *if_incoming; +}; + +/* While scanning for vis-entries of a particular vis-originator + * this list collects its interfaces to create a subgraph/cluster + * out of them later + */ +struct if_list_entry { + uint8_t addr[ETH_ALEN]; + bool primary; + struct hlist_node list; +}; + +struct debug_log { + char log_buff[LOG_BUF_LEN]; + unsigned long log_start; + unsigned long log_end; + spinlock_t lock; /* protects log_buff, log_start and log_end */ + wait_queue_head_t queue_wait; +}; + +struct frag_packet_list_entry { + struct list_head list; + uint16_t seqno; + struct sk_buff *skb; +}; + +struct vis_info { + unsigned long first_seen; + struct list_head recv_list; + /* list of server-neighbors we received a vis-packet + * from. we should not reply to them. */ + struct list_head send_list; + struct kref refcount; + struct bat_priv *bat_priv; + /* this packet might be part of the vis send queue. */ + struct sk_buff *skb_packet; + /* vis_info may follow here*/ +} __attribute__((packed)); + +struct vis_info_entry { + uint8_t src[ETH_ALEN]; + uint8_t dest[ETH_ALEN]; + uint8_t quality; /* quality = 0 means HNA */ +} __attribute__((packed)); + +struct recvlist_node { + struct list_head list; + uint8_t mac[ETH_ALEN]; +}; + +struct softif_neigh { + struct hlist_node list; + uint8_t addr[ETH_ALEN]; + unsigned long last_seen; + short vid; + struct kref refcount; + struct rcu_head rcu; +}; + +#endif /* _NET_BATMAN_ADV_TYPES_H_ */ diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c new file mode 100644 index 000000000000..dc2e28bed844 --- /dev/null +++ b/net/batman-adv/unicast.c @@ -0,0 +1,343 @@ +/* + * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * + * Andreas Langer + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "unicast.h" +#include "send.h" +#include "soft-interface.h" +#include "gateway_client.h" +#include "originator.h" +#include "hash.h" +#include "translation-table.h" +#include "routing.h" +#include "hard-interface.h" + + +static struct sk_buff *frag_merge_packet(struct list_head *head, + struct frag_packet_list_entry *tfp, + struct sk_buff *skb) +{ + struct unicast_frag_packet *up = + (struct unicast_frag_packet *)skb->data; + struct sk_buff *tmp_skb; + struct unicast_packet *unicast_packet; + int hdr_len = sizeof(struct unicast_packet), + uni_diff = sizeof(struct unicast_frag_packet) - hdr_len; + + /* set skb to the first part and tmp_skb to the second part */ + if (up->flags & UNI_FRAG_HEAD) { + tmp_skb = tfp->skb; + } else { + tmp_skb = skb; + skb = tfp->skb; + } + + skb_pull(tmp_skb, sizeof(struct unicast_frag_packet)); + if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0) { + /* free buffered skb, skb will be freed later */ + kfree_skb(tfp->skb); + return NULL; + } + + /* move free entry to end */ + tfp->skb = NULL; + tfp->seqno = 0; + list_move_tail(&tfp->list, head); + + memcpy(skb_put(skb, tmp_skb->len), tmp_skb->data, tmp_skb->len); + kfree_skb(tmp_skb); + + memmove(skb->data + uni_diff, skb->data, hdr_len); + unicast_packet = (struct unicast_packet *) skb_pull(skb, uni_diff); + unicast_packet->packet_type = BAT_UNICAST; + + return skb; +} + +static void frag_create_entry(struct list_head *head, struct sk_buff *skb) +{ + struct frag_packet_list_entry *tfp; + struct unicast_frag_packet *up = + (struct unicast_frag_packet *)skb->data; + + /* free and oldest packets stand at the end */ + tfp = list_entry((head)->prev, typeof(*tfp), list); + kfree_skb(tfp->skb); + + tfp->seqno = ntohs(up->seqno); + tfp->skb = skb; + list_move(&tfp->list, head); + return; +} + +static int frag_create_buffer(struct list_head *head) +{ + int i; + struct frag_packet_list_entry *tfp; + + for (i = 0; i < FRAG_BUFFER_SIZE; i++) { + tfp = kmalloc(sizeof(struct frag_packet_list_entry), + GFP_ATOMIC); + if (!tfp) { + frag_list_free(head); + return -ENOMEM; + } + tfp->skb = NULL; + tfp->seqno = 0; + INIT_LIST_HEAD(&tfp->list); + list_add(&tfp->list, head); + } + + return 0; +} + +static struct frag_packet_list_entry *frag_search_packet(struct list_head *head, + struct unicast_frag_packet *up) +{ + struct frag_packet_list_entry *tfp; + struct unicast_frag_packet *tmp_up = NULL; + uint16_t search_seqno; + + if (up->flags & UNI_FRAG_HEAD) + search_seqno = ntohs(up->seqno)+1; + else + search_seqno = ntohs(up->seqno)-1; + + list_for_each_entry(tfp, head, list) { + + if (!tfp->skb) + continue; + + if (tfp->seqno == ntohs(up->seqno)) + goto mov_tail; + + tmp_up = (struct unicast_frag_packet *)tfp->skb->data; + + if (tfp->seqno == search_seqno) { + + if ((tmp_up->flags & UNI_FRAG_HEAD) != + (up->flags & UNI_FRAG_HEAD)) + return tfp; + else + goto mov_tail; + } + } + return NULL; + +mov_tail: + list_move_tail(&tfp->list, head); + return NULL; +} + +void frag_list_free(struct list_head *head) +{ + struct frag_packet_list_entry *pf, *tmp_pf; + + if (!list_empty(head)) { + + list_for_each_entry_safe(pf, tmp_pf, head, list) { + kfree_skb(pf->skb); + list_del(&pf->list); + kfree(pf); + } + } + return; +} + +/* frag_reassemble_skb(): + * returns NET_RX_DROP if the operation failed - skb is left intact + * returns NET_RX_SUCCESS if the fragment was buffered (skb_new will be NULL) + * or the skb could be reassembled (skb_new will point to the new packet and + * skb was freed) + */ +int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, + struct sk_buff **new_skb) +{ + struct orig_node *orig_node; + struct frag_packet_list_entry *tmp_frag_entry; + int ret = NET_RX_DROP; + struct unicast_frag_packet *unicast_packet = + (struct unicast_frag_packet *)skb->data; + + *new_skb = NULL; + spin_lock_bh(&bat_priv->orig_hash_lock); + orig_node = ((struct orig_node *) + hash_find(bat_priv->orig_hash, compare_orig, choose_orig, + unicast_packet->orig)); + + if (!orig_node) { + pr_debug("couldn't find originator in orig_hash\n"); + goto out; + } + + orig_node->last_frag_packet = jiffies; + + if (list_empty(&orig_node->frag_list) && + frag_create_buffer(&orig_node->frag_list)) { + pr_debug("couldn't create frag buffer\n"); + goto out; + } + + tmp_frag_entry = frag_search_packet(&orig_node->frag_list, + unicast_packet); + + if (!tmp_frag_entry) { + frag_create_entry(&orig_node->frag_list, skb); + ret = NET_RX_SUCCESS; + goto out; + } + + *new_skb = frag_merge_packet(&orig_node->frag_list, tmp_frag_entry, + skb); + /* if not, merge failed */ + if (*new_skb) + ret = NET_RX_SUCCESS; +out: + spin_unlock_bh(&bat_priv->orig_hash_lock); + + return ret; +} + +int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, + struct batman_if *batman_if, uint8_t dstaddr[]) +{ + struct unicast_packet tmp_uc, *unicast_packet; + struct sk_buff *frag_skb; + struct unicast_frag_packet *frag1, *frag2; + int uc_hdr_len = sizeof(struct unicast_packet); + int ucf_hdr_len = sizeof(struct unicast_frag_packet); + int data_len = skb->len; + + if (!bat_priv->primary_if) + goto dropped; + + unicast_packet = (struct unicast_packet *) skb->data; + + memcpy(&tmp_uc, unicast_packet, uc_hdr_len); + frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len); + skb_split(skb, frag_skb, data_len / 2); + + if (my_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 || + my_skb_head_push(frag_skb, ucf_hdr_len) < 0) + goto drop_frag; + + frag1 = (struct unicast_frag_packet *)skb->data; + frag2 = (struct unicast_frag_packet *)frag_skb->data; + + memcpy(frag1, &tmp_uc, sizeof(struct unicast_packet)); + + frag1->ttl--; + frag1->version = COMPAT_VERSION; + frag1->packet_type = BAT_UNICAST_FRAG; + + memcpy(frag1->orig, bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); + memcpy(frag2, frag1, sizeof(struct unicast_frag_packet)); + + frag1->flags |= UNI_FRAG_HEAD; + frag2->flags &= ~UNI_FRAG_HEAD; + + frag1->seqno = htons((uint16_t)atomic_inc_return( + &batman_if->frag_seqno)); + frag2->seqno = htons((uint16_t)atomic_inc_return( + &batman_if->frag_seqno)); + + send_skb_packet(skb, batman_if, dstaddr); + send_skb_packet(frag_skb, batman_if, dstaddr); + return NET_RX_SUCCESS; + +drop_frag: + kfree_skb(frag_skb); +dropped: + kfree_skb(skb); + return NET_RX_DROP; +} + +int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) +{ + struct ethhdr *ethhdr = (struct ethhdr *)skb->data; + struct unicast_packet *unicast_packet; + struct orig_node *orig_node; + struct batman_if *batman_if; + struct neigh_node *router; + int data_len = skb->len; + uint8_t dstaddr[6]; + + spin_lock_bh(&bat_priv->orig_hash_lock); + + /* get routing information */ + if (is_multicast_ether_addr(ethhdr->h_dest)) + orig_node = (struct orig_node *)gw_get_selected(bat_priv); + else + orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, + compare_orig, + choose_orig, + ethhdr->h_dest)); + + /* check for hna host */ + if (!orig_node) + orig_node = transtable_search(bat_priv, ethhdr->h_dest); + + router = find_router(bat_priv, orig_node, NULL); + + if (!router) + goto unlock; + + /* don't lock while sending the packets ... we therefore + * copy the required data before sending */ + + batman_if = router->if_incoming; + memcpy(dstaddr, router->addr, ETH_ALEN); + + spin_unlock_bh(&bat_priv->orig_hash_lock); + + if (batman_if->if_status != IF_ACTIVE) + goto dropped; + + if (my_skb_head_push(skb, sizeof(struct unicast_packet)) < 0) + goto dropped; + + unicast_packet = (struct unicast_packet *)skb->data; + + unicast_packet->version = COMPAT_VERSION; + /* batman packet type: unicast */ + unicast_packet->packet_type = BAT_UNICAST; + /* set unicast ttl */ + unicast_packet->ttl = TTL; + /* copy the destination for faster routing */ + memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); + + if (atomic_read(&bat_priv->fragmentation) && + data_len + sizeof(struct unicast_packet) > + batman_if->net_dev->mtu) { + /* send frag skb decreases ttl */ + unicast_packet->ttl++; + return frag_send_skb(skb, bat_priv, batman_if, + dstaddr); + } + send_skb_packet(skb, batman_if, dstaddr); + return 0; + +unlock: + spin_unlock_bh(&bat_priv->orig_hash_lock); +dropped: + kfree_skb(skb); + return 1; +} diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h new file mode 100644 index 000000000000..e32b7867a9a4 --- /dev/null +++ b/net/batman-adv/unicast.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * + * Andreas Langer + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_UNICAST_H_ +#define _NET_BATMAN_ADV_UNICAST_H_ + +#define FRAG_TIMEOUT 10000 /* purge frag list entrys after time in ms */ +#define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */ + +int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, + struct sk_buff **new_skb); +void frag_list_free(struct list_head *head); +int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv); +int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, + struct batman_if *batman_if, uint8_t dstaddr[]); + +#endif /* _NET_BATMAN_ADV_UNICAST_H_ */ diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c new file mode 100644 index 000000000000..cd4c4231fa48 --- /dev/null +++ b/net/batman-adv/vis.c @@ -0,0 +1,949 @@ +/* + * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors: + * + * Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "send.h" +#include "translation-table.h" +#include "vis.h" +#include "soft-interface.h" +#include "hard-interface.h" +#include "hash.h" +#include "originator.h" + +#define MAX_VIS_PACKET_SIZE 1000 + +/* Returns the smallest signed integer in two's complement with the sizeof x */ +#define smallest_signed_int(x) (1u << (7u + 8u * (sizeof(x) - 1u))) + +/* Checks if a sequence number x is a predecessor/successor of y. + * they handle overflows/underflows and can correctly check for a + * predecessor/successor unless the variable sequence number has grown by + * more then 2**(bitwidth(x)-1)-1. + * This means that for a uint8_t with the maximum value 255, it would think: + * - when adding nothing - it is neither a predecessor nor a successor + * - before adding more than 127 to the starting value - it is a predecessor, + * - when adding 128 - it is neither a predecessor nor a successor, + * - after adding more than 127 to the starting value - it is a successor */ +#define seq_before(x, y) ({typeof(x) _dummy = (x - y); \ + _dummy > smallest_signed_int(_dummy); }) +#define seq_after(x, y) seq_before(y, x) + +static void start_vis_timer(struct bat_priv *bat_priv); + +/* free the info */ +static void free_info(struct kref *ref) +{ + struct vis_info *info = container_of(ref, struct vis_info, refcount); + struct bat_priv *bat_priv = info->bat_priv; + struct recvlist_node *entry, *tmp; + + list_del_init(&info->send_list); + spin_lock_bh(&bat_priv->vis_list_lock); + list_for_each_entry_safe(entry, tmp, &info->recv_list, list) { + list_del(&entry->list); + kfree(entry); + } + + spin_unlock_bh(&bat_priv->vis_list_lock); + kfree_skb(info->skb_packet); +} + +/* Compare two vis packets, used by the hashing algorithm */ +static int vis_info_cmp(void *data1, void *data2) +{ + struct vis_info *d1, *d2; + struct vis_packet *p1, *p2; + d1 = data1; + d2 = data2; + p1 = (struct vis_packet *)d1->skb_packet->data; + p2 = (struct vis_packet *)d2->skb_packet->data; + return compare_orig(p1->vis_orig, p2->vis_orig); +} + +/* hash function to choose an entry in a hash table of given size */ +/* hash algorithm from http://en.wikipedia.org/wiki/Hash_table */ +static int vis_info_choose(void *data, int size) +{ + struct vis_info *vis_info = data; + struct vis_packet *packet; + unsigned char *key; + uint32_t hash = 0; + size_t i; + + packet = (struct vis_packet *)vis_info->skb_packet->data; + key = packet->vis_orig; + for (i = 0; i < ETH_ALEN; i++) { + hash += key[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + return hash % size; +} + +/* insert interface to the list of interfaces of one originator, if it + * does not already exist in the list */ +static void vis_data_insert_interface(const uint8_t *interface, + struct hlist_head *if_list, + bool primary) +{ + struct if_list_entry *entry; + struct hlist_node *pos; + + hlist_for_each_entry(entry, pos, if_list, list) { + if (compare_orig(entry->addr, (void *)interface)) + return; + } + + /* its a new address, add it to the list */ + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + return; + memcpy(entry->addr, interface, ETH_ALEN); + entry->primary = primary; + hlist_add_head(&entry->list, if_list); +} + +static ssize_t vis_data_read_prim_sec(char *buff, struct hlist_head *if_list) +{ + struct if_list_entry *entry; + struct hlist_node *pos; + size_t len = 0; + + hlist_for_each_entry(entry, pos, if_list, list) { + if (entry->primary) + len += sprintf(buff + len, "PRIMARY, "); + else + len += sprintf(buff + len, "SEC %pM, ", entry->addr); + } + + return len; +} + +static size_t vis_data_count_prim_sec(struct hlist_head *if_list) +{ + struct if_list_entry *entry; + struct hlist_node *pos; + size_t count = 0; + + hlist_for_each_entry(entry, pos, if_list, list) { + if (entry->primary) + count += 9; + else + count += 23; + } + + return count; +} + +/* read an entry */ +static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry, + uint8_t *src, bool primary) +{ + /* maximal length: max(4+17+2, 3+17+1+3+2) == 26 */ + if (primary && entry->quality == 0) + return sprintf(buff, "HNA %pM, ", entry->dest); + else if (compare_orig(entry->src, src)) + return sprintf(buff, "TQ %pM %d, ", entry->dest, + entry->quality); + + return 0; +} + +int vis_seq_print_text(struct seq_file *seq, void *offset) +{ + struct hlist_node *walk; + struct hlist_head *head; + struct element_t *bucket; + struct vis_info *info; + struct vis_packet *packet; + struct vis_info_entry *entries; + struct net_device *net_dev = (struct net_device *)seq->private; + struct bat_priv *bat_priv = netdev_priv(net_dev); + struct hashtable_t *hash = bat_priv->vis_hash; + HLIST_HEAD(vis_if_list); + struct if_list_entry *entry; + struct hlist_node *pos, *n; + int i, j; + int vis_server = atomic_read(&bat_priv->vis_mode); + size_t buff_pos, buf_size; + char *buff; + int compare; + + if ((!bat_priv->primary_if) || + (vis_server == VIS_TYPE_CLIENT_UPDATE)) + return 0; + + buf_size = 1; + /* Estimate length */ + spin_lock_bh(&bat_priv->vis_hash_lock); + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + info = bucket->data; + packet = (struct vis_packet *)info->skb_packet->data; + entries = (struct vis_info_entry *) + ((char *)packet + sizeof(struct vis_packet)); + + for (j = 0; j < packet->entries; j++) { + if (entries[j].quality == 0) + continue; + compare = + compare_orig(entries[j].src, packet->vis_orig); + vis_data_insert_interface(entries[j].src, + &vis_if_list, + compare); + } + + hlist_for_each_entry(entry, pos, &vis_if_list, list) { + buf_size += 18 + 26 * packet->entries; + + /* add primary/secondary records */ + if (compare_orig(entry->addr, packet->vis_orig)) + buf_size += + vis_data_count_prim_sec(&vis_if_list); + + buf_size += 1; + } + + hlist_for_each_entry_safe(entry, pos, n, &vis_if_list, + list) { + hlist_del(&entry->list); + kfree(entry); + } + } + } + + buff = kmalloc(buf_size, GFP_ATOMIC); + if (!buff) { + spin_unlock_bh(&bat_priv->vis_hash_lock); + return -ENOMEM; + } + buff[0] = '\0'; + buff_pos = 0; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + info = bucket->data; + packet = (struct vis_packet *)info->skb_packet->data; + entries = (struct vis_info_entry *) + ((char *)packet + sizeof(struct vis_packet)); + + for (j = 0; j < packet->entries; j++) { + if (entries[j].quality == 0) + continue; + compare = + compare_orig(entries[j].src, packet->vis_orig); + vis_data_insert_interface(entries[j].src, + &vis_if_list, + compare); + } + + hlist_for_each_entry(entry, pos, &vis_if_list, list) { + buff_pos += sprintf(buff + buff_pos, "%pM,", + entry->addr); + + for (i = 0; i < packet->entries; i++) + buff_pos += vis_data_read_entry( + buff + buff_pos, + &entries[i], + entry->addr, + entry->primary); + + /* add primary/secondary records */ + if (compare_orig(entry->addr, packet->vis_orig)) + buff_pos += + vis_data_read_prim_sec(buff + buff_pos, + &vis_if_list); + + buff_pos += sprintf(buff + buff_pos, "\n"); + } + + hlist_for_each_entry_safe(entry, pos, n, &vis_if_list, + list) { + hlist_del(&entry->list); + kfree(entry); + } + } + } + + spin_unlock_bh(&bat_priv->vis_hash_lock); + + seq_printf(seq, "%s", buff); + kfree(buff); + + return 0; +} + +/* add the info packet to the send list, if it was not + * already linked in. */ +static void send_list_add(struct bat_priv *bat_priv, struct vis_info *info) +{ + if (list_empty(&info->send_list)) { + kref_get(&info->refcount); + list_add_tail(&info->send_list, &bat_priv->vis_send_list); + } +} + +/* delete the info packet from the send list, if it was + * linked in. */ +static void send_list_del(struct vis_info *info) +{ + if (!list_empty(&info->send_list)) { + list_del_init(&info->send_list); + kref_put(&info->refcount, free_info); + } +} + +/* tries to add one entry to the receive list. */ +static void recv_list_add(struct bat_priv *bat_priv, + struct list_head *recv_list, char *mac) +{ + struct recvlist_node *entry; + + entry = kmalloc(sizeof(struct recvlist_node), GFP_ATOMIC); + if (!entry) + return; + + memcpy(entry->mac, mac, ETH_ALEN); + spin_lock_bh(&bat_priv->vis_list_lock); + list_add_tail(&entry->list, recv_list); + spin_unlock_bh(&bat_priv->vis_list_lock); +} + +/* returns 1 if this mac is in the recv_list */ +static int recv_list_is_in(struct bat_priv *bat_priv, + struct list_head *recv_list, char *mac) +{ + struct recvlist_node *entry; + + spin_lock_bh(&bat_priv->vis_list_lock); + list_for_each_entry(entry, recv_list, list) { + if (memcmp(entry->mac, mac, ETH_ALEN) == 0) { + spin_unlock_bh(&bat_priv->vis_list_lock); + return 1; + } + } + spin_unlock_bh(&bat_priv->vis_list_lock); + return 0; +} + +/* try to add the packet to the vis_hash. return NULL if invalid (e.g. too old, + * broken.. ). vis hash must be locked outside. is_new is set when the packet + * is newer than old entries in the hash. */ +static struct vis_info *add_packet(struct bat_priv *bat_priv, + struct vis_packet *vis_packet, + int vis_info_len, int *is_new, + int make_broadcast) +{ + struct vis_info *info, *old_info; + struct vis_packet *search_packet, *old_packet; + struct vis_info search_elem; + struct vis_packet *packet; + int hash_added; + + *is_new = 0; + /* sanity check */ + if (!bat_priv->vis_hash) + return NULL; + + /* see if the packet is already in vis_hash */ + search_elem.skb_packet = dev_alloc_skb(sizeof(struct vis_packet)); + if (!search_elem.skb_packet) + return NULL; + search_packet = (struct vis_packet *)skb_put(search_elem.skb_packet, + sizeof(struct vis_packet)); + + memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN); + old_info = hash_find(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, + &search_elem); + kfree_skb(search_elem.skb_packet); + + if (old_info) { + old_packet = (struct vis_packet *)old_info->skb_packet->data; + if (!seq_after(ntohl(vis_packet->seqno), + ntohl(old_packet->seqno))) { + if (old_packet->seqno == vis_packet->seqno) { + recv_list_add(bat_priv, &old_info->recv_list, + vis_packet->sender_orig); + return old_info; + } else { + /* newer packet is already in hash. */ + return NULL; + } + } + /* remove old entry */ + hash_remove(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, + old_info); + send_list_del(old_info); + kref_put(&old_info->refcount, free_info); + } + + info = kmalloc(sizeof(struct vis_info), GFP_ATOMIC); + if (!info) + return NULL; + + info->skb_packet = dev_alloc_skb(sizeof(struct vis_packet) + + vis_info_len + sizeof(struct ethhdr)); + if (!info->skb_packet) { + kfree(info); + return NULL; + } + skb_reserve(info->skb_packet, sizeof(struct ethhdr)); + packet = (struct vis_packet *)skb_put(info->skb_packet, + sizeof(struct vis_packet) + + vis_info_len); + + kref_init(&info->refcount); + INIT_LIST_HEAD(&info->send_list); + INIT_LIST_HEAD(&info->recv_list); + info->first_seen = jiffies; + info->bat_priv = bat_priv; + memcpy(packet, vis_packet, sizeof(struct vis_packet) + vis_info_len); + + /* initialize and add new packet. */ + *is_new = 1; + + /* Make it a broadcast packet, if required */ + if (make_broadcast) + memcpy(packet->target_orig, broadcast_addr, ETH_ALEN); + + /* repair if entries is longer than packet. */ + if (packet->entries * sizeof(struct vis_info_entry) > vis_info_len) + packet->entries = vis_info_len / sizeof(struct vis_info_entry); + + recv_list_add(bat_priv, &info->recv_list, packet->sender_orig); + + /* try to add it */ + hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, + info); + if (hash_added < 0) { + /* did not work (for some reason) */ + kref_put(&old_info->refcount, free_info); + info = NULL; + } + + return info; +} + +/* handle the server sync packet, forward if needed. */ +void receive_server_sync_packet(struct bat_priv *bat_priv, + struct vis_packet *vis_packet, + int vis_info_len) +{ + struct vis_info *info; + int is_new, make_broadcast; + int vis_server = atomic_read(&bat_priv->vis_mode); + + make_broadcast = (vis_server == VIS_TYPE_SERVER_SYNC); + + spin_lock_bh(&bat_priv->vis_hash_lock); + info = add_packet(bat_priv, vis_packet, vis_info_len, + &is_new, make_broadcast); + if (!info) + goto end; + + /* only if we are server ourselves and packet is newer than the one in + * hash.*/ + if (vis_server == VIS_TYPE_SERVER_SYNC && is_new) + send_list_add(bat_priv, info); +end: + spin_unlock_bh(&bat_priv->vis_hash_lock); +} + +/* handle an incoming client update packet and schedule forward if needed. */ +void receive_client_update_packet(struct bat_priv *bat_priv, + struct vis_packet *vis_packet, + int vis_info_len) +{ + struct vis_info *info; + struct vis_packet *packet; + int is_new; + int vis_server = atomic_read(&bat_priv->vis_mode); + int are_target = 0; + + /* clients shall not broadcast. */ + if (is_broadcast_ether_addr(vis_packet->target_orig)) + return; + + /* Are we the target for this VIS packet? */ + if (vis_server == VIS_TYPE_SERVER_SYNC && + is_my_mac(vis_packet->target_orig)) + are_target = 1; + + spin_lock_bh(&bat_priv->vis_hash_lock); + info = add_packet(bat_priv, vis_packet, vis_info_len, + &is_new, are_target); + + if (!info) + goto end; + /* note that outdated packets will be dropped at this point. */ + + packet = (struct vis_packet *)info->skb_packet->data; + + /* send only if we're the target server or ... */ + if (are_target && is_new) { + packet->vis_type = VIS_TYPE_SERVER_SYNC; /* upgrade! */ + send_list_add(bat_priv, info); + + /* ... we're not the recipient (and thus need to forward). */ + } else if (!is_my_mac(packet->target_orig)) { + send_list_add(bat_priv, info); + } + +end: + spin_unlock_bh(&bat_priv->vis_hash_lock); +} + +/* Walk the originators and find the VIS server with the best tq. Set the packet + * address to its address and return the best_tq. + * + * Must be called with the originator hash locked */ +static int find_best_vis_server(struct bat_priv *bat_priv, + struct vis_info *info) +{ + struct hashtable_t *hash = bat_priv->orig_hash; + struct hlist_node *walk; + struct hlist_head *head; + struct element_t *bucket; + struct orig_node *orig_node; + struct vis_packet *packet; + int best_tq = -1, i; + + packet = (struct vis_packet *)info->skb_packet->data; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + orig_node = bucket->data; + if ((orig_node) && (orig_node->router) && + (orig_node->flags & VIS_SERVER) && + (orig_node->router->tq_avg > best_tq)) { + best_tq = orig_node->router->tq_avg; + memcpy(packet->target_orig, orig_node->orig, + ETH_ALEN); + } + } + } + + return best_tq; +} + +/* Return true if the vis packet is full. */ +static bool vis_packet_full(struct vis_info *info) +{ + struct vis_packet *packet; + packet = (struct vis_packet *)info->skb_packet->data; + + if (MAX_VIS_PACKET_SIZE / sizeof(struct vis_info_entry) + < packet->entries + 1) + return true; + return false; +} + +/* generates a packet of own vis data, + * returns 0 on success, -1 if no packet could be generated */ +static int generate_vis_packet(struct bat_priv *bat_priv) +{ + struct hashtable_t *hash = bat_priv->orig_hash; + struct hlist_node *walk; + struct hlist_head *head; + struct element_t *bucket; + struct orig_node *orig_node; + struct neigh_node *neigh_node; + struct vis_info *info = (struct vis_info *)bat_priv->my_vis_info; + struct vis_packet *packet = (struct vis_packet *)info->skb_packet->data; + struct vis_info_entry *entry; + struct hna_local_entry *hna_local_entry; + int best_tq = -1, i; + + info->first_seen = jiffies; + packet->vis_type = atomic_read(&bat_priv->vis_mode); + + spin_lock_bh(&bat_priv->orig_hash_lock); + memcpy(packet->target_orig, broadcast_addr, ETH_ALEN); + packet->ttl = TTL; + packet->seqno = htonl(ntohl(packet->seqno) + 1); + packet->entries = 0; + skb_trim(info->skb_packet, sizeof(struct vis_packet)); + + if (packet->vis_type == VIS_TYPE_CLIENT_UPDATE) { + best_tq = find_best_vis_server(bat_priv, info); + + if (best_tq < 0) { + spin_unlock_bh(&bat_priv->orig_hash_lock); + return -1; + } + } + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + orig_node = bucket->data; + neigh_node = orig_node->router; + + if (!neigh_node) + continue; + + if (!compare_orig(neigh_node->addr, orig_node->orig)) + continue; + + if (neigh_node->if_incoming->if_status != IF_ACTIVE) + continue; + + if (neigh_node->tq_avg < 1) + continue; + + /* fill one entry into buffer. */ + entry = (struct vis_info_entry *) + skb_put(info->skb_packet, sizeof(*entry)); + memcpy(entry->src, + neigh_node->if_incoming->net_dev->dev_addr, + ETH_ALEN); + memcpy(entry->dest, orig_node->orig, ETH_ALEN); + entry->quality = neigh_node->tq_avg; + packet->entries++; + + if (vis_packet_full(info)) { + spin_unlock_bh(&bat_priv->orig_hash_lock); + return 0; + } + } + } + + spin_unlock_bh(&bat_priv->orig_hash_lock); + + hash = bat_priv->hna_local_hash; + + spin_lock_bh(&bat_priv->hna_lhash_lock); + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + hna_local_entry = bucket->data; + entry = (struct vis_info_entry *) + skb_put(info->skb_packet, + sizeof(*entry)); + memset(entry->src, 0, ETH_ALEN); + memcpy(entry->dest, hna_local_entry->addr, ETH_ALEN); + entry->quality = 0; /* 0 means HNA */ + packet->entries++; + + if (vis_packet_full(info)) { + spin_unlock_bh(&bat_priv->hna_lhash_lock); + return 0; + } + } + } + + spin_unlock_bh(&bat_priv->hna_lhash_lock); + return 0; +} + +/* free old vis packets. Must be called with this vis_hash_lock + * held */ +static void purge_vis_packets(struct bat_priv *bat_priv) +{ + int i; + struct hashtable_t *hash = bat_priv->vis_hash; + struct hlist_node *walk, *safe; + struct hlist_head *head; + struct element_t *bucket; + struct vis_info *info; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { + info = bucket->data; + + /* never purge own data. */ + if (info == bat_priv->my_vis_info) + continue; + + if (time_after(jiffies, + info->first_seen + VIS_TIMEOUT * HZ)) { + hlist_del(walk); + kfree(bucket); + send_list_del(info); + kref_put(&info->refcount, free_info); + } + } + } +} + +static void broadcast_vis_packet(struct bat_priv *bat_priv, + struct vis_info *info) +{ + struct hashtable_t *hash = bat_priv->orig_hash; + struct hlist_node *walk; + struct hlist_head *head; + struct element_t *bucket; + struct orig_node *orig_node; + struct vis_packet *packet; + struct sk_buff *skb; + struct batman_if *batman_if; + uint8_t dstaddr[ETH_ALEN]; + int i; + + + spin_lock_bh(&bat_priv->orig_hash_lock); + packet = (struct vis_packet *)info->skb_packet->data; + + /* send to all routers in range. */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry(bucket, walk, head, hlist) { + orig_node = bucket->data; + + /* if it's a vis server and reachable, send it. */ + if ((!orig_node) || (!orig_node->router)) + continue; + if (!(orig_node->flags & VIS_SERVER)) + continue; + /* don't send it if we already received the packet from + * this node. */ + if (recv_list_is_in(bat_priv, &info->recv_list, + orig_node->orig)) + continue; + + memcpy(packet->target_orig, orig_node->orig, ETH_ALEN); + batman_if = orig_node->router->if_incoming; + memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); + spin_unlock_bh(&bat_priv->orig_hash_lock); + + skb = skb_clone(info->skb_packet, GFP_ATOMIC); + if (skb) + send_skb_packet(skb, batman_if, dstaddr); + + spin_lock_bh(&bat_priv->orig_hash_lock); + } + + } + + spin_unlock_bh(&bat_priv->orig_hash_lock); +} + +static void unicast_vis_packet(struct bat_priv *bat_priv, + struct vis_info *info) +{ + struct orig_node *orig_node; + struct sk_buff *skb; + struct vis_packet *packet; + struct batman_if *batman_if; + uint8_t dstaddr[ETH_ALEN]; + + spin_lock_bh(&bat_priv->orig_hash_lock); + packet = (struct vis_packet *)info->skb_packet->data; + orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, + compare_orig, choose_orig, + packet->target_orig)); + + if ((!orig_node) || (!orig_node->router)) + goto out; + + /* don't lock while sending the packets ... we therefore + * copy the required data before sending */ + batman_if = orig_node->router->if_incoming; + memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); + spin_unlock_bh(&bat_priv->orig_hash_lock); + + skb = skb_clone(info->skb_packet, GFP_ATOMIC); + if (skb) + send_skb_packet(skb, batman_if, dstaddr); + + return; + +out: + spin_unlock_bh(&bat_priv->orig_hash_lock); +} + +/* only send one vis packet. called from send_vis_packets() */ +static void send_vis_packet(struct bat_priv *bat_priv, struct vis_info *info) +{ + struct vis_packet *packet; + + packet = (struct vis_packet *)info->skb_packet->data; + if (packet->ttl < 2) { + pr_debug("Error - can't send vis packet: ttl exceeded\n"); + return; + } + + memcpy(packet->sender_orig, bat_priv->primary_if->net_dev->dev_addr, + ETH_ALEN); + packet->ttl--; + + if (is_broadcast_ether_addr(packet->target_orig)) + broadcast_vis_packet(bat_priv, info); + else + unicast_vis_packet(bat_priv, info); + packet->ttl++; /* restore TTL */ +} + +/* called from timer; send (and maybe generate) vis packet. */ +static void send_vis_packets(struct work_struct *work) +{ + struct delayed_work *delayed_work = + container_of(work, struct delayed_work, work); + struct bat_priv *bat_priv = + container_of(delayed_work, struct bat_priv, vis_work); + struct vis_info *info, *temp; + + spin_lock_bh(&bat_priv->vis_hash_lock); + purge_vis_packets(bat_priv); + + if (generate_vis_packet(bat_priv) == 0) { + /* schedule if generation was successful */ + send_list_add(bat_priv, bat_priv->my_vis_info); + } + + list_for_each_entry_safe(info, temp, &bat_priv->vis_send_list, + send_list) { + + kref_get(&info->refcount); + spin_unlock_bh(&bat_priv->vis_hash_lock); + + if (bat_priv->primary_if) + send_vis_packet(bat_priv, info); + + spin_lock_bh(&bat_priv->vis_hash_lock); + send_list_del(info); + kref_put(&info->refcount, free_info); + } + spin_unlock_bh(&bat_priv->vis_hash_lock); + start_vis_timer(bat_priv); +} + +/* init the vis server. this may only be called when if_list is already + * initialized (e.g. bat0 is initialized, interfaces have been added) */ +int vis_init(struct bat_priv *bat_priv) +{ + struct vis_packet *packet; + int hash_added; + + if (bat_priv->vis_hash) + return 1; + + spin_lock_bh(&bat_priv->vis_hash_lock); + + bat_priv->vis_hash = hash_new(256); + if (!bat_priv->vis_hash) { + pr_err("Can't initialize vis_hash\n"); + goto err; + } + + bat_priv->my_vis_info = kmalloc(MAX_VIS_PACKET_SIZE, GFP_ATOMIC); + if (!bat_priv->my_vis_info) { + pr_err("Can't initialize vis packet\n"); + goto err; + } + + bat_priv->my_vis_info->skb_packet = dev_alloc_skb( + sizeof(struct vis_packet) + + MAX_VIS_PACKET_SIZE + + sizeof(struct ethhdr)); + if (!bat_priv->my_vis_info->skb_packet) + goto free_info; + + skb_reserve(bat_priv->my_vis_info->skb_packet, sizeof(struct ethhdr)); + packet = (struct vis_packet *)skb_put( + bat_priv->my_vis_info->skb_packet, + sizeof(struct vis_packet)); + + /* prefill the vis info */ + bat_priv->my_vis_info->first_seen = jiffies - + msecs_to_jiffies(VIS_INTERVAL); + INIT_LIST_HEAD(&bat_priv->my_vis_info->recv_list); + INIT_LIST_HEAD(&bat_priv->my_vis_info->send_list); + kref_init(&bat_priv->my_vis_info->refcount); + bat_priv->my_vis_info->bat_priv = bat_priv; + packet->version = COMPAT_VERSION; + packet->packet_type = BAT_VIS; + packet->ttl = TTL; + packet->seqno = 0; + packet->entries = 0; + + INIT_LIST_HEAD(&bat_priv->vis_send_list); + + hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, + bat_priv->my_vis_info); + if (hash_added < 0) { + pr_err("Can't add own vis packet into hash\n"); + /* not in hash, need to remove it manually. */ + kref_put(&bat_priv->my_vis_info->refcount, free_info); + goto err; + } + + spin_unlock_bh(&bat_priv->vis_hash_lock); + start_vis_timer(bat_priv); + return 1; + +free_info: + kfree(bat_priv->my_vis_info); + bat_priv->my_vis_info = NULL; +err: + spin_unlock_bh(&bat_priv->vis_hash_lock); + vis_quit(bat_priv); + return 0; +} + +/* Decrease the reference count on a hash item info */ +static void free_info_ref(void *data, void *arg) +{ + struct vis_info *info = data; + + send_list_del(info); + kref_put(&info->refcount, free_info); +} + +/* shutdown vis-server */ +void vis_quit(struct bat_priv *bat_priv) +{ + if (!bat_priv->vis_hash) + return; + + cancel_delayed_work_sync(&bat_priv->vis_work); + + spin_lock_bh(&bat_priv->vis_hash_lock); + /* properly remove, kill timers ... */ + hash_delete(bat_priv->vis_hash, free_info_ref, NULL); + bat_priv->vis_hash = NULL; + bat_priv->my_vis_info = NULL; + spin_unlock_bh(&bat_priv->vis_hash_lock); +} + +/* schedule packets for (re)transmission */ +static void start_vis_timer(struct bat_priv *bat_priv) +{ + INIT_DELAYED_WORK(&bat_priv->vis_work, send_vis_packets); + queue_delayed_work(bat_event_workqueue, &bat_priv->vis_work, + msecs_to_jiffies(VIS_INTERVAL)); +} diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h new file mode 100644 index 000000000000..2c3b33089a9b --- /dev/null +++ b/net/batman-adv/vis.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors: + * + * Simon Wunderlich, Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_VIS_H_ +#define _NET_BATMAN_ADV_VIS_H_ + +#define VIS_TIMEOUT 200 /* timeout of vis packets in seconds */ + +int vis_seq_print_text(struct seq_file *seq, void *offset); +void receive_server_sync_packet(struct bat_priv *bat_priv, + struct vis_packet *vis_packet, + int vis_info_len); +void receive_client_update_packet(struct bat_priv *bat_priv, + struct vis_packet *vis_packet, + int vis_info_len); +int vis_init(struct bat_priv *bat_priv); +void vis_quit(struct bat_priv *bat_priv); + +#endif /* _NET_BATMAN_ADV_VIS_H_ */ diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index d1e433f7d673..250f954f0213 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -10,4 +10,4 @@ obj-$(CONFIG_BT_BNEP) += bnep/ obj-$(CONFIG_BT_CMTP) += cmtp/ obj-$(CONFIG_BT_HIDP) += hidp/ -bluetooth-objs := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o +bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o hci_sock.o hci_sysfs.o lib.o diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index f10b41fb05a0..5868597534e5 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -648,6 +648,7 @@ int bnep_del_connection(struct bnep_conndel_req *req) static void __bnep_copy_ci(struct bnep_conninfo *ci, struct bnep_session *s) { + memset(ci, 0, sizeof(*ci)); memcpy(ci->dst, s->eh.h_source, ETH_ALEN); strcpy(ci->device, s->dev->name); ci->flags = s->flags; diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index ec0a1347f933..8e5f292529ac 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -78,6 +78,7 @@ static void __cmtp_unlink_session(struct cmtp_session *session) static void __cmtp_copy_session(struct cmtp_session *session, struct cmtp_conninfo *ci) { + memset(ci, 0, sizeof(*ci)); bacpy(&ci->bdaddr, &session->bdaddr); ci->flags = session->flags; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 0b1e460fe440..6b90a4191734 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -39,7 +39,7 @@ #include <net/sock.h> #include <asm/system.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> @@ -66,7 +66,8 @@ void hci_acl_connect(struct hci_conn *conn) bacpy(&cp.bdaddr, &conn->dst); cp.pscan_rep_mode = 0x02; - if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst))) { + ie = hci_inquiry_cache_lookup(hdev, &conn->dst); + if (ie) { if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) { cp.pscan_rep_mode = ie->data.pscan_rep_mode; cp.pscan_mode = ie->data.pscan_mode; @@ -368,8 +369,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 BT_DBG("%s dst %s", hdev->name, batostr(dst)); - if (!(acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst))) { - if (!(acl = hci_conn_add(hdev, ACL_LINK, dst))) + acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); + if (!acl) { + acl = hci_conn_add(hdev, ACL_LINK, dst); + if (!acl) return NULL; } @@ -389,8 +392,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 if (type == ACL_LINK) return acl; - if (!(sco = hci_conn_hash_lookup_ba(hdev, type, dst))) { - if (!(sco = hci_conn_add(hdev, type, dst))) { + sco = hci_conn_hash_lookup_ba(hdev, type, dst); + if (!sco) { + sco = hci_conn_add(hdev, type, dst); + if (!sco) { hci_conn_put(acl); return NULL; } @@ -647,10 +652,12 @@ int hci_get_conn_list(void __user *arg) size = sizeof(req) + req.conn_num * sizeof(*ci); - if (!(cl = kmalloc(size, GFP_KERNEL))) + cl = kmalloc(size, GFP_KERNEL); + if (!cl) return -ENOMEM; - if (!(hdev = hci_dev_get(req.dev_id))) { + hdev = hci_dev_get(req.dev_id); + if (!hdev) { kfree(cl); return -ENODEV; } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bc2a052e518b..8b602d881fd7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -44,7 +44,7 @@ #include <net/sock.h> #include <asm/system.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> @@ -91,9 +91,16 @@ static void hci_notify(struct hci_dev *hdev, int event) /* ---- HCI requests ---- */ -void hci_req_complete(struct hci_dev *hdev, int result) +void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result) { - BT_DBG("%s result 0x%2.2x", hdev->name, result); + BT_DBG("%s command 0x%04x result 0x%2.2x", hdev->name, cmd, result); + + /* If the request has set req_last_cmd (typical for multi-HCI + * command requests) check if the completed command matches + * this, and if not just return. Single HCI command requests + * typically leave req_last_cmd as 0 */ + if (hdev->req_last_cmd && cmd != hdev->req_last_cmd) + return; if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = result; @@ -149,7 +156,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, break; } - hdev->req_status = hdev->req_result = 0; + hdev->req_last_cmd = hdev->req_status = hdev->req_result = 0; BT_DBG("%s end: err %d", hdev->name, err); @@ -252,6 +259,8 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Connection accept timeout ~20 secs */ param = cpu_to_le16(0x7d00); hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); + + hdev->req_last_cmd = HCI_OP_WRITE_CA_TIMEOUT; } static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) @@ -349,20 +358,23 @@ struct inquiry_entry *hci_inquiry_cache_lookup(struct hci_dev *hdev, bdaddr_t *b void hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data) { struct inquiry_cache *cache = &hdev->inq_cache; - struct inquiry_entry *e; + struct inquiry_entry *ie; BT_DBG("cache %p, %s", cache, batostr(&data->bdaddr)); - if (!(e = hci_inquiry_cache_lookup(hdev, &data->bdaddr))) { + ie = hci_inquiry_cache_lookup(hdev, &data->bdaddr); + if (!ie) { /* Entry not in the cache. Add new one. */ - if (!(e = kzalloc(sizeof(struct inquiry_entry), GFP_ATOMIC))) + ie = kzalloc(sizeof(struct inquiry_entry), GFP_ATOMIC); + if (!ie) return; - e->next = cache->list; - cache->list = e; + + ie->next = cache->list; + cache->list = ie; } - memcpy(&e->data, data, sizeof(*data)); - e->timestamp = jiffies; + memcpy(&ie->data, data, sizeof(*data)); + ie->timestamp = jiffies; cache->timestamp = jiffies; } @@ -422,16 +434,20 @@ int hci_inquiry(void __user *arg) hci_dev_lock_bh(hdev); if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX || - inquiry_cache_empty(hdev) || - ir.flags & IREQ_CACHE_FLUSH) { + inquiry_cache_empty(hdev) || + ir.flags & IREQ_CACHE_FLUSH) { inquiry_cache_flush(hdev); do_inquiry = 1; } hci_dev_unlock_bh(hdev); timeo = ir.length * msecs_to_jiffies(2000); - if (do_inquiry && (err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo)) < 0) - goto done; + + if (do_inquiry) { + err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo); + if (err < 0) + goto done; + } /* for unlimited number of responses we will use buffer with 255 entries */ max_rsp = (ir.num_rsp == 0) ? 255 : ir.num_rsp; @@ -439,7 +455,8 @@ int hci_inquiry(void __user *arg) /* cache_dump can't sleep. Therefore we allocate temp buffer and then * copy it to the user space. */ - if (!(buf = kmalloc(sizeof(struct inquiry_info) * max_rsp, GFP_KERNEL))) { + buf = kmalloc(sizeof(struct inquiry_info) *max_rsp, GFP_KERNEL); + if (!buf) { err = -ENOMEM; goto done; } @@ -611,7 +628,8 @@ int hci_dev_close(__u16 dev) struct hci_dev *hdev; int err; - if (!(hdev = hci_dev_get(dev))) + hdev = hci_dev_get(dev); + if (!hdev) return -ENODEV; err = hci_dev_do_close(hdev); hci_dev_put(hdev); @@ -623,7 +641,8 @@ int hci_dev_reset(__u16 dev) struct hci_dev *hdev; int ret = 0; - if (!(hdev = hci_dev_get(dev))) + hdev = hci_dev_get(dev); + if (!hdev) return -ENODEV; hci_req_lock(hdev); @@ -663,7 +682,8 @@ int hci_dev_reset_stat(__u16 dev) struct hci_dev *hdev; int ret = 0; - if (!(hdev = hci_dev_get(dev))) + hdev = hci_dev_get(dev); + if (!hdev) return -ENODEV; memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); @@ -682,7 +702,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (copy_from_user(&dr, arg, sizeof(dr))) return -EFAULT; - if (!(hdev = hci_dev_get(dr.dev_id))) + hdev = hci_dev_get(dr.dev_id); + if (!hdev) return -ENODEV; switch (cmd) { @@ -763,7 +784,8 @@ int hci_get_dev_list(void __user *arg) size = sizeof(*dl) + dev_num * sizeof(*dr); - if (!(dl = kzalloc(size, GFP_KERNEL))) + dl = kzalloc(size, GFP_KERNEL); + if (!dl) return -ENOMEM; dr = dl->dev_req; @@ -797,7 +819,8 @@ int hci_get_dev_info(void __user *arg) if (copy_from_user(&di, arg, sizeof(di))) return -EFAULT; - if (!(hdev = hci_dev_get(di.dev_id))) + hdev = hci_dev_get(di.dev_id); + if (!hdev) return -ENODEV; strcpy(di.name, hdev->name); @@ -905,7 +928,7 @@ int hci_register_dev(struct hci_dev *hdev) hdev->sniff_max_interval = 800; hdev->sniff_min_interval = 80; - tasklet_init(&hdev->cmd_task, hci_cmd_task,(unsigned long) hdev); + tasklet_init(&hdev->cmd_task, hci_cmd_task, (unsigned long) hdev); tasklet_init(&hdev->rx_task, hci_rx_task, (unsigned long) hdev); tasklet_init(&hdev->tx_task, hci_tx_task, (unsigned long) hdev); @@ -946,6 +969,7 @@ int hci_register_dev(struct hci_dev *hdev) } } + mgmt_index_added(hdev->id); hci_notify(hdev, HCI_DEV_REG); return id; @@ -975,6 +999,7 @@ int hci_unregister_dev(struct hci_dev *hdev) for (i = 0; i < NUM_REASSEMBLY; i++) kfree_skb(hdev->reassembly[i]); + mgmt_index_removed(hdev->id); hci_notify(hdev, HCI_DEV_UNREG); if (hdev->rfkill) { @@ -1368,7 +1393,8 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; hci_add_acl_hdr(skb, conn->handle, flags | ACL_START); - if (!(list = skb_shinfo(skb)->frag_list)) { + list = skb_shinfo(skb)->frag_list; + if (!list) { /* Non fragmented */ BT_DBG("%s nonfrag skb %p len %d", hdev->name, skb, skb->len); @@ -1609,7 +1635,8 @@ static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_conn_enter_active_mode(conn); /* Send to upper protocol */ - if ((hp = hci_proto[HCI_PROTO_L2CAP]) && hp->recv_acldata) { + hp = hci_proto[HCI_PROTO_L2CAP]; + if (hp && hp->recv_acldata) { hp->recv_acldata(conn, skb, flags); return; } @@ -1644,7 +1671,8 @@ static inline void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) register struct hci_proto *hp; /* Send to upper protocol */ - if ((hp = hci_proto[HCI_PROTO_SCO]) && hp->recv_scodata) { + hp = hci_proto[HCI_PROTO_SCO]; + if (hp && hp->recv_scodata) { hp->recv_scodata(conn, skb); return; } @@ -1727,7 +1755,8 @@ static void hci_cmd_task(unsigned long arg) if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) { kfree_skb(hdev->sent_cmd); - if ((hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC))) { + hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC); + if (hdev->sent_cmd) { atomic_dec(&hdev->cmd_cnt); hci_send_frame(skb); hdev->cmd_last_tx = jiffies; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index bfef5bae0b3a..38100170d380 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -39,7 +39,7 @@ #include <net/sock.h> #include <asm/system.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> @@ -58,7 +58,7 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_INQUIRY, &hdev->flags); - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status); hci_conn_check_pending(hdev); } @@ -174,7 +174,7 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev, struct sk_buff *s if (!status) hdev->link_policy = get_unaligned_le16(sent); - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, status); } static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) @@ -183,7 +183,7 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%x", hdev->name, status); - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_RESET, status); } static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) @@ -235,7 +235,7 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_AUTH, &hdev->flags); } - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_WRITE_AUTH_ENABLE, status); } static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) @@ -258,7 +258,7 @@ static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_ENCRYPT, &hdev->flags); } - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_WRITE_ENCRYPT_MODE, status); } static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) @@ -285,7 +285,7 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) set_bit(HCI_PSCAN, &hdev->flags); } - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status); } static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) @@ -383,7 +383,7 @@ static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%x", hdev->name, status); - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_HOST_BUFFER_SIZE, status); } static void hci_cc_read_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) @@ -536,7 +536,16 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) if (!rp->status) bacpy(&hdev->bdaddr, &rp->bdaddr); - hci_req_complete(hdev, rp->status); + hci_req_complete(hdev, HCI_OP_READ_BD_ADDR, rp->status); +} + +static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%x", hdev->name, status); + + hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status); } static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) @@ -544,7 +553,7 @@ static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) BT_DBG("%s status 0x%x", hdev->name, status); if (status) { - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); } else @@ -677,9 +686,50 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status) hci_dev_unlock(hdev); } +static int hci_outgoing_auth_needed(struct hci_dev *hdev, + struct hci_conn *conn) +{ + if (conn->state != BT_CONFIG || !conn->out) + return 0; + + if (conn->sec_level == BT_SECURITY_SDP) + return 0; + + /* Only request authentication for SSP connections or non-SSP + * devices with sec_level HIGH */ + if (!(hdev->ssp_mode > 0 && conn->ssp_mode > 0) && + conn->sec_level != BT_SECURITY_HIGH) + return 0; + + return 1; +} + static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status) { + struct hci_cp_remote_name_req *cp; + struct hci_conn *conn; + BT_DBG("%s status 0x%x", hdev->name, status); + + /* If successful wait for the name req complete event before + * checking for the need to do authentication */ + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_REMOTE_NAME_REQ); + if (!cp) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); + if (conn && hci_outgoing_auth_needed(hdev, conn)) { + struct hci_cp_auth_requested cp; + cp.handle = __cpu_to_le16(conn->handle); + hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); + } + + hci_dev_unlock(hdev); } static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status) @@ -830,7 +880,7 @@ static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff clear_bit(HCI_INQUIRY, &hdev->flags); - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); } @@ -955,12 +1005,14 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk hci_dev_lock(hdev); - if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr))) + ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr); + if (ie) memcpy(ie->data.dev_class, ev->dev_class, 3); conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { - if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) { + conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr); + if (!conn) { BT_ERR("No memory for new connection"); hci_dev_unlock(hdev); return; @@ -1090,9 +1142,23 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s static inline void hci_remote_name_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_remote_name *ev = (void *) skb->data; + struct hci_conn *conn; + BT_DBG("%s", hdev->name); hci_conn_check_pending(hdev); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn && hci_outgoing_auth_needed(hdev, conn)) { + struct hci_cp_auth_requested cp; + cp.handle = __cpu_to_le16(conn->handle); + hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); + } + + hci_dev_unlock(hdev); } static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -1162,27 +1228,39 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); - if (conn) { - if (!ev->status) - memcpy(conn->features, ev->features, 8); + if (!conn) + goto unlock; - if (conn->state == BT_CONFIG) { - if (!ev->status && lmp_ssp_capable(hdev) && - lmp_ssp_capable(conn)) { - struct hci_cp_read_remote_ext_features cp; - cp.handle = ev->handle; - cp.page = 0x01; - hci_send_cmd(hdev, - HCI_OP_READ_REMOTE_EXT_FEATURES, + if (!ev->status) + memcpy(conn->features, ev->features, 8); + + if (conn->state != BT_CONFIG) + goto unlock; + + if (!ev->status && lmp_ssp_capable(hdev) && lmp_ssp_capable(conn)) { + struct hci_cp_read_remote_ext_features cp; + cp.handle = ev->handle; + cp.page = 0x01; + hci_send_cmd(hdev, HCI_OP_READ_REMOTE_EXT_FEATURES, sizeof(cp), &cp); - } else { - conn->state = BT_CONNECTED; - hci_proto_connect_cfm(conn, ev->status); - hci_conn_put(conn); - } - } + goto unlock; + } + + if (!ev->status) { + struct hci_cp_remote_name_req cp; + memset(&cp, 0, sizeof(cp)); + bacpy(&cp.bdaddr, &conn->dst); + cp.pscan_rep_mode = 0x02; + hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ, sizeof(cp), &cp); + } + + if (!hci_outgoing_auth_needed(hdev, conn)) { + conn->state = BT_CONNECTED; + hci_proto_connect_cfm(conn, ev->status); + hci_conn_put(conn); } +unlock: hci_dev_unlock(hdev); } @@ -1310,6 +1388,10 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_read_bd_addr(hdev, skb); break; + case HCI_OP_WRITE_CA_TIMEOUT: + hci_cc_write_ca_timeout(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%x", hdev->name, opcode); break; @@ -1443,10 +1525,12 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s conn->sent -= count; if (conn->type == ACL_LINK) { - if ((hdev->acl_cnt += count) > hdev->acl_pkts) + hdev->acl_cnt += count; + if (hdev->acl_cnt > hdev->acl_pkts) hdev->acl_cnt = hdev->acl_pkts; } else { - if ((hdev->sco_cnt += count) > hdev->sco_pkts) + hdev->sco_cnt += count; + if (hdev->sco_cnt > hdev->sco_pkts) hdev->sco_cnt = hdev->sco_pkts; } } @@ -1541,7 +1625,8 @@ static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *sk if (conn && !ev->status) { struct inquiry_entry *ie; - if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst))) { + ie = hci_inquiry_cache_lookup(hdev, &conn->dst); + if (ie) { ie->data.clock_offset = ev->clock_offset; ie->timestamp = jiffies; } @@ -1575,7 +1660,8 @@ static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff * hci_dev_lock(hdev); - if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr))) { + ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr); + if (ie) { ie->data.pscan_rep_mode = ev->pscan_rep_mode; ie->timestamp = jiffies; } @@ -1640,32 +1726,37 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); - if (conn) { - if (!ev->status && ev->page == 0x01) { - struct inquiry_entry *ie; + if (!conn) + goto unlock; - if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst))) - ie->data.ssp_mode = (ev->features[0] & 0x01); + if (!ev->status && ev->page == 0x01) { + struct inquiry_entry *ie; - conn->ssp_mode = (ev->features[0] & 0x01); - } + ie = hci_inquiry_cache_lookup(hdev, &conn->dst); + if (ie) + ie->data.ssp_mode = (ev->features[0] & 0x01); - if (conn->state == BT_CONFIG) { - if (!ev->status && hdev->ssp_mode > 0 && - conn->ssp_mode > 0 && conn->out && - conn->sec_level != BT_SECURITY_SDP) { - struct hci_cp_auth_requested cp; - cp.handle = ev->handle; - hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, - sizeof(cp), &cp); - } else { - conn->state = BT_CONNECTED; - hci_proto_connect_cfm(conn, ev->status); - hci_conn_put(conn); - } - } + conn->ssp_mode = (ev->features[0] & 0x01); } + if (conn->state != BT_CONFIG) + goto unlock; + + if (!ev->status) { + struct hci_cp_remote_name_req cp; + memset(&cp, 0, sizeof(cp)); + bacpy(&cp.bdaddr, &conn->dst); + cp.pscan_rep_mode = 0x02; + hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ, sizeof(cp), &cp); + } + + if (!hci_outgoing_auth_needed(hdev, conn)) { + conn->state = BT_CONNECTED; + hci_proto_connect_cfm(conn, ev->status); + hci_conn_put(conn); + } + +unlock: hci_dev_unlock(hdev); } @@ -1815,7 +1906,8 @@ static inline void hci_remote_host_features_evt(struct hci_dev *hdev, struct sk_ hci_dev_lock(hdev); - if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr))) + ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr); + if (ie) ie->data.ssp_mode = (ev->features[0] & 0x01); hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 83acd164d39e..29827c77f6ce 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -43,12 +43,14 @@ #include <net/sock.h> #include <asm/system.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> +static int enable_mgmt; + /* ----- HCI socket interface ----- */ static inline int hci_test_bit(int nr, void *addr) @@ -102,6 +104,12 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) if (skb->sk == sk) continue; + if (bt_cb(skb)->channel != hci_pi(sk)->channel) + continue; + + if (bt_cb(skb)->channel == HCI_CHANNEL_CONTROL) + goto clone; + /* Apply filter */ flt = &hci_pi(sk)->filter; @@ -125,11 +133,14 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) continue; } - if (!(nskb = skb_clone(skb, GFP_ATOMIC))) +clone: + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) continue; /* Put type byte before the data */ - memcpy(skb_push(nskb, 1), &bt_cb(nskb)->pkt_type, 1); + if (bt_cb(skb)->channel == HCI_CHANNEL_RAW) + memcpy(skb_push(nskb, 1), &bt_cb(nskb)->pkt_type, 1); if (sock_queue_rcv_skb(sk, nskb)) kfree_skb(nskb); @@ -352,25 +363,39 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long a static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { - struct sockaddr_hci *haddr = (struct sockaddr_hci *) addr; + struct sockaddr_hci haddr; struct sock *sk = sock->sk; struct hci_dev *hdev = NULL; - int err = 0; + int len, err = 0; BT_DBG("sock %p sk %p", sock, sk); - if (!haddr || haddr->hci_family != AF_BLUETOOTH) + if (!addr) + return -EINVAL; + + memset(&haddr, 0, sizeof(haddr)); + len = min_t(unsigned int, sizeof(haddr), addr_len); + memcpy(&haddr, addr, len); + + if (haddr.hci_family != AF_BLUETOOTH) + return -EINVAL; + + if (haddr.hci_channel > HCI_CHANNEL_CONTROL) + return -EINVAL; + + if (haddr.hci_channel == HCI_CHANNEL_CONTROL && !enable_mgmt) return -EINVAL; lock_sock(sk); - if (hci_pi(sk)->hdev) { + if (sk->sk_state == BT_BOUND || hci_pi(sk)->hdev) { err = -EALREADY; goto done; } - if (haddr->hci_dev != HCI_DEV_NONE) { - if (!(hdev = hci_dev_get(haddr->hci_dev))) { + if (haddr.hci_dev != HCI_DEV_NONE) { + hdev = hci_dev_get(haddr.hci_dev); + if (!hdev) { err = -ENODEV; goto done; } @@ -378,6 +403,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le atomic_inc(&hdev->promisc); } + hci_pi(sk)->channel = haddr.hci_channel; hci_pi(sk)->hdev = hdev; sk->sk_state = BT_BOUND; @@ -457,7 +483,8 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_state == BT_CLOSED) return 0; - if (!(skb = skb_recv_datagram(sk, flags, noblock, &err))) + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) return err; msg->msg_namelen = 0; @@ -499,7 +526,19 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, lock_sock(sk); - if (!(hdev = hci_pi(sk)->hdev)) { + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_RAW: + break; + case HCI_CHANNEL_CONTROL: + err = mgmt_control(sk, msg, len); + goto done; + default: + err = -EINVAL; + goto done; + } + + hdev = hci_pi(sk)->hdev; + if (!hdev) { err = -EBADFD; goto done; } @@ -509,7 +548,8 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto done; } - if (!(skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err))) + skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) goto done; if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { @@ -826,3 +866,6 @@ void __exit hci_sock_cleanup(void) proto_unregister(&hci_sk_proto); } + +module_param(enable_mgmt, bool, 0644); +MODULE_PARM_DESC(enable_mgmt, "Enable Management interface"); diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig index 98fdfa1fbddd..86a91543172a 100644 --- a/net/bluetooth/hidp/Kconfig +++ b/net/bluetooth/hidp/Kconfig @@ -1,6 +1,6 @@ config BT_HIDP tristate "HIDP protocol support" - depends on BT && BT_L2CAP && INPUT + depends on BT && BT_L2CAP && INPUT && HID_SUPPORT select HID help HIDP (Human Interface Device Protocol) is a transport layer diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index c0ee8b3928ed..29544c21f4b5 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -107,6 +107,7 @@ static void __hidp_unlink_session(struct hidp_session *session) static void __hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci) { + memset(ci, 0, sizeof(*ci)); bacpy(&ci->bdaddr, &session->bdaddr); ci->flags = session->flags; @@ -115,7 +116,6 @@ static void __hidp_copy_session(struct hidp_session *session, struct hidp_connin ci->vendor = 0x0000; ci->product = 0x0000; ci->version = 0x0000; - memset(ci->name, 0, 128); if (session->input) { ci->vendor = session->input->id.vendor; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index daa7a988d9a6..c791fcda7b2d 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -57,7 +57,7 @@ #define VERSION "2.15" -static int disable_ertm = 0; +static int disable_ertm; static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; static u8 l2cap_fixed_chan[8] = { 0x02, }; @@ -83,6 +83,18 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb); /* ---- L2CAP timers ---- */ +static void l2cap_sock_set_timer(struct sock *sk, long timeout) +{ + BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout); + sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); +} + +static void l2cap_sock_clear_timer(struct sock *sk) +{ + BT_DBG("sock %p state %d", sk, sk->sk_state); + sk_stop_timer(sk, &sk->sk_timer); +} + static void l2cap_sock_timeout(unsigned long arg) { struct sock *sk = (struct sock *) arg; @@ -92,6 +104,14 @@ static void l2cap_sock_timeout(unsigned long arg) bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* sk is owned by user. Try again later */ + l2cap_sock_set_timer(sk, HZ / 5); + bh_unlock_sock(sk); + sock_put(sk); + return; + } + if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG) reason = ECONNREFUSED; else if (sk->sk_state == BT_CONNECT && @@ -108,18 +128,6 @@ static void l2cap_sock_timeout(unsigned long arg) sock_put(sk); } -static void l2cap_sock_set_timer(struct sock *sk, long timeout) -{ - BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout); - sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); -} - -static void l2cap_sock_clear_timer(struct sock *sk) -{ - BT_DBG("sock %p state %d", sk, sk->sk_state); - sk_stop_timer(sk, &sk->sk_timer); -} - /* ---- L2CAP channels ---- */ static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid) { @@ -743,11 +751,13 @@ found: /* Find socket with psm and source bdaddr. * Returns closest match. */ -static struct sock *__l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src) +static struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src) { struct sock *sk = NULL, *sk1 = NULL; struct hlist_node *node; + read_lock(&l2cap_sk_list.lock); + sk_for_each(sk, node, &l2cap_sk_list.head) { if (state && sk->sk_state != state) continue; @@ -762,20 +772,10 @@ static struct sock *__l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src sk1 = sk; } } - return node ? sk : sk1; -} -/* Find socket with given address (psm, src). - * Returns locked socket */ -static inline struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src) -{ - struct sock *s; - read_lock(&l2cap_sk_list.lock); - s = __l2cap_get_sock_by_psm(state, psm, src); - if (s) - bh_lock_sock(s); read_unlock(&l2cap_sk_list.lock); - return s; + + return node ? sk : sk1; } static void l2cap_sock_destruct(struct sock *sk) @@ -2421,11 +2421,11 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned break; case 2: - *val = __le16_to_cpu(*((__le16 *) opt->val)); + *val = get_unaligned_le16(opt->val); break; case 4: - *val = __le32_to_cpu(*((__le32 *) opt->val)); + *val = get_unaligned_le32(opt->val); break; default: @@ -2452,11 +2452,11 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val) break; case 2: - *((__le16 *) opt->val) = cpu_to_le16(val); + put_unaligned_le16(val, opt->val); break; case 4: - *((__le32 *) opt->val) = cpu_to_le32(val); + put_unaligned_le32(val, opt->val); break; default: @@ -2926,6 +2926,8 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd goto sendresp; } + bh_lock_sock(parent); + /* Check if the ACL is secure enough (if not SDP) */ if (psm != cpu_to_le16(0x0001) && !hci_conn_check_link_mode(conn->hcon)) { @@ -3078,6 +3080,14 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd break; default: + /* don't delete l2cap channel if sk is owned by user */ + if (sock_owned_by_user(sk)) { + sk->sk_state = BT_DISCONN; + l2cap_sock_clear_timer(sk); + l2cap_sock_set_timer(sk, HZ / 5); + break; + } + l2cap_chan_del(sk, ECONNREFUSED); break; } @@ -3114,8 +3124,14 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr if (!sk) return -ENOENT; - if (sk->sk_state == BT_DISCONN) + if (sk->sk_state != BT_CONFIG) { + struct l2cap_cmd_rej rej; + + rej.reason = cpu_to_le16(0x0002); + l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, + sizeof(rej), &rej); goto unlock; + } /* Reject if config buffer is too small. */ len = cmd_len - sizeof(*req); @@ -3283,6 +3299,15 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd sk->sk_shutdown = SHUTDOWN_MASK; + /* don't delete l2cap channel if sk is owned by user */ + if (sock_owned_by_user(sk)) { + sk->sk_state = BT_DISCONN; + l2cap_sock_clear_timer(sk); + l2cap_sock_set_timer(sk, HZ / 5); + bh_unlock_sock(sk); + return 0; + } + l2cap_chan_del(sk, ECONNRESET); bh_unlock_sock(sk); @@ -3305,6 +3330,15 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd if (!sk) return 0; + /* don't delete l2cap channel if sk is owned by user */ + if (sock_owned_by_user(sk)) { + sk->sk_state = BT_DISCONN; + l2cap_sock_clear_timer(sk); + l2cap_sock_set_timer(sk, HZ / 5); + bh_unlock_sock(sk); + return 0; + } + l2cap_chan_del(sk, 0); bh_unlock_sock(sk); @@ -4134,11 +4168,10 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control) __mod_retrans_timer(); pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; - if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { + if (pi->conn_state & L2CAP_CONN_SREJ_SENT) l2cap_send_ack(pi); - } else { + else l2cap_ertm_send(sk); - } } } @@ -4430,6 +4463,8 @@ static inline int l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, str if (!sk) goto drop; + bh_lock_sock(sk); + BT_DBG("sk %p, len %d", sk, skb->len); if (sk->sk_state != BT_BOUND && sk->sk_state != BT_CONNECTED) @@ -4841,8 +4876,10 @@ static int __init l2cap_init(void) return err; _busy_wq = create_singlethread_workqueue("l2cap"); - if (!_busy_wq) - goto error; + if (!_busy_wq) { + proto_unregister(&l2cap_proto); + return -ENOMEM; + } err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops); if (err < 0) { @@ -4870,6 +4907,7 @@ static int __init l2cap_init(void) return 0; error: + destroy_workqueue(_busy_wq); proto_unregister(&l2cap_proto); return err; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c new file mode 100644 index 000000000000..f827fd908380 --- /dev/null +++ b/net/bluetooth/mgmt.c @@ -0,0 +1,308 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2010 Nokia Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +/* Bluetooth HCI Management interface */ + +#include <asm/uaccess.h> +#include <asm/unaligned.h> + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> +#include <net/bluetooth/mgmt.h> + +#define MGMT_VERSION 0 +#define MGMT_REVISION 1 + +static int cmd_status(struct sock *sk, u16 cmd, u8 status) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_status *ev; + + BT_DBG("sock %p", sk); + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); + hdr->len = cpu_to_le16(sizeof(*ev)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + ev->status = status; + put_unaligned_le16(cmd, &ev->opcode); + + if (sock_queue_rcv_skb(sk, skb) < 0) + kfree_skb(skb); + + return 0; +} + +static int read_version(struct sock *sk) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_complete *ev; + struct mgmt_rp_read_version *rp; + + BT_DBG("sock %p", sk); + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + put_unaligned_le16(MGMT_OP_READ_VERSION, &ev->opcode); + + rp = (void *) skb_put(skb, sizeof(*rp)); + rp->version = MGMT_VERSION; + put_unaligned_le16(MGMT_REVISION, &rp->revision); + + if (sock_queue_rcv_skb(sk, skb) < 0) + kfree_skb(skb); + + return 0; +} + +static int read_index_list(struct sock *sk) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_complete *ev; + struct mgmt_rp_read_index_list *rp; + struct list_head *p; + size_t body_len; + u16 count; + int i; + + BT_DBG("sock %p", sk); + + read_lock(&hci_dev_list_lock); + + count = 0; + list_for_each(p, &hci_dev_list) { + count++; + } + + body_len = sizeof(*ev) + sizeof(*rp) + (2 * count); + skb = alloc_skb(sizeof(*hdr) + body_len, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(body_len); + + ev = (void *) skb_put(skb, sizeof(*ev)); + put_unaligned_le16(MGMT_OP_READ_INDEX_LIST, &ev->opcode); + + rp = (void *) skb_put(skb, sizeof(*rp) + (2 * count)); + put_unaligned_le16(count, &rp->num_controllers); + + i = 0; + list_for_each(p, &hci_dev_list) { + struct hci_dev *d = list_entry(p, struct hci_dev, list); + put_unaligned_le16(d->id, &rp->index[i++]); + BT_DBG("Added hci%u", d->id); + } + + read_unlock(&hci_dev_list_lock); + + if (sock_queue_rcv_skb(sk, skb) < 0) + kfree_skb(skb); + + return 0; +} + +static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_complete *ev; + struct mgmt_rp_read_info *rp; + struct mgmt_cp_read_info *cp; + struct hci_dev *hdev; + u16 dev_id; + + BT_DBG("sock %p", sk); + + if (len != 2) + return cmd_status(sk, MGMT_OP_READ_INFO, EINVAL); + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + put_unaligned_le16(MGMT_OP_READ_INFO, &ev->opcode); + + rp = (void *) skb_put(skb, sizeof(*rp)); + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + BT_DBG("request for hci%u", dev_id); + + hdev = hci_dev_get(dev_id); + if (!hdev) { + kfree_skb(skb); + return cmd_status(sk, MGMT_OP_READ_INFO, ENODEV); + } + + hci_dev_lock_bh(hdev); + + put_unaligned_le16(hdev->id, &rp->index); + rp->type = hdev->dev_type; + + rp->powered = test_bit(HCI_UP, &hdev->flags); + rp->discoverable = test_bit(HCI_ISCAN, &hdev->flags); + rp->pairable = test_bit(HCI_PSCAN, &hdev->flags); + + if (test_bit(HCI_AUTH, &hdev->flags)) + rp->sec_mode = 3; + else if (hdev->ssp_mode > 0) + rp->sec_mode = 4; + else + rp->sec_mode = 2; + + bacpy(&rp->bdaddr, &hdev->bdaddr); + memcpy(rp->features, hdev->features, 8); + memcpy(rp->dev_class, hdev->dev_class, 3); + put_unaligned_le16(hdev->manufacturer, &rp->manufacturer); + rp->hci_ver = hdev->hci_ver; + put_unaligned_le16(hdev->hci_rev, &rp->hci_rev); + + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + if (sock_queue_rcv_skb(sk, skb) < 0) + kfree_skb(skb); + + return 0; +} + +int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) +{ + unsigned char *buf; + struct mgmt_hdr *hdr; + u16 opcode, len; + int err; + + BT_DBG("got %zu bytes", msglen); + + if (msglen < sizeof(*hdr)) + return -EINVAL; + + buf = kmalloc(msglen, GFP_ATOMIC); + if (!buf) + return -ENOMEM; + + if (memcpy_fromiovec(buf, msg->msg_iov, msglen)) { + err = -EFAULT; + goto done; + } + + hdr = (struct mgmt_hdr *) buf; + opcode = get_unaligned_le16(&hdr->opcode); + len = get_unaligned_le16(&hdr->len); + + if (len != msglen - sizeof(*hdr)) { + err = -EINVAL; + goto done; + } + + switch (opcode) { + case MGMT_OP_READ_VERSION: + err = read_version(sk); + break; + case MGMT_OP_READ_INDEX_LIST: + err = read_index_list(sk); + break; + case MGMT_OP_READ_INFO: + err = read_controller_info(sk, buf + sizeof(*hdr), len); + break; + default: + BT_DBG("Unknown op %u", opcode); + err = cmd_status(sk, opcode, 0x01); + break; + } + + if (err < 0) + goto done; + + err = msglen; + +done: + kfree(buf); + return err; +} + +static int mgmt_event(u16 event, void *data, u16 data_len) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + + skb = alloc_skb(sizeof(*hdr) + data_len, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + bt_cb(skb)->channel = HCI_CHANNEL_CONTROL; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(event); + hdr->len = cpu_to_le16(data_len); + + memcpy(skb_put(skb, data_len), data, data_len); + + hci_send_to_sock(NULL, skb); + kfree_skb(skb); + + return 0; +} + +int mgmt_index_added(u16 index) +{ + struct mgmt_ev_index_added ev; + + put_unaligned_le16(index, &ev.index); + + return mgmt_event(MGMT_EV_INDEX_ADDED, &ev, sizeof(ev)); +} + +int mgmt_index_removed(u16 index) +{ + struct mgmt_ev_index_added ev; + + put_unaligned_le16(index, &ev.index); + + return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev)); +} diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 39a5d87e33b4..ff8aaa736650 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -41,7 +41,7 @@ #include <linux/slab.h> #include <net/sock.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> @@ -51,10 +51,10 @@ #define VERSION "1.11" -static int disable_cfc = 0; +static int disable_cfc; +static int l2cap_ertm; static int channel_mtu = -1; static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU; -static int l2cap_ertm = 0; static struct task_struct *rfcomm_thread; @@ -79,7 +79,10 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr); static void rfcomm_process_connect(struct rfcomm_session *s); -static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst, int *err); +static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, + bdaddr_t *dst, + u8 sec_level, + int *err); static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst); static void rfcomm_session_del(struct rfcomm_session *s); @@ -308,6 +311,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d) d->state = BT_OPEN; d->flags = 0; d->mscex = 0; + d->sec_level = BT_SECURITY_LOW; d->mtu = RFCOMM_DEFAULT_MTU; d->v24_sig = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV; @@ -401,7 +405,7 @@ static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, s = rfcomm_session_get(src, dst); if (!s) { - s = rfcomm_session_create(src, dst, &err); + s = rfcomm_session_create(src, dst, d->sec_level, &err); if (!s) return err; } @@ -679,7 +683,10 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err) rfcomm_session_put(s); } -static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst, int *err) +static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, + bdaddr_t *dst, + u8 sec_level, + int *err) { struct rfcomm_session *s = NULL; struct sockaddr_l2 addr; @@ -704,6 +711,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst sk = sock->sk; lock_sock(sk); l2cap_pi(sk)->imtu = l2cap_mtu; + l2cap_pi(sk)->sec_level = sec_level; if (l2cap_ertm) l2cap_pi(sk)->mode = L2CAP_MODE_ERTM; release_sock(sk); @@ -1894,7 +1902,7 @@ static inline void rfcomm_check_connection(struct rfcomm_session *s) BT_DBG("%p state %ld", s, s->state); - switch(sk->sk_state) { + switch (sk->sk_state) { case BT_CONNECTED: s->state = BT_CONNECT; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index aec505f934df..66cc1f0c3df8 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -45,7 +45,7 @@ #include <net/sock.h> #include <asm/system.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -140,11 +140,13 @@ static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src) /* Find socket with channel and source bdaddr. * Returns closest match. */ -static struct sock *__rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src) +static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src) { struct sock *sk = NULL, *sk1 = NULL; struct hlist_node *node; + read_lock(&rfcomm_sk_list.lock); + sk_for_each(sk, node, &rfcomm_sk_list.head) { if (state && sk->sk_state != state) continue; @@ -159,19 +161,10 @@ static struct sock *__rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t sk1 = sk; } } - return node ? sk : sk1; -} -/* Find socket with given address (channel, src). - * Returns locked socket */ -static inline struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src) -{ - struct sock *s; - read_lock(&rfcomm_sk_list.lock); - s = __rfcomm_get_sock_by_channel(state, channel, src); - if (s) bh_lock_sock(s); read_unlock(&rfcomm_sk_list.lock); - return s; + + return node ? sk : sk1; } static void rfcomm_sock_destruct(struct sock *sk) @@ -895,7 +888,8 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how) BT_DBG("sock %p, sk %p", sock, sk); - if (!sk) return 0; + if (!sk) + return 0; lock_sock(sk); if (!sk->sk_shutdown) { @@ -945,6 +939,8 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * if (!parent) return 0; + bh_lock_sock(parent); + /* Check for backlog size */ if (sk_acceptq_is_full(parent)) { BT_DBG("backlog full %d", parent->sk_ack_backlog); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index a9b81f5dacd1..2575c2db6404 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -58,9 +58,9 @@ struct rfcomm_dev { bdaddr_t src; bdaddr_t dst; - u8 channel; + u8 channel; - uint modem_status; + uint modem_status; struct rfcomm_dlc *dlc; struct tty_struct *tty; @@ -69,7 +69,7 @@ struct rfcomm_dev { struct device *tty_dev; - atomic_t wmem_alloc; + atomic_t wmem_alloc; struct sk_buff_head pending; }; @@ -431,7 +431,8 @@ static int rfcomm_release_dev(void __user *arg) BT_DBG("dev_id %d flags 0x%x", req.dev_id, req.flags); - if (!(dev = rfcomm_dev_get(req.dev_id))) + dev = rfcomm_dev_get(req.dev_id); + if (!dev) return -ENODEV; if (dev->flags != NOCAP_FLAGS && !capable(CAP_NET_ADMIN)) { @@ -470,7 +471,8 @@ static int rfcomm_get_dev_list(void __user *arg) size = sizeof(*dl) + dev_num * sizeof(*di); - if (!(dl = kmalloc(size, GFP_KERNEL))) + dl = kmalloc(size, GFP_KERNEL); + if (!dl) return -ENOMEM; di = dl->dev_info; @@ -513,7 +515,8 @@ static int rfcomm_get_dev_info(void __user *arg) if (copy_from_user(&di, arg, sizeof(di))) return -EFAULT; - if (!(dev = rfcomm_dev_get(di.id))) + dev = rfcomm_dev_get(di.id); + if (!dev) return -ENODEV; di.flags = dev->flags; @@ -561,7 +564,8 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb) return; } - if (!(tty = dev->tty) || !skb_queue_empty(&dev->pending)) { + tty = dev->tty; + if (!tty || !skb_queue_empty(&dev->pending)) { skb_queue_tail(&dev->pending, skb); return; } @@ -796,7 +800,8 @@ static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, in memcpy(skb_put(skb, size), buf + sent, size); - if ((err = rfcomm_dlc_send(dlc, skb)) < 0) { + err = rfcomm_dlc_send(dlc, skb); + if (err < 0) { kfree_skb(skb); break; } @@ -892,7 +897,7 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old) /* Parity on/off and when on, odd/even */ if (((old->c_cflag & PARENB) != (new->c_cflag & PARENB)) || - ((old->c_cflag & PARODD) != (new->c_cflag & PARODD)) ) { + ((old->c_cflag & PARODD) != (new->c_cflag & PARODD))) { changes |= RFCOMM_RPN_PM_PARITY; BT_DBG("Parity change detected."); } @@ -937,11 +942,10 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old) /* POSIX does not support 1.5 stop bits and RFCOMM does not * support 2 stop bits. So a request for 2 stop bits gets * translated to 1.5 stop bits */ - if (new->c_cflag & CSTOPB) { + if (new->c_cflag & CSTOPB) stop_bits = RFCOMM_RPN_STOP_15; - } else { + else stop_bits = RFCOMM_RPN_STOP_1; - } /* Handle number of data bits [5-8] */ if ((old->c_cflag & CSIZE) != (new->c_cflag & CSIZE)) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index d0927d1fdada..960c6d1637da 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -44,7 +44,7 @@ #include <net/sock.h> #include <asm/system.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -52,7 +52,7 @@ #define VERSION "0.6" -static int disable_esco = 0; +static int disable_esco; static const struct proto_ops sco_sock_ops; @@ -138,16 +138,17 @@ static inline struct sock *sco_chan_get(struct sco_conn *conn) static int sco_conn_del(struct hci_conn *hcon, int err) { - struct sco_conn *conn; + struct sco_conn *conn = hcon->sco_data; struct sock *sk; - if (!(conn = hcon->sco_data)) + if (!conn) return 0; BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); /* Kill socket */ - if ((sk = sco_chan_get(conn))) { + sk = sco_chan_get(conn); + if (sk) { bh_lock_sock(sk); sco_sock_clear_timer(sk); sco_chan_del(sk, err); @@ -185,7 +186,8 @@ static int sco_connect(struct sock *sk) BT_DBG("%s -> %s", batostr(src), batostr(dst)); - if (!(hdev = hci_get_route(dst, src))) + hdev = hci_get_route(dst, src); + if (!hdev) return -EHOSTUNREACH; hci_dev_lock_bh(hdev); @@ -510,7 +512,8 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen /* Set destination address and psm */ bacpy(&bt_sk(sk)->dst, &sa->sco_bdaddr); - if ((err = sco_connect(sk))) + err = sco_connect(sk); + if (err) goto done; err = bt_sock_wait_state(sk, BT_CONNECTED, @@ -828,13 +831,14 @@ static void sco_chan_del(struct sock *sk, int err) static void sco_conn_ready(struct sco_conn *conn) { - struct sock *parent, *sk; + struct sock *parent; + struct sock *sk = conn->sk; BT_DBG("conn %p", conn); sco_conn_lock(conn); - if ((sk = conn->sk)) { + if (sk) { sco_sock_clear_timer(sk); bh_lock_sock(sk); sk->sk_state = BT_CONNECTED; @@ -882,7 +886,7 @@ static int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type) int lm = 0; if (type != SCO_LINK && type != ESCO_LINK) - return 0; + return -EINVAL; BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); @@ -908,7 +912,7 @@ static int sco_connect_cfm(struct hci_conn *hcon, __u8 status) BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) - return 0; + return -EINVAL; if (!status) { struct sco_conn *conn; @@ -927,7 +931,7 @@ static int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) BT_DBG("hcon %p reason %d", hcon, reason); if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) - return 0; + return -EINVAL; sco_conn_del(hcon, bt_err(reason)); diff --git a/net/bridge/br.c b/net/bridge/br.c index c8436fa31344..84bbb82599b2 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -22,8 +22,6 @@ #include "br_private.h" -int (*br_should_route_hook)(struct sk_buff *skb); - static const struct stp_proto br_stp_proto = { .rcv = br_stp_rcv, }; @@ -102,8 +100,6 @@ static void __exit br_deinit(void) br_fdb_fini(); } -EXPORT_SYMBOL(br_should_route_hook); - module_init(br_init) module_exit(br_deinit) MODULE_LICENSE("GPL"); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 17cb0b633576..556443566e9c 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -141,7 +141,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu) #ifdef CONFIG_BRIDGE_NETFILTER /* remember the MTU in the rtable for PMTU */ - br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu; + dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu); #endif return 0; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 90512ccfd3e9..2872393b2939 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -238,15 +238,18 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) { struct net_bridge_fdb_entry *fdb; + struct net_bridge_port *port; int ret; - if (!br_port_exists(dev)) - return 0; - rcu_read_lock(); - fdb = __br_fdb_get(br_port_get_rcu(dev)->br, addr); - ret = fdb && fdb->dst->dev != dev && - fdb->dst->state == BR_STATE_FORWARDING; + port = br_port_get_rcu(dev); + if (!port) + ret = 0; + else { + fdb = __br_fdb_get(port->br, addr); + ret = fdb && fdb->dst->dev != dev && + fdb->dst->state == BR_STATE_FORWARDING; + } rcu_read_unlock(); return ret; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index cbfe87f0f34a..ee64287f1290 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -41,17 +41,13 @@ static inline unsigned packet_length(const struct sk_buff *skb) int br_dev_queue_push_xmit(struct sk_buff *skb) { - /* drop mtu oversized packets except gso */ - if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) + /* ip_fragment doesn't copy the MAC header */ + if (nf_bridge_maybe_copy_header(skb) || + (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))) { kfree_skb(skb); - else { - /* ip_fragment doesn't copy the MAC header */ - if (nf_bridge_maybe_copy_header(skb)) - kfree_skb(skb); - else { - skb_push(skb, ETH_HLEN); - dev_queue_xmit(skb); - } + } else { + skb_push(skb, ETH_HLEN); + dev_queue_xmit(skb); } return 0; @@ -223,7 +219,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct net_bridge_port_group *p; struct hlist_node *rp; - rp = rcu_dereference(br->router_list.first); + rp = rcu_dereference(hlist_first_rcu(&br->router_list)); p = mdst ? rcu_dereference(mdst->ports) : NULL; while (p || rp) { struct net_bridge_port *port, *lport, *rport; @@ -242,7 +238,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, if ((unsigned long)lport >= (unsigned long)port) p = rcu_dereference(p->next); if ((unsigned long)rport >= (unsigned long)port) - rp = rcu_dereference(rp->next); + rp = rcu_dereference(hlist_next_rcu(rp)); } if (!prev) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 89ad25a76202..d9d1e2bac1d6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -475,11 +475,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) { struct net_bridge_port *p; - if (!br_port_exists(dev)) - return -EINVAL; - - p = br_port_get(dev); - if (p->br != br) + p = br_port_get_rtnl(dev); + if (!p || p->br != br) return -EINVAL; del_nbp(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 25207a1f182b..6f6d8e1b776f 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -21,6 +21,10 @@ /* Bridge group multicast address 802.1d (pg 51). */ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; +/* Hook for brouter */ +br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; +EXPORT_SYMBOL(br_should_route_hook); + static int br_pass_frame_up(struct sk_buff *skb) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; @@ -139,7 +143,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb) { struct net_bridge_port *p; const unsigned char *dest = eth_hdr(skb)->h_dest; - int (*rhook)(struct sk_buff *skb); + br_should_route_hook_t *rhook; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return skb; @@ -173,8 +177,8 @@ forward: switch (p->state) { case BR_STATE_FORWARDING: rhook = rcu_dereference(br_should_route_hook); - if (rhook != NULL) { - if (rhook(skb)) + if (rhook) { + if ((*rhook)(skb)) return skb; dest = eth_hdr(skb)->h_dest; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index eb5b256ffc88..f701a21acb34 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -33,6 +33,9 @@ #include "br_private.h" +#define mlock_dereference(X, br) \ + rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static inline int ipv6_is_local_multicast(const struct in6_addr *addr) { @@ -135,7 +138,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get( struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, struct sk_buff *skb) { - struct net_bridge_mdb_htable *mdb = br->mdb; + struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb); struct br_ip ip; if (br->multicast_disabled) @@ -235,7 +238,8 @@ static void br_multicast_group_expired(unsigned long data) if (mp->ports) goto out; - mdb = br->mdb; + mdb = mlock_dereference(br->mdb, br); + hlist_del_rcu(&mp->hlist[mdb->ver]); mdb->size--; @@ -249,16 +253,20 @@ out: static void br_multicast_del_pg(struct net_bridge *br, struct net_bridge_port_group *pg) { - struct net_bridge_mdb_htable *mdb = br->mdb; + struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; - struct net_bridge_port_group **pp; + struct net_bridge_port_group __rcu **pp; + + mdb = mlock_dereference(br->mdb, br); mp = br_mdb_ip_get(mdb, &pg->addr); if (WARN_ON(!mp)) return; - for (pp = &mp->ports; (p = *pp); pp = &p->next) { + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { if (p != pg) continue; @@ -294,10 +302,10 @@ out: spin_unlock(&br->multicast_lock); } -static int br_mdb_rehash(struct net_bridge_mdb_htable **mdbp, int max, +static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max, int elasticity) { - struct net_bridge_mdb_htable *old = *mdbp; + struct net_bridge_mdb_htable *old = rcu_dereference_protected(*mdbp, 1); struct net_bridge_mdb_htable *mdb; int err; @@ -437,7 +445,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h = ipv6_hdr(skb); *(__force __be32 *)ip6h = htonl(0x60000000); - ip6h->payload_len = 8 + sizeof(*mldq); + ip6h->payload_len = htons(8 + sizeof(*mldq)); ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0); @@ -569,7 +577,7 @@ static struct net_bridge_mdb_entry *br_multicast_get_group( struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group, int hash) { - struct net_bridge_mdb_htable *mdb = br->mdb; + struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; struct hlist_node *p; unsigned count = 0; @@ -577,6 +585,7 @@ static struct net_bridge_mdb_entry *br_multicast_get_group( int elasticity; int err; + mdb = rcu_dereference_protected(br->mdb, 1); hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { count++; if (unlikely(br_ip_equal(group, &mp->addr))) @@ -642,13 +651,16 @@ static struct net_bridge_mdb_entry *br_multicast_new_group( struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group) { - struct net_bridge_mdb_htable *mdb = br->mdb; + struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; int hash; + int err; + mdb = rcu_dereference_protected(br->mdb, 1); if (!mdb) { - if (br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0)) - return NULL; + err = br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0); + if (err) + return ERR_PTR(err); goto rehash; } @@ -660,7 +672,7 @@ static struct net_bridge_mdb_entry *br_multicast_new_group( case -EAGAIN: rehash: - mdb = br->mdb; + mdb = rcu_dereference_protected(br->mdb, 1); hash = br_ip_hash(mdb, group); break; @@ -670,7 +682,7 @@ rehash: mp = kzalloc(sizeof(*mp), GFP_ATOMIC); if (unlikely(!mp)) - goto out; + return ERR_PTR(-ENOMEM); mp->br = br; mp->addr = *group; @@ -692,7 +704,7 @@ static int br_multicast_add_group(struct net_bridge *br, { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; - struct net_bridge_port_group **pp; + struct net_bridge_port_group __rcu **pp; unsigned long now = jiffies; int err; @@ -703,7 +715,7 @@ static int br_multicast_add_group(struct net_bridge *br, mp = br_multicast_new_group(br, port, group); err = PTR_ERR(mp); - if (unlikely(IS_ERR(mp) || !mp)) + if (IS_ERR(mp)) goto err; if (!port) { @@ -712,7 +724,9 @@ static int br_multicast_add_group(struct net_bridge *br, goto out; } - for (pp = &mp->ports; (p = *pp); pp = &p->next) { + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { if (p->port == port) goto found; if ((unsigned long)p->port < (unsigned long)port) @@ -1106,7 +1120,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, struct net_bridge_mdb_entry *mp; struct igmpv3_query *ih3; struct net_bridge_port_group *p; - struct net_bridge_port_group **pp; + struct net_bridge_port_group __rcu **pp; unsigned long max_delay; unsigned long now = jiffies; __be32 group; @@ -1145,7 +1159,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!group) goto out; - mp = br_mdb_ip4_get(br->mdb, group); + mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group); if (!mp) goto out; @@ -1157,7 +1171,9 @@ static int br_ip4_multicast_query(struct net_bridge *br, try_to_del_timer_sync(&mp->timer) >= 0)) mod_timer(&mp->timer, now + max_delay); - for (pp = &mp->ports; (p = *pp); pp = &p->next) { + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0) @@ -1178,7 +1194,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb); struct net_bridge_mdb_entry *mp; struct mld2_query *mld2q; - struct net_bridge_port_group *p, **pp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; unsigned long max_delay; unsigned long now = jiffies; struct in6_addr *group = NULL; @@ -1214,7 +1231,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!group) goto out; - mp = br_mdb_ip6_get(br->mdb, group); + mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group); if (!mp) goto out; @@ -1225,7 +1242,9 @@ static int br_ip6_multicast_query(struct net_bridge *br, try_to_del_timer_sync(&mp->timer) >= 0)) mod_timer(&mp->timer, now + max_delay); - for (pp = &mp->ports; (p = *pp); pp = &p->next) { + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0) @@ -1254,7 +1273,7 @@ static void br_multicast_leave_group(struct net_bridge *br, timer_pending(&br->multicast_querier_timer)) goto out; - mdb = br->mdb; + mdb = mlock_dereference(br->mdb, br); mp = br_mdb_ip_get(mdb, group); if (!mp) goto out; @@ -1277,7 +1296,9 @@ static void br_multicast_leave_group(struct net_bridge *br, goto out; } - for (p = mp->ports; p; p = p->next) { + for (p = mlock_dereference(mp->ports, br); + p != NULL; + p = mlock_dereference(p->next, br)) { if (p->port != port) continue; @@ -1430,7 +1451,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) { - struct sk_buff *skb2 = skb; + struct sk_buff *skb2; struct ipv6hdr *ip6h; struct icmp6hdr *icmp6h; u8 nexthdr; @@ -1469,15 +1490,15 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, if (!skb2) return -ENOMEM; + err = -EINVAL; + if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr))) + goto out; + len -= offset - skb_network_offset(skb2); __skb_pull(skb2, offset); skb_reset_transport_header(skb2); - err = -EINVAL; - if (!pskb_may_pull(skb2, sizeof(*icmp6h))) - goto out; - icmp6h = icmp6_hdr(skb2); switch (icmp6h->icmp6_type) { @@ -1516,7 +1537,12 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, switch (icmp6h->icmp6_type) { case ICMPV6_MGM_REPORT: { - struct mld_msg *mld = (struct mld_msg *)icmp6h; + struct mld_msg *mld; + if (!pskb_may_pull(skb2, sizeof(*mld))) { + err = -EINVAL; + goto out; + } + mld = (struct mld_msg *)skb_transport_header(skb2); BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); break; @@ -1529,15 +1555,18 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, break; case ICMPV6_MGM_REDUCTION: { - struct mld_msg *mld = (struct mld_msg *)icmp6h; + struct mld_msg *mld; + if (!pskb_may_pull(skb2, sizeof(*mld))) { + err = -EINVAL; + goto out; + } + mld = (struct mld_msg *)skb_transport_header(skb2); br_ip6_multicast_leave_group(br, port, &mld->mld_mca); } } out: - __skb_push(skb2, offset); - if (skb2 != skb) - kfree_skb(skb2); + kfree_skb(skb2); return err; } #endif @@ -1625,7 +1654,7 @@ void br_multicast_stop(struct net_bridge *br) del_timer_sync(&br->multicast_query_timer); spin_lock_bh(&br->multicast_lock); - mdb = br->mdb; + mdb = mlock_dereference(br->mdb, br); if (!mdb) goto out; @@ -1729,6 +1758,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) { struct net_bridge_port *port; int err = 0; + struct net_bridge_mdb_htable *mdb; spin_lock(&br->multicast_lock); if (br->multicast_disabled == !val) @@ -1741,15 +1771,16 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) if (!netif_running(br->dev)) goto unlock; - if (br->mdb) { - if (br->mdb->old) { + mdb = mlock_dereference(br->mdb, br); + if (mdb) { + if (mdb->old) { err = -EEXIST; rollback: br->multicast_disabled = !!val; goto unlock; } - err = br_mdb_rehash(&br->mdb, br->mdb->max, + err = br_mdb_rehash(&br->mdb, mdb->max, br->hash_elasticity); if (err) goto rollback; @@ -1774,6 +1805,7 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) { int err = -ENOENT; u32 old; + struct net_bridge_mdb_htable *mdb; spin_lock(&br->multicast_lock); if (!netif_running(br->dev)) @@ -1782,7 +1814,9 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) err = -EINVAL; if (!is_power_of_2(val)) goto unlock; - if (br->mdb && val < br->mdb->size) + + mdb = mlock_dereference(br->mdb, br); + if (mdb && val < mdb->size) goto unlock; err = 0; @@ -1790,8 +1824,8 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) old = br->hash_max; br->hash_max = val; - if (br->mdb) { - if (br->mdb->old) { + if (mdb) { + if (mdb->old) { err = -EEXIST; rollback: br->hash_max = old; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 865fd7634b67..4b5b66d07bba 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -124,24 +124,25 @@ void br_netfilter_rtable_init(struct net_bridge *br) atomic_set(&rt->dst.__refcnt, 1); rt->dst.dev = br->dev; rt->dst.path = &rt->dst; - rt->dst.metrics[RTAX_MTU - 1] = 1500; + dst_metric_set(&rt->dst, RTAX_MTU, 1500); rt->dst.flags = DST_NOXFRM; rt->dst.ops = &fake_dst_ops; } static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) { - if (!br_port_exists(dev)) - return NULL; - return &br_port_get_rcu(dev)->br->fake_rtable; + struct net_bridge_port *port; + + port = br_port_get_rcu(dev); + return port ? &port->br->fake_rtable : NULL; } static inline struct net_device *bridge_parent(const struct net_device *dev) { - if (!br_port_exists(dev)) - return NULL; + struct net_bridge_port *port; - return br_port_get_rcu(dev)->br->dev; + port = br_port_get_rcu(dev); + return port ? port->br->dev : NULL; } static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) @@ -412,13 +413,8 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) if (dnat_took_place(skb)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .saddr = 0, - .tos = RT_TOS(iph->tos) }, - }, - .proto = 0, + .fl4_dst = iph->daddr, + .fl4_tos = RT_TOS(iph->tos), }; struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -566,26 +562,26 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, u32 pkt_len; if (skb->len < sizeof(struct ipv6hdr)) - goto inhdr_error; + return NF_DROP; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto inhdr_error; + return NF_DROP; hdr = ipv6_hdr(skb); if (hdr->version != 6) - goto inhdr_error; + return NF_DROP; pkt_len = ntohs(hdr->payload_len); if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { if (pkt_len + sizeof(struct ipv6hdr) > skb->len) - goto inhdr_error; + return NF_DROP; if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) - goto inhdr_error; + return NF_DROP; } if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) - goto inhdr_error; + return NF_DROP; nf_bridge_put(skb->nf_bridge); if (!nf_bridge_alloc(skb)) @@ -598,9 +594,6 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, br_nf_pre_routing_finish_ipv6); return NF_STOLEN; - -inhdr_error: - return NF_DROP; } /* Direct IPv6 traffic to br_nf_pre_routing_ipv6. @@ -619,11 +612,11 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, __u32 len = nf_bridge_encap_header_len(skb); if (unlikely(!pskb_may_pull(skb, len))) - goto out; + return NF_DROP; p = br_port_get_rcu(in); if (p == NULL) - goto out; + return NF_DROP; br = p->br; if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || @@ -645,8 +638,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, nf_bridge_pull_encap_header_rcsum(skb); if (br_parse_ip_options(skb)) - /* Drop invalid packet */ - goto out; + return NF_DROP; nf_bridge_put(skb->nf_bridge); if (!nf_bridge_alloc(skb)) @@ -660,9 +652,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, br_nf_pre_routing_finish); return NF_STOLEN; - -out: - return NF_DROP; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 4a6a378c84e3..f8bf4c7f842c 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -119,11 +119,13 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; for_each_netdev(net, dev) { + struct net_bridge_port *port = br_port_get_rtnl(dev); + /* not a bridge port */ - if (!br_port_exists(dev) || idx < cb->args[0]) + if (!port || idx < cb->args[0]) goto skip; - if (br_fill_ifinfo(skb, br_port_get(dev), + if (br_fill_ifinfo(skb, port, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) < 0) @@ -169,9 +171,9 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!dev) return -ENODEV; - if (!br_port_exists(dev)) + p = br_port_get_rtnl(dev); + if (!p) return -EINVAL; - p = br_port_get(dev); /* if kernel STP is running, don't allow changes */ if (p->br->stp_enabled == BR_KERNEL_STP) diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 404d4e14c6a7..7d337c9b6082 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -32,15 +32,15 @@ struct notifier_block br_device_notifier = { static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct net_bridge_port *p = br_port_get(dev); + struct net_bridge_port *p; struct net_bridge *br; int err; /* not a port of a bridge */ - if (!br_port_exists(dev)) + p = br_port_get_rtnl(dev); + if (!p) return NOTIFY_DONE; - p = br_port_get(dev); br = p->br; switch (event) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 75c90edaf7db..84aac7734bfc 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -72,7 +72,7 @@ struct net_bridge_fdb_entry struct net_bridge_port_group { struct net_bridge_port *port; - struct net_bridge_port_group *next; + struct net_bridge_port_group __rcu *next; struct hlist_node mglist; struct rcu_head rcu; struct timer_list timer; @@ -86,7 +86,7 @@ struct net_bridge_mdb_entry struct hlist_node hlist[2]; struct hlist_node mglist; struct net_bridge *br; - struct net_bridge_port_group *ports; + struct net_bridge_port_group __rcu *ports; struct rcu_head rcu; struct timer_list timer; struct timer_list query_timer; @@ -151,11 +151,20 @@ struct net_bridge_port #endif }; -#define br_port_get_rcu(dev) \ - ((struct net_bridge_port *) rcu_dereference(dev->rx_handler_data)) -#define br_port_get(dev) ((struct net_bridge_port *) dev->rx_handler_data) #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT) +static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) +{ + struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data); + return br_port_exists(dev) ? port : NULL; +} + +static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev) +{ + return br_port_exists(dev) ? + rtnl_dereference(dev->rx_handler_data) : NULL; +} + struct br_cpu_netstats { u64 rx_packets; u64 rx_bytes; @@ -227,7 +236,7 @@ struct net_bridge unsigned long multicast_startup_query_interval; spinlock_t multicast_lock; - struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_htable __rcu *mdb; struct hlist_head router_list; struct hlist_head mglist; diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 35cf27087b56..289646ec9b7b 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -50,6 +50,8 @@ static void br_send_bpdu(struct net_bridge_port *p, llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr); + skb_reset_mac_header(skb); + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, dev_queue_xmit); } @@ -141,10 +143,6 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, struct net_bridge *br; const unsigned char *buf; - if (!br_port_exists(dev)) - goto err; - p = br_port_get_rcu(dev); - if (!pskb_may_pull(skb, 4)) goto err; @@ -153,6 +151,10 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0) goto err; + p = br_port_get_rcu(dev); + if (!p) + goto err; + br = p->br; spin_lock(&br->lock); diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 1d8826914cbf..79372d4a4055 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -145,7 +145,7 @@ static void br_stp_stop(struct net_bridge *br) char *envp[] = { NULL }; if (br->stp_enabled == BR_USER_STP) { - r = call_usermodehelper(BR_STP_PROG, argv, envp, 1); + r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); br_info(br, "userspace STP stopped, return code %d\n", r); /* To start timers on any ports left in blocking */ diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 50a46afc2bcc..2ed0056a39a8 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -22,9 +22,15 @@ #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_ip6.h> -struct tcpudphdr { - __be16 src; - __be16 dst; +union pkthdr { + struct { + __be16 src; + __be16 dst; + } tcpudphdr; + struct { + u8 type; + u8 code; + } icmphdr; }; static bool @@ -33,8 +39,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct ebt_ip6_info *info = par->matchinfo; const struct ipv6hdr *ih6; struct ipv6hdr _ip6h; - const struct tcpudphdr *pptr; - struct tcpudphdr _ports; + const union pkthdr *pptr; + union pkthdr _pkthdr; ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); if (ih6 == NULL) @@ -56,26 +62,34 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) return false; - if (!(info->bitmask & EBT_IP6_DPORT) && - !(info->bitmask & EBT_IP6_SPORT)) + if (!(info->bitmask & ( EBT_IP6_DPORT | + EBT_IP6_SPORT | EBT_IP6_ICMP6))) return true; - pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports), - &_ports); + + /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */ + pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr), + &_pkthdr); if (pptr == NULL) return false; if (info->bitmask & EBT_IP6_DPORT) { - u32 dst = ntohs(pptr->dst); + u16 dst = ntohs(pptr->tcpudphdr.dst); if (FWINV(dst < info->dport[0] || dst > info->dport[1], EBT_IP6_DPORT)) return false; } if (info->bitmask & EBT_IP6_SPORT) { - u32 src = ntohs(pptr->src); + u16 src = ntohs(pptr->tcpudphdr.src); if (FWINV(src < info->sport[0] || src > info->sport[1], EBT_IP6_SPORT)) return false; } - return true; + if ((info->bitmask & EBT_IP6_ICMP6) && + FWINV(pptr->icmphdr.type < info->icmpv6_type[0] || + pptr->icmphdr.type > info->icmpv6_type[1] || + pptr->icmphdr.code < info->icmpv6_code[0] || + pptr->icmphdr.code > info->icmpv6_code[1], + EBT_IP6_ICMP6)) + return false; } return true; } @@ -103,6 +117,14 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par) return -EINVAL; if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) return -EINVAL; + if (info->bitmask & EBT_IP6_ICMP6) { + if ((info->invflags & EBT_IP6_PROTO) || + info->protocol != IPPROTO_ICMPV6) + return -EINVAL; + if (info->icmpv6_type[0] > info->icmpv6_type[1] || + info->icmpv6_code[0] > info->icmpv6_code[1]) + return -EINVAL; + } return 0; } diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index ae3f106c3908..1bcaf36ad612 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -87,7 +87,8 @@ static int __init ebtable_broute_init(void) if (ret < 0) return ret; /* see br_input.c */ - rcu_assign_pointer(br_should_route_hook, ebt_broute); + rcu_assign_pointer(br_should_route_hook, + (br_should_route_hook_t *)ebt_broute); return 0; } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index a1dcf83f0d58..5f1825df9dca 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -128,6 +128,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out) { const struct ethhdr *h = eth_hdr(skb); + const struct net_bridge_port *p; __be16 ethproto; int verdict, i; @@ -148,13 +149,11 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT)) return 1; /* rcu_read_lock()ed by nf_hook_slow */ - if (in && br_port_exists(in) && - FWINV2(ebt_dev_check(e->logical_in, br_port_get_rcu(in)->br->dev), - EBT_ILOGICALIN)) + if (in && (p = br_port_get_rcu(in)) != NULL && + FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN)) return 1; - if (out && br_port_exists(out) && - FWINV2(ebt_dev_check(e->logical_out, br_port_get_rcu(out)->br->dev), - EBT_ILOGICALOUT)) + if (out && (p = br_port_get_rcu(out)) != NULL && + FWINV2(ebt_dev_check(e->logical_out, p->br->dev), EBT_ILOGICALOUT)) return 1; if (e->bitmask & EBT_SOURCEMAC) { @@ -1148,7 +1147,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table) void *p; if (input_table == NULL || (repl = input_table->table) == NULL || - repl->entries == 0 || repl->entries_size == 0 || + repl->entries == NULL || repl->entries_size == 0 || repl->counters != NULL || input_table->private != NULL) { BUGPRINT("Bad table data for ebt_register_table!!!\n"); return ERR_PTR(-EINVAL); @@ -1765,6 +1764,7 @@ static int compat_table_info(const struct ebt_table_info *info, newinfo->entries_size = size; + xt_compat_init_offsets(AF_INET, info->nentries); return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, entries, newinfo); } diff --git a/net/caif/Makefile b/net/caif/Makefile index f87481fb0e65..9d38e406e4a4 100644 --- a/net/caif/Makefile +++ b/net/caif/Makefile @@ -1,8 +1,6 @@ -ifeq ($(CONFIG_CAIF_DEBUG),y) -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG -caif-objs := caif_dev.o \ +caif-y := caif_dev.o \ cfcnfg.o cfmuxl.o cfctrl.o \ cffrml.o cfveil.o cfdbgl.o\ cfserl.o cfdgml.o \ @@ -13,4 +11,4 @@ obj-$(CONFIG_CAIF) += caif.o obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o obj-$(CONFIG_CAIF) += caif_socket.o -export-objs := caif.o +export-y := caif.o diff --git a/net/can/Makefile b/net/can/Makefile index 9cd3c4b3abda..2d3894b32742 100644 --- a/net/can/Makefile +++ b/net/can/Makefile @@ -3,10 +3,10 @@ # obj-$(CONFIG_CAN) += can.o -can-objs := af_can.o proc.o +can-y := af_can.o proc.o obj-$(CONFIG_CAN_RAW) += can-raw.o -can-raw-objs := raw.o +can-raw-y := raw.o obj-$(CONFIG_CAN_BCM) += can-bcm.o -can-bcm-objs := bcm.o +can-bcm-y := bcm.o diff --git a/net/can/bcm.c b/net/can/bcm.c index 08ffe9e4be20..9d5e8accfab1 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -125,7 +125,7 @@ struct bcm_sock { struct list_head tx_ops; unsigned long dropped_usr_msgs; struct proc_dir_entry *bcm_proc_read; - char procname [9]; /* pointer printed in ASCII with \0 */ + char procname [32]; /* inode number in decimal with \0 */ }; static inline struct bcm_sock *bcm_sk(const struct sock *sk) @@ -1521,7 +1521,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, if (proc_dir) { /* unique socket address as filename */ - sprintf(bo->procname, "%p", sock); + sprintf(bo->procname, "%lu", sock_i_ino(sk)); bo->bcm_proc_read = proc_create_data(bo->procname, 0644, proc_dir, &bcm_proc_fops, sk); diff --git a/net/ceph/Makefile b/net/ceph/Makefile index aab1cabb8035..e87ef435e11b 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -1,12 +1,9 @@ # # Makefile for CEPH filesystem. # - -ifneq ($(KERNELRELEASE),) - obj-$(CONFIG_CEPH_LIB) += libceph.o -libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ +libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ mon_client.o \ osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ debugfs.o \ @@ -16,22 +13,3 @@ libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ ceph_fs.o ceph_strings.o ceph_hash.o \ pagevec.o -else -#Otherwise we were called directly from the command -# line; invoke the kernel build system. - -KERNELDIR ?= /lib/modules/$(shell uname -r)/build -PWD := $(shell pwd) - -default: all - -all: - $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules - -modules_install: - $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install - -clean: - $(MAKE) -C $(KERNELDIR) M=$(PWD) clean - -endif diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c index 53d8abfa25d5..bf3e6a13c215 100644 --- a/net/ceph/buffer.c +++ b/net/ceph/buffer.c @@ -19,7 +19,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) if (b->vec.iov_base) { b->is_vmalloc = false; } else { - b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL); + b->vec.iov_base = __vmalloc(len, gfp | __GFP_HIGHMEM, PAGE_KERNEL); if (!b->vec.iov_base) { kfree(b); return NULL; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 0e8157ee5d43..b6ff4a1519ab 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -97,11 +97,9 @@ struct workqueue_struct *ceph_msgr_wq; int ceph_msgr_init(void) { ceph_msgr_wq = create_workqueue("ceph-msgr"); - if (IS_ERR(ceph_msgr_wq)) { - int ret = PTR_ERR(ceph_msgr_wq); - pr_err("msgr_init failed to create workqueue: %d\n", ret); - ceph_msgr_wq = NULL; - return ret; + if (!ceph_msgr_wq) { + pr_err("msgr_init failed to create workqueue\n"); + return -ENOMEM; } return 0; } @@ -540,8 +538,7 @@ static void prepare_write_message(struct ceph_connection *con) /* initialize page iterator */ con->out_msg_pos.page = 0; if (m->pages) - con->out_msg_pos.page_pos = - le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; + con->out_msg_pos.page_pos = m->page_alignment; else con->out_msg_pos.page_pos = 0; con->out_msg_pos.data_pos = 0; @@ -1491,7 +1488,7 @@ static int read_partial_message(struct ceph_connection *con) struct ceph_msg *m = con->in_msg; int ret; int to, left; - unsigned front_len, middle_len, data_len, data_off; + unsigned front_len, middle_len, data_len; int datacrc = con->msgr->nocrc; int skip; u64 seq; @@ -1527,19 +1524,17 @@ static int read_partial_message(struct ceph_connection *con) data_len = le32_to_cpu(con->in_hdr.data_len); if (data_len > CEPH_MSG_MAX_DATA_LEN) return -EIO; - data_off = le16_to_cpu(con->in_hdr.data_off); /* verify seq# */ seq = le64_to_cpu(con->in_hdr.seq); if ((s64)seq - (s64)con->in_seq < 1) { - pr_info("skipping %s%lld %s seq %lld, expected %lld\n", + pr_info("skipping %s%lld %s seq %lld expected %lld\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr), seq, con->in_seq + 1); con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; - con->in_seq++; return 0; } else if ((s64)seq - (s64)con->in_seq > 1) { pr_err("read_partial_message bad seq %lld expected %lld\n", @@ -1576,7 +1571,7 @@ static int read_partial_message(struct ceph_connection *con) con->in_msg_pos.page = 0; if (m->pages) - con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; + con->in_msg_pos.page_pos = m->page_alignment; else con->in_msg_pos.page_pos = 0; con->in_msg_pos.data_pos = 0; @@ -2301,6 +2296,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) /* data */ m->nr_pages = 0; + m->page_alignment = 0; m->pages = NULL; m->pagelist = NULL; m->bio = NULL; @@ -2370,6 +2366,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, type, front_len); return NULL; } + msg->page_alignment = le16_to_cpu(hdr->data_off); } memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 79391994b3ed..3e20a122ffa2 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -71,6 +71,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc, op->extent.length = objlen; } req->r_num_pages = calc_pages_for(off, *plen); + req->r_page_alignment = off & ~PAGE_MASK; if (op->op == CEPH_OSD_OP_WRITE) op->payload_len = *plen; @@ -390,6 +391,8 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, req->r_request->hdr.data_len = cpu_to_le32(data_len); } + req->r_request->page_alignment = req->r_page_alignment; + BUG_ON(p > msg->front.iov_base + msg->front.iov_len); msg_size = p - msg->front.iov_base; msg->front.iov_len = msg_size; @@ -419,7 +422,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply) + bool use_mempool, int num_reply, + int page_align) { struct ceph_osd_req_op ops[3]; struct ceph_osd_request *req; @@ -447,6 +451,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, calc_layout(osdc, vino, layout, off, plen, req, ops); req->r_file_layout = *layout; /* keep a copy */ + /* in case it differs from natural alignment that calc_layout + filled in for us */ + req->r_page_alignment = page_align; + ceph_osdc_build_request(req, off, plen, ops, snapc, mtime, @@ -1489,7 +1497,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, struct ceph_vino vino, struct ceph_file_layout *layout, u64 off, u64 *plen, u32 truncate_seq, u64 truncate_size, - struct page **pages, int num_pages) + struct page **pages, int num_pages, int page_align) { struct ceph_osd_request *req; int rc = 0; @@ -1499,15 +1507,15 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, req = ceph_osdc_new_request(osdc, layout, vino, off, plen, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, truncate_seq, truncate_size, NULL, - false, 1); + false, 1, page_align); if (!req) return -ENOMEM; /* it may be a short read due to an object boundary */ req->r_pages = pages; - dout("readpages final extent is %llu~%llu (%d pages)\n", - off, *plen, req->r_num_pages); + dout("readpages final extent is %llu~%llu (%d pages align %d)\n", + off, *plen, req->r_num_pages, page_align); rc = ceph_osdc_start_request(osdc, req, false); if (!rc) @@ -1533,6 +1541,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, { struct ceph_osd_request *req; int rc = 0; + int page_align = off & ~PAGE_MASK; BUG_ON(vino.snap != CEPH_NOSNAP); req = ceph_osdc_new_request(osdc, layout, vino, off, &len, @@ -1541,7 +1550,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, CEPH_OSD_FLAG_WRITE, snapc, do_sync, truncate_seq, truncate_size, mtime, - nofail, 1); + nofail, 1, page_align); if (!req) return -ENOMEM; @@ -1638,8 +1647,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, m = ceph_msg_get(req->r_reply); if (data_len > 0) { - unsigned data_off = le16_to_cpu(hdr->data_off); - int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); + int want = calc_pages_for(req->r_page_alignment, data_len); if (unlikely(req->r_num_pages < want)) { pr_warning("tid %lld reply %d > expected %d pages\n", @@ -1651,6 +1659,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, } m->pages = req->r_pages; m->nr_pages = req->r_num_pages; + m->page_alignment = req->r_page_alignment; #ifdef CONFIG_BLOCK m->bio = req->r_bio; #endif diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 54caf0687155..1a040e64c69f 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -13,8 +13,7 @@ * build a vector of user pages */ struct page **ceph_get_direct_page_vector(const char __user *data, - int num_pages, - loff_t off, size_t len) + int num_pages, bool write_page) { struct page **pages; int rc; @@ -25,24 +24,27 @@ struct page **ceph_get_direct_page_vector(const char __user *data, down_read(¤t->mm->mmap_sem); rc = get_user_pages(current, current->mm, (unsigned long)data, - num_pages, 0, 0, pages, NULL); + num_pages, write_page, 0, pages, NULL); up_read(¤t->mm->mmap_sem); - if (rc < 0) + if (rc < num_pages) goto fail; return pages; fail: - kfree(pages); + ceph_put_page_vector(pages, rc > 0 ? rc : 0, false); return ERR_PTR(rc); } EXPORT_SYMBOL(ceph_get_direct_page_vector); -void ceph_put_page_vector(struct page **pages, int num_pages) +void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty) { int i; - for (i = 0; i < num_pages; i++) + for (i = 0; i < num_pages; i++) { + if (dirty) + set_page_dirty_lock(pages[i]); put_page(pages[i]); + } kfree(pages); } EXPORT_SYMBOL(ceph_put_page_vector); diff --git a/net/core/datagram.c b/net/core/datagram.c index cd1e039c8755..18ac112ea7ae 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -177,7 +177,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, * interrupt level will suddenly eat the receive_queue. * * Look at current nfs client by the way... - * However, this function was corrent in any case. 8) + * However, this function was correct in any case. 8) */ unsigned long cpu_flags; diff --git a/net/core/dev.c b/net/core/dev.c index 5968c822c999..a215269d2e35 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -743,34 +743,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) EXPORT_SYMBOL(dev_get_by_index); /** - * dev_getbyhwaddr - find a device by its hardware address + * dev_getbyhwaddr_rcu - find a device by its hardware address * @net: the applicable net namespace * @type: media type of device * @ha: hardware address * * Search for an interface by MAC address. Returns NULL if the device - * is not found or a pointer to the device. The caller must hold the - * rtnl semaphore. The returned device has not had its ref count increased + * is not found or a pointer to the device. The caller must hold RCU + * The returned device has not had its ref count increased * and the caller must therefore be careful about locking * - * BUGS: - * If the API was consistent this would be __dev_get_by_hwaddr */ -struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) +struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, + const char *ha) { struct net_device *dev; - ASSERT_RTNL(); - - for_each_netdev(net, dev) + for_each_netdev_rcu(net, dev) if (dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len)) return dev; return NULL; } -EXPORT_SYMBOL(dev_getbyhwaddr); +EXPORT_SYMBOL(dev_getbyhwaddr_rcu); struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) { @@ -1225,52 +1222,90 @@ int dev_open(struct net_device *dev) } EXPORT_SYMBOL(dev_open); -static int __dev_close(struct net_device *dev) +static int __dev_close_many(struct list_head *head) { - const struct net_device_ops *ops = dev->netdev_ops; + struct net_device *dev; ASSERT_RTNL(); might_sleep(); - /* - * Tell people we are going down, so that they can - * prepare to death, when device is still operating. - */ - call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); + list_for_each_entry(dev, head, unreg_list) { + /* + * Tell people we are going down, so that they can + * prepare to death, when device is still operating. + */ + call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); - clear_bit(__LINK_STATE_START, &dev->state); + clear_bit(__LINK_STATE_START, &dev->state); - /* Synchronize to scheduled poll. We cannot touch poll list, - * it can be even on different cpu. So just clear netif_running(). - * - * dev->stop() will invoke napi_disable() on all of it's - * napi_struct instances on this device. - */ - smp_mb__after_clear_bit(); /* Commit netif_running(). */ + /* Synchronize to scheduled poll. We cannot touch poll list, it + * can be even on different cpu. So just clear netif_running(). + * + * dev->stop() will invoke napi_disable() on all of it's + * napi_struct instances on this device. + */ + smp_mb__after_clear_bit(); /* Commit netif_running(). */ + } - dev_deactivate(dev); + dev_deactivate_many(head); - /* - * Call the device specific close. This cannot fail. - * Only if device is UP - * - * We allow it to be called even after a DETACH hot-plug - * event. - */ - if (ops->ndo_stop) - ops->ndo_stop(dev); + list_for_each_entry(dev, head, unreg_list) { + const struct net_device_ops *ops = dev->netdev_ops; - /* - * Device is now down. - */ + /* + * Call the device specific close. This cannot fail. + * Only if device is UP + * + * We allow it to be called even after a DETACH hot-plug + * event. + */ + if (ops->ndo_stop) + ops->ndo_stop(dev); + + /* + * Device is now down. + */ + + dev->flags &= ~IFF_UP; + + /* + * Shutdown NET_DMA + */ + net_dmaengine_put(); + } + + return 0; +} + +static int __dev_close(struct net_device *dev) +{ + LIST_HEAD(single); + + list_add(&dev->unreg_list, &single); + return __dev_close_many(&single); +} + +int dev_close_many(struct list_head *head) +{ + struct net_device *dev, *tmp; + LIST_HEAD(tmp_list); + + list_for_each_entry_safe(dev, tmp, head, unreg_list) + if (!(dev->flags & IFF_UP)) + list_move(&dev->unreg_list, &tmp_list); - dev->flags &= ~IFF_UP; + __dev_close_many(head); /* - * Shutdown NET_DMA + * Tell people we are down */ - net_dmaengine_put(); + list_for_each_entry(dev, head, unreg_list) { + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); + call_netdevice_notifiers(NETDEV_DOWN, dev); + } + /* rollback_registered_many needs the complete original list */ + list_splice(&tmp_list, head); return 0; } @@ -1285,16 +1320,10 @@ static int __dev_close(struct net_device *dev) */ int dev_close(struct net_device *dev) { - if (!(dev->flags & IFF_UP)) - return 0; - - __dev_close(dev); + LIST_HEAD(single); - /* - * Tell people we are down - */ - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); - call_netdevice_notifiers(NETDEV_DOWN, dev); + list_add(&dev->unreg_list, &single); + dev_close_many(&single); return 0; } @@ -1499,6 +1528,14 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(dev_forward_skb); +static inline int deliver_skb(struct sk_buff *skb, + struct packet_type *pt_prev, + struct net_device *orig_dev) +{ + atomic_inc(&skb->users); + return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); +} + /* * Support routine. Sends outgoing frames to any network * taps currently in use. @@ -1507,13 +1544,8 @@ EXPORT_SYMBOL_GPL(dev_forward_skb); static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; - -#ifdef CONFIG_NET_CLS_ACT - if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) - net_timestamp_set(skb); -#else - net_timestamp_set(skb); -#endif + struct sk_buff *skb2 = NULL; + struct packet_type *pt_prev = NULL; rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_all, list) { @@ -1523,10 +1555,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) if ((ptype->dev == dev || !ptype->dev) && (ptype->af_packet_priv == NULL || (struct sock *)ptype->af_packet_priv != skb->sk)) { - struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + if (pt_prev) { + deliver_skb(skb2, pt_prev, skb->dev); + pt_prev = ptype; + continue; + } + + skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) break; + net_timestamp_set(skb2); + /* skb->nh should be correctly set by sender, so that the second statement is just protection against buggy protocols. @@ -1545,9 +1585,11 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb2->transport_header = skb2->network_header; skb2->pkt_type = PACKET_OUTGOING; - ptype->func(skb2, skb->dev, ptype, skb->dev); + pt_prev = ptype; } } + if (pt_prev) + pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); rcu_read_unlock(); } @@ -1557,12 +1599,19 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) */ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) { + int rc; + if (txq < 1 || txq > dev->num_tx_queues) return -EINVAL; if (dev->reg_state == NETREG_REGISTERED) { ASSERT_RTNL(); + rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, + txq); + if (rc) + return rc; + if (txq < dev->real_num_tx_queues) qdisc_reset_all_tx_gt(dev, txq); } @@ -1757,7 +1806,7 @@ int skb_checksum_help(struct sk_buff *skb) goto out_set_summed; } - offset = skb->csum_start - skb_headroom(skb); + offset = skb_checksum_start_offset(skb); BUG_ON(offset >= skb_headlen(skb)); csum = skb_checksum(skb, offset, skb->len - offset, 0); @@ -1794,16 +1843,18 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; __be16 type = skb->protocol; + int vlan_depth = ETH_HLEN; int err; - if (type == htons(ETH_P_8021Q)) { - struct vlan_ethhdr *veh; + while (type == htons(ETH_P_8021Q)) { + struct vlan_hdr *vh; - if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) + if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) return ERR_PTR(-EINVAL); - veh = (struct vlan_ethhdr *)skb->data; - type = veh->h_vlan_encapsulated_proto; + vh = (struct vlan_hdr *)(skb->data + vlan_depth); + type = vh->h_vlan_encapsulated_proto; + vlan_depth += VLAN_HLEN; } skb_reset_mac_header(skb); @@ -1966,6 +2017,23 @@ static inline void skb_orphan_try(struct sk_buff *skb) } } +int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev) +{ + __be16 protocol = skb->protocol; + + if (protocol == htons(ETH_P_8021Q)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } else if (!skb->vlan_tci) + return dev->features; + + if (protocol != htons(ETH_P_8021Q)) + return dev->features & dev->vlan_features; + else + return 0; +} +EXPORT_SYMBOL(netif_get_vlan_features); + /* * Returns true if either: * 1. skb has frag_list and the device doesn't support FRAGLIST, or @@ -1976,15 +2044,20 @@ static inline void skb_orphan_try(struct sk_buff *skb) static inline int skb_needs_linearize(struct sk_buff *skb, struct net_device *dev) { - int features = dev->features; + if (skb_is_nonlinear(skb)) { + int features = dev->features; - if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb)) - features &= dev->vlan_features; + if (vlan_tx_tag_present(skb)) + features &= dev->vlan_features; - return skb_is_nonlinear(skb) && - ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) || - (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) || - illegal_highdma(dev, skb)))); + return (skb_has_frag_list(skb) && + !(features & NETIF_F_FRAGLIST)) || + (skb_shinfo(skb)->nr_frags && + (!(features & NETIF_F_SG) || + illegal_highdma(dev, skb))); + } + + return 0; } int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, @@ -1994,9 +2067,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { - if (!list_empty(&ptype_all)) - dev_queue_xmit_nit(skb, dev); - /* * If device doesnt need skb->dst, release it right now while * its hot in this cpu cache @@ -2004,6 +2074,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(skb); + if (!list_empty(&ptype_all)) + dev_queue_xmit_nit(skb, dev); + skb_orphan_try(skb); if (vlan_tx_tag_present(skb) && @@ -2030,8 +2103,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, * checksumming here. */ if (skb->ip_summed == CHECKSUM_PARTIAL) { - skb_set_transport_header(skb, skb->csum_start - - skb_headroom(skb)); + skb_set_transport_header(skb, + skb_checksum_start_offset(skb)); if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) goto out_kfree_skb; @@ -2084,14 +2157,19 @@ out: static u32 hashrnd __read_mostly; -u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) +/* + * Returns a Tx hash based on the given packet descriptor a Tx queues' number + * to be used as a distribution range. + */ +u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, + unsigned int num_tx_queues) { u32 hash; if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); - while (unlikely(hash >= dev->real_num_tx_queues)) - hash -= dev->real_num_tx_queues; + while (unlikely(hash >= num_tx_queues)) + hash -= num_tx_queues; return hash; } @@ -2101,9 +2179,9 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) hash = (__force u16) skb->protocol ^ skb->rxhash; hash = jhash_1word(hash, hashrnd); - return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); + return (u16) (((u64) hash * num_tx_queues) >> 32); } -EXPORT_SYMBOL(skb_tx_hash); +EXPORT_SYMBOL(__skb_tx_hash); static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) { @@ -2118,26 +2196,70 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) return queue_index; } +static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) +{ +#ifdef CONFIG_XPS + struct xps_dev_maps *dev_maps; + struct xps_map *map; + int queue_index = -1; + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + map = rcu_dereference( + dev_maps->cpu_map[raw_smp_processor_id()]); + if (map) { + if (map->len == 1) + queue_index = map->queues[0]; + else { + u32 hash; + if (skb->sk && skb->sk->sk_hash) + hash = skb->sk->sk_hash; + else + hash = (__force u16) skb->protocol ^ + skb->rxhash; + hash = jhash_1word(hash, hashrnd); + queue_index = map->queues[ + ((u64)hash * map->len) >> 32]; + } + if (unlikely(queue_index >= dev->real_num_tx_queues)) + queue_index = -1; + } + } + rcu_read_unlock(); + + return queue_index; +#else + return -1; +#endif +} + static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { int queue_index; const struct net_device_ops *ops = dev->netdev_ops; - if (ops->ndo_select_queue) { + if (dev->real_num_tx_queues == 1) + queue_index = 0; + else if (ops->ndo_select_queue) { queue_index = ops->ndo_select_queue(dev, skb); queue_index = dev_cap_txqueue(dev, queue_index); } else { struct sock *sk = skb->sk; queue_index = sk_tx_queue_get(sk); - if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) { - queue_index = 0; - if (dev->real_num_tx_queues > 1) + if (queue_index < 0 || skb->ooo_okay || + queue_index >= dev->real_num_tx_queues) { + int old_index = queue_index; + + queue_index = get_xps_queue(dev, skb); + if (queue_index < 0) queue_index = skb_tx_hash(dev, skb); - if (sk) { - struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); + if (queue_index != old_index && sk) { + struct dst_entry *dst = + rcu_dereference_check(sk->sk_dst_cache, 1); if (dst && skb_dst(skb) == dst) sk_tx_queue_set(sk, queue_index); @@ -2711,14 +2833,6 @@ static void net_tx_action(struct softirq_action *h) } } -static inline int deliver_skb(struct sk_buff *skb, - struct packet_type *pt_prev, - struct net_device *orig_dev) -{ - atomic_inc(&skb->users); - return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); -} - #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) /* This hook is defined here for ATM LANE */ @@ -4886,10 +5000,12 @@ static void rollback_registered_many(struct list_head *head) } BUG_ON(dev->reg_state != NETREG_REGISTERED); + } - /* If device is running, close it first. */ - dev_close(dev); + /* If device is running, close it first. */ + dev_close_many(head); + list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ unlist_netdevice(dev); @@ -4966,10 +5082,13 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } if (features & NETIF_F_UFO) { - if (!(features & NETIF_F_GEN_CSUM)) { + /* maybe split UFO into V4 and V6? */ + if (!((features & NETIF_F_GEN_CSUM) || + (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) + == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { if (name) printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no NETIF_F_HW_CSUM feature.\n", + "since no checksum offload features.\n", name); features &= ~NETIF_F_UFO; } @@ -5013,9 +5132,9 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, } EXPORT_SYMBOL(netif_stacked_transfer_operstate); +#ifdef CONFIG_RPS static int netif_alloc_rx_queues(struct net_device *dev) { -#ifdef CONFIG_RPS unsigned int i, count = dev->num_rx_queues; struct netdev_rx_queue *rx; @@ -5028,15 +5147,22 @@ static int netif_alloc_rx_queues(struct net_device *dev) } dev->_rx = rx; - /* - * Set a pointer to first element in the array which holds the - * reference count. - */ for (i = 0; i < count; i++) - rx[i].first = rx; -#endif + rx[i].dev = dev; return 0; } +#endif + +static void netdev_init_one_queue(struct net_device *dev, + struct netdev_queue *queue, void *_unused) +{ + /* Initialize queue lock */ + spin_lock_init(&queue->_xmit_lock); + netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); + queue->xmit_lock_owner = -1; + netdev_queue_numa_node_write(queue, NUMA_NO_NODE); + queue->dev = dev; +} static int netif_alloc_netdev_queues(struct net_device *dev) { @@ -5052,25 +5178,11 @@ static int netif_alloc_netdev_queues(struct net_device *dev) return -ENOMEM; } dev->_tx = tx; - return 0; -} -static void netdev_init_one_queue(struct net_device *dev, - struct netdev_queue *queue, - void *_unused) -{ - queue->dev = dev; - - /* Initialize queue lock */ - spin_lock_init(&queue->_xmit_lock); - netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); - queue->xmit_lock_owner = -1; -} - -static void netdev_init_queues(struct net_device *dev) -{ netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); spin_lock_init(&dev->tx_global_lock); + + return 0; } /** @@ -5109,16 +5221,6 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; - ret = netif_alloc_rx_queues(dev); - if (ret) - goto out; - - ret = netif_alloc_netdev_queues(dev); - if (ret) - goto out; - - netdev_init_queues(dev); - /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); @@ -5576,10 +5678,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->num_tx_queues = queue_count; dev->real_num_tx_queues = queue_count; + if (netif_alloc_netdev_queues(dev)) + goto free_pcpu; #ifdef CONFIG_RPS dev->num_rx_queues = queue_count; dev->real_num_rx_queues = queue_count; + if (netif_alloc_rx_queues(dev)) + goto free_pcpu; #endif dev->gso_max_size = GSO_MAX_SIZE; @@ -5596,6 +5702,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, free_pcpu: free_percpu(dev->pcpu_refcnt); + kfree(dev->_tx); +#ifdef CONFIG_RPS + kfree(dev->_rx); +#endif + free_p: kfree(p); return NULL; @@ -5617,6 +5728,9 @@ void free_netdev(struct net_device *dev) release_net(dev_net(dev)); kfree(dev->_tx); +#ifdef CONFIG_RPS + kfree(dev->_rx); +#endif kfree(rcu_dereference_raw(dev->ingress_queue)); diff --git a/net/core/dst.c b/net/core/dst.c index 8abe628b79f1..b99c7c7ffce2 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -370,6 +370,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, static struct notifier_block dst_dev_notifier = { .notifier_call = dst_dev_event, + .priority = -10, /* must be called after other network notifiers */ }; void __init dst_init(void) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 956a9f4971cb..17741782a345 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -891,6 +891,20 @@ static int ethtool_nway_reset(struct net_device *dev) return dev->ethtool_ops->nway_reset(dev); } +static int ethtool_get_link(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata = { .cmd = ETHTOOL_GLINK }; + + if (!dev->ethtool_ops->get_link) + return -EOPNOTSUPP; + + edata.data = netif_running(dev) && dev->ethtool_ops->get_link(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; @@ -1171,7 +1185,9 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) return -EFAULT; if (edata.data && !(dev->features & NETIF_F_SG)) return -EINVAL; - if (edata.data && !(dev->features & NETIF_F_HW_CSUM)) + if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) || + (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) + == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) return -EINVAL; return dev->ethtool_ops->set_ufo(dev, edata.data); } @@ -1528,8 +1544,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_nway_reset(dev); break; case ETHTOOL_GLINK: - rc = ethtool_get_value(dev, useraddr, ethcmd, - dev->ethtool_ops->get_link); + rc = ethtool_get_link(dev, useraddr); break; case ETHTOOL_GEEPROM: rc = ethtool_get_eeprom(dev, useraddr); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 82a4369ae150..a20e5d3bbfa0 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -181,8 +181,7 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, { int ret = 0; - if (rule->iifindex && (rule->iifindex != fl->iif) && - !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF)) + if (rule->iifindex && (rule->iifindex != fl->iif)) goto out; if (rule->oifindex && (rule->oifindex != fl->oif)) diff --git a/net/core/filter.c b/net/core/filter.c index 7beaec36b541..2b27d4efdd48 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -37,9 +37,69 @@ #include <asm/uaccess.h> #include <asm/unaligned.h> #include <linux/filter.h> +#include <linux/reciprocal_div.h> + +enum { + BPF_S_RET_K = 1, + BPF_S_RET_A, + BPF_S_ALU_ADD_K, + BPF_S_ALU_ADD_X, + BPF_S_ALU_SUB_K, + BPF_S_ALU_SUB_X, + BPF_S_ALU_MUL_K, + BPF_S_ALU_MUL_X, + BPF_S_ALU_DIV_X, + BPF_S_ALU_AND_K, + BPF_S_ALU_AND_X, + BPF_S_ALU_OR_K, + BPF_S_ALU_OR_X, + BPF_S_ALU_LSH_K, + BPF_S_ALU_LSH_X, + BPF_S_ALU_RSH_K, + BPF_S_ALU_RSH_X, + BPF_S_ALU_NEG, + BPF_S_LD_W_ABS, + BPF_S_LD_H_ABS, + BPF_S_LD_B_ABS, + BPF_S_LD_W_LEN, + BPF_S_LD_W_IND, + BPF_S_LD_H_IND, + BPF_S_LD_B_IND, + BPF_S_LD_IMM, + BPF_S_LDX_W_LEN, + BPF_S_LDX_B_MSH, + BPF_S_LDX_IMM, + BPF_S_MISC_TAX, + BPF_S_MISC_TXA, + BPF_S_ALU_DIV_K, + BPF_S_LD_MEM, + BPF_S_LDX_MEM, + BPF_S_ST, + BPF_S_STX, + BPF_S_JMP_JA, + BPF_S_JMP_JEQ_K, + BPF_S_JMP_JEQ_X, + BPF_S_JMP_JGE_K, + BPF_S_JMP_JGE_X, + BPF_S_JMP_JGT_K, + BPF_S_JMP_JGT_X, + BPF_S_JMP_JSET_K, + BPF_S_JMP_JSET_X, + /* Ancillary data */ + BPF_S_ANC_PROTOCOL, + BPF_S_ANC_PKTTYPE, + BPF_S_ANC_IFINDEX, + BPF_S_ANC_NLATTR, + BPF_S_ANC_NLATTR_NEST, + BPF_S_ANC_MARK, + BPF_S_ANC_QUEUE, + BPF_S_ANC_HATYPE, + BPF_S_ANC_RXHASH, + BPF_S_ANC_CPU, +}; /* No hurry in this branch */ -static void *__load_pointer(struct sk_buff *skb, int k) +static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size) { u8 *ptr = NULL; @@ -48,21 +108,17 @@ static void *__load_pointer(struct sk_buff *skb, int k) else if (k >= SKF_LL_OFF) ptr = skb_mac_header(skb) + k - SKF_LL_OFF; - if (ptr >= skb->head && ptr < skb_tail_pointer(skb)) + if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) return ptr; return NULL; } -static inline void *load_pointer(struct sk_buff *skb, int k, +static inline void *load_pointer(const struct sk_buff *skb, int k, unsigned int size, void *buffer) { if (k >= 0) return skb_header_pointer(skb, k, size, buffer); - else { - if (k >= SKF_AD_OFF) - return NULL; - return __load_pointer(skb, k); - } + return __load_pointer(skb, k, size); } /** @@ -89,7 +145,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) rcu_read_lock_bh(); filter = rcu_dereference_bh(sk->sk_filter); if (filter) { - unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len); + unsigned int pkt_len = sk_run_filter(skb, filter->insns); err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } @@ -103,48 +159,52 @@ EXPORT_SYMBOL(sk_filter); * sk_run_filter - run a filter on a socket * @skb: buffer to run the filter on * @filter: filter to apply - * @flen: length of filter * * Decode and apply filter instructions to the skb->data. - * Return length to keep, 0 for none. skb is the data we are - * filtering, filter is the array of filter instructions, and - * len is the number of filter blocks in the array. + * Return length to keep, 0 for none. @skb is the data we are + * filtering, @filter is the array of filter instructions. + * Because all jumps are guaranteed to be before last instruction, + * and last instruction guaranteed to be a RET, we dont need to check + * flen. (We used to pass to this function the length of filter) */ -unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) +unsigned int sk_run_filter(const struct sk_buff *skb, + const struct sock_filter *fentry) { - struct sock_filter *fentry; /* We walk down these */ void *ptr; u32 A = 0; /* Accumulator */ u32 X = 0; /* Index Register */ u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ u32 tmp; int k; - int pc; /* * Process array of filter instructions. */ - for (pc = 0; pc < flen; pc++) { - fentry = &filter[pc]; + for (;; fentry++) { +#if defined(CONFIG_X86_32) +#define K (fentry->k) +#else + const u32 K = fentry->k; +#endif switch (fentry->code) { case BPF_S_ALU_ADD_X: A += X; continue; case BPF_S_ALU_ADD_K: - A += fentry->k; + A += K; continue; case BPF_S_ALU_SUB_X: A -= X; continue; case BPF_S_ALU_SUB_K: - A -= fentry->k; + A -= K; continue; case BPF_S_ALU_MUL_X: A *= X; continue; case BPF_S_ALU_MUL_K: - A *= fentry->k; + A *= K; continue; case BPF_S_ALU_DIV_X: if (X == 0) @@ -152,89 +212,89 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int A /= X; continue; case BPF_S_ALU_DIV_K: - A /= fentry->k; + A = reciprocal_divide(A, K); continue; case BPF_S_ALU_AND_X: A &= X; continue; case BPF_S_ALU_AND_K: - A &= fentry->k; + A &= K; continue; case BPF_S_ALU_OR_X: A |= X; continue; case BPF_S_ALU_OR_K: - A |= fentry->k; + A |= K; continue; case BPF_S_ALU_LSH_X: A <<= X; continue; case BPF_S_ALU_LSH_K: - A <<= fentry->k; + A <<= K; continue; case BPF_S_ALU_RSH_X: A >>= X; continue; case BPF_S_ALU_RSH_K: - A >>= fentry->k; + A >>= K; continue; case BPF_S_ALU_NEG: A = -A; continue; case BPF_S_JMP_JA: - pc += fentry->k; + fentry += K; continue; case BPF_S_JMP_JGT_K: - pc += (A > fentry->k) ? fentry->jt : fentry->jf; + fentry += (A > K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JGE_K: - pc += (A >= fentry->k) ? fentry->jt : fentry->jf; + fentry += (A >= K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JEQ_K: - pc += (A == fentry->k) ? fentry->jt : fentry->jf; + fentry += (A == K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JSET_K: - pc += (A & fentry->k) ? fentry->jt : fentry->jf; + fentry += (A & K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JGT_X: - pc += (A > X) ? fentry->jt : fentry->jf; + fentry += (A > X) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JGE_X: - pc += (A >= X) ? fentry->jt : fentry->jf; + fentry += (A >= X) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JEQ_X: - pc += (A == X) ? fentry->jt : fentry->jf; + fentry += (A == X) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JSET_X: - pc += (A & X) ? fentry->jt : fentry->jf; + fentry += (A & X) ? fentry->jt : fentry->jf; continue; case BPF_S_LD_W_ABS: - k = fentry->k; + k = K; load_w: ptr = load_pointer(skb, k, 4, &tmp); if (ptr != NULL) { A = get_unaligned_be32(ptr); continue; } - break; + return 0; case BPF_S_LD_H_ABS: - k = fentry->k; + k = K; load_h: ptr = load_pointer(skb, k, 2, &tmp); if (ptr != NULL) { A = get_unaligned_be16(ptr); continue; } - break; + return 0; case BPF_S_LD_B_ABS: - k = fentry->k; + k = K; load_b: ptr = load_pointer(skb, k, 1, &tmp); if (ptr != NULL) { A = *(u8 *)ptr; continue; } - break; + return 0; case BPF_S_LD_W_LEN: A = skb->len; continue; @@ -242,32 +302,32 @@ load_b: X = skb->len; continue; case BPF_S_LD_W_IND: - k = X + fentry->k; + k = X + K; goto load_w; case BPF_S_LD_H_IND: - k = X + fentry->k; + k = X + K; goto load_h; case BPF_S_LD_B_IND: - k = X + fentry->k; + k = X + K; goto load_b; case BPF_S_LDX_B_MSH: - ptr = load_pointer(skb, fentry->k, 1, &tmp); + ptr = load_pointer(skb, K, 1, &tmp); if (ptr != NULL) { X = (*(u8 *)ptr & 0xf) << 2; continue; } return 0; case BPF_S_LD_IMM: - A = fentry->k; + A = K; continue; case BPF_S_LDX_IMM: - X = fentry->k; + X = K; continue; case BPF_S_LD_MEM: - A = mem[fentry->k]; + A = mem[K]; continue; case BPF_S_LDX_MEM: - X = mem[fentry->k]; + X = mem[K]; continue; case BPF_S_MISC_TAX: X = A; @@ -276,48 +336,44 @@ load_b: A = X; continue; case BPF_S_RET_K: - return fentry->k; + return K; case BPF_S_RET_A: return A; case BPF_S_ST: - mem[fentry->k] = A; + mem[K] = A; continue; case BPF_S_STX: - mem[fentry->k] = X; + mem[K] = X; continue; - default: - WARN_ON(1); - return 0; - } - - /* - * Handle ancillary data, which are impossible - * (or very difficult) to get parsing packet contents. - */ - switch (k-SKF_AD_OFF) { - case SKF_AD_PROTOCOL: + case BPF_S_ANC_PROTOCOL: A = ntohs(skb->protocol); continue; - case SKF_AD_PKTTYPE: + case BPF_S_ANC_PKTTYPE: A = skb->pkt_type; continue; - case SKF_AD_IFINDEX: + case BPF_S_ANC_IFINDEX: if (!skb->dev) return 0; A = skb->dev->ifindex; continue; - case SKF_AD_MARK: + case BPF_S_ANC_MARK: A = skb->mark; continue; - case SKF_AD_QUEUE: + case BPF_S_ANC_QUEUE: A = skb->queue_mapping; continue; - case SKF_AD_HATYPE: + case BPF_S_ANC_HATYPE: if (!skb->dev) return 0; A = skb->dev->type; continue; - case SKF_AD_NLATTR: { + case BPF_S_ANC_RXHASH: + A = skb->rxhash; + continue; + case BPF_S_ANC_CPU: + A = raw_smp_processor_id(); + continue; + case BPF_S_ANC_NLATTR: { struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -333,7 +389,7 @@ load_b: A = 0; continue; } - case SKF_AD_NLATTR_NEST: { + case BPF_S_ANC_NLATTR_NEST: { struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -353,6 +409,7 @@ load_b: continue; } default: + WARN_ON(1); return 0; } } @@ -361,6 +418,66 @@ load_b: } EXPORT_SYMBOL(sk_run_filter); +/* + * Security : + * A BPF program is able to use 16 cells of memory to store intermediate + * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()) + * As we dont want to clear mem[] array for each packet going through + * sk_run_filter(), we check that filter loaded by user never try to read + * a cell if not previously written, and we check all branches to be sure + * a malicious user doesnt try to abuse us. + */ +static int check_load_and_stores(struct sock_filter *filter, int flen) +{ + u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */ + int pc, ret = 0; + + BUILD_BUG_ON(BPF_MEMWORDS > 16); + masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); + if (!masks) + return -ENOMEM; + memset(masks, 0xff, flen * sizeof(*masks)); + + for (pc = 0; pc < flen; pc++) { + memvalid &= masks[pc]; + + switch (filter[pc].code) { + case BPF_S_ST: + case BPF_S_STX: + memvalid |= (1 << filter[pc].k); + break; + case BPF_S_LD_MEM: + case BPF_S_LDX_MEM: + if (!(memvalid & (1 << filter[pc].k))) { + ret = -EINVAL; + goto error; + } + break; + case BPF_S_JMP_JA: + /* a jump must set masks on target */ + masks[pc + 1 + filter[pc].k] &= memvalid; + memvalid = ~0; + break; + case BPF_S_JMP_JEQ_K: + case BPF_S_JMP_JEQ_X: + case BPF_S_JMP_JGE_K: + case BPF_S_JMP_JGE_X: + case BPF_S_JMP_JGT_K: + case BPF_S_JMP_JGT_X: + case BPF_S_JMP_JSET_X: + case BPF_S_JMP_JSET_K: + /* a jump must set masks on targets */ + masks[pc + 1 + filter[pc].jt] &= memvalid; + masks[pc + 1 + filter[pc].jf] &= memvalid; + memvalid = ~0; + break; + } + } +error: + kfree(masks); + return ret; +} + /** * sk_chk_filter - verify socket filter code * @filter: filter to verify @@ -377,7 +494,57 @@ EXPORT_SYMBOL(sk_run_filter); */ int sk_chk_filter(struct sock_filter *filter, int flen) { - struct sock_filter *ftest; + /* + * Valid instructions are initialized to non-0. + * Invalid instructions are initialized to 0. + */ + static const u8 codes[] = { + [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K, + [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X, + [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K, + [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X, + [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K, + [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X, + [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X, + [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K, + [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X, + [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K, + [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X, + [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K, + [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X, + [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K, + [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X, + [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG, + [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS, + [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS, + [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS, + [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN, + [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND, + [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND, + [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND, + [BPF_LD|BPF_IMM] = BPF_S_LD_IMM, + [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN, + [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH, + [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM, + [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX, + [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA, + [BPF_RET|BPF_K] = BPF_S_RET_K, + [BPF_RET|BPF_A] = BPF_S_RET_A, + [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K, + [BPF_LD|BPF_MEM] = BPF_S_LD_MEM, + [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM, + [BPF_ST] = BPF_S_ST, + [BPF_STX] = BPF_S_STX, + [BPF_JMP|BPF_JA] = BPF_S_JMP_JA, + [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K, + [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X, + [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K, + [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X, + [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K, + [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X, + [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K, + [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X, + }; int pc; if (flen == 0 || flen > BPF_MAXINSNS) @@ -385,136 +552,31 @@ int sk_chk_filter(struct sock_filter *filter, int flen) /* check the filter code now */ for (pc = 0; pc < flen; pc++) { - ftest = &filter[pc]; - - /* Only allow valid instructions */ - switch (ftest->code) { - case BPF_ALU|BPF_ADD|BPF_K: - ftest->code = BPF_S_ALU_ADD_K; - break; - case BPF_ALU|BPF_ADD|BPF_X: - ftest->code = BPF_S_ALU_ADD_X; - break; - case BPF_ALU|BPF_SUB|BPF_K: - ftest->code = BPF_S_ALU_SUB_K; - break; - case BPF_ALU|BPF_SUB|BPF_X: - ftest->code = BPF_S_ALU_SUB_X; - break; - case BPF_ALU|BPF_MUL|BPF_K: - ftest->code = BPF_S_ALU_MUL_K; - break; - case BPF_ALU|BPF_MUL|BPF_X: - ftest->code = BPF_S_ALU_MUL_X; - break; - case BPF_ALU|BPF_DIV|BPF_X: - ftest->code = BPF_S_ALU_DIV_X; - break; - case BPF_ALU|BPF_AND|BPF_K: - ftest->code = BPF_S_ALU_AND_K; - break; - case BPF_ALU|BPF_AND|BPF_X: - ftest->code = BPF_S_ALU_AND_X; - break; - case BPF_ALU|BPF_OR|BPF_K: - ftest->code = BPF_S_ALU_OR_K; - break; - case BPF_ALU|BPF_OR|BPF_X: - ftest->code = BPF_S_ALU_OR_X; - break; - case BPF_ALU|BPF_LSH|BPF_K: - ftest->code = BPF_S_ALU_LSH_K; - break; - case BPF_ALU|BPF_LSH|BPF_X: - ftest->code = BPF_S_ALU_LSH_X; - break; - case BPF_ALU|BPF_RSH|BPF_K: - ftest->code = BPF_S_ALU_RSH_K; - break; - case BPF_ALU|BPF_RSH|BPF_X: - ftest->code = BPF_S_ALU_RSH_X; - break; - case BPF_ALU|BPF_NEG: - ftest->code = BPF_S_ALU_NEG; - break; - case BPF_LD|BPF_W|BPF_ABS: - ftest->code = BPF_S_LD_W_ABS; - break; - case BPF_LD|BPF_H|BPF_ABS: - ftest->code = BPF_S_LD_H_ABS; - break; - case BPF_LD|BPF_B|BPF_ABS: - ftest->code = BPF_S_LD_B_ABS; - break; - case BPF_LD|BPF_W|BPF_LEN: - ftest->code = BPF_S_LD_W_LEN; - break; - case BPF_LD|BPF_W|BPF_IND: - ftest->code = BPF_S_LD_W_IND; - break; - case BPF_LD|BPF_H|BPF_IND: - ftest->code = BPF_S_LD_H_IND; - break; - case BPF_LD|BPF_B|BPF_IND: - ftest->code = BPF_S_LD_B_IND; - break; - case BPF_LD|BPF_IMM: - ftest->code = BPF_S_LD_IMM; - break; - case BPF_LDX|BPF_W|BPF_LEN: - ftest->code = BPF_S_LDX_W_LEN; - break; - case BPF_LDX|BPF_B|BPF_MSH: - ftest->code = BPF_S_LDX_B_MSH; - break; - case BPF_LDX|BPF_IMM: - ftest->code = BPF_S_LDX_IMM; - break; - case BPF_MISC|BPF_TAX: - ftest->code = BPF_S_MISC_TAX; - break; - case BPF_MISC|BPF_TXA: - ftest->code = BPF_S_MISC_TXA; - break; - case BPF_RET|BPF_K: - ftest->code = BPF_S_RET_K; - break; - case BPF_RET|BPF_A: - ftest->code = BPF_S_RET_A; - break; + struct sock_filter *ftest = &filter[pc]; + u16 code = ftest->code; + if (code >= ARRAY_SIZE(codes)) + return -EINVAL; + code = codes[code]; + if (!code) + return -EINVAL; /* Some instructions need special checks */ - + switch (code) { + case BPF_S_ALU_DIV_K: /* check for division by zero */ - case BPF_ALU|BPF_DIV|BPF_K: if (ftest->k == 0) return -EINVAL; - ftest->code = BPF_S_ALU_DIV_K; - break; - - /* check for invalid memory addresses */ - case BPF_LD|BPF_MEM: - if (ftest->k >= BPF_MEMWORDS) - return -EINVAL; - ftest->code = BPF_S_LD_MEM; + ftest->k = reciprocal_value(ftest->k); break; - case BPF_LDX|BPF_MEM: - if (ftest->k >= BPF_MEMWORDS) - return -EINVAL; - ftest->code = BPF_S_LDX_MEM; - break; - case BPF_ST: - if (ftest->k >= BPF_MEMWORDS) - return -EINVAL; - ftest->code = BPF_S_ST; - break; - case BPF_STX: + case BPF_S_LD_MEM: + case BPF_S_LDX_MEM: + case BPF_S_ST: + case BPF_S_STX: + /* check for invalid memory addresses */ if (ftest->k >= BPF_MEMWORDS) return -EINVAL; - ftest->code = BPF_S_STX; break; - - case BPF_JMP|BPF_JA: + case BPF_S_JMP_JA: /* * Note, the large ftest->k might cause loops. * Compare this with conditional jumps below, @@ -522,40 +584,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen) */ if (ftest->k >= (unsigned)(flen-pc-1)) return -EINVAL; - ftest->code = BPF_S_JMP_JA; - break; - - case BPF_JMP|BPF_JEQ|BPF_K: - ftest->code = BPF_S_JMP_JEQ_K; - break; - case BPF_JMP|BPF_JEQ|BPF_X: - ftest->code = BPF_S_JMP_JEQ_X; - break; - case BPF_JMP|BPF_JGE|BPF_K: - ftest->code = BPF_S_JMP_JGE_K; - break; - case BPF_JMP|BPF_JGE|BPF_X: - ftest->code = BPF_S_JMP_JGE_X; break; - case BPF_JMP|BPF_JGT|BPF_K: - ftest->code = BPF_S_JMP_JGT_K; - break; - case BPF_JMP|BPF_JGT|BPF_X: - ftest->code = BPF_S_JMP_JGT_X; - break; - case BPF_JMP|BPF_JSET|BPF_K: - ftest->code = BPF_S_JMP_JSET_K; - break; - case BPF_JMP|BPF_JSET|BPF_X: - ftest->code = BPF_S_JMP_JSET_X; - break; - - default: - return -EINVAL; - } - - /* for conditionals both must be safe */ - switch (ftest->code) { case BPF_S_JMP_JEQ_K: case BPF_S_JMP_JEQ_X: case BPF_S_JMP_JGE_K: @@ -564,42 +593,54 @@ int sk_chk_filter(struct sock_filter *filter, int flen) case BPF_S_JMP_JGT_X: case BPF_S_JMP_JSET_X: case BPF_S_JMP_JSET_K: + /* for conditionals both must be safe */ if (pc + ftest->jt + 1 >= flen || pc + ftest->jf + 1 >= flen) return -EINVAL; + break; + case BPF_S_LD_W_ABS: + case BPF_S_LD_H_ABS: + case BPF_S_LD_B_ABS: +#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ + code = BPF_S_ANC_##CODE; \ + break + switch (ftest->k) { + ANCILLARY(PROTOCOL); + ANCILLARY(PKTTYPE); + ANCILLARY(IFINDEX); + ANCILLARY(NLATTR); + ANCILLARY(NLATTR_NEST); + ANCILLARY(MARK); + ANCILLARY(QUEUE); + ANCILLARY(HATYPE); + ANCILLARY(RXHASH); + ANCILLARY(CPU); + } } + ftest->code = code; } /* last instruction must be a RET code */ switch (filter[flen - 1].code) { case BPF_S_RET_K: case BPF_S_RET_A: - return 0; - break; - default: - return -EINVAL; - } + return check_load_and_stores(filter, flen); + } + return -EINVAL; } EXPORT_SYMBOL(sk_chk_filter); /** - * sk_filter_rcu_release: Release a socket filter by rcu_head + * sk_filter_release_rcu - Release a socket filter by rcu_head * @rcu: rcu_head that contains the sk_filter to free */ -static void sk_filter_rcu_release(struct rcu_head *rcu) +void sk_filter_release_rcu(struct rcu_head *rcu) { struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); - sk_filter_release(fp); -} - -static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp) -{ - unsigned int size = sk_filter_len(fp); - - atomic_sub(size, &sk->sk_omem_alloc); - call_rcu_bh(&fp->rcu, sk_filter_rcu_release); + kfree(fp); } +EXPORT_SYMBOL(sk_filter_release_rcu); /** * sk_attach_filter - attach a socket filter @@ -643,7 +684,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) rcu_assign_pointer(sk->sk_filter, fp); if (old_fp) - sk_filter_delayed_uncharge(sk, old_fp); + sk_filter_uncharge(sk, old_fp); return 0; } EXPORT_SYMBOL_GPL(sk_attach_filter); @@ -657,7 +698,7 @@ int sk_detach_filter(struct sock *sk) sock_owned_by_user(sk)); if (filter) { rcu_assign_pointer(sk->sk_filter, NULL); - sk_filter_delayed_uncharge(sk, filter); + sk_filter_uncharge(sk, filter); ret = 0; } return ret; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8cc8f9a79db9..60a902913429 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -41,7 +41,6 @@ #define NEIGH_PRINTK(x...) printk(x) #define NEIGH_NOPRINTK(x...) do { ; } while(0) -#define NEIGH_PRINTK0 NEIGH_PRINTK #define NEIGH_PRINTK1 NEIGH_NOPRINTK #define NEIGH_PRINTK2 NEIGH_NOPRINTK diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a5ff5a89f376..e23c01be5a5b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -706,21 +706,24 @@ static struct attribute *rx_queue_default_attrs[] = { static void rx_queue_release(struct kobject *kobj) { struct netdev_rx_queue *queue = to_rx_queue(kobj); - struct netdev_rx_queue *first = queue->first; struct rps_map *map; struct rps_dev_flow_table *flow_table; map = rcu_dereference_raw(queue->rps_map); - if (map) + if (map) { + RCU_INIT_POINTER(queue->rps_map, NULL); call_rcu(&map->rcu, rps_map_release); + } flow_table = rcu_dereference_raw(queue->rps_flow_table); - if (flow_table) + if (flow_table) { + RCU_INIT_POINTER(queue->rps_flow_table, NULL); call_rcu(&flow_table->rcu, rps_dev_flow_table_release); + } - if (atomic_dec_and_test(&first->count)) - kfree(first); + memset(kobj, 0, sizeof(*kobj)); + dev_put(queue->dev); } static struct kobj_type rx_queue_ktype = { @@ -732,7 +735,6 @@ static struct kobj_type rx_queue_ktype = { static int rx_queue_add_kobject(struct net_device *net, int index) { struct netdev_rx_queue *queue = net->_rx + index; - struct netdev_rx_queue *first = queue->first; struct kobject *kobj = &queue->kobj; int error = 0; @@ -745,14 +747,16 @@ static int rx_queue_add_kobject(struct net_device *net, int index) } kobject_uevent(kobj, KOBJ_ADD); - atomic_inc(&first->count); + dev_hold(queue->dev); return error; } +#endif /* CONFIG_RPS */ int net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) { +#ifdef CONFIG_RPS int i; int error = 0; @@ -768,23 +772,423 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) kobject_put(&net->_rx[i].kobj); return error; +#else + return 0; +#endif +} + +#ifdef CONFIG_XPS +/* + * netdev_queue sysfs structures and functions. + */ +struct netdev_queue_attribute { + struct attribute attr; + ssize_t (*show)(struct netdev_queue *queue, + struct netdev_queue_attribute *attr, char *buf); + ssize_t (*store)(struct netdev_queue *queue, + struct netdev_queue_attribute *attr, const char *buf, size_t len); +}; +#define to_netdev_queue_attr(_attr) container_of(_attr, \ + struct netdev_queue_attribute, attr) + +#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj) + +static ssize_t netdev_queue_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); + struct netdev_queue *queue = to_netdev_queue(kobj); + + if (!attribute->show) + return -EIO; + + return attribute->show(queue, attribute, buf); +} + +static ssize_t netdev_queue_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) +{ + struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); + struct netdev_queue *queue = to_netdev_queue(kobj); + + if (!attribute->store) + return -EIO; + + return attribute->store(queue, attribute, buf, count); +} + +static const struct sysfs_ops netdev_queue_sysfs_ops = { + .show = netdev_queue_attr_show, + .store = netdev_queue_attr_store, +}; + +static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) +{ + struct net_device *dev = queue->dev; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) + if (queue == &dev->_tx[i]) + break; + + BUG_ON(i >= dev->num_tx_queues); + + return i; } -static int rx_queue_register_kobjects(struct net_device *net) + +static ssize_t show_xps_map(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, char *buf) { + struct net_device *dev = queue->dev; + struct xps_dev_maps *dev_maps; + cpumask_var_t mask; + unsigned long index; + size_t len = 0; + int i; + + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + index = get_netdev_queue_index(queue); + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + for_each_possible_cpu(i) { + struct xps_map *map = + rcu_dereference(dev_maps->cpu_map[i]); + if (map) { + int j; + for (j = 0; j < map->len; j++) { + if (map->queues[j] == index) { + cpumask_set_cpu(i, mask); + break; + } + } + } + } + } + rcu_read_unlock(); + + len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); + if (PAGE_SIZE - len < 3) { + free_cpumask_var(mask); + return -EINVAL; + } + + free_cpumask_var(mask); + len += sprintf(buf + len, "\n"); + return len; +} + +static void xps_map_release(struct rcu_head *rcu) +{ + struct xps_map *map = container_of(rcu, struct xps_map, rcu); + + kfree(map); +} + +static void xps_dev_maps_release(struct rcu_head *rcu) +{ + struct xps_dev_maps *dev_maps = + container_of(rcu, struct xps_dev_maps, rcu); + + kfree(dev_maps); +} + +static DEFINE_MUTEX(xps_map_mutex); +#define xmap_dereference(P) \ + rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) + +static ssize_t store_xps_map(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + const char *buf, size_t len) +{ + struct net_device *dev = queue->dev; + cpumask_var_t mask; + int err, i, cpu, pos, map_len, alloc_len, need_set; + unsigned long index; + struct xps_map *map, *new_map; + struct xps_dev_maps *dev_maps, *new_dev_maps; + int nonempty = 0; + int numa_node = -2; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + index = get_netdev_queue_index(queue); + + err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); + if (err) { + free_cpumask_var(mask); + return err; + } + + new_dev_maps = kzalloc(max_t(unsigned, + XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL); + if (!new_dev_maps) { + free_cpumask_var(mask); + return -ENOMEM; + } + + mutex_lock(&xps_map_mutex); + + dev_maps = xmap_dereference(dev->xps_maps); + + for_each_possible_cpu(cpu) { + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; + new_map = map; + if (map) { + for (pos = 0; pos < map->len; pos++) + if (map->queues[pos] == index) + break; + map_len = map->len; + alloc_len = map->alloc_len; + } else + pos = map_len = alloc_len = 0; + + need_set = cpu_isset(cpu, *mask) && cpu_online(cpu); +#ifdef CONFIG_NUMA + if (need_set) { + if (numa_node == -2) + numa_node = cpu_to_node(cpu); + else if (numa_node != cpu_to_node(cpu)) + numa_node = -1; + } +#endif + if (need_set && pos >= map_len) { + /* Need to add queue to this CPU's map */ + if (map_len >= alloc_len) { + alloc_len = alloc_len ? + 2 * alloc_len : XPS_MIN_MAP_ALLOC; + new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), + GFP_KERNEL, + cpu_to_node(cpu)); + if (!new_map) + goto error; + new_map->alloc_len = alloc_len; + for (i = 0; i < map_len; i++) + new_map->queues[i] = map->queues[i]; + new_map->len = map_len; + } + new_map->queues[new_map->len++] = index; + } else if (!need_set && pos < map_len) { + /* Need to remove queue from this CPU's map */ + if (map_len > 1) + new_map->queues[pos] = + new_map->queues[--new_map->len]; + else + new_map = NULL; + } + RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map); + } + + /* Cleanup old maps */ + for_each_possible_cpu(cpu) { + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; + if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map) + call_rcu(&map->rcu, xps_map_release); + if (new_dev_maps->cpu_map[cpu]) + nonempty = 1; + } + + if (nonempty) + rcu_assign_pointer(dev->xps_maps, new_dev_maps); + else { + kfree(new_dev_maps); + rcu_assign_pointer(dev->xps_maps, NULL); + } + + if (dev_maps) + call_rcu(&dev_maps->rcu, xps_dev_maps_release); + + netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : + NUMA_NO_NODE); + + mutex_unlock(&xps_map_mutex); + + free_cpumask_var(mask); + return len; + +error: + mutex_unlock(&xps_map_mutex); + + if (new_dev_maps) + for_each_possible_cpu(i) + kfree(rcu_dereference_protected( + new_dev_maps->cpu_map[i], + 1)); + kfree(new_dev_maps); + free_cpumask_var(mask); + return -ENOMEM; +} + +static struct netdev_queue_attribute xps_cpus_attribute = + __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); + +static struct attribute *netdev_queue_default_attrs[] = { + &xps_cpus_attribute.attr, + NULL +}; + +static void netdev_queue_release(struct kobject *kobj) +{ + struct netdev_queue *queue = to_netdev_queue(kobj); + struct net_device *dev = queue->dev; + struct xps_dev_maps *dev_maps; + struct xps_map *map; + unsigned long index; + int i, pos, nonempty = 0; + + index = get_netdev_queue_index(queue); + + mutex_lock(&xps_map_mutex); + dev_maps = xmap_dereference(dev->xps_maps); + + if (dev_maps) { + for_each_possible_cpu(i) { + map = xmap_dereference(dev_maps->cpu_map[i]); + if (!map) + continue; + + for (pos = 0; pos < map->len; pos++) + if (map->queues[pos] == index) + break; + + if (pos < map->len) { + if (map->len > 1) + map->queues[pos] = + map->queues[--map->len]; + else { + RCU_INIT_POINTER(dev_maps->cpu_map[i], + NULL); + call_rcu(&map->rcu, xps_map_release); + map = NULL; + } + } + if (map) + nonempty = 1; + } + + if (!nonempty) { + RCU_INIT_POINTER(dev->xps_maps, NULL); + call_rcu(&dev_maps->rcu, xps_dev_maps_release); + } + } + + mutex_unlock(&xps_map_mutex); + + memset(kobj, 0, sizeof(*kobj)); + dev_put(queue->dev); +} + +static struct kobj_type netdev_queue_ktype = { + .sysfs_ops = &netdev_queue_sysfs_ops, + .release = netdev_queue_release, + .default_attrs = netdev_queue_default_attrs, +}; + +static int netdev_queue_add_kobject(struct net_device *net, int index) +{ + struct netdev_queue *queue = net->_tx + index; + struct kobject *kobj = &queue->kobj; + int error = 0; + + kobj->kset = net->queues_kset; + error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, + "tx-%u", index); + if (error) { + kobject_put(kobj); + return error; + } + + kobject_uevent(kobj, KOBJ_ADD); + dev_hold(queue->dev); + + return error; +} +#endif /* CONFIG_XPS */ + +int +netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) +{ +#ifdef CONFIG_XPS + int i; + int error = 0; + + for (i = old_num; i < new_num; i++) { + error = netdev_queue_add_kobject(net, i); + if (error) { + new_num = old_num; + break; + } + } + + while (--i >= new_num) + kobject_put(&net->_tx[i].kobj); + + return error; +#else + return 0; +#endif +} + +static int register_queue_kobjects(struct net_device *net) +{ + int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; + +#if defined(CONFIG_RPS) || defined(CONFIG_XPS) net->queues_kset = kset_create_and_add("queues", NULL, &net->dev.kobj); if (!net->queues_kset) return -ENOMEM; - return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues); +#endif + +#ifdef CONFIG_RPS + real_rx = net->real_num_rx_queues; +#endif + real_tx = net->real_num_tx_queues; + + error = net_rx_queue_update_kobjects(net, 0, real_rx); + if (error) + goto error; + rxq = real_rx; + + error = netdev_queue_update_kobjects(net, 0, real_tx); + if (error) + goto error; + txq = real_tx; + + return 0; + +error: + netdev_queue_update_kobjects(net, txq, 0); + net_rx_queue_update_kobjects(net, rxq, 0); + return error; } -static void rx_queue_remove_kobjects(struct net_device *net) +static void remove_queue_kobjects(struct net_device *net) { - net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0); + int real_rx = 0, real_tx = 0; + +#ifdef CONFIG_RPS + real_rx = net->real_num_rx_queues; +#endif + real_tx = net->real_num_tx_queues; + + net_rx_queue_update_kobjects(net, real_rx, 0); + netdev_queue_update_kobjects(net, real_tx, 0); +#if defined(CONFIG_RPS) || defined(CONFIG_XPS) kset_unregister(net->queues_kset); +#endif } -#endif /* CONFIG_RPS */ static const void *net_current_ns(void) { @@ -883,9 +1287,7 @@ void netdev_unregister_kobject(struct net_device * net) kobject_get(&dev->kobj); -#ifdef CONFIG_RPS - rx_queue_remove_kobjects(net); -#endif + remove_queue_kobjects(net); device_del(dev); } @@ -924,13 +1326,11 @@ int netdev_register_kobject(struct net_device *net) if (error) return error; -#ifdef CONFIG_RPS - error = rx_queue_register_kobjects(net); + error = register_queue_kobjects(net); if (error) { device_del(dev); return error; } -#endif return error; } diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 778e1571548d..bd7751ec1c4d 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -4,8 +4,8 @@ int netdev_kobject_init(void); int netdev_register_kobject(struct net_device *); void netdev_unregister_kobject(struct net_device *); -#ifdef CONFIG_RPS int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); -#endif +int netdev_queue_update_kobjects(struct net_device *net, + int old_num, int new_num); #endif diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4e98ffac3af0..02dc2cbcbe86 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -35,7 +35,6 @@ #define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 -#define MAX_QUEUE_DEPTH (MAX_SKBS / 2) static struct sk_buff_head skb_pool; @@ -76,8 +75,7 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); __netif_tx_lock(txq, smp_processor_id()); - if (netif_tx_queue_stopped(txq) || - netif_tx_queue_frozen(txq) || + if (netif_tx_queue_frozen_or_stopped(txq) || ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); @@ -925,7 +923,7 @@ void __netpoll_cleanup(struct netpoll *np) skb_queue_purge(&npinfo->arp_tx); skb_queue_purge(&npinfo->txq); - cancel_rearming_delayed_work(&npinfo->tx_work); + cancel_delayed_work_sync(&npinfo->tx_work); /* clean after last, unfinished work */ __skb_queue_purge(&npinfo->txq); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 33bc3823ac6f..a9e7fc4c461f 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -378,6 +378,7 @@ struct pktgen_dev { u16 queue_map_min; u16 queue_map_max; + __u32 skb_priority; /* skb priority field */ int node; /* Memory node */ #ifdef CONFIG_XFRM @@ -394,6 +395,8 @@ struct pktgen_hdr { __be32 tv_usec; }; +static bool pktgen_exiting __read_mostly; + struct pktgen_thread { spinlock_t if_lock; /* for list of devices */ struct list_head if_list; /* All device here */ @@ -547,6 +550,10 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->queue_map_min, pkt_dev->queue_map_max); + if (pkt_dev->skb_priority) + seq_printf(seq, " skb_priority: %u\n", + pkt_dev->skb_priority); + if (pkt_dev->flags & F_IPV6) { char b1[128], b2[128], b3[128]; fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); @@ -1711,6 +1718,18 @@ static ssize_t pktgen_if_write(struct file *file, return count; } + if (!strcmp(name, "skb_priority")) { + len = num_arg(&user_buffer[i], 9, &value); + if (len < 0) + return len; + + i += len; + pkt_dev->skb_priority = value; + sprintf(pg_result, "OK: skb_priority=%i", + pkt_dev->skb_priority); + return count; + } + sprintf(pkt_dev->result, "No such parameter \"%s\"", name); return -EINVAL; } @@ -2641,6 +2660,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, sprintf(pkt_dev->result, "No memory"); return NULL; } + prefetchw(skb->data); skb_reserve(skb, datalen); @@ -2671,6 +2691,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); + skb->priority = pkt_dev->skb_priority; + iph = ip_hdr(skb); udph = udp_hdr(skb); @@ -2986,6 +3008,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, sprintf(pkt_dev->result, "No memory"); return NULL; } + prefetchw(skb->data); skb_reserve(skb, 16); @@ -3016,6 +3039,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); + skb->priority = pkt_dev->skb_priority; iph = ipv6_hdr(skb); udph = udp_hdr(skb); @@ -3431,11 +3455,6 @@ static void pktgen_rem_thread(struct pktgen_thread *t) remove_proc_entry(t->tsk->comm, pg_proc_dir); - mutex_lock(&pktgen_thread_lock); - - list_del(&t->th_list); - - mutex_unlock(&pktgen_thread_lock); } static void pktgen_resched(struct pktgen_dev *pkt_dev) @@ -3510,7 +3529,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) __netif_tx_lock_bh(txq); - if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) { + if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) { ret = NETDEV_TX_BUSY; pkt_dev->last_ok = 0; goto unlock; @@ -3534,8 +3553,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) break; default: /* Drivers are not supposed to return other values! */ if (net_ratelimit()) - pr_info("pktgen: %s xmit error: %d\n", - pkt_dev->odevname, ret); + pr_info("%s xmit error: %d\n", pkt_dev->odevname, ret); pkt_dev->errors++; /* fallthru */ case NETDEV_TX_LOCKED: @@ -3582,6 +3600,8 @@ static int pktgen_thread_worker(void *arg) pkt_dev = next_to_run(t); if (unlikely(!pkt_dev && t->control == 0)) { + if (pktgen_exiting) + break; wait_event_interruptible_timeout(t->queue, t->control != 0, HZ/10); @@ -3634,6 +3654,13 @@ static int pktgen_thread_worker(void *arg) pr_debug("%s removing thread\n", t->tsk->comm); pktgen_rem_thread(t); + /* Wait for kthread_stop */ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + __set_current_state(TASK_RUNNING); + return 0; } @@ -3908,6 +3935,7 @@ static void __exit pg_cleanup(void) struct list_head *q, *n; /* Stop all interfaces & threads */ + pktgen_exiting = true; list_for_each_safe(q, n, &pktgen_threads) { t = list_entry(q, struct pktgen_thread, th_list); diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 7552495aff7a..182236b2510a 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -33,6 +33,7 @@ * Note : Dont forget somaxconn that may limit backlog too. */ int sysctl_max_syn_backlog = 256; +EXPORT_SYMBOL(sysctl_max_syn_backlog); int reqsk_queue_alloc(struct request_sock_queue *queue, unsigned int nr_table_entries) @@ -45,9 +46,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); lopt_size += nr_table_entries * sizeof(struct request_sock *); if (lopt_size > PAGE_SIZE) - lopt = __vmalloc(lopt_size, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, - PAGE_KERNEL); + lopt = vzalloc(lopt_size); else lopt = kzalloc(lopt_size, GFP_KERNEL); if (lopt == NULL) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8121268ddbdd..750db57f3bb3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -347,16 +347,106 @@ static size_t rtnl_link_get_size(const struct net_device *dev) if (!ops) return 0; - size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ - nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ + size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ + nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ if (ops->get_size) /* IFLA_INFO_DATA + nested data */ - size += nlmsg_total_size(sizeof(struct nlattr)) + + size += nla_total_size(sizeof(struct nlattr)) + ops->get_size(dev); if (ops->get_xstats_size) - size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */ + /* IFLA_INFO_XSTATS */ + size += nla_total_size(ops->get_xstats_size(dev)); + + return size; +} + +static LIST_HEAD(rtnl_af_ops); + +static const struct rtnl_af_ops *rtnl_af_lookup(const int family) +{ + const struct rtnl_af_ops *ops; + + list_for_each_entry(ops, &rtnl_af_ops, list) { + if (ops->family == family) + return ops; + } + + return NULL; +} + +/** + * __rtnl_af_register - Register rtnl_af_ops with rtnetlink. + * @ops: struct rtnl_af_ops * to register + * + * The caller must hold the rtnl_mutex. + * + * Returns 0 on success or a negative error code. + */ +int __rtnl_af_register(struct rtnl_af_ops *ops) +{ + list_add_tail(&ops->list, &rtnl_af_ops); + return 0; +} +EXPORT_SYMBOL_GPL(__rtnl_af_register); + +/** + * rtnl_af_register - Register rtnl_af_ops with rtnetlink. + * @ops: struct rtnl_af_ops * to register + * + * Returns 0 on success or a negative error code. + */ +int rtnl_af_register(struct rtnl_af_ops *ops) +{ + int err; + + rtnl_lock(); + err = __rtnl_af_register(ops); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL_GPL(rtnl_af_register); + +/** + * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. + * @ops: struct rtnl_af_ops * to unregister + * + * The caller must hold the rtnl_mutex. + */ +void __rtnl_af_unregister(struct rtnl_af_ops *ops) +{ + list_del(&ops->list); +} +EXPORT_SYMBOL_GPL(__rtnl_af_unregister); + +/** + * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. + * @ops: struct rtnl_af_ops * to unregister + */ +void rtnl_af_unregister(struct rtnl_af_ops *ops) +{ + rtnl_lock(); + __rtnl_af_unregister(ops); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(rtnl_af_unregister); + +static size_t rtnl_link_get_af_size(const struct net_device *dev) +{ + struct rtnl_af_ops *af_ops; + size_t size; + + /* IFLA_AF_SPEC */ + size = nla_total_size(sizeof(struct nlattr)); + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->get_link_af_size) { + /* AF_* + nested data */ + size += nla_total_size(sizeof(struct nlattr)) + + af_ops->get_link_af_size(dev); + } + } return size; } @@ -670,7 +760,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev) + nla_total_size(4) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ - + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ + + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ } static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) @@ -756,7 +847,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct nlmsghdr *nlh; struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats; - struct nlattr *attr; + struct nlattr *attr, *af_spec; + struct rtnl_af_ops *af_ops; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) @@ -865,6 +957,36 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } + if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) + goto nla_put_failure; + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->fill_link_af) { + struct nlattr *af; + int err; + + if (!(af = nla_nest_start(skb, af_ops->family))) + goto nla_put_failure; + + err = af_ops->fill_link_af(skb, dev); + + /* + * Caller may return ENODATA to indicate that there + * was no data to be dumped. This is not an error, it + * means we should trim the attribute header and + * continue. + */ + if (err == -ENODATA) + nla_nest_cancel(skb, af); + else if (err < 0) + goto nla_put_failure; + + nla_nest_end(skb, af); + } + } + + nla_nest_end(skb, af_spec); + return nlmsg_end(skb, nlh); nla_put_failure: @@ -923,6 +1045,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, [IFLA_VF_PORTS] = { .type = NLA_NESTED }, [IFLA_PORT_SELF] = { .type = NLA_NESTED }, + [IFLA_AF_SPEC] = { .type = NLA_NESTED }, }; EXPORT_SYMBOL(ifla_policy); @@ -984,6 +1107,28 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return -EINVAL; } + if (tb[IFLA_AF_SPEC]) { + struct nlattr *af; + int rem, err; + + nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { + const struct rtnl_af_ops *af_ops; + + if (!(af_ops = rtnl_af_lookup(nla_type(af)))) + return -EAFNOSUPPORT; + + if (!af_ops->set_link_af) + return -EOPNOTSUPP; + + if (af_ops->validate_link_af) { + err = af_ops->validate_link_af(dev, + tb[IFLA_AF_SPEC]); + if (err < 0) + return err; + } + } + } + return 0; } @@ -1224,6 +1369,24 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; modified = 1; } + + if (tb[IFLA_AF_SPEC]) { + struct nlattr *af; + int rem; + + nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { + const struct rtnl_af_ops *af_ops; + + if (!(af_ops = rtnl_af_lookup(nla_type(af)))) + BUG(); + + err = af_ops->set_link_af(dev, af); + if (err < 0) + goto errout; + + modified = 1; + } + } err = 0; errout: diff --git a/net/core/scm.c b/net/core/scm.c index 413cab89017d..bbe454450801 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -79,10 +79,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) return -ENOMEM; *fplp = fpl; fpl->count = 0; + fpl->max = SCM_MAX_FD; } fpp = &fpl->fp[fpl->count]; - if (fpl->count + num > SCM_MAX_FD) + if (fpl->count + num > fpl->max) return -EINVAL; /* @@ -331,11 +332,12 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) if (!fpl) return NULL; - new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); + new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]), + GFP_KERNEL); if (new_fpl) { - for (i=fpl->count-1; i>=0; i--) + for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); - memcpy(new_fpl, fpl, sizeof(*fpl)); + new_fpl->max = new_fpl->count; } return new_fpl; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 74ebf4b5258a..d31bb36ae0dc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -780,6 +780,28 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, size = SKB_DATA_ALIGN(size); + /* Check if we can avoid taking references on fragments if we own + * the last reference on skb->head. (see skb_release_data()) + */ + if (!skb->cloned) + fastpath = true; + else { + int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; + + fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta; + } + + if (fastpath && + size + sizeof(struct skb_shared_info) <= ksize(skb->head)) { + memmove(skb->head + size, skb_shinfo(skb), + offsetof(struct skb_shared_info, + frags[skb_shinfo(skb)->nr_frags])); + memmove(skb->head + nhead, skb->head, + skb_tail_pointer(skb) - skb->head); + off = nhead; + goto adjust_others; + } + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); if (!data) goto nodata; @@ -793,17 +815,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb_shinfo(skb), offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); - /* Check if we can avoid taking references on fragments if we own - * the last reference on skb->head. (see skb_release_data()) - */ - if (!skb->cloned) - fastpath = true; - else { - int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; - - fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta; - } - if (fastpath) { kfree(skb->head); } else { @@ -818,6 +829,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, off = (data + nhead) - skb->head; skb->head = data; +adjust_others: skb->data += off; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->end = size; @@ -1814,7 +1826,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) long csstart; if (skb->ip_summed == CHECKSUM_PARTIAL) - csstart = skb->csum_start - skb_headroom(skb); + csstart = skb_checksum_start_offset(skb); else csstart = skb_headlen(skb); diff --git a/net/core/sock.c b/net/core/sock.c index 3eed5424e659..a658aeb6d554 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -992,23 +992,54 @@ static inline void sock_lock_init(struct sock *sk) /* * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, * even temporarly, because of RCU lookups. sk_node should also be left as is. + * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end */ static void sock_copy(struct sock *nsk, const struct sock *osk) { #ifdef CONFIG_SECURITY_NETWORK void *sptr = nsk->sk_security; #endif - BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != - sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) + - sizeof(osk->sk_tx_queue_mapping)); - memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, - osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); + memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); + + memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, + osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); + #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; security_sk_clone(osk, nsk); #endif } +/* + * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes + * un-modified. Special care is taken when initializing object to zero. + */ +static inline void sk_prot_clear_nulls(struct sock *sk, int size) +{ + if (offsetof(struct sock, sk_node.next) != 0) + memset(sk, 0, offsetof(struct sock, sk_node.next)); + memset(&sk->sk_node.pprev, 0, + size - offsetof(struct sock, sk_node.pprev)); +} + +void sk_prot_clear_portaddr_nulls(struct sock *sk, int size) +{ + unsigned long nulls1, nulls2; + + nulls1 = offsetof(struct sock, __sk_common.skc_node.next); + nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next); + if (nulls1 > nulls2) + swap(nulls1, nulls2); + + if (nulls1 != 0) + memset((char *)sk, 0, nulls1); + memset((char *)sk + nulls1 + sizeof(void *), 0, + nulls2 - nulls1 - sizeof(void *)); + memset((char *)sk + nulls2 + sizeof(void *), 0, + size - nulls2 - sizeof(void *)); +} +EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls); + static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, int family) { @@ -1021,19 +1052,12 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!sk) return sk; if (priority & __GFP_ZERO) { - /* - * caches using SLAB_DESTROY_BY_RCU should let - * sk_node.next un-modified. Special care is taken - * when initializing object to zero. - */ - if (offsetof(struct sock, sk_node.next) != 0) - memset(sk, 0, offsetof(struct sock, sk_node.next)); - memset(&sk->sk_node.pprev, 0, - prot->obj_size - offsetof(struct sock, - sk_node.pprev)); + if (prot->clear_sk) + prot->clear_sk(sk, prot->obj_size); + else + sk_prot_clear_nulls(sk, prot->obj_size); } - } - else + } else sk = kmalloc(prot->obj_size, priority); if (sk != NULL) { @@ -1653,10 +1677,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) { struct proto *prot = sk->sk_prot; int amt = sk_mem_pages(size); - int allocated; + long allocated; sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; - allocated = atomic_add_return(amt, prot->memory_allocated); + allocated = atomic_long_add_return(amt, prot->memory_allocated); /* Under limit. */ if (allocated <= prot->sysctl_mem[0]) { @@ -1714,7 +1738,7 @@ suppress_allocation: /* Alas. Undo changes. */ sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; - atomic_sub(amt, prot->memory_allocated); + atomic_long_sub(amt, prot->memory_allocated); return 0; } EXPORT_SYMBOL(__sk_mem_schedule); @@ -1727,12 +1751,12 @@ void __sk_mem_reclaim(struct sock *sk) { struct proto *prot = sk->sk_prot; - atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, + atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, prot->memory_allocated); sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; if (prot->memory_pressure && *prot->memory_pressure && - (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0])) + (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0])) *prot->memory_pressure = 0; } EXPORT_SYMBOL(__sk_mem_reclaim); @@ -1884,7 +1908,7 @@ static void sock_def_readable(struct sock *sk, int len) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (wq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLIN | + wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); rcu_read_unlock(); @@ -2452,12 +2476,12 @@ static char proto_method_implemented(const void *method) static void proto_seq_printf(struct seq_file *seq, struct proto *proto) { - seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s " + seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", proto->name, proto->obj_size, sock_prot_inuse_get(seq_file_net(seq), proto), - proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1, + proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L, proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", proto->max_header, proto->slab == NULL ? "no" : "yes", diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 0ae6c22da85b..7e7ca375d431 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -26,12 +26,12 @@ static struct sock_filter ptp_filter[] = { PTP_FILTER }; -static unsigned int classify(struct sk_buff *skb) +static unsigned int classify(const struct sk_buff *skb) { if (likely(skb->dev && skb->dev->phydev && skb->dev->phydev->drv)) - return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter)); + return sk_run_filter(skb, ptp_filter); else return PTP_CLASS_NONE; } @@ -96,11 +96,13 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) struct phy_device *phydev; unsigned int type; - skb_push(skb, ETH_HLEN); + if (skb_headroom(skb) < ETH_HLEN) + return false; + __skb_push(skb, ETH_HLEN); type = classify(skb); - skb_pull(skb, ETH_HLEN); + __skb_pull(skb, ETH_HLEN); switch (type) { case PTP_CLASS_V1_IPV4: diff --git a/net/dcb/Makefile b/net/dcb/Makefile index 9930f4cde818..c1282c9e64fa 100644 --- a/net/dcb/Makefile +++ b/net/dcb/Makefile @@ -1 +1 @@ -obj-$(CONFIG_DCB) += dcbnl.o +obj-$(CONFIG_DCB) += dcbnl.o dcbevent.o diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c new file mode 100644 index 000000000000..665a8802105a --- /dev/null +++ b/net/dcb/dcbevent.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2010, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: John Fastabend <john.r.fastabend@intel.com> + */ + +#include <linux/rtnetlink.h> +#include <linux/notifier.h> + +static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain); + +int register_dcbevent_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&dcbevent_notif_chain, nb); +} +EXPORT_SYMBOL(register_dcbevent_notifier); + +int unregister_dcbevent_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&dcbevent_notif_chain, nb); +} +EXPORT_SYMBOL(unregister_dcbevent_notifier); + +int call_dcbevent_notifiers(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&dcbevent_notif_chain, val, v); +} diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 19ac2b985485..d900ab99814a 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -23,6 +23,7 @@ #include <net/netlink.h> #include <net/rtnetlink.h> #include <linux/dcbnl.h> +#include <net/dcbevent.h> #include <linux/rtnetlink.h> #include <net/sock.h> @@ -66,6 +67,9 @@ static const struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { [DCB_ATTR_PFC_STATE] = {.type = NLA_U8}, [DCB_ATTR_BCN] = {.type = NLA_NESTED}, [DCB_ATTR_APP] = {.type = NLA_NESTED}, + [DCB_ATTR_IEEE] = {.type = NLA_NESTED}, + [DCB_ATTR_DCBX] = {.type = NLA_U8}, + [DCB_ATTR_FEATCFG] = {.type = NLA_NESTED}, }; /* DCB priority flow control to User Priority nested attributes */ @@ -122,6 +126,7 @@ static const struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = { [DCB_CAP_ATTR_PFC_TCS] = {.type = NLA_U8}, [DCB_CAP_ATTR_GSP] = {.type = NLA_U8}, [DCB_CAP_ATTR_BCN] = {.type = NLA_U8}, + [DCB_CAP_ATTR_DCBX] = {.type = NLA_U8}, }; /* DCB capabilities nested attributes. */ @@ -167,6 +172,28 @@ static const struct nla_policy dcbnl_app_nest[DCB_APP_ATTR_MAX + 1] = { [DCB_APP_ATTR_PRIORITY] = {.type = NLA_U8}, }; +/* IEEE 802.1Qaz nested attributes. */ +static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = { + [DCB_ATTR_IEEE_ETS] = {.len = sizeof(struct ieee_ets)}, + [DCB_ATTR_IEEE_PFC] = {.len = sizeof(struct ieee_pfc)}, + [DCB_ATTR_IEEE_APP_TABLE] = {.type = NLA_NESTED}, +}; + +static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = { + [DCB_ATTR_IEEE_APP] = {.len = sizeof(struct dcb_app)}, +}; + +/* DCB number of traffic classes nested attributes. */ +static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = { + [DCB_FEATCFG_ATTR_ALL] = {.type = NLA_FLAG}, + [DCB_FEATCFG_ATTR_PG] = {.type = NLA_U8}, + [DCB_FEATCFG_ATTR_PFC] = {.type = NLA_U8}, + [DCB_FEATCFG_ATTR_APP] = {.type = NLA_U8}, +}; + +static LIST_HEAD(dcb_app_list); +static DEFINE_SPINLOCK(dcb_lock); + /* standard netlink reply call */ static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid, u32 seq, u16 flags) @@ -622,12 +649,12 @@ out: static int dcbnl_setapp(struct net_device *netdev, struct nlattr **tb, u32 pid, u32 seq, u16 flags) { - int ret = -EINVAL; + int err, ret = -EINVAL; u16 id; u8 up, idtype; struct nlattr *app_tb[DCB_APP_ATTR_MAX + 1]; - if (!tb[DCB_ATTR_APP] || !netdev->dcbnl_ops->setapp) + if (!tb[DCB_ATTR_APP]) goto out; ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], @@ -651,9 +678,18 @@ static int dcbnl_setapp(struct net_device *netdev, struct nlattr **tb, id = nla_get_u16(app_tb[DCB_APP_ATTR_ID]); up = nla_get_u8(app_tb[DCB_APP_ATTR_PRIORITY]); - ret = dcbnl_reply(netdev->dcbnl_ops->setapp(netdev, idtype, id, up), - RTM_SETDCB, DCB_CMD_SAPP, DCB_ATTR_APP, - pid, seq, flags); + if (netdev->dcbnl_ops->setapp) { + err = netdev->dcbnl_ops->setapp(netdev, idtype, id, up); + } else { + struct dcb_app app; + app.selector = idtype; + app.protocol = id; + app.priority = up; + err = dcb_setapp(netdev, &app); + } + + ret = dcbnl_reply(err, RTM_SETDCB, DCB_CMD_SAPP, DCB_ATTR_APP, + pid, seq, flags); out: return ret; } @@ -1118,6 +1154,281 @@ err: return ret; } +/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not + * be completed the entire msg is aborted and error value is returned. + * No attempt is made to reconcile the case where only part of the + * cmd can be completed. + */ +static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; + int err = -EOPNOTSUPP; + + if (!ops) + goto err; + + err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, + tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); + if (err) + goto err; + + if (ieee[DCB_ATTR_IEEE_ETS] && ops->ieee_setets) { + struct ieee_ets *ets = nla_data(ieee[DCB_ATTR_IEEE_ETS]); + err = ops->ieee_setets(netdev, ets); + if (err) + goto err; + } + + if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setets) { + struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); + err = ops->ieee_setpfc(netdev, pfc); + if (err) + goto err; + } + + if (ieee[DCB_ATTR_IEEE_APP_TABLE]) { + struct nlattr *attr; + int rem; + + nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) { + struct dcb_app *app_data; + if (nla_type(attr) != DCB_ATTR_IEEE_APP) + continue; + app_data = nla_data(attr); + if (ops->ieee_setapp) + err = ops->ieee_setapp(netdev, app_data); + else + err = dcb_setapp(netdev, app_data); + if (err) + goto err; + } + } + +err: + dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_SET, DCB_ATTR_IEEE, + pid, seq, flags); + return err; +} + + +/* Handle IEEE 802.1Qaz GET commands. */ +static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *ieee, *app; + struct dcb_app_type *itr; + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + int err; + + if (!ops) + return -EOPNOTSUPP; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_IEEE_GET; + + NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name); + + ieee = nla_nest_start(skb, DCB_ATTR_IEEE); + if (!ieee) + goto nla_put_failure; + + if (ops->ieee_getets) { + struct ieee_ets ets; + err = ops->ieee_getets(netdev, &ets); + if (!err) + NLA_PUT(skb, DCB_ATTR_IEEE_ETS, sizeof(ets), &ets); + } + + if (ops->ieee_getpfc) { + struct ieee_pfc pfc; + err = ops->ieee_getpfc(netdev, &pfc); + if (!err) + NLA_PUT(skb, DCB_ATTR_IEEE_PFC, sizeof(pfc), &pfc); + } + + app = nla_nest_start(skb, DCB_ATTR_IEEE_APP_TABLE); + if (!app) + goto nla_put_failure; + + spin_lock(&dcb_lock); + list_for_each_entry(itr, &dcb_app_list, list) { + if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0) { + err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app), + &itr->app); + if (err) { + spin_unlock(&dcb_lock); + goto nla_put_failure; + } + } + } + spin_unlock(&dcb_lock); + nla_nest_end(skb, app); + + nla_nest_end(skb, ieee); + nlmsg_end(skb, nlh); + + return rtnl_unicast(skb, &init_net, pid); +nla_put_failure: + nlmsg_cancel(skb, nlh); +nlmsg_failure: + kfree_skb(skb); + return -1; +} + +/* DCBX configuration */ +static int dcbnl_getdcbx(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret; + + if (!netdev->dcbnl_ops->getdcbx) + return -EOPNOTSUPP; + + ret = dcbnl_reply(netdev->dcbnl_ops->getdcbx(netdev), RTM_GETDCB, + DCB_CMD_GDCBX, DCB_ATTR_DCBX, pid, seq, flags); + + return ret; +} + +static int dcbnl_setdcbx(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret; + u8 value; + + if (!netdev->dcbnl_ops->setdcbx) + return -EOPNOTSUPP; + + if (!tb[DCB_ATTR_DCBX]) + return -EINVAL; + + value = nla_get_u8(tb[DCB_ATTR_DCBX]); + + ret = dcbnl_reply(netdev->dcbnl_ops->setdcbx(netdev, value), + RTM_SETDCB, DCB_CMD_SDCBX, DCB_ATTR_DCBX, + pid, seq, flags); + + return ret; +} + +static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *data[DCB_FEATCFG_ATTR_MAX + 1], *nest; + u8 value; + int ret, i; + int getall = 0; + + if (!netdev->dcbnl_ops->getfeatcfg) + return -EOPNOTSUPP; + + if (!tb[DCB_ATTR_FEATCFG]) + return -EINVAL; + + ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG], + dcbnl_featcfg_nest); + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) { + ret = -ENOBUFS; + goto err_out; + } + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_GFEATCFG; + + nest = nla_nest_start(dcbnl_skb, DCB_ATTR_FEATCFG); + if (!nest) { + ret = -EMSGSIZE; + goto nla_put_failure; + } + + if (data[DCB_FEATCFG_ATTR_ALL]) + getall = 1; + + for (i = DCB_FEATCFG_ATTR_ALL+1; i <= DCB_FEATCFG_ATTR_MAX; i++) { + if (!getall && !data[i]) + continue; + + ret = netdev->dcbnl_ops->getfeatcfg(netdev, i, &value); + if (!ret) + ret = nla_put_u8(dcbnl_skb, i, value); + + if (ret) { + nla_nest_cancel(dcbnl_skb, nest); + goto nla_put_failure; + } + } + nla_nest_end(dcbnl_skb, nest); + + nlmsg_end(dcbnl_skb, nlh); + + return rtnl_unicast(dcbnl_skb, &init_net, pid); +nla_put_failure: + nlmsg_cancel(dcbnl_skb, nlh); +nlmsg_failure: + kfree_skb(dcbnl_skb); +err_out: + return ret; +} + +static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct nlattr *data[DCB_FEATCFG_ATTR_MAX + 1]; + int ret, i; + u8 value; + + if (!netdev->dcbnl_ops->setfeatcfg) + return -ENOTSUPP; + + if (!tb[DCB_ATTR_FEATCFG]) + return -EINVAL; + + ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG], + dcbnl_featcfg_nest); + + if (ret) + goto err; + + for (i = DCB_FEATCFG_ATTR_ALL+1; i <= DCB_FEATCFG_ATTR_MAX; i++) { + if (data[i] == NULL) + continue; + + value = nla_get_u8(data[i]); + + ret = netdev->dcbnl_ops->setfeatcfg(netdev, i, value); + + if (ret) + goto err; + } +err: + dcbnl_reply(ret, RTM_SETDCB, DCB_CMD_SFEATCFG, DCB_ATTR_FEATCFG, + pid, seq, flags); + + return ret; +} + static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -1223,6 +1534,30 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ret = dcbnl_setapp(netdev, tb, pid, nlh->nlmsg_seq, nlh->nlmsg_flags); goto out; + case DCB_CMD_IEEE_SET: + ret = dcbnl_ieee_set(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_IEEE_GET: + ret = dcbnl_ieee_get(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_GDCBX: + ret = dcbnl_getdcbx(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_SDCBX: + ret = dcbnl_setdcbx(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_GFEATCFG: + ret = dcbnl_getfeatcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_SFEATCFG: + ret = dcbnl_setfeatcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; default: goto errout; } @@ -1233,8 +1568,95 @@ out: return ret; } +/** + * dcb_getapp - retrieve the DCBX application user priority + * + * On success returns a non-zero 802.1p user priority bitmap + * otherwise returns 0 as the invalid user priority bitmap to + * indicate an error. + */ +u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) +{ + struct dcb_app_type *itr; + u8 prio = 0; + + spin_lock(&dcb_lock); + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == app->selector && + itr->app.protocol == app->protocol && + (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + prio = itr->app.priority; + break; + } + } + spin_unlock(&dcb_lock); + + return prio; +} +EXPORT_SYMBOL(dcb_getapp); + +/** + * ixgbe_dcbnl_setapp - add dcb application data to app list + * + * Priority 0 is the default priority this removes applications + * from the app list if the priority is set to zero. + */ +u8 dcb_setapp(struct net_device *dev, struct dcb_app *new) +{ + struct dcb_app_type *itr; + + spin_lock(&dcb_lock); + /* Search for existing match and replace */ + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == new->selector && + itr->app.protocol == new->protocol && + (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + if (new->priority) + itr->app.priority = new->priority; + else { + list_del(&itr->list); + kfree(itr); + } + goto out; + } + } + /* App type does not exist add new application type */ + if (new->priority) { + struct dcb_app_type *entry; + entry = kmalloc(sizeof(struct dcb_app_type), GFP_ATOMIC); + if (!entry) { + spin_unlock(&dcb_lock); + return -ENOMEM; + } + + memcpy(&entry->app, new, sizeof(*new)); + strncpy(entry->name, dev->name, IFNAMSIZ); + list_add(&entry->list, &dcb_app_list); + } +out: + spin_unlock(&dcb_lock); + call_dcbevent_notifiers(DCB_APP_EVENT, new); + return 0; +} +EXPORT_SYMBOL(dcb_setapp); + +static void dcb_flushapp(void) +{ + struct dcb_app_type *app; + struct dcb_app_type *tmp; + + spin_lock(&dcb_lock); + list_for_each_entry_safe(app, tmp, &dcb_app_list, list) { + list_del(&app->list); + kfree(app); + } + spin_unlock(&dcb_lock); +} + static int __init dcbnl_init(void) { + INIT_LIST_HEAD(&dcb_app_list); + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL); rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL); @@ -1246,7 +1668,6 @@ static void __exit dcbnl_exit(void) { rtnl_unregister(PF_UNSPEC, RTM_GETDCB); rtnl_unregister(PF_UNSPEC, RTM_SETDCB); + dcb_flushapp(); } module_exit(dcbnl_exit); - - diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 2991efcc8dea..5c8362b037ed 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o -dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o - +dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \ + qpolicy.o # # CCID algorithms to be used by dccp.ko # diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index abaf241c7353..25b7a8d1ad58 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -9,18 +9,10 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; version 2 of the License; */ - -#include "ackvec.h" #include "dccp.h" - -#include <linux/init.h> -#include <linux/errno.h> #include <linux/kernel.h> -#include <linux/skbuff.h> #include <linux/slab.h> -#include <net/sock.h> - static struct kmem_cache *dccp_ackvec_slab; static struct kmem_cache *dccp_ackvec_record_slab; @@ -92,6 +84,24 @@ int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum) return 0; } +static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list, + const u64 ackno) +{ + struct dccp_ackvec_record *avr; + /* + * Exploit that records are inserted in descending order of sequence + * number, start with the oldest record first. If @ackno is `before' + * the earliest ack_ackno, the packet is too old to be considered. + */ + list_for_each_entry_reverse(avr, av_list, avr_node) { + if (avr->avr_ack_seqno == ackno) + return avr; + if (before48(ackno, avr->avr_ack_seqno)) + break; + } + return NULL; +} + /* * Buffer index and length computation using modulo-buffersize arithmetic. * Note that, as pointers move from right to left, head is `before' tail. @@ -113,248 +123,253 @@ u16 dccp_ackvec_buflen(const struct dccp_ackvec *av) return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head); } -/* - * If several packets are missing, the HC-Receiver may prefer to enter multiple - * bytes with run length 0, rather than a single byte with a larger run length; - * this simplifies table updates if one of the missing packets arrives. +/** + * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1 + * @av: non-empty buffer to update + * @distance: negative or zero distance of @seqno from buf_ackno downward + * @seqno: the (old) sequence number whose record is to be updated + * @state: state in which packet carrying @seqno was received */ -static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, - const unsigned int packets, - const unsigned char state) +static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance, + u64 seqno, enum dccp_ackvec_states state) { - long gap; - long new_head; + u16 ptr = av->av_buf_head; - if (av->av_vec_len + packets > DCCPAV_MAX_ACKVEC_LEN) - return -ENOBUFS; + BUG_ON(distance > 0); + if (unlikely(dccp_ackvec_is_empty(av))) + return; - gap = packets - 1; - new_head = av->av_buf_head - packets; + do { + u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr); - if (new_head < 0) { - if (gap > 0) { - memset(av->av_buf, DCCPAV_NOT_RECEIVED, - gap + new_head + 1); - gap = -new_head; + if (distance + runlen >= 0) { + /* + * Only update the state if packet has not been received + * yet. This is OK as per the second table in RFC 4340, + * 11.4.1; i.e. here we are using the following table: + * RECEIVED + * 0 1 3 + * S +---+---+---+ + * T 0 | 0 | 0 | 0 | + * O +---+---+---+ + * R 1 | 1 | 1 | 1 | + * E +---+---+---+ + * D 3 | 0 | 1 | 3 | + * +---+---+---+ + * The "Not Received" state was set by reserve_seats(). + */ + if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED) + av->av_buf[ptr] = state; + else + dccp_pr_debug("Not changing %llu state to %u\n", + (unsigned long long)seqno, state); + break; } - new_head += DCCPAV_MAX_ACKVEC_LEN; - } - av->av_buf_head = new_head; + distance += runlen + 1; + ptr = __ackvec_idx_add(ptr, 1); - if (gap > 0) - memset(av->av_buf + av->av_buf_head + 1, - DCCPAV_NOT_RECEIVED, gap); + } while (ptr != av->av_buf_tail); +} - av->av_buf[av->av_buf_head] = state; - av->av_vec_len += packets; - return 0; +/* Mark @num entries after buf_head as "Not yet received". */ +static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num) +{ + u16 start = __ackvec_idx_add(av->av_buf_head, 1), + len = DCCPAV_MAX_ACKVEC_LEN - start; + + /* check for buffer wrap-around */ + if (num > len) { + memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len); + start = 0; + num -= len; + } + if (num) + memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num); } -/* - * Implements the RFC 4340, Appendix A +/** + * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer + * @av: container of buffer to update (can be empty or non-empty) + * @num_packets: number of packets to register (must be >= 1) + * @seqno: sequence number of the first packet in @num_packets + * @state: state in which packet carrying @seqno was received */ -int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, - const u64 ackno, const u8 state) +static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets, + u64 seqno, enum dccp_ackvec_states state) { - u8 *cur_head = av->av_buf + av->av_buf_head, - *buf_end = av->av_buf + DCCPAV_MAX_ACKVEC_LEN; - /* - * Check at the right places if the buffer is full, if it is, tell the - * caller to start dropping packets till the HC-Sender acks our ACK - * vectors, when we will free up space in av_buf. - * - * We may well decide to do buffer compression, etc, but for now lets - * just drop. - * - * From Appendix A.1.1 (`New Packets'): - * - * Of course, the circular buffer may overflow, either when the - * HC-Sender is sending data at a very high rate, when the - * HC-Receiver's acknowledgements are not reaching the HC-Sender, - * or when the HC-Sender is forgetting to acknowledge those acks - * (so the HC-Receiver is unable to clean up old state). In this - * case, the HC-Receiver should either compress the buffer (by - * increasing run lengths when possible), transfer its state to - * a larger buffer, or, as a last resort, drop all received - * packets, without processing them whatsoever, until its buffer - * shrinks again. - */ + u32 num_cells = num_packets; - /* See if this is the first ackno being inserted */ - if (av->av_vec_len == 0) { - *cur_head = state; - av->av_vec_len = 1; - } else if (after48(ackno, av->av_buf_ackno)) { - const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno); + if (num_packets > DCCPAV_BURST_THRESH) { + u32 lost_packets = num_packets - 1; + DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets); /* - * Look if the state of this packet is the same as the - * previous ackno and if so if we can bump the head len. + * We received 1 packet and have a loss of size "num_packets-1" + * which we squeeze into num_cells-1 rather than reserving an + * entire byte for each lost packet. + * The reason is that the vector grows in O(burst_length); when + * it grows too large there will no room left for the payload. + * This is a trade-off: if a few packets out of the burst show + * up later, their state will not be changed; it is simply too + * costly to reshuffle/reallocate/copy the buffer each time. + * Should such problems persist, we will need to switch to a + * different underlying data structure. */ - if (delta == 1 && dccp_ackvec_state(cur_head) == state && - dccp_ackvec_runlen(cur_head) < DCCPAV_MAX_RUNLEN) - *cur_head += 1; - else if (dccp_ackvec_set_buf_head_state(av, delta, state)) - return -ENOBUFS; - } else { - /* - * A.1.2. Old Packets - * - * When a packet with Sequence Number S <= buf_ackno - * arrives, the HC-Receiver will scan the table for - * the byte corresponding to S. (Indexing structures - * could reduce the complexity of this scan.) - */ - u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno); + for (num_packets = num_cells = 1; lost_packets; ++num_cells) { + u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN); - while (1) { - const u8 len = dccp_ackvec_runlen(cur_head); - /* - * valid packets not yet in av_buf have a reserved - * entry, with a len equal to 0. - */ - if (*cur_head == DCCPAV_NOT_RECEIVED && delta == 0) { - dccp_pr_debug("Found %llu reserved seat!\n", - (unsigned long long)ackno); - *cur_head = state; - goto out; - } - /* len == 0 means one packet */ - if (delta < len + 1) - goto out_duplicate; - - delta -= len + 1; - if (++cur_head == buf_end) - cur_head = av->av_buf; + av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1); + av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len; + + lost_packets -= len; } } - av->av_buf_ackno = ackno; -out: - return 0; + if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) { + DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n"); + av->av_overflow = true; + } + + av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets); + if (av->av_overflow) + av->av_buf_tail = av->av_buf_head; + + av->av_buf[av->av_buf_head] = state; + av->av_buf_ackno = seqno; -out_duplicate: - /* Duplicate packet */ - dccp_pr_debug("Received a dup or already considered lost " - "packet: %llu\n", (unsigned long long)ackno); - return -EILSEQ; + if (num_packets > 1) + dccp_ackvec_reserve_seats(av, num_packets - 1); } -static void dccp_ackvec_throw_record(struct dccp_ackvec *av, - struct dccp_ackvec_record *avr) +/** + * dccp_ackvec_input - Register incoming packet in the buffer + */ +void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb) { - struct dccp_ackvec_record *next; + u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq; + enum dccp_ackvec_states state = DCCPAV_RECEIVED; - /* sort out vector length */ - if (av->av_buf_head <= avr->avr_ack_ptr) - av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head; - else - av->av_vec_len = DCCPAV_MAX_ACKVEC_LEN - 1 - - av->av_buf_head + avr->avr_ack_ptr; + if (dccp_ackvec_is_empty(av)) { + dccp_ackvec_add_new(av, 1, seqno, state); + av->av_tail_ackno = seqno; - /* free records */ - list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { - list_del(&avr->avr_node); - kmem_cache_free(dccp_ackvec_record_slab, avr); - } -} + } else { + s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno); + u8 *current_head = av->av_buf + av->av_buf_head; -void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, - const u64 ackno) -{ - struct dccp_ackvec_record *avr; + if (num_packets == 1 && + dccp_ackvec_state(current_head) == state && + dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) { - /* - * If we traverse backwards, it should be faster when we have large - * windows. We will be receiving ACKs for stuff we sent a while back - * -sorbo. - */ - list_for_each_entry_reverse(avr, &av->av_records, avr_node) { - if (ackno == avr->avr_ack_seqno) { - dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, " - "ack_ackno=%llu, ACKED!\n", - dccp_role(sk), avr->avr_ack_runlen, - (unsigned long long)avr->avr_ack_seqno, - (unsigned long long)avr->avr_ack_ackno); - dccp_ackvec_throw_record(av, avr); - break; - } else if (avr->avr_ack_seqno > ackno) - break; /* old news */ + *current_head += 1; + av->av_buf_ackno = seqno; + + } else if (num_packets > 0) { + dccp_ackvec_add_new(av, num_packets, seqno, state); + } else { + dccp_ackvec_update_old(av, num_packets, seqno, state); + } } } -static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, - struct sock *sk, u64 *ackno, - const unsigned char len, - const unsigned char *vector) +/** + * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection + * This routine is called when the peer acknowledges the receipt of Ack Vectors + * up to and including @ackno. While based on on section A.3 of RFC 4340, here + * are additional precautions to prevent corrupted buffer state. In particular, + * we use tail_ackno to identify outdated records; it always marks the earliest + * packet of group (2) in 11.4.2. + */ +void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno) { - unsigned char i; - struct dccp_ackvec_record *avr; + struct dccp_ackvec_record *avr, *next; + u8 runlen_now, eff_runlen; + s64 delta; - /* Check if we actually sent an ACK vector */ - if (list_empty(&av->av_records)) + avr = dccp_ackvec_lookup(&av->av_records, ackno); + if (avr == NULL) return; + /* + * Deal with outdated acknowledgments: this arises when e.g. there are + * several old records and the acks from the peer come in slowly. In + * that case we may still have records that pre-date tail_ackno. + */ + delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno); + if (delta < 0) + goto free_records; + /* + * Deal with overlapping Ack Vectors: don't subtract more than the + * number of packets between tail_ackno and ack_ackno. + */ + eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen; - i = len; + runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr); /* - * XXX - * I think it might be more efficient to work backwards. See comment on - * rcv_ackno. -sorbo. + * The run length of Ack Vector cells does not decrease over time. If + * the run length is the same as at the time the Ack Vector was sent, we + * free the ack_ptr cell. That cell can however not be freed if the run + * length has increased: in this case we need to move the tail pointer + * backwards (towards higher indices), to its next-oldest neighbour. */ - avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node); - while (i--) { - const u8 rl = dccp_ackvec_runlen(vector); - u64 ackno_end_rl; + if (runlen_now > eff_runlen) { - dccp_set_seqno(&ackno_end_rl, *ackno - rl); + av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1; + av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1); + /* This move may not have cleared the overflow flag. */ + if (av->av_overflow) + av->av_overflow = (av->av_buf_head == av->av_buf_tail); + } else { + av->av_buf_tail = avr->avr_ack_ptr; /* - * If our AVR sequence number is greater than the ack, go - * forward in the AVR list until it is not so. + * We have made sure that avr points to a valid cell within the + * buffer. This cell is either older than head, or equals head + * (empty buffer): in both cases we no longer have any overflow. */ - list_for_each_entry_from(avr, &av->av_records, avr_node) { - if (!after48(avr->avr_ack_seqno, *ackno)) - goto found; - } - /* End of the av_records list, not found, exit */ - break; -found: - if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) { - if (dccp_ackvec_state(vector) != DCCPAV_NOT_RECEIVED) { - dccp_pr_debug("%s ACK vector 0, len=%d, " - "ack_seqno=%llu, ack_ackno=%llu, " - "ACKED!\n", - dccp_role(sk), len, - (unsigned long long) - avr->avr_ack_seqno, - (unsigned long long) - avr->avr_ack_ackno); - dccp_ackvec_throw_record(av, avr); - break; - } - /* - * If it wasn't received, continue scanning... we might - * find another one. - */ - } + av->av_overflow = 0; + } - dccp_set_seqno(ackno, ackno_end_rl - 1); - ++vector; + /* + * The peer has acknowledged up to and including ack_ackno. Hence the + * first packet in group (2) of 11.4.2 is the successor of ack_ackno. + */ + av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1); + +free_records: + list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { + list_del(&avr->avr_node); + kmem_cache_free(dccp_ackvec_record_slab, avr); } } -int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - u64 *ackno, const u8 opt, const u8 *value, const u8 len) +/* + * Routines to keep track of Ack Vectors received in an skb + */ +int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce) { - if (len > DCCP_SINGLE_OPT_MAXLEN) - return -1; + struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC); + + if (new == NULL) + return -ENOBUFS; + new->vec = vec; + new->len = len; + new->nonce = nonce; - /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ - dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, - ackno, len, value); + list_add_tail(&new->node, head); return 0; } +EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add); + +void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks) +{ + struct dccp_ackvec_parsed *cur, *next; + + list_for_each_entry_safe(cur, next, parsed_chunks, node) + kfree(cur); + INIT_LIST_HEAD(parsed_chunks); +} +EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup); int __init dccp_ackvec_init(void) { diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 23880be8fc29..e2ab0627a5ff 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -29,6 +29,9 @@ /* Estimated minimum average Ack Vector length - used for updating MPS */ #define DCCPAV_MIN_OPTLEN 16 +/* Threshold for coping with large bursts of losses */ +#define DCCPAV_BURST_THRESH (DCCPAV_MAX_ACKVEC_LEN / 8) + enum dccp_ackvec_states { DCCPAV_RECEIVED = 0x00, DCCPAV_ECN_MARKED = 0x40, @@ -61,7 +64,6 @@ static inline u8 dccp_ackvec_state(const u8 *cell) * %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf * @av_overflow: if 1 then buf_head == buf_tail indicates buffer wraparound * @av_records: list of %dccp_ackvec_record (Ack Vectors sent previously) - * @av_veclen: length of the live portion of @av_buf */ struct dccp_ackvec { u8 av_buf[DCCPAV_MAX_ACKVEC_LEN]; @@ -72,7 +74,6 @@ struct dccp_ackvec { bool av_buf_nonce[DCCPAV_NUM_ACKVECS]; u8 av_overflow:1; struct list_head av_records; - u16 av_vec_len; }; /** struct dccp_ackvec_record - Records information about sent Ack Vectors @@ -98,29 +99,38 @@ struct dccp_ackvec_record { u8 avr_ack_nonce:1; }; -struct sock; -struct sk_buff; - extern int dccp_ackvec_init(void); extern void dccp_ackvec_exit(void); extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); extern void dccp_ackvec_free(struct dccp_ackvec *av); -extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, - const u64 ackno, const u8 state); - -extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, - struct sock *sk, const u64 ackno); -extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - u64 *ackno, const u8 opt, - const u8 *value, const u8 len); - +extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb); extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum); +extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno); extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av); static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av) { return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail; } + +/** + * struct dccp_ackvec_parsed - Record offsets of Ack Vectors in skb + * @vec: start of vector (offset into skb) + * @len: length of @vec + * @nonce: whether @vec had an ECN nonce of 0 or 1 + * @node: FIFO - arranged in descending order of ack_ackno + * This structure is used by CCIDs to access Ack Vectors in a received skb. + */ +struct dccp_ackvec_parsed { + u8 *vec, + len, + nonce:1; + struct list_head node; +}; + +extern int dccp_ackvec_parsed_add(struct list_head *head, + u8 *vec, u8 len, u8 nonce); +extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks); #endif /* _ACKVEC_H */ diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index cb1b4a0d1877..e96d5e810039 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -246,68 +246,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) #endif } -/* XXX Lame code duplication! - * returns -1 if none was found. - * else returns the next offset to use in the function call. - */ -static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, - unsigned char **vec, unsigned char *veclen) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); - unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); - unsigned char *opt_ptr; - const unsigned char *opt_end = (unsigned char *)dh + - (dh->dccph_doff * 4); - unsigned char opt, len; - unsigned char *value; - - BUG_ON(offset < 0); - options += offset; - opt_ptr = options; - if (opt_ptr >= opt_end) - return -1; - - while (opt_ptr != opt_end) { - opt = *opt_ptr++; - len = 0; - value = NULL; - - /* Check if this isn't a single byte option */ - if (opt > DCCPO_MAX_RESERVED) { - if (opt_ptr == opt_end) - goto out_invalid_option; - - len = *opt_ptr++; - if (len < 3) - goto out_invalid_option; - /* - * Remove the type and len fields, leaving - * just the value size - */ - len -= 2; - value = opt_ptr; - opt_ptr += len; - - if (opt_ptr > opt_end) - goto out_invalid_option; - } - - switch (opt) { - case DCCPO_ACK_VECTOR_0: - case DCCPO_ACK_VECTOR_1: - *vec = value; - *veclen = len; - return offset + (opt_ptr - options); - } - } - - return -1; - -out_invalid_option: - DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); - return -1; -} - /** * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm * This code is almost identical with TCP's tcp_rtt_estimator(), since @@ -432,16 +370,28 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); } +static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, + u8 option, u8 *optval, u8 optlen) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + + switch (option) { + case DCCPO_ACK_VECTOR_0: + case DCCPO_ACK_VECTOR_1: + return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen, + option - DCCPO_ACK_VECTOR_0); + } + return 0; +} + static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); + struct dccp_ackvec_parsed *avp; u64 ackno, seqno; struct ccid2_seq *seqp; - unsigned char *vector; - unsigned char veclen; - int offset = 0; int done = 0; unsigned int maxincr = 0; @@ -475,17 +425,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* check forward path congestion */ - /* still didn't send out new data packets */ - if (hc->tx_seqh == hc->tx_seqt) + if (dccp_packet_without_ack(skb)) return; - switch (DCCP_SKB_CB(skb)->dccpd_type) { - case DCCP_PKT_ACK: - case DCCP_PKT_DATAACK: - break; - default: - return; - } + /* still didn't send out new data packets */ + if (hc->tx_seqh == hc->tx_seqt) + goto done; ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; if (after48(ackno, hc->tx_high_ack)) @@ -509,15 +454,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); /* go through all ack vectors */ - while ((offset = ccid2_ackvector(sk, skb, offset, - &vector, &veclen)) != -1) { + list_for_each_entry(avp, &hc->tx_av_chunks, node) { /* go through this ack vector */ - while (veclen--) { - u64 ackno_end_rl = SUB48(ackno, dccp_ackvec_runlen(vector)); + for (; avp->len--; avp->vec++) { + u64 ackno_end_rl = SUB48(ackno, + dccp_ackvec_runlen(avp->vec)); - ccid2_pr_debug("ackvec start:%llu end:%llu\n", + ccid2_pr_debug("ackvec %llu |%u,%u|\n", (unsigned long long)ackno, - (unsigned long long)ackno_end_rl); + dccp_ackvec_state(avp->vec) >> 6, + dccp_ackvec_runlen(avp->vec)); /* if the seqno we are analyzing is larger than the * current ackno, then move towards the tail of our * seqnos. @@ -536,7 +482,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * run length */ while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { - const u8 state = dccp_ackvec_state(vector); + const u8 state = dccp_ackvec_state(avp->vec); /* new packet received or marked */ if (state != DCCPAV_NOT_RECEIVED && @@ -563,7 +509,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) break; ackno = SUB48(ackno_end_rl, 1); - vector++; } if (done) break; @@ -631,10 +576,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) sk_stop_timer(sk, &hc->tx_rtotimer); else sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); - +done: /* check if incoming Acks allow pending packets to be sent */ if (sender_was_blocked && !ccid2_cwnd_network_limited(hc)) tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); + dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) @@ -663,6 +609,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hc->tx_last_cong = ccid2_time_stamp; setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); + INIT_LIST_HEAD(&hc->tx_av_chunks); return 0; } @@ -696,16 +643,17 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } struct ccid_operations ccid2_ops = { - .ccid_id = DCCPC_CCID2, - .ccid_name = "TCP-like", - .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), - .ccid_hc_tx_init = ccid2_hc_tx_init, - .ccid_hc_tx_exit = ccid2_hc_tx_exit, - .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, - .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, - .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, - .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), - .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, + .ccid_id = DCCPC_CCID2, + .ccid_name = "TCP-like", + .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), + .ccid_hc_tx_init = ccid2_hc_tx_init, + .ccid_hc_tx_exit = ccid2_hc_tx_exit, + .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, + .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, + .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options, + .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, + .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), + .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, }; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 25cb6b216eda..e9985dafc2c7 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -55,6 +55,7 @@ struct ccid2_seq { * @tx_rtt_seq: to decay RTTVAR at most once per flight * @tx_rpseq: last consecutive seqno * @tx_rpdupack: dupacks since rpseq + * @tx_av_chunks: list of Ack Vectors received on current skb */ struct ccid2_hc_tx_sock { u32 tx_cwnd; @@ -79,6 +80,7 @@ struct ccid2_hc_tx_sock { int tx_rpdupack; u32 tx_last_cong; u64 tx_high_ack; + struct list_head tx_av_chunks; }; static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 19fafd597465..45087052d894 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -93,9 +93,6 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); #define DCCP_FALLBACK_RTT (USEC_PER_SEC / 5) #define DCCP_SANE_RTT_MAX (3 * USEC_PER_SEC) -/* Maximal interval between probes for local resources. */ -#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) - /* sysctl variables for DCCP */ extern int sysctl_dccp_request_retries; extern int sysctl_dccp_retries1; @@ -203,12 +200,7 @@ struct dccp_mib { DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); #define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) #define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) -#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) #define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) -#define DCCP_ADD_STATS_BH(field, val) \ - SNMP_ADD_STATS_BH(dccp_statistics, field, val) -#define DCCP_ADD_STATS_USER(field, val) \ - SNMP_ADD_STATS_USER(dccp_statistics, field, val) /* * Checksumming routines @@ -243,6 +235,19 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); +/* + * TX Packet Dequeueing Interface + */ +extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); +extern bool dccp_qpolicy_full(struct sock *sk); +extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); +extern struct sk_buff *dccp_qpolicy_top(struct sock *sk); +extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk); +extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param); + +/* + * TX Packet Output and TX Timers + */ extern void dccp_write_xmit(struct sock *sk); extern void dccp_write_space(struct sock *sk); extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); diff --git a/net/dccp/input.c b/net/dccp/input.c index c7aeeba859d4..15af247ea007 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -160,13 +160,15 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) dccp_time_wait(sk, DCCP_TIME_WAIT, 0); } -static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) +static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb) { - struct dccp_sock *dp = dccp_sk(sk); + struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec; - if (dp->dccps_hc_rx_ackvec != NULL) - dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq); + if (av == NULL) + return; + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq); + dccp_ackvec_input(av, skb); } static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) @@ -239,7 +241,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) dccp_update_gsr(sk, seqno); if (dh->dccph_type != DCCP_PKT_SYNC && - (ackno != DCCP_PKT_WITHOUT_ACK_SEQ)) + ackno != DCCP_PKT_WITHOUT_ACK_SEQ && + after48(ackno, dp->dccps_gar)) dp->dccps_gar = ackno; } else { unsigned long now = jiffies; @@ -365,21 +368,13 @@ discard: int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned len) { - struct dccp_sock *dp = dccp_sk(sk); - if (dccp_check_seqno(sk, skb)) goto discard; if (dccp_parse_options(sk, NULL, skb)) return 1; - if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_event_ack_recv(sk, skb); - - if (dp->dccps_hc_rx_ackvec != NULL && - dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED)) - goto discard; + dccp_handle_ackvec_processing(sk, skb); dccp_deliver_input_to_ccids(sk, skb); return __dccp_rcv_established(sk, skb, dh, len); @@ -631,14 +626,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dccp_parse_options(sk, NULL, skb)) return 1; - if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_event_ack_recv(sk, skb); - - if (dp->dccps_hc_rx_ackvec != NULL && - dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED)) - goto discard; - + dccp_handle_ackvec_processing(sk, skb); dccp_deliver_input_to_ccids(sk, skb); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3f69ea114829..45a434f94169 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -462,15 +462,12 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, { struct rtable *rt; struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, - .nl_u = { .ip4_u = - { .daddr = ip_hdr(skb)->saddr, - .saddr = ip_hdr(skb)->daddr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = ip_hdr(skb)->saddr, + .fl4_src = ip_hdr(skb)->daddr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, - .uli_u = { .ports = - { .sport = dccp_hdr(skb)->dccph_dport, - .dport = dccp_hdr(skb)->dccph_sport } - } + .fl_ip_sport = dccp_hdr(skb)->dccph_dport, + .fl_ip_dport = dccp_hdr(skb)->dccph_sport }; security_skb_classify_flow(skb, &fl); diff --git a/net/dccp/options.c b/net/dccp/options.c index 5adeeed5e0d2..f06ffcfc8d71 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -54,7 +54,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, struct dccp_sock *dp = dccp_sk(sk); const struct dccp_hdr *dh = dccp_hdr(skb); const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; - u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); unsigned char *opt_ptr = options; const unsigned char *opt_end = (unsigned char *)dh + @@ -129,14 +128,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, if (rc) goto out_featneg_failed; break; - case DCCPO_ACK_VECTOR_0: - case DCCPO_ACK_VECTOR_1: - if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ - break; - if (dp->dccps_hc_rx_ackvec != NULL && - dccp_ackvec_parse(sk, skb, &ackno, opt, value, len)) - goto out_invalid_option; - break; case DCCPO_TIMESTAMP: if (len != 4) goto out_invalid_option; @@ -226,6 +217,16 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, pkt_type, opt, value, len)) goto out_invalid_option; break; + case DCCPO_ACK_VECTOR_0: + case DCCPO_ACK_VECTOR_1: + if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ + break; + /* + * Ack vectors are processed by the TX CCID if it is + * interested. The RX CCID need not parse Ack Vectors, + * since it is only interested in clearing old state. + * Fall through. + */ case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, pkt_type, opt, value, len)) @@ -429,6 +430,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const u16 buflen = dccp_ackvec_buflen(av); /* Figure out how many options do we need to represent the ackvec */ const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN); @@ -437,10 +439,25 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) const unsigned char *tail, *from; unsigned char *to; - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) + if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + DCCP_WARN("Lacking space for %u bytes on %s packet\n", len, + dccp_packet_name(dcb->dccpd_type)); return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; + } + /* + * Since Ack Vectors are variable-length, we can not always predict + * their size. To catch exception cases where the space is running out + * on the skb, a separate Sync is scheduled to carry the Ack Vector. + */ + if (len > DCCPAV_MIN_OPTLEN && + len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) { + DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), " + "MPS=%u ==> reduce payload size?\n", len, skb->len, + dcb->dccpd_opt_len, dp->dccps_mss_cache); + dp->dccps_sync_scheduled = 1; + return 0; + } + dcb->dccpd_opt_len += len; to = skb_push(skb, len); len = buflen; @@ -481,7 +498,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) /* * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340. */ - if (dccp_ackvec_update_records(av, DCCP_SKB_CB(skb)->dccpd_seq, nonce)) + if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce)) return -ENOBUFS; return 0; } diff --git a/net/dccp/output.c b/net/dccp/output.c index 45b91853f5ae..784d30210543 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk) { int err, len; struct dccp_sock *dp = dccp_sk(sk); - struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); + struct sk_buff *skb = dccp_qpolicy_pop(sk); if (unlikely(skb == NULL)) return; @@ -283,6 +283,15 @@ static void dccp_xmit_packet(struct sock *sk) * any local drop will eventually be reported via receiver feedback. */ ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); + + /* + * If the CCID needs to transfer additional header options out-of-band + * (e.g. Ack Vectors or feature-negotiation options), it activates this + * flag to schedule a Sync. The Sync will automatically incorporate all + * currently pending header options, thus clearing the backlog. + */ + if (dp->dccps_sync_scheduled) + dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); } /** @@ -336,7 +345,7 @@ void dccp_write_xmit(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; - while ((skb = skb_peek(&sk->sk_write_queue))) { + while ((skb = dccp_qpolicy_top(sk))) { int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); switch (ccid_packet_dequeue_eval(rc)) { @@ -350,8 +359,7 @@ void dccp_write_xmit(struct sock *sk) dccp_xmit_packet(sk); break; case CCID_PACKET_ERR: - skb_dequeue(&sk->sk_write_queue); - kfree_skb(skb); + dccp_qpolicy_drop(sk, skb); dccp_pr_debug("packet discarded due to err=%d\n", rc); } } @@ -636,6 +644,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno, DCCP_SKB_CB(skb)->dccpd_type = pkt_type; DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; + /* + * Clear the flag in case the Sync was scheduled for out-of-band data, + * such as carrying a long Ack Vector. + */ + dccp_sk(sk)->dccps_sync_scheduled = 0; + dccp_transmit_skb(sk, skb); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ef343d53fcea..152975d942d9 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; + dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; dccp_init_xmit_timers(sk); @@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_RECV_CSCOV: err = dccp_setsockopt_cscov(sk, val, true); break; + case DCCP_SOCKOPT_QPOLICY_ID: + if (sk->sk_state != DCCP_CLOSED) + err = -EISCONN; + else if (val < 0 || val >= DCCPQ_POLICY_MAX) + err = -EINVAL; + else + dp->dccps_qpolicy = val; + break; + case DCCP_SOCKOPT_QPOLICY_TXQLEN: + if (val < 0) + err = -EINVAL; + else + dp->dccps_tx_qlen = val; + break; default: err = -ENOPROTOOPT; break; @@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_RECV_CSCOV: val = dp->dccps_pcrlen; break; + case DCCP_SOCKOPT_QPOLICY_ID: + val = dp->dccps_qpolicy; + break; + case DCCP_SOCKOPT_QPOLICY_TXQLEN: + val = dp->dccps_tx_qlen; + break; case 128 ... 191: return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, len, (u32 __user *)optval, optlen); @@ -681,6 +702,47 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); #endif +static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) +{ + struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); + + /* + * Assign an (opaque) qpolicy priority value to skb->priority. + * + * We are overloading this skb field for use with the qpolicy subystem. + * The skb->priority is normally used for the SO_PRIORITY option, which + * is initialised from sk_priority. Since the assignment of sk_priority + * to skb->priority happens later (on layer 3), we overload this field + * for use with queueing priorities as long as the skb is on layer 4. + * The default priority value (if nothing is set) is 0. + */ + skb->priority = 0; + + for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) { + + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + + if (cmsg->cmsg_level != SOL_DCCP) + continue; + + if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX && + !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type)) + return -EINVAL; + + switch (cmsg->cmsg_type) { + case DCCP_SCM_PRIORITY: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) + return -EINVAL; + skb->priority = *(__u32 *)CMSG_DATA(cmsg); + break; + default: + return -EINVAL; + } + } + return 0; +} + int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { @@ -696,8 +758,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, lock_sock(sk); - if (sysctl_dccp_tx_qlen && - (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { + if (dccp_qpolicy_full(sk)) { rc = -EAGAIN; goto out_release; } @@ -725,7 +786,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rc != 0) goto out_discard; - skb_queue_tail(&sk->sk_write_queue, skb); + rc = dccp_msghdr_parse(msg, skb); + if (rc != 0) + goto out_discard; + + dccp_qpolicy_push(sk, skb); /* * The xmit_timer is set if the TX CCID is rate-based and will expire * when congestion control permits to release further packets into the diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c new file mode 100644 index 000000000000..63c30bfa4703 --- /dev/null +++ b/net/dccp/qpolicy.c @@ -0,0 +1,137 @@ +/* + * net/dccp/qpolicy.c + * + * Policy-based packet dequeueing interface for DCCP. + * + * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License v2 + * as published by the Free Software Foundation. + */ +#include "dccp.h" + +/* + * Simple Dequeueing Policy: + * If tx_qlen is different from 0, enqueue up to tx_qlen elements. + */ +static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb) +{ + skb_queue_tail(&sk->sk_write_queue, skb); +} + +static bool qpolicy_simple_full(struct sock *sk) +{ + return dccp_sk(sk)->dccps_tx_qlen && + sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen; +} + +static struct sk_buff *qpolicy_simple_top(struct sock *sk) +{ + return skb_peek(&sk->sk_write_queue); +} + +/* + * Priority-based Dequeueing Policy: + * If tx_qlen is different from 0 and the queue has reached its upper bound + * of tx_qlen elements, replace older packets lowest-priority-first. + */ +static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk) +{ + struct sk_buff *skb, *best = NULL; + + skb_queue_walk(&sk->sk_write_queue, skb) + if (best == NULL || skb->priority > best->priority) + best = skb; + return best; +} + +static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk) +{ + struct sk_buff *skb, *worst = NULL; + + skb_queue_walk(&sk->sk_write_queue, skb) + if (worst == NULL || skb->priority < worst->priority) + worst = skb; + return worst; +} + +static bool qpolicy_prio_full(struct sock *sk) +{ + if (qpolicy_simple_full(sk)) + dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk)); + return false; +} + +/** + * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface + * @push: add a new @skb to the write queue + * @full: indicates that no more packets will be admitted + * @top: peeks at whatever the queueing policy defines as its `top' + */ +static struct dccp_qpolicy_operations { + void (*push) (struct sock *sk, struct sk_buff *skb); + bool (*full) (struct sock *sk); + struct sk_buff* (*top) (struct sock *sk); + __be32 params; + +} qpol_table[DCCPQ_POLICY_MAX] = { + [DCCPQ_POLICY_SIMPLE] = { + .push = qpolicy_simple_push, + .full = qpolicy_simple_full, + .top = qpolicy_simple_top, + .params = 0, + }, + [DCCPQ_POLICY_PRIO] = { + .push = qpolicy_simple_push, + .full = qpolicy_prio_full, + .top = qpolicy_prio_best_skb, + .params = DCCP_SCM_PRIORITY, + }, +}; + +/* + * Externally visible interface + */ +void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb) +{ + qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb); +} + +bool dccp_qpolicy_full(struct sock *sk) +{ + return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk); +} + +void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb) +{ + if (skb != NULL) { + skb_unlink(skb, &sk->sk_write_queue); + kfree_skb(skb); + } +} + +struct sk_buff *dccp_qpolicy_top(struct sock *sk) +{ + return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk); +} + +struct sk_buff *dccp_qpolicy_pop(struct sock *sk) +{ + struct sk_buff *skb = dccp_qpolicy_top(sk); + + if (skb != NULL) { + /* Clear any skb fields that we used internally */ + skb->priority = 0; + skb_unlink(skb, &sk->sk_write_queue); + } + return skb; +} + +bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param) +{ + /* check if exactly one bit is set */ + if (!param || (param & (param - 1))) + return false; + return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param; +} diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 18b8a2cbdf77..2af15b15d1fa 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -155,7 +155,7 @@ static const struct proto_ops dn_proto_ops; static DEFINE_RWLOCK(dn_hash_lock); static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; static struct hlist_head dn_wild_sk; -static atomic_t decnet_memory_allocated; +static atomic_long_t decnet_memory_allocated; static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags); static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags); @@ -829,7 +829,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) return -EINVAL; scp->state = DN_CC; - scp->segsize_loc = dst_metric(__sk_dst_get(sk), RTAX_ADVMSS); + scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk)); dn_send_conn_conf(sk, allocation); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); @@ -958,7 +958,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, sk->sk_route_caps = sk->sk_dst_cache->dev->features; sock->state = SS_CONNECTING; scp->state = DN_CI; - scp->segsize_loc = dst_metric(sk->sk_dst_cache, RTAX_ADVMSS); + scp->segsize_loc = dst_metric_advmss(sk->sk_dst_cache); dn_nsp_send_conninit(sk, NSP_CI); err = -EINPROGRESS; @@ -1556,6 +1556,8 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us if (r_len > sizeof(struct linkinfo_dn)) r_len = sizeof(struct linkinfo_dn); + memset(&link, 0, sizeof(link)); + switch(sock->state) { case SS_CONNECTING: link.idn_linkstate = LL_CONNECTING; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 474d54dd08c2..5e636365d33c 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -110,6 +110,8 @@ static unsigned long dn_rt_deadline; static int dn_dst_gc(struct dst_ops *ops); static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); +static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); +static unsigned int dn_dst_default_mtu(const struct dst_entry *dst); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); @@ -129,6 +131,8 @@ static struct dst_ops dn_dst_ops = { .gc_thresh = 128, .gc = dn_dst_gc, .check = dn_dst_check, + .default_advmss = dn_dst_default_advmss, + .default_mtu = dn_dst_default_mtu, .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, @@ -240,13 +244,14 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) { if (!(dst_metric_locked(dst, RTAX_MTU))) { - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); dst_set_expires(dst, dn_rt_mtu_expires); } if (!(dst_metric_locked(dst, RTAX_ADVMSS))) { u32 mss = mtu - DN_MAX_NSP_DATA_HEADER; - if (dst_metric(dst, RTAX_ADVMSS) > mss) - dst->metrics[RTAX_ADVMSS-1] = mss; + u32 existing_mss = dst_metric_raw(dst, RTAX_ADVMSS); + if (!existing_mss || existing_mss > mss) + dst_metric_set(dst, RTAX_ADVMSS, mss); } } } @@ -271,10 +276,10 @@ static void dn_dst_link_failure(struct sk_buff *skb) static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return ((fl1->nl_u.dn_u.daddr ^ fl2->nl_u.dn_u.daddr) | - (fl1->nl_u.dn_u.saddr ^ fl2->nl_u.dn_u.saddr) | + return ((fl1->fld_dst ^ fl2->fld_dst) | + (fl1->fld_src ^ fl2->fld_src) | (fl1->mark ^ fl2->mark) | - (fl1->nl_u.dn_u.scope ^ fl2->nl_u.dn_u.scope) | + (fl1->fld_scope ^ fl2->fld_scope) | (fl1->oif ^ fl2->oif) | (fl1->iif ^ fl2->iif)) == 0; } @@ -795,19 +800,28 @@ static int dn_rt_bug(struct sk_buff *skb) return NET_RX_DROP; } +static unsigned int dn_dst_default_advmss(const struct dst_entry *dst) +{ + return dn_mss_from_pmtu(dst->dev, dst_mtu(dst)); +} + +static unsigned int dn_dst_default_mtu(const struct dst_entry *dst) +{ + return dst->dev->mtu; +} + static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; struct net_device *dev = rt->dst.dev; struct neighbour *n; - unsigned mss; + unsigned int metric; if (fi) { if (DN_FIB_RES_GW(*res) && DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = DN_FIB_RES_GW(*res); - memcpy(rt->dst.metrics, fi->fib_metrics, - sizeof(rt->dst.metrics)); + dst_import_metrics(&rt->dst, fi->fib_metrics); } rt->rt_type = res->type; @@ -818,13 +832,14 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) rt->dst.neighbour = n; } - if (dst_metric(&rt->dst, RTAX_MTU) == 0 || - dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) - rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; - mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); - if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 || - dst_metric(&rt->dst, RTAX_ADVMSS) > mss) - rt->dst.metrics[RTAX_ADVMSS-1] = mss; + if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) + dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); + metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); + if (metric) { + unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); + if (metric > mss) + dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); + } return 0; } @@ -882,11 +897,9 @@ static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_re static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) { - struct flowi fl = { .nl_u = { .dn_u = - { .daddr = oldflp->fld_dst, - .saddr = oldflp->fld_src, - .scope = RT_SCOPE_UNIVERSE, - } }, + struct flowi fl = { .fld_dst = oldflp->fld_dst, + .fld_src = oldflp->fld_src, + .fld_scope = RT_SCOPE_UNIVERSE, .mark = oldflp->mark, .iif = init_net.loopback_dev->ifindex, .oif = oldflp->oif }; @@ -1230,11 +1243,9 @@ static int dn_route_input_slow(struct sk_buff *skb) int flags = 0; __le16 gateway = 0; __le16 local_src = 0; - struct flowi fl = { .nl_u = { .dn_u = - { .daddr = cb->dst, - .saddr = cb->src, - .scope = RT_SCOPE_UNIVERSE, - } }, + struct flowi fl = { .fld_dst = cb->dst, + .fld_src = cb->src, + .fld_scope = RT_SCOPE_UNIVERSE, .mark = skb->mark, .iif = skb->dev->ifindex }; struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; @@ -1506,7 +1517,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src); if (rt->rt_daddr != rt->rt_gateway) RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway); - if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto rtattr_failure; expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires, diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 48fdf10be7a1..6eb91df3c550 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -175,7 +175,7 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, unsigned dnet_addr_type(__le16 addr) { - struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; + struct flowi fl = { .fld_dst = addr }; struct dn_fib_res res; unsigned ret = RTN_UNICAST; struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index be3eb8e23288..28f8b5e5f73b 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -38,7 +38,7 @@ int decnet_log_martians = 1; int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW; /* Reasonable defaults, I hope, based on tcp's defaults */ -int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 }; +long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 }; int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 }; int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 }; @@ -324,7 +324,7 @@ static ctl_table dn_table[] = { .data = &sysctl_decnet_mem, .maxlen = sizeof(sysctl_decnet_mem), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_doulongvec_minmax }, { .procname = "decnet_rmem", diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile index c0ef4e71dc49..d5c13c2eb36d 100644 --- a/net/dns_resolver/Makefile +++ b/net/dns_resolver/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o -dns_resolver-objs := dns_key.o dns_query.o +dns_resolver-y := dns_key.o dns_query.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 6112a12578b2..0c877a74e1f4 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -390,7 +390,7 @@ static int dsa_remove(struct platform_device *pdev) if (dst->link_poll_needed) del_timer_sync(&dst->link_poll_timer); - flush_scheduled_work(); + flush_work_sync(&dst->link_poll_work); for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; diff --git a/net/econet/Makefile b/net/econet/Makefile index 39f0a77abdbd..05fae8be2fed 100644 --- a/net/econet/Makefile +++ b/net/econet/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_ECONET) += econet.o -econet-objs := af_econet.o +econet-y := af_econet.o diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index f8c1ae4b41f0..15dcc1a586b4 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -31,6 +31,7 @@ #include <linux/skbuff.h> #include <linux/udp.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <net/sock.h> #include <net/inet_common.h> #include <linux/stat.h> @@ -276,12 +277,12 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, #endif #ifdef CONFIG_ECONET_AUNUDP struct msghdr udpmsg; - struct iovec iov[msg->msg_iovlen+1]; + struct iovec iov[2]; struct aunhdr ah; struct sockaddr_in udpdest; __kernel_size_t size; - int i; mm_segment_t oldfs; + char *userbuf; #endif /* @@ -297,23 +298,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, mutex_lock(&econet_mutex); - if (saddr == NULL) { - struct econet_sock *eo = ec_sk(sk); - - addr.station = eo->station; - addr.net = eo->net; - port = eo->port; - cb = eo->cb; - } else { - if (msg->msg_namelen < sizeof(struct sockaddr_ec)) { - mutex_unlock(&econet_mutex); - return -EINVAL; - } - addr.station = saddr->addr.station; - addr.net = saddr->addr.net; - port = saddr->port; - cb = saddr->cb; - } + if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) { + mutex_unlock(&econet_mutex); + return -EINVAL; + } + addr.station = saddr->addr.station; + addr.net = saddr->addr.net; + port = saddr->port; + cb = saddr->cb; /* Look for a device with the right network number. */ dev = net2dev_map[addr.net]; @@ -328,17 +320,17 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, } } - if (len + 15 > dev->mtu) { - mutex_unlock(&econet_mutex); - return -EMSGSIZE; - } - if (dev->type == ARPHRD_ECONET) { /* Real hardware Econet. We're not worthy etc. */ #ifdef CONFIG_ECONET_NATIVE unsigned short proto = 0; int res; + if (len + 15 > dev->mtu) { + mutex_unlock(&econet_mutex); + return -EMSGSIZE; + } + dev_hold(dev); skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev), @@ -351,7 +343,6 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, eb = (struct ec_cb *)&skb->cb; - /* BUG: saddr may be NULL */ eb->cookie = saddr->cookie; eb->sec = *saddr; eb->sent = ec_tx_done; @@ -415,6 +406,11 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, return -ENETDOWN; /* No socket - can't send */ } + if (len > 32768) { + err = -E2BIG; + goto error; + } + /* Make up a UDP datagram and hand it off to some higher intellect. */ memset(&udpdest, 0, sizeof(udpdest)); @@ -446,36 +442,26 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, /* tack our header on the front of the iovec */ size = sizeof(struct aunhdr); - /* - * XXX: that is b0rken. We can't mix userland and kernel pointers - * in iovec, since on a lot of platforms copy_from_user() will - * *not* work with the kernel and userland ones at the same time, - * regardless of what we do with set_fs(). And we are talking about - * econet-over-ethernet here, so "it's only ARM anyway" doesn't - * apply. Any suggestions on fixing that code? -- AV - */ iov[0].iov_base = (void *)&ah; iov[0].iov_len = size; - for (i = 0; i < msg->msg_iovlen; i++) { - void __user *base = msg->msg_iov[i].iov_base; - size_t iov_len = msg->msg_iov[i].iov_len; - /* Check it now since we switch to KERNEL_DS later. */ - if (!access_ok(VERIFY_READ, base, iov_len)) { - mutex_unlock(&econet_mutex); - return -EFAULT; - } - iov[i+1].iov_base = base; - iov[i+1].iov_len = iov_len; - size += iov_len; + + userbuf = vmalloc(len); + if (userbuf == NULL) { + err = -ENOMEM; + goto error; } + iov[1].iov_base = userbuf; + iov[1].iov_len = len; + err = memcpy_fromiovec(userbuf, msg->msg_iov, len); + if (err) + goto error_free_buf; + /* Get a skbuff (no data, just holds our cb information) */ if ((skb = sock_alloc_send_skb(sk, 0, msg->msg_flags & MSG_DONTWAIT, - &err)) == NULL) { - mutex_unlock(&econet_mutex); - return err; - } + &err)) == NULL) + goto error_free_buf; eb = (struct ec_cb *)&skb->cb; @@ -491,7 +477,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, udpmsg.msg_name = (void *)&udpdest; udpmsg.msg_namelen = sizeof(udpdest); udpmsg.msg_iov = &iov[0]; - udpmsg.msg_iovlen = msg->msg_iovlen + 1; + udpmsg.msg_iovlen = 2; udpmsg.msg_control = NULL; udpmsg.msg_controllen = 0; udpmsg.msg_flags=0; @@ -499,9 +485,13 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, oldfs = get_fs(); set_fs(KERNEL_DS); /* More privs :-) */ err = sock_sendmsg(udpsock, &udpmsg, size); set_fs(oldfs); + +error_free_buf: + vfree(userbuf); #else err = -EPROTOTYPE; #endif + error: mutex_unlock(&econet_mutex); return err; @@ -671,6 +661,11 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg) err = 0; switch (cmd) { case SIOCSIFADDR: + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + edev = dev->ec_ptr; if (edev == NULL) { /* Magic up a new one. */ @@ -856,9 +851,13 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len) { struct iphdr *ip = ip_hdr(skb); unsigned char stn = ntohl(ip->saddr) & 0xff; + struct dst_entry *dst = skb_dst(skb); + struct ec_device *edev = NULL; struct sock *sk = NULL; struct sk_buff *newskb; - struct ec_device *edev = skb->dev->ec_ptr; + + if (dst) + edev = dst->dev->ec_ptr; if (! edev) goto bad; diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 93c91b633a56..6df6ecf49708 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -52,11 +52,11 @@ struct net_device *ieee802154_get_dev(struct net *net, switch (addr->addr_type) { case IEEE802154_ADDR_LONG: - rtnl_lock(); - dev = dev_getbyhwaddr(net, ARPHRD_IEEE802154, addr->hwaddr); + rcu_read_lock(); + dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, addr->hwaddr); if (dev) dev_hold(dev); - rtnl_unlock(); + rcu_read_unlock(); break; case IEEE802154_ADDR_SHORT: if (addr->pan_id == 0xffff || diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f581f77d1097..f2b61107df6c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1148,21 +1148,13 @@ int inet_sk_rebuild_header(struct sock *sk) struct flowi fl = { .oif = sk->sk_bound_dev_if, .mark = sk->sk_mark, - .nl_u = { - .ip4_u = { - .daddr = daddr, - .saddr = inet->inet_saddr, - .tos = RT_CONN_FLAGS(sk), - }, - }, + .fl4_dst = daddr, + .fl4_src = inet->inet_saddr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { - .ports = { - .sport = inet->inet_sport, - .dport = inet->inet_dport, - }, - }, + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = inet->inet_dport, }; security_sk_classify_flow(sk, &fl); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index d8e540c5b071..a2fc7b961dbc 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -433,8 +433,8 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, - .saddr = tip } } }; + struct flowi fl = { .fl4_dst = sip, + .fl4_src = tip }; struct rtable *rt; int flag = 0; /*unsigned long now; */ @@ -883,7 +883,7 @@ static int arp_process(struct sk_buff *skb) dont_send = arp_ignore(in_dev, sip, tip); if (!dont_send && IN_DEV_ARPFILTER(in_dev)) - dont_send |= arp_filter(sip, tip, dev); + dont_send = arp_filter(sip, tip, dev); if (!dont_send) { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) { @@ -1017,13 +1017,14 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; return 0; } - if (__in_dev_get_rtnl(dev)) { - IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on); + if (__in_dev_get_rcu(dev)) { + IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on); return 0; } return -ENXIO; } +/* must be called with rcu_read_lock() */ static int arp_req_set_public(struct net *net, struct arpreq *r, struct net_device *dev) { @@ -1033,7 +1034,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r, if (mask && mask != htonl(0xFFFFFFFF)) return -EINVAL; if (!dev && (r->arp_flags & ATF_COM)) { - dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, + dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family, r->arp_ha.sa_data); if (!dev) return -ENODEV; @@ -1061,8 +1062,8 @@ static int arp_req_set(struct net *net, struct arpreq *r, if (r->arp_flags & ATF_PERM) r->arp_flags |= ATF_COM; if (dev == NULL) { - struct flowi fl = { .nl_u.ip4_u = { .daddr = ip, - .tos = RTO_ONLINK } }; + struct flowi fl = { .fl4_dst = ip, + .fl4_tos = RTO_ONLINK }; struct rtable *rt; err = ip_route_output_key(net, &rt, &fl); if (err != 0) @@ -1169,8 +1170,8 @@ static int arp_req_delete(struct net *net, struct arpreq *r, ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; if (dev == NULL) { - struct flowi fl = { .nl_u.ip4_u = { .daddr = ip, - .tos = RTO_ONLINK } }; + struct flowi fl = { .fl4_dst = ip, + .fl4_tos = RTO_ONLINK }; struct rtable *rt; err = ip_route_output_key(net, &rt, &fl); if (err != 0) @@ -1225,10 +1226,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!(r.arp_flags & ATF_NETMASK)) ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = htonl(0xFFFFFFFFUL); - rtnl_lock(); + rcu_read_lock(); if (r.arp_dev[0]) { err = -ENODEV; - dev = __dev_get_by_name(net, r.arp_dev); + dev = dev_get_by_name_rcu(net, r.arp_dev); if (dev == NULL) goto out; @@ -1252,12 +1253,12 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; case SIOCGARP: err = arp_req_get(&r, dev); - if (!err && copy_to_user(arg, &r, sizeof(r))) - err = -EFAULT; break; } out: - rtnl_unlock(); + rcu_read_unlock(); + if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r))) + err = -EFAULT; return err; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index dc94b0316b78..748cb5b337bd 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1256,6 +1256,87 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); } +static size_t inet_get_link_af_size(const struct net_device *dev) +{ + struct in_device *in_dev = __in_dev_get_rtnl(dev); + + if (!in_dev) + return 0; + + return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ +} + +static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) +{ + struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct nlattr *nla; + int i; + + if (!in_dev) + return -ENODATA; + + nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); + if (nla == NULL) + return -EMSGSIZE; + + for (i = 0; i < IPV4_DEVCONF_MAX; i++) + ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; + + return 0; +} + +static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { + [IFLA_INET_CONF] = { .type = NLA_NESTED }, +}; + +static int inet_validate_link_af(const struct net_device *dev, + const struct nlattr *nla) +{ + struct nlattr *a, *tb[IFLA_INET_MAX+1]; + int err, rem; + + if (dev && !__in_dev_get_rtnl(dev)) + return -EAFNOSUPPORT; + + err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); + if (err < 0) + return err; + + if (tb[IFLA_INET_CONF]) { + nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { + int cfgid = nla_type(a); + + if (nla_len(a) < 4) + return -EINVAL; + + if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) + return -EINVAL; + } + } + + return 0; +} + +static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) +{ + struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct nlattr *a, *tb[IFLA_INET_MAX+1]; + int rem; + + if (!in_dev) + return -EAFNOSUPPORT; + + if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) + BUG(); + + if (tb[IFLA_INET_CONF]) { + nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) + ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); + } + + return 0; +} + #ifdef CONFIG_SYSCTL static void devinet_copy_dflt_conf(struct net *net, int i) @@ -1349,9 +1430,9 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, return ret; } -int ipv4_doint_and_flush(ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) +static int ipv4_doint_and_flush(ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; @@ -1619,6 +1700,14 @@ static __net_initdata struct pernet_operations devinet_ops = { .exit = devinet_exit_net, }; +static struct rtnl_af_ops inet_af_ops = { + .family = AF_INET, + .fill_link_af = inet_fill_link_af, + .get_link_af_size = inet_get_link_af_size, + .validate_link_af = inet_validate_link_af, + .set_link_af = inet_set_link_af, +}; + void __init devinet_init(void) { register_pernet_subsys(&devinet_ops); @@ -1626,6 +1715,8 @@ void __init devinet_init(void) register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); + rtnl_af_register(&inet_af_ops); + rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 14ca1f1c3fb0..e42a905180f0 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -23,6 +23,8 @@ struct esp_skb_cb { #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) +static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); + /* * Allocate an AEAD request structure with extra space for SG and IV. * @@ -117,25 +119,35 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int blksize; int clen; int alen; + int plen; + int tfclen; int nfrags; /* skb is pure payload to encrypt */ err = -ENOMEM; - /* Round to block size */ - clen = skb->len; - esp = x->data; aead = esp->aead; alen = crypto_aead_authsize(aead); + tfclen = 0; + if (x->tfcpad) { + struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); + u32 padto; + + padto = min(x->tfcpad, esp4_get_mtu(x, dst->child_mtu_cached)); + if (skb->len < padto) + tfclen = padto - skb->len; + } blksize = ALIGN(crypto_aead_blocksize(aead), 4); - clen = ALIGN(clen + 2, blksize); + clen = ALIGN(skb->len + 2 + tfclen, blksize); if (esp->padlen) clen = ALIGN(clen, esp->padlen); + plen = clen - skb->len - tfclen; - if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0) + err = skb_cow_data(skb, tfclen + plen + alen, &trailer); + if (err < 0) goto error; nfrags = err; @@ -150,13 +162,17 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) /* Fill padding... */ tail = skb_tail_pointer(trailer); + if (tfclen) { + memset(tail, 0, tfclen); + tail += tfclen; + } do { int i; - for (i=0; i<clen-skb->len - 2; i++) + for (i = 0; i < plen - 2; i++) tail[i] = i + 1; } while (0); - tail[clen - skb->len - 2] = (clen - skb->len) - 2; - tail[clen - skb->len - 1] = *skb_mac_header(skb); + tail[plen - 2] = plen - 2; + tail[plen - 1] = *skb_mac_header(skb); pskb_put(skb, trailer, clen - skb->len + alen); skb_push(skb, -skb_network_offset(skb)); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eb6f69a8f27a..1d2cdd43a878 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -158,18 +158,20 @@ static void fib_flush(struct net *net) struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = addr - } - }, - .flags = FLOWI_FLAG_MATCH_ANY_IIF + .fl4_dst = addr, }; struct fib_result res = { 0 }; struct net_device *dev = NULL; + struct fib_table *local_table; + +#ifdef CONFIG_IP_MULTIPLE_TABLES + res.r = NULL; +#endif rcu_read_lock(); - if (fib_lookup(net, &fl, &res)) { + local_table = fib_get_table(net, RT_TABLE_LOCAL); + if (!local_table || + fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { rcu_read_unlock(); return NULL; } @@ -193,7 +195,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; + struct flowi fl = { .fl4_dst = addr }; struct fib_result res; unsigned ret = RTN_BROADCAST; struct fib_table *local_table; @@ -247,13 +249,9 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, { struct in_device *in_dev; struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = src, - .saddr = dst, - .tos = tos - } - }, + .fl4_dst = src, + .fl4_src = dst, + .fl4_tos = tos, .mark = mark, .iif = oif }; @@ -853,13 +851,9 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) struct fib_result res; struct flowi fl = { .mark = frn->fl_mark, - .nl_u = { - .ip4_u = { - .daddr = frn->fl_addr, - .tos = frn->fl_tos, - .scope = frn->fl_scope - } - } + .fl4_dst = frn->fl_addr, + .fl4_tos = frn->fl_tos, + .fl4_scope = frn->fl_scope, }; #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -999,7 +993,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo rt_cache_flush(dev_net(dev), 0); break; case NETDEV_UNREGISTER_BATCH: - rt_cache_flush_batch(); + /* The batch unregister is only called on the first + * device in the list of devices being unregistered. + * Therefore we should not pass dev_net(dev) in here. + */ + rt_cache_flush_batch(NULL); break; } return NOTIFY_DONE; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a72c62d03106..9aff11d7278f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -563,12 +563,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, rcu_read_lock(); { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = nh->nh_gw, - .scope = cfg->fc_scope + 1, - }, - }, + .fl4_dst = nh->nh_gw, + .fl4_scope = cfg->fc_scope + 1, .oif = nh->nh_oif, }; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 200eb538fbb3..0f280348e0fd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -365,7 +365,7 @@ static struct tnode *tnode_alloc(size_t size) if (size <= PAGE_SIZE) return kzalloc(size, GFP_KERNEL); else - return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); + return vzalloc(size); } static void __tnode_vfree(struct work_struct *arg) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c6e2affafbd3..4aa1b7f01ea0 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -386,10 +386,9 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) daddr = icmp_param->replyopts.faddr; } { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = rt->rt_spec_dst, - .tos = RT_TOS(ip_hdr(skb)->tos) } }, + struct flowi fl = { .fl4_dst= daddr, + .fl4_src = rt->rt_spec_dst, + .fl4_tos = RT_TOS(ip_hdr(skb)->tos), .proto = IPPROTO_ICMP }; security_skb_classify_flow(skb, &fl); if (ip_route_output_key(net, &rt, &fl)) @@ -542,22 +541,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = icmp_param.replyopts.srr ? - icmp_param.replyopts.faddr : - iph->saddr, - .saddr = saddr, - .tos = RT_TOS(tos) - } - }, + .fl4_dst = icmp_param.replyopts.srr ? + icmp_param.replyopts.faddr : iph->saddr, + .fl4_src = saddr, + .fl4_tos = RT_TOS(tos), .proto = IPPROTO_ICMP, - .uli_u = { - .icmpt = { - .type = type, - .code = code - } - } + .fl_icmp_type = type, + .fl_icmp_code = code, }; int err; struct rtable *rt2; @@ -569,6 +559,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) /* No need to clone since we're just using its address. */ rt2 = rt; + if (!fl.nl_u.ip4_u.saddr) + fl.nl_u.ip4_u.saddr = rt->rt_src; + err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); switch (err) { case 0: diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 08d0d81ffc15..e0e77e297de3 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -149,21 +149,37 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc); static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, int sfcount, __be32 *psfsrc, int delta); + +static void ip_mc_list_reclaim(struct rcu_head *head) +{ + kfree(container_of(head, struct ip_mc_list, rcu)); +} + static void ip_ma_put(struct ip_mc_list *im) { if (atomic_dec_and_test(&im->refcnt)) { in_dev_put(im->interface); - kfree(im); + call_rcu(&im->rcu, ip_mc_list_reclaim); } } +#define for_each_pmc_rcu(in_dev, pmc) \ + for (pmc = rcu_dereference(in_dev->mc_list); \ + pmc != NULL; \ + pmc = rcu_dereference(pmc->next_rcu)) + +#define for_each_pmc_rtnl(in_dev, pmc) \ + for (pmc = rtnl_dereference(in_dev->mc_list); \ + pmc != NULL; \ + pmc = rtnl_dereference(pmc->next_rcu)) + #ifdef CONFIG_IP_MULTICAST /* * Timer management */ -static __inline__ void igmp_stop_timer(struct ip_mc_list *im) +static void igmp_stop_timer(struct ip_mc_list *im) { spin_lock_bh(&im->lock); if (del_timer(&im->timer)) @@ -284,6 +300,8 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) return scount; } +#define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb)) + static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) { struct sk_buff *skb; @@ -292,14 +310,20 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct igmpv3_report *pig; struct net *net = dev_net(dev); - skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); - if (skb == NULL) - return NULL; + while (1) { + skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), + GFP_ATOMIC | __GFP_NOWARN); + if (skb) + break; + size >>= 1; + if (size < 256) + return NULL; + } + igmp_skb_size(skb) = size; { struct flowi fl = { .oif = dev->ifindex, - .nl_u = { .ip4_u = { - .daddr = IGMPV3_ALL_MCR } }, + .fl4_dst = IGMPV3_ALL_MCR, .proto = IPPROTO_IGMP }; if (ip_route_output_key(net, &rt, &fl)) { kfree_skb(skb); @@ -384,7 +408,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, return skb; } -#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \ +#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \ skb_tailroom(skb)) : 0) static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, @@ -502,8 +526,8 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) int type; if (!pmc) { - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; spin_lock_bh(&pmc->lock); @@ -514,7 +538,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } else { spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -556,7 +580,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) struct sk_buff *skb = NULL; int type, dtype; - read_lock(&in_dev->mc_list_lock); + rcu_read_lock(); spin_lock_bh(&in_dev->mc_tomb_lock); /* deleted MCA's */ @@ -593,7 +617,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) spin_unlock_bh(&in_dev->mc_tomb_lock); /* change recs */ - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(in_dev, pmc) { spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) { type = IGMPV3_BLOCK_OLD_SOURCES; @@ -616,7 +640,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) } spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); if (!skb) return; @@ -644,7 +668,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, { struct flowi fl = { .oif = dev->ifindex, - .nl_u = { .ip4_u = { .daddr = dst } }, + .fl4_dst = dst, .proto = IPPROTO_IGMP }; if (ip_route_output_key(net, &rt, &fl)) return -1; @@ -813,14 +837,14 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group) if (group == IGMP_ALL_HOSTS) return; - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im!=NULL; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == group) { igmp_stop_timer(im); break; } } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, @@ -906,8 +930,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * - Use the igmp->igmp_code field as the maximum * delay possible */ - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im!=NULL; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { int changed; if (group && group != im->multiaddr) @@ -925,7 +949,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (changed) igmp_mod_timer(im, max_delay); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } /* called in rcu_read_lock() section */ @@ -1110,8 +1134,8 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) kfree(pmc); } /* clear dead sources, too */ - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { struct ip_sf_list *psf, *psf_next; spin_lock_bh(&pmc->lock); @@ -1123,7 +1147,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) kfree(psf); } } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } #endif @@ -1209,7 +1233,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - for (im=in_dev->mc_list; im; im=im->next) { + for_each_pmc_rtnl(in_dev, im) { if (im->multiaddr == addr) { im->users++; ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0); @@ -1217,7 +1241,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) } } - im = kmalloc(sizeof(*im), GFP_KERNEL); + im = kzalloc(sizeof(*im), GFP_KERNEL); if (!im) goto out; @@ -1227,26 +1251,18 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) im->multiaddr = addr; /* initial mode is (EX, empty) */ im->sfmode = MCAST_EXCLUDE; - im->sfcount[MCAST_INCLUDE] = 0; im->sfcount[MCAST_EXCLUDE] = 1; - im->sources = NULL; - im->tomb = NULL; - im->crcount = 0; atomic_set(&im->refcnt, 1); spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST - im->tm_running = 0; setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); im->unsolicit_count = IGMP_Unsolicited_Report_Count; - im->reporter = 0; - im->gsquery = 0; #endif - im->loaded = 0; - write_lock_bh(&in_dev->mc_list_lock); - im->next = in_dev->mc_list; - in_dev->mc_list = im; + + im->next_rcu = in_dev->mc_list; in_dev->mc_count++; - write_unlock_bh(&in_dev->mc_list_lock); + rcu_assign_pointer(in_dev->mc_list, im); + #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); #endif @@ -1260,26 +1276,32 @@ EXPORT_SYMBOL(ip_mc_inc_group); /* * Resend IGMP JOIN report; used for bonding. + * Called with rcu_read_lock() */ -void ip_mc_rejoin_group(struct ip_mc_list *im) +void ip_mc_rejoin_groups(struct in_device *in_dev) { #ifdef CONFIG_IP_MULTICAST - struct in_device *in_dev = im->interface; + struct ip_mc_list *im; + int type; - if (im->multiaddr == IGMP_ALL_HOSTS) - return; + for_each_pmc_rcu(in_dev, im) { + if (im->multiaddr == IGMP_ALL_HOSTS) + continue; - /* a failover is happening and switches - * must be notified immediately */ - if (IGMP_V1_SEEN(in_dev)) - igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT); - else if (IGMP_V2_SEEN(in_dev)) - igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT); - else - igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT); + /* a failover is happening and switches + * must be notified immediately + */ + if (IGMP_V1_SEEN(in_dev)) + type = IGMP_HOST_MEMBERSHIP_REPORT; + else if (IGMP_V2_SEEN(in_dev)) + type = IGMPV2_HOST_MEMBERSHIP_REPORT; + else + type = IGMPV3_HOST_MEMBERSHIP_REPORT; + igmp_send_report(in_dev, im, type); + } #endif } -EXPORT_SYMBOL(ip_mc_rejoin_group); +EXPORT_SYMBOL(ip_mc_rejoin_groups); /* * A socket has left a multicast group on device dev @@ -1287,17 +1309,18 @@ EXPORT_SYMBOL(ip_mc_rejoin_group); void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) { - struct ip_mc_list *i, **ip; + struct ip_mc_list *i; + struct ip_mc_list __rcu **ip; ASSERT_RTNL(); - for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { + for (ip = &in_dev->mc_list; + (i = rtnl_dereference(*ip)) != NULL; + ip = &i->next_rcu) { if (i->multiaddr == addr) { if (--i->users == 0) { - write_lock_bh(&in_dev->mc_list_lock); - *ip = i->next; + *ip = i->next_rcu; in_dev->mc_count--; - write_unlock_bh(&in_dev->mc_list_lock); igmp_group_dropped(i); if (!in_dev->dead) @@ -1316,34 +1339,34 @@ EXPORT_SYMBOL(ip_mc_dec_group); void ip_mc_unmap(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i = in_dev->mc_list; i; i = i->next) - igmp_group_dropped(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_dropped(pmc); } void ip_mc_remap(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i = in_dev->mc_list; i; i = i->next) - igmp_group_added(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_added(pmc); } /* Device going down */ void ip_mc_down(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i=in_dev->mc_list; i; i=i->next) - igmp_group_dropped(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_dropped(pmc); #ifdef CONFIG_IP_MULTICAST in_dev->mr_ifc_count = 0; @@ -1374,7 +1397,6 @@ void ip_mc_init_dev(struct in_device *in_dev) in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; #endif - rwlock_init(&in_dev->mc_list_lock); spin_lock_init(&in_dev->mc_tomb_lock); } @@ -1382,14 +1404,14 @@ void ip_mc_init_dev(struct in_device *in_dev) void ip_mc_up(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); - for (i=in_dev->mc_list; i; i=i->next) - igmp_group_added(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_added(pmc); } /* @@ -1405,24 +1427,19 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* Deactivate timers */ ip_mc_down(in_dev); - write_lock_bh(&in_dev->mc_list_lock); - while ((i = in_dev->mc_list) != NULL) { - in_dev->mc_list = i->next; + while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) { + in_dev->mc_list = i->next_rcu; in_dev->mc_count--; - write_unlock_bh(&in_dev->mc_list_lock); + igmp_group_dropped(i); ip_ma_put(i); - - write_lock_bh(&in_dev->mc_list_lock); } - write_unlock_bh(&in_dev->mc_list_lock); } /* RTNL is locked */ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = imr->imr_multiaddr.s_addr } } }; + struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr }; struct rtable *rt; struct net_device *dev = NULL; struct in_device *idev = NULL; @@ -1513,18 +1530,18 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); #endif @@ -1685,18 +1702,18 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); @@ -1793,7 +1810,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) err = -EADDRINUSE; ifindex = imr->imr_ifindex; - for (i = inet->mc_list; i; i = i->next) { + for_each_pmc_rtnl(inet, i) { if (i->multi.imr_multiaddr.s_addr == addr && i->multi.imr_ifindex == ifindex) goto done; @@ -1807,7 +1824,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) goto done; memcpy(&iml->multi, imr, sizeof(*imr)); - iml->next = inet->mc_list; + iml->next_rcu = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; rcu_assign_pointer(inet->mc_list, iml); @@ -1821,17 +1838,14 @@ EXPORT_SYMBOL(ip_mc_join_group); static void ip_sf_socklist_reclaim(struct rcu_head *rp) { - struct ip_sf_socklist *psf; - - psf = container_of(rp, struct ip_sf_socklist, rcu); + kfree(container_of(rp, struct ip_sf_socklist, rcu)); /* sk_omem_alloc should have been decreased by the caller*/ - kfree(psf); } static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, struct in_device *in_dev) { - struct ip_sf_socklist *psf = iml->sflist; + struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist); int err; if (psf == NULL) { @@ -1851,11 +1865,8 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, static void ip_mc_socklist_reclaim(struct rcu_head *rp) { - struct ip_mc_socklist *iml; - - iml = container_of(rp, struct ip_mc_socklist, rcu); + kfree(container_of(rp, struct ip_mc_socklist, rcu)); /* sk_omem_alloc should have been decreased by the caller*/ - kfree(iml); } @@ -1866,7 +1877,8 @@ static void ip_mc_socklist_reclaim(struct rcu_head *rp) int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) { struct inet_sock *inet = inet_sk(sk); - struct ip_mc_socklist *iml, **imlp; + struct ip_mc_socklist *iml; + struct ip_mc_socklist __rcu **imlp; struct in_device *in_dev; struct net *net = sock_net(sk); __be32 group = imr->imr_multiaddr.s_addr; @@ -1876,7 +1888,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) rtnl_lock(); in_dev = ip_mc_find_dev(net, imr); ifindex = imr->imr_ifindex; - for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { + for (imlp = &inet->mc_list; + (iml = rtnl_dereference(*imlp)) != NULL; + imlp = &iml->next_rcu) { if (iml->multi.imr_multiaddr.s_addr != group) continue; if (ifindex) { @@ -1888,7 +1902,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) (void) ip_mc_leave_src(sk, iml, in_dev); - rcu_assign_pointer(*imlp, iml->next); + *imlp = iml->next_rcu; if (in_dev) ip_mc_dec_group(in_dev, group); @@ -1934,7 +1948,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if ((pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr) && (pmc->multi.imr_ifindex == imr.imr_ifindex)) @@ -1958,7 +1972,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct pmc->sfmode = omode; } - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (!add) { if (!psl) goto done; /* err = -EADDRNOTAVAIL */ @@ -2077,7 +2091,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) goto done; } - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && pmc->multi.imr_ifindex == imr.imr_ifindex) break; @@ -2107,7 +2121,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr, msf->imsf_fmode, 0, NULL, 0); } - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (psl) { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); @@ -2155,7 +2169,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, } err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && pmc->multi.imr_ifindex == imr.imr_ifindex) break; @@ -2163,7 +2177,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, if (!pmc) /* must have a prior join */ goto done; msf->imsf_fmode = pmc->sfmode; - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); rtnl_unlock(); if (!psl) { len = 0; @@ -2208,7 +2222,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == addr && pmc->multi.imr_ifindex == gsf->gf_interface) break; @@ -2216,7 +2230,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!pmc) /* must have a prior join */ goto done; gsf->gf_fmode = pmc->sfmode; - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); rtnl_unlock(); count = psl ? psl->sl_count : 0; copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; @@ -2257,7 +2271,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) goto out; rcu_read_lock(); - for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) { + for_each_pmc_rcu(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == loc_addr && pmc->multi.imr_ifindex == dif) break; @@ -2265,7 +2279,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) ret = inet->mc_all; if (!pmc) goto unlock; - psl = pmc->sflist; + psl = rcu_dereference(pmc->sflist); ret = (pmc->sfmode == MCAST_EXCLUDE); if (!psl) goto unlock; @@ -2300,16 +2314,14 @@ void ip_mc_drop_socket(struct sock *sk) return; rtnl_lock(); - while ((iml = inet->mc_list) != NULL) { + while ((iml = rtnl_dereference(inet->mc_list)) != NULL) { struct in_device *in_dev; - rcu_assign_pointer(inet->mc_list, iml->next); + inet->mc_list = iml->next_rcu; in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); - if (in_dev != NULL) { + if (in_dev != NULL) ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); - in_dev_put(in_dev); - } /* decrease mem now to avoid the memleak warning */ atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); call_rcu(&iml->rcu, ip_mc_socklist_reclaim); @@ -2323,8 +2335,8 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p struct ip_sf_list *psf; int rv = 0; - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == mc_addr) break; } @@ -2345,7 +2357,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p } else rv = 1; /* unspecified source; tentatively allow */ } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return rv; } @@ -2371,13 +2383,11 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) in_dev = __in_dev_get_rcu(state->dev); if (!in_dev) continue; - read_lock(&in_dev->mc_list_lock); - im = in_dev->mc_list; + im = rcu_dereference(in_dev->mc_list); if (im) { state->in_dev = in_dev; break; } - read_unlock(&in_dev->mc_list_lock); } return im; } @@ -2385,11 +2395,9 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - im = im->next; - while (!im) { - if (likely(state->in_dev != NULL)) - read_unlock(&state->in_dev->mc_list_lock); + im = rcu_dereference(im->next_rcu); + while (!im) { state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->in_dev = NULL; @@ -2398,8 +2406,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li state->in_dev = __in_dev_get_rcu(state->dev); if (!state->in_dev) continue; - read_lock(&state->in_dev->mc_list_lock); - im = state->in_dev->mc_list; + im = rcu_dereference(state->in_dev->mc_list); } return im; } @@ -2435,10 +2442,8 @@ static void igmp_mc_seq_stop(struct seq_file *seq, void *v) __releases(rcu) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - if (likely(state->in_dev != NULL)) { - read_unlock(&state->in_dev->mc_list_lock); - state->in_dev = NULL; - } + + state->in_dev = NULL; state->dev = NULL; rcu_read_unlock(); } @@ -2460,7 +2465,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) querier = "NONE"; #endif - if (state->in_dev->mc_list == im) { + if (rcu_dereference(state->in_dev->mc_list) == im) { seq_printf(seq, "%d\t%-10s: %5d %7s\n", state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } @@ -2519,8 +2524,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) idev = __in_dev_get_rcu(state->dev); if (unlikely(idev == NULL)) continue; - read_lock(&idev->mc_list_lock); - im = idev->mc_list; + im = rcu_dereference(idev->mc_list); if (likely(im != NULL)) { spin_lock_bh(&im->lock); psf = im->sources; @@ -2531,7 +2535,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) } spin_unlock_bh(&im->lock); } - read_unlock(&idev->mc_list_lock); } return psf; } @@ -2545,9 +2548,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l spin_unlock_bh(&state->im->lock); state->im = state->im->next; while (!state->im) { - if (likely(state->idev != NULL)) - read_unlock(&state->idev->mc_list_lock); - state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; @@ -2556,8 +2556,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l state->idev = __in_dev_get_rcu(state->dev); if (!state->idev) continue; - read_lock(&state->idev->mc_list_lock); - state->im = state->idev->mc_list; + state->im = rcu_dereference(state->idev->mc_list); } if (!state->im) break; @@ -2603,10 +2602,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) spin_unlock_bh(&state->im->lock); state->im = NULL; } - if (likely(state->idev != NULL)) { - read_unlock(&state->idev->mc_list_lock); - state->idev = NULL; - } + state->idev = NULL; state->dev = NULL; rcu_read_unlock(); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7174370b1195..25e318153f14 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -55,7 +55,6 @@ EXPORT_SYMBOL(inet_get_local_port_range); int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) { - const __be32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -75,9 +74,9 @@ int inet_csk_bind_conflict(const struct sock *sk, sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr || - sk2_rcv_saddr == sk_rcv_saddr) + const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); + if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || + sk2_rcv_saddr == sk_rcv_saddr(sk)) break; } } @@ -358,17 +357,14 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct ip_options *opt = inet_rsk(req)->opt; struct flowi fl = { .oif = sk->sk_bound_dev_if, .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = ((opt && opt->srr) ? - opt->faddr : - ireq->rmt_addr), - .saddr = ireq->loc_addr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .fl4_src = ireq->loc_addr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = - { .sport = inet_sk(sk)->inet_sport, - .dport = ireq->rmt_port } } }; + .fl_ip_sport = inet_sk(sk)->inet_sport, + .fl_ip_dport = ireq->rmt_port }; struct net *net = sock_net(sk); security_req_classify_flow(req, &fl); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 1b344f30b463..3c0369a3a663 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -133,8 +133,7 @@ int __inet_inherit_port(struct sock *sk, struct sock *child) } } } - sk_add_bind_node(child, &tb->owners); - inet_csk(child)->icsk_bind_hash = tb; + inet_bind_hash(child, tb, port); spin_unlock(&head->lock); return 0; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 9e94d7cf4f8a..d9bc85751c74 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -63,7 +63,7 @@ * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * dtime: unused node list lock - * v4daddr: unchangeable + * daddr: unchangeable * ip_id_count: atomic value (no lock needed) */ @@ -79,15 +79,24 @@ static const struct inet_peer peer_fake_node = { .avl_height = 0 }; -static struct { +struct inet_peer_base { struct inet_peer __rcu *root; spinlock_t lock; int total; -} peers = { +}; + +static struct inet_peer_base v4_peers = { + .root = peer_avl_empty_rcu, + .lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock), + .total = 0, +}; + +static struct inet_peer_base v6_peers = { .root = peer_avl_empty_rcu, - .lock = __SPIN_LOCK_UNLOCKED(peers.lock), + .lock = __SPIN_LOCK_UNLOCKED(v6_peers.lock), .total = 0, }; + #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ /* Exported for sysctl_net_ipv4. */ @@ -152,28 +161,45 @@ static void unlink_from_unused(struct inet_peer *p) } } +static int addr_compare(const struct inetpeer_addr *a, + const struct inetpeer_addr *b) +{ + int i, n = (a->family == AF_INET ? 1 : 4); + + for (i = 0; i < n; i++) { + if (a->a6[i] == b->a6[i]) + continue; + if (a->a6[i] < b->a6[i]) + return -1; + return 1; + } + + return 0; +} + /* * Called with local BH disabled and the pool lock held. */ -#define lookup(_daddr, _stack) \ +#define lookup(_daddr, _stack, _base) \ ({ \ struct inet_peer *u; \ struct inet_peer __rcu **v; \ \ stackptr = _stack; \ - *stackptr++ = &peers.root; \ - for (u = rcu_dereference_protected(peers.root, \ - lockdep_is_held(&peers.lock)); \ + *stackptr++ = &_base->root; \ + for (u = rcu_dereference_protected(_base->root, \ + lockdep_is_held(&_base->lock)); \ u != peer_avl_empty; ) { \ - if (_daddr == u->v4daddr) \ + int cmp = addr_compare(_daddr, &u->daddr); \ + if (cmp == 0) \ break; \ - if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ + if (cmp == -1) \ v = &u->avl_left; \ else \ v = &u->avl_right; \ *stackptr++ = v; \ u = rcu_dereference_protected(*v, \ - lockdep_is_held(&peers.lock)); \ + lockdep_is_held(&_base->lock)); \ } \ u; \ }) @@ -185,13 +211,15 @@ static void unlink_from_unused(struct inet_peer *p) * But every pointer we follow is guaranteed to be valid thanks to RCU. * We exit from this function if number of links exceeds PEER_MAXDEPTH */ -static struct inet_peer *lookup_rcu_bh(__be32 daddr) +static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, + struct inet_peer_base *base) { - struct inet_peer *u = rcu_dereference_bh(peers.root); + struct inet_peer *u = rcu_dereference_bh(base->root); int count = 0; while (u != peer_avl_empty) { - if (daddr == u->v4daddr) { + int cmp = addr_compare(daddr, &u->daddr); + if (cmp == 0) { /* Before taking a reference, check if this entry was * deleted, unlink_from_pool() sets refcnt=-1 to make * distinction between an unused entry (refcnt=0) and @@ -201,7 +229,7 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr) u = NULL; return u; } - if ((__force __u32)daddr < (__force __u32)u->v4daddr) + if (cmp == -1) u = rcu_dereference_bh(u->avl_left); else u = rcu_dereference_bh(u->avl_right); @@ -212,19 +240,19 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr) } /* Called with local BH disabled and the pool lock held. */ -#define lookup_rightempty(start) \ +#define lookup_rightempty(start, base) \ ({ \ struct inet_peer *u; \ struct inet_peer __rcu **v; \ *stackptr++ = &start->avl_left; \ v = &start->avl_left; \ for (u = rcu_dereference_protected(*v, \ - lockdep_is_held(&peers.lock)); \ + lockdep_is_held(&base->lock)); \ u->avl_right != peer_avl_empty_rcu; ) { \ v = &u->avl_right; \ *stackptr++ = v; \ u = rcu_dereference_protected(*v, \ - lockdep_is_held(&peers.lock)); \ + lockdep_is_held(&base->lock)); \ } \ u; \ }) @@ -234,7 +262,8 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr) * Look into mm/map_avl.c for more detail description of the ideas. */ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], - struct inet_peer __rcu ***stackend) + struct inet_peer __rcu ***stackend, + struct inet_peer_base *base) { struct inet_peer __rcu **nodep; struct inet_peer *node, *l, *r; @@ -243,20 +272,20 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], while (stackend > stack) { nodep = *--stackend; node = rcu_dereference_protected(*nodep, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); l = rcu_dereference_protected(node->avl_left, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); r = rcu_dereference_protected(node->avl_right, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); lh = node_height(l); rh = node_height(r); if (lh > rh + 1) { /* l: RH+2 */ struct inet_peer *ll, *lr, *lrl, *lrr; int lrh; ll = rcu_dereference_protected(l->avl_left, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); lr = rcu_dereference_protected(l->avl_right, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); lrh = node_height(lr); if (lrh <= node_height(ll)) { /* ll: RH+1 */ RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ @@ -268,9 +297,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], RCU_INIT_POINTER(*nodep, l); } else { /* ll: RH, lr: RH+1 */ lrl = rcu_dereference_protected(lr->avl_left, - lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */ + lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */ lrr = rcu_dereference_protected(lr->avl_right, - lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */ + lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */ RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ node->avl_height = rh + 1; /* node: RH+1 */ @@ -286,9 +315,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], struct inet_peer *rr, *rl, *rlr, *rll; int rlh; rr = rcu_dereference_protected(r->avl_right, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); rl = rcu_dereference_protected(r->avl_left, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); rlh = node_height(rl); if (rlh <= node_height(rr)) { /* rr: LH+1 */ RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ @@ -300,9 +329,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], RCU_INIT_POINTER(*nodep, r); } else { /* rr: RH, rl: RH+1 */ rlr = rcu_dereference_protected(rl->avl_right, - lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */ + lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */ rll = rcu_dereference_protected(rl->avl_left, - lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */ + lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */ RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ node->avl_height = lh + 1; /* node: LH+1 */ @@ -321,14 +350,14 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], } /* Called with local BH disabled and the pool lock held. */ -#define link_to_pool(n) \ +#define link_to_pool(n, base) \ do { \ n->avl_height = 1; \ n->avl_left = peer_avl_empty_rcu; \ n->avl_right = peer_avl_empty_rcu; \ /* lockless readers can catch us now */ \ rcu_assign_pointer(**--stackptr, n); \ - peer_avl_rebalance(stack, stackptr); \ + peer_avl_rebalance(stack, stackptr, base); \ } while (0) static void inetpeer_free_rcu(struct rcu_head *head) @@ -337,13 +366,13 @@ static void inetpeer_free_rcu(struct rcu_head *head) } /* May be called with local BH enabled. */ -static void unlink_from_pool(struct inet_peer *p) +static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) { int do_free; do_free = 0; - spin_lock_bh(&peers.lock); + spin_lock_bh(&base->lock); /* Check the reference counter. It was artificially incremented by 1 * in cleanup() function to prevent sudden disappearing. If we can * atomically (because of lockless readers) take this last reference, @@ -353,7 +382,7 @@ static void unlink_from_pool(struct inet_peer *p) if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { struct inet_peer __rcu **stack[PEER_MAXDEPTH]; struct inet_peer __rcu ***stackptr, ***delp; - if (lookup(p->v4daddr, stack) != p) + if (lookup(&p->daddr, stack, base) != p) BUG(); delp = stackptr - 1; /* *delp[0] == p */ if (p->avl_left == peer_avl_empty_rcu) { @@ -362,11 +391,11 @@ static void unlink_from_pool(struct inet_peer *p) } else { /* look for a node to insert instead of p */ struct inet_peer *t; - t = lookup_rightempty(p); + t = lookup_rightempty(p, base); BUG_ON(rcu_dereference_protected(*stackptr[-1], - lockdep_is_held(&peers.lock)) != t); + lockdep_is_held(&base->lock)) != t); **--stackptr = t->avl_left; - /* t is removed, t->v4daddr > x->v4daddr for any + /* t is removed, t->daddr > x->daddr for any * x in p->avl_left subtree. * Put t in the old place of p. */ RCU_INIT_POINTER(*delp[0], t); @@ -376,11 +405,11 @@ static void unlink_from_pool(struct inet_peer *p) BUG_ON(delp[1] != &p->avl_left); delp[1] = &t->avl_left; /* was &p->avl_left */ } - peer_avl_rebalance(stack, stackptr); - peers.total--; + peer_avl_rebalance(stack, stackptr, base); + base->total--; do_free = 1; } - spin_unlock_bh(&peers.lock); + spin_unlock_bh(&base->lock); if (do_free) call_rcu_bh(&p->rcu, inetpeer_free_rcu); @@ -395,6 +424,16 @@ static void unlink_from_pool(struct inet_peer *p) inet_putpeer(p); } +static struct inet_peer_base *family_to_base(int family) +{ + return (family == AF_INET ? &v4_peers : &v6_peers); +} + +static struct inet_peer_base *peer_to_base(struct inet_peer *p) +{ + return family_to_base(p->daddr.family); +} + /* May be called with local BH enabled. */ static int cleanup_once(unsigned long ttl) { @@ -428,21 +467,22 @@ static int cleanup_once(unsigned long ttl) * happen because of entry limits in route cache. */ return -1; - unlink_from_pool(p); + unlink_from_pool(p, peer_to_base(p)); return 0; } /* Called with or without local BH being disabled. */ -struct inet_peer *inet_getpeer(__be32 daddr, int create) +struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) { - struct inet_peer *p; struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; + struct inet_peer_base *base = family_to_base(AF_INET); + struct inet_peer *p; /* Look up for the address quickly, lockless. * Because of a concurrent writer, we might not find an existing entry. */ rcu_read_lock_bh(); - p = lookup_rcu_bh(daddr); + p = lookup_rcu_bh(daddr, base); rcu_read_unlock_bh(); if (p) { @@ -456,50 +496,57 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) /* retry an exact lookup, taking the lock before. * At least, nodes should be hot in our cache. */ - spin_lock_bh(&peers.lock); - p = lookup(daddr, stack); + spin_lock_bh(&base->lock); + p = lookup(daddr, stack, base); if (p != peer_avl_empty) { atomic_inc(&p->refcnt); - spin_unlock_bh(&peers.lock); + spin_unlock_bh(&base->lock); /* Remove the entry from unused list if it was there. */ unlink_from_unused(p); return p; } p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; if (p) { - p->v4daddr = daddr; + p->daddr = *daddr; atomic_set(&p->refcnt, 1); atomic_set(&p->rid, 0); - atomic_set(&p->ip_id_count, secure_ip_id(daddr)); + atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); p->tcp_ts_stamp = 0; INIT_LIST_HEAD(&p->unused); /* Link the node. */ - link_to_pool(p); - peers.total++; + link_to_pool(p, base); + base->total++; } - spin_unlock_bh(&peers.lock); + spin_unlock_bh(&base->lock); - if (peers.total >= inet_peer_threshold) + if (base->total >= inet_peer_threshold) /* Remove one less-recently-used entry. */ cleanup_once(0); return p; } +static int compute_total(void) +{ + return v4_peers.total + v6_peers.total; +} +EXPORT_SYMBOL_GPL(inet_getpeer); + /* Called with local BH disabled. */ static void peer_check_expire(unsigned long dummy) { unsigned long now = jiffies; - int ttl; + int ttl, total; - if (peers.total >= inet_peer_threshold) + total = compute_total(); + if (total >= inet_peer_threshold) ttl = inet_peer_minttl; else ttl = inet_peer_maxttl - (inet_peer_maxttl - inet_peer_minttl) / HZ * - peers.total / inet_peer_threshold * HZ; + total / inet_peer_threshold * HZ; while (!cleanup_once(ttl)) { if (jiffies != now) break; @@ -508,13 +555,14 @@ static void peer_check_expire(unsigned long dummy) /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime * interval depending on the total number of entries (more entries, * less interval). */ - if (peers.total >= inet_peer_threshold) + total = compute_total(); + if (total >= inet_peer_threshold) peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; else peer_periodic_timer.expires = jiffies + inet_peer_gc_maxtime - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * - peers.total / inet_peer_threshold * HZ; + total / inet_peer_threshold * HZ; add_timer(&peer_periodic_timer); } @@ -530,3 +578,4 @@ void inet_putpeer(struct inet_peer *p) local_bh_enable(); } +EXPORT_SYMBOL_GPL(inet_putpeer); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 168440834ade..a1151b8adf3c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -45,6 +45,7 @@ #include <linux/udp.h> #include <linux/inet.h> #include <linux/netfilter_ipv4.h> +#include <net/inet_ecn.h> /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c @@ -70,11 +71,28 @@ struct ipq { __be32 daddr; __be16 id; u8 protocol; + u8 ecn; /* RFC3168 support */ int iif; unsigned int rid; struct inet_peer *peer; }; +#define IPFRAG_ECN_CLEAR 0x01 /* one frag had INET_ECN_NOT_ECT */ +#define IPFRAG_ECN_SET_CE 0x04 /* one frag had INET_ECN_CE */ + +static inline u8 ip4_frag_ecn(u8 tos) +{ + tos = (tos & INET_ECN_MASK) + 1; + /* + * After the last operation we have (in binary): + * INET_ECN_NOT_ECT => 001 + * INET_ECN_ECT_1 => 010 + * INET_ECN_ECT_0 => 011 + * INET_ECN_CE => 100 + */ + return (tos & 2) ? 0 : tos; +} + static struct inet_frags ip4_frags; int ip_frag_nqueues(struct net *net) @@ -137,11 +155,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) qp->protocol = arg->iph->protocol; qp->id = arg->iph->id; + qp->ecn = ip4_frag_ecn(arg->iph->tos); qp->saddr = arg->iph->saddr; qp->daddr = arg->iph->daddr; qp->user = arg->user; qp->peer = sysctl_ipfrag_max_dist ? - inet_getpeer(arg->iph->saddr, 1) : NULL; + inet_getpeer_v4(arg->iph->saddr, 1) : NULL; } static __inline__ void ip4_frag_free(struct inet_frag_queue *q) @@ -316,6 +335,7 @@ static int ip_frag_reinit(struct ipq *qp) qp->q.fragments = NULL; qp->q.fragments_tail = NULL; qp->iif = 0; + qp->ecn = 0; return 0; } @@ -328,6 +348,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) int flags, offset; int ihl, end; int err = -ENOENT; + u8 ecn; if (qp->q.last_in & INET_FRAG_COMPLETE) goto err; @@ -339,6 +360,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) goto err; } + ecn = ip4_frag_ecn(ip_hdr(skb)->tos); offset = ntohs(ip_hdr(skb)->frag_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; @@ -472,6 +494,7 @@ found: } qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; + qp->ecn |= ecn; atomic_add(skb->truesize, &qp->q.net->mem); if (offset == 0) qp->q.last_in |= INET_FRAG_FIRST_IN; @@ -583,6 +606,17 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, iph = ip_hdr(head); iph->frag_off = 0; iph->tot_len = htons(len); + /* RFC3168 5.3 Fragmentation support + * If one fragment had INET_ECN_NOT_ECT, + * reassembled frame also has INET_ECN_NOT_ECT + * Elif one fragment had INET_ECN_CE + * reassembled frame also has INET_ECN_CE + */ + if (qp->ecn & IPFRAG_ECN_CLEAR) + iph->tos &= ~INET_ECN_MASK; + else if (qp->ecn & IPFRAG_ECN_SET_CE) + iph->tos |= INET_ECN_CE; + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; qp->q.fragments_tail = NULL; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index cab2057d5430..eb68a0e34e49 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -405,11 +405,11 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net, if (parms->name[0]) strlcpy(name, parms->name, IFNAMSIZ); else - sprintf(name, "gre%%d"); + strcpy(name, "gre%d"); dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); if (!dev) - return NULL; + return NULL; dev_net_set(dev, net); @@ -772,16 +772,11 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev { struct flowi fl = { .oif = tunnel->parms.link, - .nl_u = { - .ip4_u = { - .daddr = dst, - .saddr = tiph->saddr, - .tos = RT_TOS(tos) - } - }, - .proto = IPPROTO_GRE - } -; + .fl4_dst = dst, + .fl4_src = tiph->saddr, + .fl4_tos = RT_TOS(tos), + .fl_gre_key = tunnel->parms.o_key + }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { dev->stats.tx_carrier_errors++; goto tx_error; @@ -823,7 +818,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev !ipv4_is_multicast(tunnel->parms.iph.daddr)) || rt6->rt6i_dst.plen == 128) { rt6->rt6i_flags |= RTF_MODIFIED; - skb_dst(skb)->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); } } @@ -895,7 +890,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; #endif else - iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT); + iph->ttl = ip4_dst_hoplimit(&rt->dst); } ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; @@ -951,14 +946,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) if (iph->daddr) { struct flowi fl = { .oif = tunnel->parms.link, - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos) - } - }, - .proto = IPPROTO_GRE + .fl4_dst = iph->daddr, + .fl4_src = iph->saddr, + .fl4_tos = RT_TOS(iph->tos), + .proto = IPPROTO_GRE, + .fl_gre_key = tunnel->parms.o_key }; struct rtable *rt; @@ -1216,14 +1208,11 @@ static int ipgre_open(struct net_device *dev) if (ipv4_is_multicast(t->parms.iph.daddr)) { struct flowi fl = { .oif = t->parms.link, - .nl_u = { - .ip4_u = { - .daddr = t->parms.iph.daddr, - .saddr = t->parms.iph.saddr, - .tos = RT_TOS(t->parms.iph.tos) - } - }, - .proto = IPPROTO_GRE + .fl4_dst = t->parms.iph.daddr, + .fl4_src = t->parms.iph.saddr, + .fl4_tos = RT_TOS(t->parms.iph.tos), + .proto = IPPROTO_GRE, + .fl_gre_key = t->parms.o_key }; struct rtable *rt; @@ -1775,3 +1764,4 @@ module_exit(ipgre_fini); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("gre"); MODULE_ALIAS_RTNL_LINK("gretap"); +MODULE_ALIAS("gre0"); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 439d2a34ee44..04c7b3ba6b39 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -82,6 +82,7 @@ #include <linux/tcp.h> int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; +EXPORT_SYMBOL(sysctl_ip_default_ttl); /* Generate a checksum for an outgoing IP datagram. */ __inline__ void ip_send_check(struct iphdr *iph) @@ -130,7 +131,7 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) int ttl = inet->uc_ttl; if (ttl < 0) - ttl = dst_metric(dst, RTAX_HOPLIMIT); + ttl = ip4_dst_hoplimit(dst); return ttl; } @@ -341,15 +342,13 @@ int ip_queue_xmit(struct sk_buff *skb) { struct flowi fl = { .oif = sk->sk_bound_dev_if, .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = inet->inet_saddr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = daddr, + .fl4_src = inet->inet_saddr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = - { .sport = inet->inet_sport, - .dport = inet->inet_dport } } }; + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = inet->inet_dport }; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times @@ -1404,14 +1403,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar { struct flowi fl = { .oif = arg->bound_dev_if, - .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = rt->rt_spec_dst, - .tos = RT_TOS(ip_hdr(skb)->tos) } }, - /* Not quite clean, but right. */ - .uli_u = { .ports = - { .sport = tcp_hdr(skb)->dest, - .dport = tcp_hdr(skb)->source } }, + .fl4_dst = daddr, + .fl4_src = rt->rt_spec_dst, + .fl4_tos = RT_TOS(ip_hdr(skb)->tos), + .fl_ip_sport = tcp_hdr(skb)->dest, + .fl_ip_dport = tcp_hdr(skb)->source, .proto = sk->sk_protocol, .flags = ip_reply_arg_flowi_flags(arg) }; security_skb_classify_flow(skb, &fl); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 3a6e1ec5e9ae..2b097752426b 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1191,13 +1191,13 @@ static int __init ic_dynamic(void) (ic_proto_enabled & IC_USE_DHCP) && ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; - printk(","); + printk(KERN_CONT ","); continue; } #endif /* IPCONFIG_DHCP */ if (ic_got_reply) { - printk(" OK\n"); + printk(KERN_CONT " OK\n"); break; } @@ -1205,7 +1205,7 @@ static int __init ic_dynamic(void) continue; if (! --retries) { - printk(" timed out!\n"); + printk(KERN_CONT " timed out!\n"); break; } @@ -1215,7 +1215,7 @@ static int __init ic_dynamic(void) if (timeout > CONF_TIMEOUT_MAX) timeout = CONF_TIMEOUT_MAX; - printk("."); + printk(KERN_CONT "."); } #ifdef IPCONFIG_BOOTP @@ -1236,7 +1236,7 @@ static int __init ic_dynamic(void) ((ic_got_reply & IC_RARP) ? "RARP" : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), &ic_servaddr); - printk("my address is %pI4\n", &ic_myaddr); + printk(KERN_CONT "my address is %pI4\n", &ic_myaddr); return 0; } @@ -1468,19 +1468,19 @@ static int __init ip_auto_config(void) /* * Clue in the operator. */ - printk("IP-Config: Complete:"); - printk("\n device=%s", ic_dev->name); - printk(", addr=%pI4", &ic_myaddr); - printk(", mask=%pI4", &ic_netmask); - printk(", gw=%pI4", &ic_gateway); - printk(",\n host=%s, domain=%s, nis-domain=%s", + printk("IP-Config: Complete:\n"); + printk(" device=%s", ic_dev->name); + printk(KERN_CONT ", addr=%pI4", &ic_myaddr); + printk(KERN_CONT ", mask=%pI4", &ic_netmask); + printk(KERN_CONT ", gw=%pI4", &ic_gateway); + printk(KERN_CONT ",\n host=%s, domain=%s, nis-domain=%s", utsname()->nodename, ic_domain, utsname()->domainname); - printk(",\n bootserver=%pI4", &ic_servaddr); - printk(", rootserver=%pI4", &root_server_addr); - printk(", rootpath=%s", root_server_path); + printk(KERN_CONT ",\n bootserver=%pI4", &ic_servaddr); + printk(KERN_CONT ", rootserver=%pI4", &root_server_addr); + printk(KERN_CONT ", rootpath=%s", root_server_path); if (ic_dev_mtu) - printk(", mtu=%d", ic_dev_mtu); - printk("\n"); + printk(KERN_CONT ", mtu=%d", ic_dev_mtu); + printk(KERN_CONT "\n"); #endif /* !SILENT */ return 0; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index cd300aaee78f..988f52fba54a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -463,13 +463,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct flowi fl = { .oif = tunnel->parms.link, - .nl_u = { - .ip4_u = { - .daddr = dst, - .saddr = tiph->saddr, - .tos = RT_TOS(tos) - } - }, + .fl4_dst = dst, + .fl4_src= tiph->saddr, + .fl4_tos = RT_TOS(tos), .proto = IPPROTO_IPIP }; @@ -589,13 +585,9 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) if (iph->daddr) { struct flowi fl = { .oif = tunnel->parms.link, - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos) - } - }, + .fl4_dst = iph->daddr, + .fl4_src = iph->saddr, + .fl4_tos = RT_TOS(iph->tos), .proto = IPPROTO_IPIP }; struct rtable *rt; @@ -921,3 +913,4 @@ static void __exit ipip_fini(void) module_init(ipip_init); module_exit(ipip_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS("tunl0"); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ef2b0089e0ea..3f3a9afd73e0 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1537,13 +1537,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, if (vif->flags & VIFF_TUNNEL) { struct flowi fl = { .oif = vif->link, - .nl_u = { - .ip4_u = { - .daddr = vif->remote, - .saddr = vif->local, - .tos = RT_TOS(iph->tos) - } - }, + .fl4_dst = vif->remote, + .fl4_src = vif->local, + .fl4_tos = RT_TOS(iph->tos), .proto = IPPROTO_IPIP }; @@ -1553,12 +1549,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, } else { struct flowi fl = { .oif = vif->link, - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .tos = RT_TOS(iph->tos) - } - }, + .fl4_dst = iph->daddr, + .fl4_tos = RT_TOS(iph->tos), .proto = IPPROTO_IPIP }; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index d88a46c54fd1..994a1f29ebbc 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -31,10 +31,10 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. */ if (addr_type == RTN_LOCAL) { - fl.nl_u.ip4_u.daddr = iph->daddr; + fl.fl4_dst = iph->daddr; if (type == RTN_LOCAL) - fl.nl_u.ip4_u.saddr = iph->saddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); + fl.fl4_src = iph->saddr; + fl.fl4_tos = RT_TOS(iph->tos); fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl.mark = skb->mark; fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; @@ -47,7 +47,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ - fl.nl_u.ip4_u.daddr = iph->saddr; + fl.fl4_dst = iph->saddr; if (ip_route_output_key(net, &rt, &fl) != 0) return -1; diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 48111594ee9b..19eb59d01037 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -3,15 +3,15 @@ # # objects for l3 independent conntrack -nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o +nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) ifeq ($(CONFIG_PROC_FS),y) nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o endif endif -nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o -iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o +nf_nat-y := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o +iptable_nat-y := nf_nat_rule.o nf_nat_standalone.o # connection tracking obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3fac340a28d5..47e5178b998b 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -883,6 +883,7 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries[raw_smp_processor_id()]; + xt_compat_init_offsets(NFPROTO_ARP, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) @@ -1350,6 +1351,7 @@ static int translate_compat_table(const char *name, duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(NFPROTO_ARP); + xt_compat_init_offsets(NFPROTO_ARP, number); /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, total_size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a846d633b3b6..c5a75d70970f 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1080,6 +1080,7 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries[raw_smp_processor_id()]; + xt_compat_init_offsets(AF_INET, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) @@ -1681,6 +1682,7 @@ translate_compat_table(struct net *net, duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET); + xt_compat_init_offsets(AF_INET, number); /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, total_size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 43eec80c0e7c..1ff79e557f96 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -116,7 +116,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) if (ip_route_me_harder(nskb, addr_type)) goto free_nskb; - niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); + niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index ab9c05c9734e..5585980fce2e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -100,7 +100,7 @@ static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) ret = security_secid_to_secctx(ct->secmark, &secctx, &len); if (ret) - return ret; + return 0; ret = seq_printf(s, "secctx=%s ", secctx); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4ae1f203f7cb..b14ec7d03b6e 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -59,13 +59,13 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) local_bh_enable(); socket_seq_show(seq); - seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", + seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", sock_prot_inuse_get(net, &tcp_prot), orphans, tcp_death_row.tw_count, sockets, - atomic_read(&tcp_memory_allocated)); - seq_printf(seq, "UDP: inuse %d mem %d\n", + atomic_long_read(&tcp_memory_allocated)); + seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), - atomic_read(&udp_memory_allocated)); + atomic_long_read(&udp_memory_allocated)); seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(net, &udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", @@ -253,6 +253,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), + SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1f85ef289895..a3d5ab786e81 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -549,10 +549,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { struct flowi fl = { .oif = ipc.oif, .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = saddr, - .tos = tos } }, + .fl4_dst = daddr, + .fl4_src = saddr, + .fl4_tos = tos, .proto = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f70ae1bccb8a..3e5b7cc2db4f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -139,6 +139,8 @@ static unsigned long expires_ljiffies; */ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); +static unsigned int ipv4_default_advmss(const struct dst_entry *dst); +static unsigned int ipv4_default_mtu(const struct dst_entry *dst); static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); @@ -155,6 +157,8 @@ static struct dst_ops ipv4_dst_ops = { .protocol = cpu_to_be16(ETH_P_IP), .gc = rt_garbage_collect, .check = ipv4_dst_check, + .default_advmss = ipv4_default_advmss, + .default_mtu = ipv4_default_mtu, .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, @@ -383,8 +387,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) (__force u32)r->rt_gateway, r->rt_flags, atomic_read(&r->dst.__refcnt), r->dst.__use, 0, (__force u32)r->rt_src, - (dst_metric(&r->dst, RTAX_ADVMSS) ? - (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0), + dst_metric_advmss(&r->dst) + 40, dst_metric(&r->dst, RTAX_WINDOW), (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + dst_metric(&r->dst, RTAX_RTTVAR)), @@ -684,17 +687,17 @@ static inline bool rt_caching(const struct net *net) static inline bool compare_hash_inputs(const struct flowi *fl1, const struct flowi *fl2) { - return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | - ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | + return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | + ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | (fl1->iif ^ fl2->iif)) == 0); } static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | - ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | + return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | + ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | (fl1->mark ^ fl2->mark) | - (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | + (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | (fl1->oif ^ fl2->oif) | (fl1->iif ^ fl2->iif)) == 0; } @@ -714,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth) * Can be called by a softirq or a process. * In the later case, we want to be reschedule if necessary */ -static void rt_do_flush(int process_context) +static void rt_do_flush(struct net *net, int process_context) { unsigned int i; struct rtable *rth, *next; - struct rtable * tail; for (i = 0; i <= rt_hash_mask; i++) { + struct rtable __rcu **pprev; + struct rtable *list; + if (process_context && need_resched()) cond_resched(); rth = rcu_dereference_raw(rt_hash_table[i].chain); @@ -728,50 +733,32 @@ static void rt_do_flush(int process_context) continue; spin_lock_bh(rt_hash_lock_addr(i)); -#ifdef CONFIG_NET_NS - { - struct rtable __rcu **prev; - struct rtable *p; - rth = rcu_dereference_protected(rt_hash_table[i].chain, + list = NULL; + pprev = &rt_hash_table[i].chain; + rth = rcu_dereference_protected(*pprev, lockdep_is_held(rt_hash_lock_addr(i))); - /* defer releasing the head of the list after spin_unlock */ - for (tail = rth; tail; - tail = rcu_dereference_protected(tail->dst.rt_next, - lockdep_is_held(rt_hash_lock_addr(i)))) - if (!rt_is_expired(tail)) - break; - if (rth != tail) - rt_hash_table[i].chain = tail; - - /* call rt_free on entries after the tail requiring flush */ - prev = &rt_hash_table[i].chain; - for (p = rcu_dereference_protected(*prev, + while (rth) { + next = rcu_dereference_protected(rth->dst.rt_next, lockdep_is_held(rt_hash_lock_addr(i))); - p != NULL; - p = next) { - next = rcu_dereference_protected(p->dst.rt_next, - lockdep_is_held(rt_hash_lock_addr(i))); - if (!rt_is_expired(p)) { - prev = &p->dst.rt_next; + + if (!net || + net_eq(dev_net(rth->dst.dev), net)) { + rcu_assign_pointer(*pprev, next); + rcu_assign_pointer(rth->dst.rt_next, list); + list = rth; } else { - *prev = next; - rt_free(p); + pprev = &rth->dst.rt_next; } + rth = next; } - } -#else - rth = rcu_dereference_protected(rt_hash_table[i].chain, - lockdep_is_held(rt_hash_lock_addr(i))); - rcu_assign_pointer(rt_hash_table[i].chain, NULL); - tail = NULL; -#endif + spin_unlock_bh(rt_hash_lock_addr(i)); - for (; rth != tail; rth = next) { - next = rcu_dereference_protected(rth->dst.rt_next, 1); - rt_free(rth); + for (; list; list = next) { + next = rcu_dereference_protected(list->dst.rt_next, 1); + rt_free(list); } } } @@ -919,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay) { rt_cache_invalidate(net); if (delay >= 0) - rt_do_flush(!in_softirq()); + rt_do_flush(net, !in_softirq()); } /* Flush previous cache invalidated entries from the cache */ -void rt_cache_flush_batch(void) +void rt_cache_flush_batch(struct net *net) { - rt_do_flush(!in_softirq()); + rt_do_flush(net, !in_softirq()); } static void rt_emergency_hash_rebuild(struct net *net) @@ -1289,7 +1276,7 @@ void rt_bind_peer(struct rtable *rt, int create) { struct inet_peer *peer; - peer = inet_getpeer(rt->rt_dst, create); + peer = inet_getpeer_v4(rt->rt_dst, create); if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) inet_putpeer(peer); @@ -1686,11 +1673,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, if (mtu < dst_mtu(&rth->dst)) { dst_confirm(&rth->dst); if (mtu < ip_rt_min_pmtu) { + u32 lock = dst_metric(&rth->dst, + RTAX_LOCK); mtu = ip_rt_min_pmtu; - rth->dst.metrics[RTAX_LOCK-1] |= - (1 << RTAX_MTU); + lock |= (1 << RTAX_MTU); + dst_metric_set(&rth->dst, RTAX_LOCK, + lock); } - rth->dst.metrics[RTAX_MTU-1] = mtu; + dst_metric_set(&rth->dst, RTAX_MTU, mtu); dst_set_expires(&rth->dst, ip_rt_mtu_expires); } @@ -1708,10 +1698,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) if (dst_mtu(dst) > mtu && mtu >= 68 && !(dst_metric_locked(dst, RTAX_MTU))) { if (mtu < ip_rt_min_pmtu) { + u32 lock = dst_metric(dst, RTAX_LOCK); mtu = ip_rt_min_pmtu; - dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU); + dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); } - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); dst_set_expires(dst, ip_rt_mtu_expires); call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } @@ -1794,38 +1785,55 @@ static void set_class_tag(struct rtable *rt, u32 tag) } #endif +static unsigned int ipv4_default_advmss(const struct dst_entry *dst) +{ + unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS); + + if (advmss == 0) { + advmss = max_t(unsigned int, dst->dev->mtu - 40, + ip_rt_min_advmss); + if (advmss > 65535 - 40) + advmss = 65535 - 40; + } + return advmss; +} + +static unsigned int ipv4_default_mtu(const struct dst_entry *dst) +{ + unsigned int mtu = dst->dev->mtu; + + if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { + const struct rtable *rt = (const struct rtable *) dst; + + if (rt->rt_gateway != rt->rt_dst && mtu > 576) + mtu = 576; + } + + if (mtu > IP_MAX_MTU) + mtu = IP_MAX_MTU; + + return mtu; +} + static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) { + struct dst_entry *dst = &rt->dst; struct fib_info *fi = res->fi; if (fi) { if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - memcpy(rt->dst.metrics, fi->fib_metrics, - sizeof(rt->dst.metrics)); - if (fi->fib_mtu == 0) { - rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; - if (dst_metric_locked(&rt->dst, RTAX_MTU) && - rt->rt_gateway != rt->rt_dst && - rt->dst.dev->mtu > 576) - rt->dst.metrics[RTAX_MTU-1] = 576; - } + dst_import_metrics(dst, fi->fib_metrics); #ifdef CONFIG_IP_ROUTE_CLASSID - rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; + dst->tclassid = FIB_RES_NH(*res).nh_tclassid; #endif - } else - rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu; - - if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) - rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; - if (dst_mtu(&rt->dst) > IP_MAX_MTU) - rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; - if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0) - rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40, - ip_rt_min_advmss); - if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40) - rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; + } + + if (dst_mtu(dst) > IP_MAX_MTU) + dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); + if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) + dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); #ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -2089,12 +2097,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, { struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = saddr, - .tos = tos, - .scope = RT_SCOPE_UNIVERSE, - } }, + struct flowi fl = { .fl4_dst = daddr, + .fl4_src = saddr, + .fl4_tos = tos, + .fl4_scope = RT_SCOPE_UNIVERSE, .mark = skb->mark, .iif = dev->ifindex }; unsigned flags = 0; @@ -2480,14 +2486,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, const struct flowi *oldflp) { u32 tos = RT_FL_TOS(oldflp); - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = oldflp->fl4_dst, - .saddr = oldflp->fl4_src, - .tos = tos & IPTOS_RT_MASK, - .scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : - RT_SCOPE_UNIVERSE), - } }, + struct flowi fl = { .fl4_dst = oldflp->fl4_dst, + .fl4_src = oldflp->fl4_src, + .fl4_tos = tos & IPTOS_RT_MASK, + .fl4_scope = ((tos & RTO_ONLINK) ? + RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), .mark = oldflp->mark, .iif = net->loopback_dev->ifindex, .oif = oldflp->oif }; @@ -2559,9 +2562,10 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, goto out; /* RACE: Check return value of inet_select_addr instead. */ - if (rcu_dereference(dev_out->ip_ptr) == NULL) - goto out; /* Wrong error code */ - + if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { + err = -ENETUNREACH; + goto out; + } if (ipv4_is_local_multicast(oldflp->fl4_dst) || ipv4_is_lbcast(oldflp->fl4_dst)) { if (!fl.fl4_src) @@ -2622,8 +2626,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, } if (res.type == RTN_LOCAL) { - if (!fl.fl4_src) - fl.fl4_src = fl.fl4_dst; + if (!fl.fl4_src) { + if (res.fi->fib_prefsrc) + fl.fl4_src = res.fi->fib_prefsrc; + else + fl.fl4_src = fl.fl4_dst; + } dev_out = net->loopback_dev; fl.oif = dev_out->ifindex; res.fi = NULL; @@ -2725,7 +2733,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi new->__use = 1; new->input = dst_discard; new->output = dst_discard; - memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + dst_copy_metrics(new, &ort->dst); new->dev = ort->dst.dev; if (new->dev) @@ -2832,7 +2840,7 @@ static int rt_fill_info(struct net *net, if (rt->rt_dst != rt->rt_gateway) NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); - if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; if (rt->fl.mark) @@ -2944,13 +2952,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void err = -rt->dst.error; } else { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = dst, - .saddr = src, - .tos = rtm->rtm_tos, - }, - }, + .fl4_dst = dst, + .fl4_src = src, + .fl4_tos = rtm->rtm_tos, .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, .mark = mark, }; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 650cace2180d..47519205a014 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -346,17 +346,14 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, */ { struct flowi fl = { .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = ((opt && opt->srr) ? - opt->faddr : - ireq->rmt_addr), - .saddr = ireq->loc_addr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .fl4_src = ireq->loc_addr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = IPPROTO_TCP, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = - { .sport = th->dest, - .dport = th->source } } }; + .fl_ip_sport = th->dest, + .fl_ip_dport = th->source }; security_req_classify_flow(req, &fl); if (ip_route_output_key(sock_net(sk), &rt, &fl)) { reqsk_free(req); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d96c1da4b17c..1a456652086b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -26,6 +26,10 @@ static int zero; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; +static int tcp_adv_win_scale_min = -31; +static int tcp_adv_win_scale_max = 31; +static int ip_ttl_min = 1; +static int ip_ttl_max = 255; /* Update system visible IP port range */ static void set_local_port_range(int range[2]) @@ -153,8 +157,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_ip_default_ttl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = ipv4_doint_and_flush, - .extra2 = &init_net, + .proc_handler = proc_dointvec_minmax, + .extra1 = &ip_ttl_min, + .extra2 = &ip_ttl_max, }, { .procname = "ip_no_pmtu_disc", @@ -398,7 +403,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_mem, .maxlen = sizeof(sysctl_tcp_mem), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_doulongvec_minmax }, { .procname = "tcp_wmem", @@ -426,7 +431,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_adv_win_scale, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_adv_win_scale_min, + .extra2 = &tcp_adv_win_scale_max, }, { .procname = "tcp_tw_reuse", @@ -602,8 +609,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .proc_handler = proc_doulongvec_minmax, }, { .procname = "udp_rmem_min", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5f738c5c0dc4..6c11eece262c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); -int sysctl_tcp_mem[3] __read_mostly; +long sysctl_tcp_mem[3] __read_mostly; int sysctl_tcp_wmem[3] __read_mostly; int sysctl_tcp_rmem[3] __read_mostly; @@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem); EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); -atomic_t tcp_memory_allocated; /* Current allocated memory. */ +atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); /* @@ -2244,7 +2244,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, /* Values greater than interface MTU won't take effect. However * at the point when this call is done we typically don't yet * know which interface is going to be used */ - if (val < 8 || val > MAX_TCP_WINDOW) { + if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) { err = -EINVAL; break; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3357f69e353d..2549b29b062d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk) int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); - if (sk->sk_sndbuf < 3 * sndmem) - sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]); + if (sk->sk_sndbuf < 3 * sndmem) { + sk->sk_sndbuf = 3 * sndmem; + if (sk->sk_sndbuf > sysctl_tcp_wmem[2]) + sk->sk_sndbuf = sysctl_tcp_wmem[2]; + } } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk) if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !tcp_memory_pressure && - atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { + atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), sysctl_tcp_rmem[2]); } @@ -731,7 +734,7 @@ void tcp_update_metrics(struct sock *sk) * Reset our results. */ if (!(dst_metric_locked(dst, RTAX_RTT))) - dst->metrics[RTAX_RTT - 1] = 0; + dst_metric_set(dst, RTAX_RTT, 0); return; } @@ -773,34 +776,38 @@ void tcp_update_metrics(struct sock *sk) if (dst_metric(dst, RTAX_SSTHRESH) && !dst_metric_locked(dst, RTAX_SSTHRESH) && (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) - dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1; + dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); if (!dst_metric_locked(dst, RTAX_CWND) && tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd; + dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); } else if (tp->snd_cwnd > tp->snd_ssthresh && icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ if (!dst_metric_locked(dst, RTAX_SSTHRESH)) - dst->metrics[RTAX_SSTHRESH-1] = - max(tp->snd_cwnd >> 1, tp->snd_ssthresh); + dst_metric_set(dst, RTAX_SSTHRESH, + max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1; + dst_metric_set(dst, RTAX_CWND, + (dst_metric(dst, RTAX_CWND) + + tp->snd_cwnd) >> 1); } else { /* Else slow start did not finish, cwnd is non-sense, ssthresh may be also invalid. */ if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1; + dst_metric_set(dst, RTAX_CWND, + (dst_metric(dst, RTAX_CWND) + + tp->snd_ssthresh) >> 1); if (dst_metric(dst, RTAX_SSTHRESH) && !dst_metric_locked(dst, RTAX_SSTHRESH) && tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) - dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; + dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); } if (!dst_metric_locked(dst, RTAX_REORDERING)) { if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && tp->reordering != sysctl_tcp_reordering) - dst->metrics[RTAX_REORDERING-1] = tp->reordering; + dst_metric_set(dst, RTAX_REORDERING, tp->reordering); } } } @@ -909,25 +916,20 @@ static void tcp_init_metrics(struct sock *sk) tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); } tcp_set_rto(sk); - if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) - goto reset; - -cwnd: - tp->snd_cwnd = tcp_init_cwnd(tp, dst); - tp->snd_cwnd_stamp = tcp_time_stamp; - return; - + if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) { reset: - /* Play conservative. If timestamps are not - * supported, TCP will fail to recalculate correct - * rtt, if initial rto is too small. FORGET ALL AND RESET! - */ - if (!tp->rx_opt.saw_tstamp && tp->srtt) { - tp->srtt = 0; - tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + /* Play conservative. If timestamps are not + * supported, TCP will fail to recalculate correct + * rtt, if initial rto is too small. FORGET ALL AND RESET! + */ + if (!tp->rx_opt.saw_tstamp && tp->srtt) { + tp->srtt = 0; + tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + } } - goto cwnd; + tp->snd_cwnd = tcp_init_cwnd(tp, dst); + tp->snd_cwnd_stamp = tcp_time_stamp; } static void tcp_update_reordering(struct sock *sk, const int metric, @@ -4861,7 +4863,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk) return 0; /* If we are under soft global TCP memory pressure, do not expand. */ - if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) + if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) return 0; /* If we filled the congestion window, do not expand. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8f8527d41682..856f68466d49 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -415,6 +415,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) !icsk->icsk_backoff) break; + if (sock_owned_by_user(sk)) + break; + icsk->icsk_backoff--; inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << icsk->icsk_backoff; @@ -429,11 +432,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (remaining) { inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, remaining, TCP_RTO_MAX); - } else if (sock_owned_by_user(sk)) { - /* RTO revert clocked out retransmission, - * but socket is locked. Will defer. */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - HZ/20, TCP_RTO_MAX); } else { /* RTO revert clocked out retransmission. * Will retransmit now */ @@ -1212,12 +1210,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { }; #endif -static struct timewait_sock_ops tcp_timewait_sock_ops = { - .twsk_obj_size = sizeof(struct tcp_timewait_sock), - .twsk_unique = tcp_twsk_unique, - .twsk_destructor= tcp_twsk_destructor, -}; - int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_extend_values tmp_ext; @@ -1349,7 +1341,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_death_row.sysctl_tw_recycle && (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && - peer->v4daddr == saddr) { + peer->daddr.a4 == saddr) { inet_peer_refcheck(peer); if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > @@ -1444,7 +1436,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); - newtp->advmss = dst_metric(dst, RTAX_ADVMSS); + newtp->advmss = dst_metric_advmss(dst); if (tcp_sk(sk)->rx_opt.user_mss && tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; @@ -1765,64 +1757,40 @@ do_time_wait: goto discard_it; } -/* VJ's idea. Save last timestamp seen from this destination - * and hold it at least for normal timewait interval to use for duplicate - * segment detection in subsequent connections, before they enter synchronized - * state. - */ - -int tcp_v4_remember_stamp(struct sock *sk) +struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) { + struct rtable *rt = (struct rtable *) __sk_dst_get(sk); struct inet_sock *inet = inet_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct rtable *rt = (struct rtable *)__sk_dst_get(sk); - struct inet_peer *peer = NULL; - int release_it = 0; + struct inet_peer *peer; if (!rt || rt->rt_dst != inet->inet_daddr) { - peer = inet_getpeer(inet->inet_daddr, 1); - release_it = 1; + peer = inet_getpeer_v4(inet->inet_daddr, 1); + *release_it = true; } else { if (!rt->peer) rt_bind_peer(rt, 1); peer = rt->peer; + *release_it = false; } - if (peer) { - if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || - ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && - peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { - peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; - peer->tcp_ts = tp->rx_opt.ts_recent; - } - if (release_it) - inet_putpeer(peer); - return 1; - } - - return 0; + return peer; } -EXPORT_SYMBOL(tcp_v4_remember_stamp); +EXPORT_SYMBOL(tcp_v4_get_peer); -int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) +void *tcp_v4_tw_get_peer(struct sock *sk) { - struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); - - if (peer) { - const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); - - if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || - ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && - peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { - peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; - peer->tcp_ts = tcptw->tw_ts_recent; - } - inet_putpeer(peer); - return 1; - } + struct inet_timewait_sock *tw = inet_twsk(sk); - return 0; + return inet_getpeer_v4(tw->tw_daddr, 1); } +EXPORT_SYMBOL(tcp_v4_tw_get_peer); + +static struct timewait_sock_ops tcp_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp_timewait_sock), + .twsk_unique = tcp_twsk_unique, + .twsk_destructor= tcp_twsk_destructor, + .twsk_getpeer = tcp_v4_tw_get_peer, +}; const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, @@ -1830,7 +1798,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v4_conn_request, .syn_recv_sock = tcp_v4_syn_recv_sock, - .remember_stamp = tcp_v4_remember_stamp, + .get_peer = tcp_v4_get_peer, .net_header_len = sizeof(struct iphdr), .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, @@ -2032,7 +2000,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) get_req: req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; } - sk = sk_next(st->syn_wait_sk); + sk = sk_nulls_next(st->syn_wait_sk); st->state = TCP_SEQ_STATE_LISTENING; read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } else { @@ -2041,11 +2009,13 @@ get_req: if (reqsk_queue_len(&icsk->icsk_accept_queue)) goto start_req; read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - sk = sk_next(sk); + sk = sk_nulls_next(sk); } get_sk: sk_nulls_for_each_from(sk, node) { - if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { + if (!net_eq(sock_net(sk), net)) + continue; + if (sk->sk_family == st->family) { cur = sk; goto out; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 43cf901d7659..80b1f80759ab 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -49,6 +49,56 @@ struct inet_timewait_death_row tcp_death_row = { }; EXPORT_SYMBOL_GPL(tcp_death_row); +/* VJ's idea. Save last timestamp seen from this destination + * and hold it at least for normal timewait interval to use for duplicate + * segment detection in subsequent connections, before they enter synchronized + * state. + */ + +static int tcp_remember_stamp(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct inet_peer *peer; + bool release_it; + + peer = icsk->icsk_af_ops->get_peer(sk, &release_it); + if (peer) { + if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || + ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && + peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { + peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; + peer->tcp_ts = tp->rx_opt.ts_recent; + } + if (release_it) + inet_putpeer(peer); + return 1; + } + + return 0; +} + +static int tcp_tw_remember_stamp(struct inet_timewait_sock *tw) +{ + struct sock *sk = (struct sock *) tw; + struct inet_peer *peer; + + peer = twsk_getpeer(sk); + if (peer) { + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + + if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || + ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && + peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { + peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; + peer->tcp_ts = tcptw->tw_ts_recent; + } + inet_putpeer(peer); + return 1; + } + return 0; +} + static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) @@ -149,14 +199,9 @@ kill_with_rst: tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } - /* I am shamed, but failed to make it more elegant. - * Yes, it is direct reference to IP, which is impossible - * to generalize to IPv6. Taking into account that IPv6 - * do not understand recycling in any case, it not - * a big problem in practice. --ANK */ - if (tw->tw_family == AF_INET && - tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && - tcp_v4_tw_remember_stamp(tw)) + if (tcp_death_row.sysctl_tw_recycle && + tcptw->tw_ts_recent_stamp && + tcp_tw_remember_stamp(tw)) inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout, TCP_TIMEWAIT_LEN); else @@ -274,7 +319,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) int recycle_ok = 0; if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) - recycle_ok = icsk->icsk_af_ops->remember_stamp(sk); + recycle_ok = tcp_remember_stamp(sk); if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) tw = inet_twsk_alloc(sk, state); @@ -347,7 +392,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) * socket up. We've got bigger problems than * non-graceful socket closings. */ - LIMIT_NETDEBUG(KERN_INFO "TCP: time wait bucket table overflow\n"); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); } tcp_update_metrics(sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 05b1ecf36763..dc7c096ddfef 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -55,7 +55,7 @@ int sysctl_tcp_workaround_signed_windows __read_mostly = 0; int sysctl_tcp_tso_win_divisor __read_mostly = 3; int sysctl_tcp_mtu_probing __read_mostly = 0; -int sysctl_tcp_base_mss __read_mostly = 512; +int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; @@ -119,9 +119,13 @@ static __u16 tcp_advertise_mss(struct sock *sk) struct dst_entry *dst = __sk_dst_get(sk); int mss = tp->advmss; - if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) { - mss = dst_metric(dst, RTAX_ADVMSS); - tp->advmss = mss; + if (dst) { + unsigned int metric = dst_metric_advmss(dst); + + if (metric < mss) { + mss = metric; + tp->advmss = mss; + } } return (__u16)mss; @@ -224,18 +228,22 @@ void tcp_select_initial_window(int __space, __u32 mss, } } - /* Set initial window to value enough for senders, following RFC5681. */ + /* Set initial window to a value enough for senders starting with + * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place + * a limit on the initial window when mss is larger than 1460. + */ if (mss > (1 << *rcv_wscale)) { - int init_cwnd = rfc3390_bytes_to_packets(mss); - + int init_cwnd = TCP_DEFAULT_INIT_RCVWND; + if (mss > 1460) + init_cwnd = + max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); /* when initializing use the value from init_rcv_wnd * rather than the default from above */ - if (init_rcv_wnd && - (*rcv_wnd > init_rcv_wnd * mss)) - *rcv_wnd = init_rcv_wnd * mss; - else if (*rcv_wnd > init_cwnd * mss) - *rcv_wnd = init_cwnd * mss; + if (init_rcv_wnd) + *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); + else + *rcv_wnd = min(*rcv_wnd, init_cwnd * mss); } /* Set the clamp no higher than max representable value */ @@ -386,27 +394,30 @@ struct tcp_out_options { */ static u8 tcp_cookie_size_check(u8 desired) { - if (desired > 0) { + int cookie_size; + + if (desired > 0) /* previously specified */ return desired; - } - if (sysctl_tcp_cookie_size <= 0) { + + cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size); + if (cookie_size <= 0) /* no default specified */ return 0; - } - if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) { + + if (cookie_size <= TCP_COOKIE_MIN) /* value too small, specify minimum */ return TCP_COOKIE_MIN; - } - if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) { + + if (cookie_size >= TCP_COOKIE_MAX) /* value too large, specify maximum */ return TCP_COOKIE_MAX; - } - if (0x1 & sysctl_tcp_cookie_size) { + + if (cookie_size & 1) /* 8-bit multiple, illegal, fix it */ - return (u8)(sysctl_tcp_cookie_size + 0x1); - } - return (u8)sysctl_tcp_cookie_size; + cookie_size++; + + return (u8)cookie_size; } /* Write previously computed TCP options to the packet. @@ -822,8 +833,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, &md5); tcp_header_size = tcp_options_size + sizeof(struct tcphdr); - if (tcp_packets_in_flight(tp) == 0) + if (tcp_packets_in_flight(tp) == 0) { tcp_ca_event(sk, CA_EVENT_TX_START); + skb->ooo_okay = 1; + } else + skb->ooo_okay = 0; skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -1513,6 +1527,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 send_win, cong_win, limit, in_flight; + int win_divisor; if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) goto send_now; @@ -1544,13 +1559,14 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) goto send_now; - if (sysctl_tcp_tso_win_divisor) { + win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor); + if (win_divisor) { u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); /* If at least some fraction of a window is available, * just use it. */ - chunk /= sysctl_tcp_tso_win_divisor; + chunk /= win_divisor; if (limit >= chunk) goto send_now; } else { @@ -2415,7 +2431,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb_dst_set(skb, dst_clone(dst)); - mss = dst_metric(dst, RTAX_ADVMSS); + mss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) mss = tp->rx_opt.user_mss; @@ -2549,7 +2565,7 @@ static void tcp_connect_init(struct sock *sk) if (!tp->window_clamp) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); - tp->advmss = dst_metric(dst, RTAX_ADVMSS); + tp->advmss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) tp->advmss = tp->rx_opt.user_mss; @@ -2592,6 +2608,7 @@ int tcp_connect(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; + int err; tcp_connect_init(sk); @@ -2614,7 +2631,9 @@ int tcp_connect(struct sock *sk) sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); tp->packets_out += tcp_skb_pcount(buff); - tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); + err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); + if (err == -ECONNREFUSED) + return err; /* We change tp->snd_nxt after the tcp_transmit_skb() call * in order to make this packet get counted in tcpOutSegs. diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 6211e2114173..85ee7eb7e38e 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -154,7 +154,7 @@ static int tcpprobe_sprint(char *tbuf, int n) struct timespec tv = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); - return snprintf(tbuf, n, + return scnprintf(tbuf, n, "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n", (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec, @@ -174,7 +174,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf, return -EINVAL; while (cnt < len) { - char tbuf[128]; + char tbuf[164]; int width; /* Wait for data in buffer */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 28cb2d733a3c..8157b17959ee 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -110,7 +110,7 @@ struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); -int sysctl_udp_mem[3] __read_mostly; +long sysctl_udp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_udp_mem); int sysctl_udp_rmem_min __read_mostly; @@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min); int sysctl_udp_wmem_min __read_mostly; EXPORT_SYMBOL(sysctl_udp_wmem_min); -atomic_t udp_memory_allocated; +atomic_long_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); #define MAX_UDP_PORTS 65536 @@ -430,7 +430,7 @@ begin: if (result) { exact_match: - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, daddr, hnum, dif) < badness)) { @@ -500,7 +500,7 @@ begin: goto begin; if (result) { - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { @@ -890,15 +890,13 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rt == NULL) { struct flowi fl = { .oif = ipc.oif, .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = faddr, - .saddr = saddr, - .tos = tos } }, + .fl4_dst = faddr, + .fl4_src = saddr, + .fl4_tos = tos, .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = - { .sport = inet->inet_sport, - .dport = dport } } }; + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = dport }; struct net *net = sock_net(sk); security_sk_classify_flow(sk, &fl); @@ -1899,6 +1897,7 @@ struct proto udp_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; EXPORT_SYMBOL(udp_prot); @@ -2228,7 +2227,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) /* Do software UFO. Complete and fill in the UDP checksum as HW cannot * do checksum of UDP packets sent as multiple IP fragments. */ - offset = skb->csum_start - skb_headroom(skb); + offset = skb_checksum_start_offset(skb); csum = skb_checksum(skb, offset, skb->len - offset, 0); offset += skb->csum_offset; *(__sum16 *)(skb->data + offset) = csum_fold(csum); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index ab76aa928fa9..aee9963f7f5a 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -57,6 +57,7 @@ struct proto udplite_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; EXPORT_SYMBOL(udplite_prot); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 6f368413eb0e..534972e114ac 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -56,7 +56,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); ip_select_ident(top_iph, dst->child, NULL); - top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); + top_iph->ttl = ip4_dst_hoplimit(dst->child); top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index dd1fd8c473fc..b057d40addec 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -11,6 +11,7 @@ #include <linux/err.h> #include <linux/kernel.h> #include <linux/inetdevice.h> +#include <linux/if_tunnel.h> #include <net/dst.h> #include <net/xfrm.h> #include <net/ip.h> @@ -22,12 +23,8 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, xfrm_address_t *daddr) { struct flowi fl = { - .nl_u = { - .ip4_u = { - .tos = tos, - .daddr = daddr->a4, - }, - }, + .fl4_dst = daddr->a4, + .fl4_tos = tos, }; struct dst_entry *dst; struct rtable *rt; @@ -154,6 +151,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } break; + + case IPPROTO_GRE: + if (pskb_may_pull(skb, xprth + 12 - skb->data)) { + __be16 *greflags = (__be16 *)xprth; + __be32 *gre_hdr = (__be32 *)xprth; + + if (greflags[0] & GRE_KEY) { + if (greflags[0] & GRE_CSUM) + gre_hdr++; + fl->fl_gre_key = gre_hdr[1]; + } + } + break; + default: fl->fl_ipsec_spi = 0; break; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e048ec62d109..5b189c97c2fc 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -98,7 +98,11 @@ #endif #define INFINITY_LIFE_TIME 0xFFFFFFFF -#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b))) + +static inline u32 cstamp_delta(unsigned long cstamp) +{ + return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; +} #define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1) #define ADDRCONF_TIMER_FUZZ (HZ / 4) @@ -2665,8 +2669,9 @@ static int addrconf_ifdown(struct net_device *dev, int how) ASSERT_RTNL(); - rt6_ifdown(net, dev); - neigh_ifdown(&nd_tbl, dev); + /* Flush routes if device is being removed or it is not loopback */ + if (how || !(dev->flags & IFF_LOOPBACK)) + rt6_ifdown(net, dev); idev = __in6_dev_get(dev); if (idev == NULL) @@ -2740,10 +2745,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Flag it for later restoration when link comes up */ ifa->flags |= IFA_F_TENTATIVE; ifa->state = INET6_IFADDR_STATE_DAD; - - write_unlock_bh(&idev->lock); - - in6_ifa_hold(ifa); } else { list_del(&ifa->if_list); @@ -2758,19 +2759,15 @@ static int addrconf_ifdown(struct net_device *dev, int how) ifa->state = INET6_IFADDR_STATE_DEAD; spin_unlock_bh(&ifa->state_lock); - if (state == INET6_IFADDR_STATE_DEAD) - goto put_ifa; - } - - __ipv6_ifa_notify(RTM_DELADDR, ifa); - if (ifa->state == INET6_IFADDR_STATE_DEAD) - atomic_notifier_call_chain(&inet6addr_chain, - NETDEV_DOWN, ifa); - -put_ifa: - in6_ifa_put(ifa); + if (state != INET6_IFADDR_STATE_DEAD) { + __ipv6_ifa_notify(RTM_DELADDR, ifa); + atomic_notifier_call_chain(&inet6addr_chain, + NETDEV_DOWN, ifa); + } - write_lock_bh(&idev->lock); + in6_ifa_put(ifa); + write_lock_bh(&idev->lock); + } } list_splice(&keep_list, &idev->addr_list); @@ -3452,10 +3449,8 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, { struct ifa_cacheinfo ci; - ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); + ci.cstamp = cstamp_delta(cstamp); + ci.tstamp = cstamp_delta(tstamp); ci.ifa_prefered = preferred; ci.ifa_valid = valid; @@ -3806,8 +3801,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_AUTOCONF] = cnf->autoconf; array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits; array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; - array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval; - array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay; + array[DEVCONF_RTR_SOLICIT_INTERVAL] = + jiffies_to_msecs(cnf->rtr_solicit_interval); + array[DEVCONF_RTR_SOLICIT_DELAY] = + jiffies_to_msecs(cnf->rtr_solicit_delay); array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; #ifdef CONFIG_IPV6_PRIVACY array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; @@ -3821,7 +3818,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; #ifdef CONFIG_IPV6_ROUTER_PREF array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; - array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval; + array[DEVCONF_RTR_PROBE_INTERVAL] = + jiffies_to_msecs(cnf->rtr_probe_interval); #ifdef CONFIG_IPV6_ROUTE_INFO array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif @@ -3839,6 +3837,15 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; } +static inline size_t inet6_ifla6_size(void) +{ + return nla_total_size(4) /* IFLA_INET6_FLAGS */ + + nla_total_size(sizeof(struct ifla_cacheinfo)) + + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ + + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ + + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */ +} + static inline size_t inet6_if_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) @@ -3846,13 +3853,7 @@ static inline size_t inet6_if_nlmsg_size(void) + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + nla_total_size(4) /* IFLA_MTU */ + nla_total_size(4) /* IFLA_LINK */ - + nla_total_size( /* IFLA_PROTINFO */ - nla_total_size(4) /* IFLA_INET6_FLAGS */ - + nla_total_size(sizeof(struct ifla_cacheinfo)) - + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ - + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ - + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ - ); + + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */ } static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib, @@ -3899,15 +3900,70 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, } } +static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) +{ + struct nlattr *nla; + struct ifla_cacheinfo ci; + + NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); + + ci.max_reasm_len = IPV6_MAXPLEN; + ci.tstamp = cstamp_delta(idev->tstamp); + ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time); + ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time); + NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); + + nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); + if (nla == NULL) + goto nla_put_failure; + ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); + + /* XXX - MC not implemented */ + + nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); + if (nla == NULL) + goto nla_put_failure; + snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); + + nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); + if (nla == NULL) + goto nla_put_failure; + snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static size_t inet6_get_link_af_size(const struct net_device *dev) +{ + if (!__in6_dev_get(dev)) + return 0; + + return inet6_ifla6_size(); +} + +static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) +{ + struct inet6_dev *idev = __in6_dev_get(dev); + + if (!idev) + return -ENODATA; + + if (inet6_fill_ifla6_attrs(skb, idev) < 0) + return -EMSGSIZE; + + return 0; +} + static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, u32 pid, u32 seq, int event, unsigned int flags) { struct net_device *dev = idev->dev; - struct nlattr *nla; struct ifinfomsg *hdr; struct nlmsghdr *nlh; void *protoinfo; - struct ifla_cacheinfo ci; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); if (nlh == NULL) @@ -3934,31 +3990,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, if (protoinfo == NULL) goto nla_put_failure; - NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); - - ci.max_reasm_len = IPV6_MAXPLEN; - ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - ci.reachable_time = idev->nd_parms->reachable_time; - ci.retrans_time = idev->nd_parms->retrans_time; - NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); - - nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); - if (nla == NULL) - goto nla_put_failure; - ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); - - /* XXX - MC not implemented */ - - nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); - if (nla == NULL) + if (inet6_fill_ifla6_attrs(skb, idev) < 0) goto nla_put_failure; - snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); - - nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); - if (nla == NULL) - goto nla_put_failure; - snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); nla_nest_end(skb, protoinfo); return nlmsg_end(skb, nlh); @@ -4025,11 +4058,11 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC); return; errout: if (err < 0) - rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err); } static inline size_t inet6_prefix_nlmsg_size(void) @@ -4629,6 +4662,12 @@ int unregister_inet6addr_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_inet6addr_notifier); +static struct rtnl_af_ops inet6_ops = { + .family = AF_INET6, + .fill_link_af = inet6_fill_link_af, + .get_link_af_size = inet6_get_link_af_size, +}; + /* * Init / cleanup code */ @@ -4680,6 +4719,10 @@ int __init addrconf_init(void) addrconf_verify(0); + err = rtnl_af_register(&inet6_ops); + if (err < 0) + goto errout_af; + err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo); if (err < 0) goto errout; @@ -4695,6 +4738,8 @@ int __init addrconf_init(void) return 0; errout: + rtnl_af_unregister(&inet6_ops); +errout_af: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: unregister_pernet_subsys(&addrconf_ops); @@ -4715,6 +4760,8 @@ void addrconf_cleanup(void) rtnl_lock(); + __rtnl_af_unregister(&inet6_ops); + /* clean dev list */ for_each_netdev(&init_net, dev) { if (__in6_dev_get(dev) == NULL) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 54e8e42f7a88..059a3de647db 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -810,7 +810,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) } rcu_read_unlock(); - if (unlikely(IS_ERR(segs))) + if (IS_ERR(segs)) goto out; for (skb = segs; skb; skb = skb->next) { diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index ee9b93bdd6a2..1b5c9825743b 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -49,6 +49,8 @@ struct esp_skb_cb { #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) +static u32 esp6_get_mtu(struct xfrm_state *x, int mtu); + /* * Allocate an AEAD request structure with extra space for SG and IV. * @@ -140,6 +142,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) int blksize; int clen; int alen; + int plen; + int tfclen; int nfrags; u8 *iv; u8 *tail; @@ -148,18 +152,26 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) /* skb is pure payload to encrypt */ err = -ENOMEM; - /* Round to block size */ - clen = skb->len; - aead = esp->aead; alen = crypto_aead_authsize(aead); + tfclen = 0; + if (x->tfcpad) { + struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); + u32 padto; + + padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached)); + if (skb->len < padto) + tfclen = padto - skb->len; + } blksize = ALIGN(crypto_aead_blocksize(aead), 4); - clen = ALIGN(clen + 2, blksize); + clen = ALIGN(skb->len + 2 + tfclen, blksize); if (esp->padlen) clen = ALIGN(clen, esp->padlen); + plen = clen - skb->len - tfclen; - if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0) + err = skb_cow_data(skb, tfclen + plen + alen, &trailer); + if (err < 0) goto error; nfrags = err; @@ -174,13 +186,17 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) /* Fill padding... */ tail = skb_tail_pointer(trailer); + if (tfclen) { + memset(tail, 0, tfclen); + tail += tfclen; + } do { int i; - for (i=0; i<clen-skb->len - 2; i++) + for (i = 0; i < plen - 2; i++) tail[i] = i + 1; } while (0); - tail[clen-skb->len - 2] = (clen - skb->len) - 2; - tail[clen - skb->len - 1] = *skb_mac_header(skb); + tail[plen - 2] = plen - 2; + tail[plen - 1] = *skb_mac_header(skb); pskb_put(skb, trailer, clen - skb->len + alen); skb_push(skb, -skb_network_offset(skb)); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 8a1628023bd1..e46305d1815a 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -54,24 +54,54 @@ int inet6_csk_bind_conflict(const struct sock *sk, EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); +struct dst_entry *inet6_csk_route_req(struct sock *sk, + const struct request_sock *req) +{ + struct inet6_request_sock *treq = inet6_rsk(req); + struct ipv6_pinfo *np = inet6_sk(sk); + struct in6_addr *final_p, final; + struct dst_entry *dst; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); + final_p = fl6_update_dst(&fl, np->opt, &final); + ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); + fl.oif = sk->sk_bound_dev_if; + fl.mark = sk->sk_mark; + fl.fl_ip_dport = inet_rsk(req)->rmt_port; + fl.fl_ip_sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, &fl); + + if (ip6_dst_lookup(sk, &dst, &fl)) + return NULL; + + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + return NULL; + + return dst; +} + /* * request_sock (formerly open request) hash tables. */ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, const u32 rnd, const u16 synq_hsize) { - u32 a = (__force u32)raddr->s6_addr32[0]; - u32 b = (__force u32)raddr->s6_addr32[1]; - u32 c = (__force u32)raddr->s6_addr32[2]; - - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += rnd; - __jhash_mix(a, b, c); - - a += (__force u32)raddr->s6_addr32[3]; - b += (__force u32)rport; - __jhash_mix(a, b, c); + u32 c; + + c = jhash_3words((__force u32)raddr->s6_addr32[0], + (__force u32)raddr->s6_addr32[1], + (__force u32)raddr->s6_addr32[2], + rnd); + + c = jhash_2words((__force u32)raddr->s6_addr32[3], + (__force u32)rport, + c); return c & (synq_hsize - 1); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 99157b4cd56e..94b5bf132b2e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,7 +56,7 @@ #include <net/checksum.h> #include <linux/mroute6.h> -static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); +int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); int __ip6_local_out(struct sk_buff *skb) { @@ -145,14 +145,6 @@ static int ip6_finish_output2(struct sk_buff *skb) return -EINVAL; } -static inline int ip6_skb_dst_mtu(struct sk_buff *skb) -{ - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; - - return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? - skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); -} - static int ip6_finish_output(struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || @@ -601,7 +593,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } -static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2a59610c2a58..4f4483e697bd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -58,8 +58,6 @@ MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); -#define IPV6_TLV_TEL_DST_SIZE 8 - #ifdef IP6_TNL_DEBUG #define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) #else @@ -1175,6 +1173,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) sizeof (struct ipv6hdr); dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu-=8; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1363,12 +1363,17 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { static void ip6_tnl_dev_setup(struct net_device *dev) { + struct ip6_tnl *t; + dev->netdev_ops = &ip6_tnl_netdev_ops; dev->destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr); + t = netdev_priv(dev); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu-=8; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); dev->features |= NETIF_F_NETNS_LOCAL; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 6f32ffce7022..9fab274019c0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1843,9 +1843,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, fl = (struct flowi) { .oif = vif->link, - .nl_u = { .ip6_u = - { .daddr = ipv6h->daddr, } - } + .fl6_dst = ipv6h->daddr, }; dst = ip6_route_output(net, NULL, &fl); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d1444b95ad7e..49f986d626a0 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -82,7 +82,7 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = { static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; /* Big mc list lock for all the sockets */ -static DEFINE_RWLOCK(ipv6_sk_mc_lock); +static DEFINE_SPINLOCK(ipv6_sk_mc_lock); static void igmp6_join_group(struct ifmcaddr6 *ma); static void igmp6_leave_group(struct ifmcaddr6 *ma); @@ -123,6 +123,11 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; * socket join on multicast group */ +#define for_each_pmc_rcu(np, pmc) \ + for (pmc = rcu_dereference(np->ipv6_mc_list); \ + pmc != NULL; \ + pmc = rcu_dereference(pmc->next)) + int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct net_device *dev = NULL; @@ -134,15 +139,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (!ipv6_addr_is_multicast(addr)) return -EINVAL; - read_lock_bh(&ipv6_sk_mc_lock); - for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) { + rcu_read_lock(); + for_each_pmc_rcu(np, mc_lst) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { - read_unlock_bh(&ipv6_sk_mc_lock); + rcu_read_unlock(); return -EADDRINUSE; } } - read_unlock_bh(&ipv6_sk_mc_lock); + rcu_read_unlock(); mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); @@ -186,33 +191,41 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return err; } - write_lock_bh(&ipv6_sk_mc_lock); + spin_lock(&ipv6_sk_mc_lock); mc_lst->next = np->ipv6_mc_list; - np->ipv6_mc_list = mc_lst; - write_unlock_bh(&ipv6_sk_mc_lock); + rcu_assign_pointer(np->ipv6_mc_list, mc_lst); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_unlock(); return 0; } +static void ipv6_mc_socklist_reclaim(struct rcu_head *head) +{ + kfree(container_of(head, struct ipv6_mc_socklist, rcu)); +} /* * socket leave on multicast group */ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_mc_socklist *mc_lst, **lnk; + struct ipv6_mc_socklist *mc_lst; + struct ipv6_mc_socklist __rcu **lnk; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_mc_lock); - for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) { + spin_lock(&ipv6_sk_mc_lock); + for (lnk = &np->ipv6_mc_list; + (mc_lst = rcu_dereference_protected(*lnk, + lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ; + lnk = &mc_lst->next) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { struct net_device *dev; *lnk = mc_lst->next; - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, mc_lst->ifindex); @@ -225,11 +238,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); rcu_read_unlock(); - sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); return 0; } } - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); return -EADDRNOTAVAIL; } @@ -257,7 +271,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, return NULL; idev = __in6_dev_get(dev); if (!idev) - return NULL;; + return NULL; read_lock_bh(&idev->lock); if (idev->dead) { read_unlock_bh(&idev->lock); @@ -272,12 +286,13 @@ void ipv6_sock_mc_close(struct sock *sk) struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_mc_lock); - while ((mc_lst = np->ipv6_mc_list) != NULL) { + spin_lock(&ipv6_sk_mc_lock); + while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list, + lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) { struct net_device *dev; np->ipv6_mc_list = mc_lst->next; - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, mc_lst->ifindex); @@ -290,11 +305,13 @@ void ipv6_sock_mc_close(struct sock *sk) } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); rcu_read_unlock(); - sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); - write_lock_bh(&ipv6_sk_mc_lock); + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); + + spin_lock(&ipv6_sk_mc_lock); } - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); } int ip6_mc_source(int add, int omode, struct sock *sk, @@ -328,8 +345,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, err = -EADDRNOTAVAIL; - read_lock(&ipv6_sk_mc_lock); - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -428,7 +444,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, done: if (pmclocked) write_unlock(&pmc->sflock); - read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) @@ -466,14 +481,13 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) dev = idev->dev; err = 0; - read_lock(&ipv6_sk_mc_lock); if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { leavegroup = 1; goto done; } - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -521,7 +535,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) write_unlock(&pmc->sflock); err = 0; done: - read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) @@ -562,7 +575,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, * so reading the list is safe. */ - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(group, &pmc->addr)) @@ -612,13 +625,13 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, struct ip6_sf_socklist *psl; int rv = 1; - read_lock(&ipv6_sk_mc_lock); - for (mc = np->ipv6_mc_list; mc; mc = mc->next) { + rcu_read_lock(); + for_each_pmc_rcu(np, mc) { if (ipv6_addr_equal(&mc->addr, mc_addr)) break; } if (!mc) { - read_unlock(&ipv6_sk_mc_lock); + rcu_read_unlock(); return 1; } read_lock(&mc->sflock); @@ -638,7 +651,7 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, rv = 0; } read_unlock(&mc->sflock); - read_unlock(&ipv6_sk_mc_lock); + rcu_read_unlock(); return rv; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 998d6d27e7cf..2342545a5ee9 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -141,18 +141,18 @@ struct neigh_table nd_tbl = { .proxy_redo = pndisc_redo, .id = "ndisc_cache", .parms = { - .tbl = &nd_tbl, - .base_reachable_time = 30 * HZ, - .retrans_time = 1 * HZ, - .gc_staletime = 60 * HZ, - .reachable_time = 30 * HZ, - .delay_probe_time = 5 * HZ, - .queue_len = 3, - .ucast_probes = 3, - .mcast_probes = 3, - .anycast_delay = 1 * HZ, - .proxy_delay = (8 * HZ) / 10, - .proxy_qlen = 64, + .tbl = &nd_tbl, + .base_reachable_time = ND_REACHABLE_TIME, + .retrans_time = ND_RETRANS_TIMER, + .gc_staletime = 60 * HZ, + .reachable_time = ND_REACHABLE_TIME, + .delay_probe_time = 5 * HZ, + .queue_len = 3, + .ucast_probes = 3, + .mcast_probes = 3, + .anycast_delay = 1 * HZ, + .proxy_delay = (8 * HZ) / 10, + .proxy_qlen = 64, }, .gc_interval = 30 * HZ, .gc_thresh1 = 128, @@ -1259,7 +1259,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; if (rt) - rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit; + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, + ra_msg->icmph.icmp6_hop_limit); } skip_defrtr: @@ -1377,7 +1378,7 @@ skip_linkparms: in6_dev->cnf.mtu6 = mtu; if (rt) - rt->dst.metrics[RTAX_MTU-1] = mtu; + dst_metric_set(&rt->dst, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 7155b2451d7c..35915e8617f0 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -18,10 +18,8 @@ int ip6_route_me_harder(struct sk_buff *skb) struct flowi fl = { .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .mark = skb->mark, - .nl_u = - { .ip6_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, } }, + .fl6_dst = iph->daddr, + .fl6_src = iph->saddr, }; dst = ip6_route_output(net, skb->sk, &fl); diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 0a432c9b0795..abfee91ce816 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -11,13 +11,13 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o # objects for l3 independent conntrack -nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o +nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o # l3 independent conntrack obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o # defrag -nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o +nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o # matches diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 455582384ece..0c9973a657fb 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1093,6 +1093,7 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries[raw_smp_processor_id()]; + xt_compat_init_offsets(AF_INET6, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) @@ -1696,6 +1697,7 @@ translate_compat_table(struct net *net, duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET6); + xt_compat_init_offsets(AF_INET6, number); /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, total_size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 2933396e0281..bf998feac14e 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -124,7 +124,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); ip6h->version = 6; - ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); + ip6h->hop_limit = ip6_dst_hoplimit(dst); ip6h->nexthdr = IPPROTO_TCP; ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index eb9f1c03de3f..66e003e1fcd5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -286,7 +286,7 @@ found: /* Check for overlap with preceding fragment. */ if (prev && - (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset > 0) + (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset) goto discard_fq; /* Look for overlap with succeeding segment. */ diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 0f2766453759..07beeb06f752 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -104,26 +104,22 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, const struct in6_addr *daddr, u32 rnd) { - u32 a, b, c; - - a = (__force u32)saddr->s6_addr32[0]; - b = (__force u32)saddr->s6_addr32[1]; - c = (__force u32)saddr->s6_addr32[2]; - - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += rnd; - __jhash_mix(a, b, c); - - a += (__force u32)saddr->s6_addr32[3]; - b += (__force u32)daddr->s6_addr32[0]; - c += (__force u32)daddr->s6_addr32[1]; - __jhash_mix(a, b, c); - - a += (__force u32)daddr->s6_addr32[2]; - b += (__force u32)daddr->s6_addr32[3]; - c += (__force u32)id; - __jhash_mix(a, b, c); + u32 c; + + c = jhash_3words((__force u32)saddr->s6_addr32[0], + (__force u32)saddr->s6_addr32[1], + (__force u32)saddr->s6_addr32[2], + rnd); + + c = jhash_3words((__force u32)saddr->s6_addr32[3], + (__force u32)daddr->s6_addr32[0], + (__force u32)daddr->s6_addr32[1], + c); + + c = jhash_3words((__force u32)daddr->s6_addr32[2], + (__force u32)daddr->s6_addr32[3], + (__force u32)id, + c); return c & (INETFRAGS_HASHSZ - 1); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fc328339be99..373bd0416f69 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -76,6 +76,8 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); +static unsigned int ip6_default_advmss(const struct dst_entry *dst); +static unsigned int ip6_default_mtu(const struct dst_entry *dst); static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, @@ -103,6 +105,8 @@ static struct dst_ops ip6_dst_ops_template = { .gc = ip6_dst_gc, .gc_thresh = 1024, .check = ip6_dst_check, + .default_advmss = ip6_default_advmss, + .default_mtu = ip6_default_mtu, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -129,7 +133,6 @@ static struct rt6_info ip6_null_entry_template = { .__use = 1, .obsolete = -1, .error = -ENETUNREACH, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, }, @@ -150,7 +153,6 @@ static struct rt6_info ip6_prohibit_entry_template = { .__use = 1, .obsolete = -1, .error = -EACCES, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, }, @@ -166,7 +168,6 @@ static struct rt6_info ip6_blk_hole_entry_template = { .__use = 1, .obsolete = -1, .error = -EINVAL, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = dst_discard, .output = dst_discard, }, @@ -188,11 +189,29 @@ static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; + struct inet_peer *peer = rt->rt6i_peer; if (idev != NULL) { rt->rt6i_idev = NULL; in6_dev_put(idev); } + if (peer) { + BUG_ON(!(rt->rt6i_flags & RTF_CACHE)); + rt->rt6i_peer = NULL; + inet_putpeer(peer); + } +} + +void rt6_bind_peer(struct rt6_info *rt, int create) +{ + struct inet_peer *peer; + + if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE))) + return; + + peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); + if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) + inet_putpeer(peer); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -558,11 +577,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, { struct flowi fl = { .oif = oif, - .nl_u = { - .ip6_u = { - .daddr = *daddr, - }, - }, + .fl6_dst = *daddr, }; struct dst_entry *dst; int flags = strict ? RT6_LOOKUP_F_IFACE : 0; @@ -778,13 +793,9 @@ void ip6_route_input(struct sk_buff *skb) int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { .iif = skb->dev->ifindex, - .nl_u = { - .ip6_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, - }, - }, + .fl6_dst = iph->daddr, + .fl6_src = iph->saddr, + .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, .mark = skb->mark, .proto = iph->nexthdr, }; @@ -834,7 +845,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl new->input = dst_discard; new->output = dst_discard; - memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + dst_copy_metrics(new, &ort->dst); new->dev = ort->dst.dev; if (new->dev) dev_hold(new->dev); @@ -918,18 +929,22 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { rt6->rt6i_flags |= RTF_MODIFIED; if (mtu < IPV6_MIN_MTU) { + u32 features = dst_metric(dst, RTAX_FEATURES); mtu = IPV6_MIN_MTU; - dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(dst, RTAX_FEATURES, features); } - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } -static int ipv6_get_mtu(struct net_device *dev); - -static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) +static unsigned int ip6_default_advmss(const struct dst_entry *dst) { + struct net_device *dev = dst->dev; + unsigned int mtu = dst_mtu(dst); + struct net *net = dev_net(dev); + mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) @@ -946,6 +961,20 @@ static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) return mtu; } +static unsigned int ip6_default_mtu(const struct dst_entry *dst) +{ + unsigned int mtu = IPV6_MIN_MTU; + struct inet6_dev *idev; + + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); + if (idev) + mtu = idev->cnf.mtu6; + rcu_read_unlock(); + + return mtu; +} + static struct dst_entry *icmp6_dst_gc_list; static DEFINE_SPINLOCK(icmp6_dst_lock); @@ -979,9 +1008,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->rt6i_idev = idev; rt->rt6i_nexthop = neigh; atomic_set(&rt->dst.__refcnt, 1); - rt->dst.metrics[RTAX_HOPLIMIT-1] = 255; - rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); rt->dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ @@ -1080,23 +1107,10 @@ out: Remove it only when all the things will work! */ -static int ipv6_get_mtu(struct net_device *dev) -{ - int mtu = IPV6_MIN_MTU; - struct inet6_dev *idev; - - rcu_read_lock(); - idev = __in6_dev_get(dev); - if (idev) - mtu = idev->cnf.mtu6; - rcu_read_unlock(); - return mtu; -} - int ip6_dst_hoplimit(struct dst_entry *dst) { - int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); - if (hoplimit < 0) { + int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); + if (hoplimit == 0) { struct net_device *dev = dst->dev; struct inet6_dev *idev; @@ -1110,6 +1124,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst) } return hoplimit; } +EXPORT_SYMBOL(ip6_dst_hoplimit); /* * @@ -1295,17 +1310,11 @@ install_route: goto out; } - rt->dst.metrics[type - 1] = nla_get_u32(nla); + dst_metric_set(&rt->dst, type, nla_get_u32(nla)); } } } - if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) - rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; - if (!dst_mtu(&rt->dst)) - rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); - if (!dst_metric(&rt->dst, RTAX_ADVMSS)) - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -1463,12 +1472,8 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, struct ip6rd_flowi rdfl = { .fl = { .oif = dev->ifindex, - .nl_u = { - .ip6_u = { - .daddr = *dest, - .saddr = *src, - }, - }, + .fl6_dst = *dest, + .fl6_src = *src, }, }; @@ -1534,10 +1539,6 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); nrt->rt6i_nexthop = neigh_clone(neigh); - /* Reset pmtu, it may be better */ - nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); - nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), - dst_mtu(&nrt->dst)); if (ip6_ins_rt(nrt)) goto out; @@ -1565,11 +1566,16 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, { struct rt6_info *rt, *nrt; int allfrag = 0; - +again: rt = rt6_lookup(net, daddr, saddr, ifindex, 0); if (rt == NULL) return; + if (rt6_check_expired(rt)) { + ip6_del_rt(rt); + goto again; + } + if (pmtu >= dst_mtu(&rt->dst)) goto out; @@ -1596,9 +1602,12 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, would return automatically. */ if (rt->rt6i_flags & RTF_CACHE) { - rt->dst.metrics[RTAX_MTU-1] = pmtu; - if (allfrag) - rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&rt->dst, RTAX_MTU, pmtu); + if (allfrag) { + u32 features = dst_metric(&rt->dst, RTAX_FEATURES); + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&rt->dst, RTAX_FEATURES, features); + } dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; @@ -1615,9 +1624,12 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, nrt = rt6_alloc_clone(rt, daddr); if (nrt) { - nrt->dst.metrics[RTAX_MTU-1] = pmtu; - if (allfrag) - nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); + if (allfrag) { + u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&nrt->dst, RTAX_FEATURES, features); + } /* According to RFC 1981, detecting PMTU increase shouldn't be * happened within 5 mins, the recommended timer is 10 mins. @@ -1668,7 +1680,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) rt->dst.input = ort->dst.input; rt->dst.output = ort->dst.output; - memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; rt->dst.dev = ort->dst.dev; if (rt->dst.dev) @@ -1945,8 +1957,12 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); struct neighbour *neigh; - if (rt == NULL) + if (rt == NULL) { + if (net_ratelimit()) + pr_warning("IPv6: Maximum number of routes reached," + " consider increasing route/max_size.\n"); return ERR_PTR(-ENOMEM); + } dev_hold(net->loopback_dev); in6_dev_hold(idev); @@ -1956,9 +1972,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->dst.output = ip6_output; rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; - rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); - rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); rt->dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; @@ -1995,11 +2009,11 @@ struct arg_dev_net { static int fib6_ifdown(struct rt6_info *rt, void *arg) { - struct net_device *dev = ((struct arg_dev_net *)arg)->dev; - struct net *net = ((struct arg_dev_net *)arg)->net; + const struct arg_dev_net *adn = arg; + const struct net_device *dev = adn->dev; - if (((void *)rt->rt6i_dev == dev || dev == NULL) && - rt != net->ipv6.ip6_null_entry) { + if ((rt->rt6i_dev == dev || dev == NULL) && + rt != adn->net->ipv6.ip6_null_entry) { RT6_TRACE("deleted by ifdown %p\n", rt); return -1; } @@ -2027,7 +2041,6 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) { struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct inet6_dev *idev; - struct net *net = dev_net(arg->dev); /* In IPv6 pmtu discovery is not optional, so that RTAX_MTU lock cannot disable it. @@ -2058,8 +2071,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) (dst_mtu(&rt->dst) >= arg->mtu || (dst_mtu(&rt->dst) < arg->mtu && dst_mtu(&rt->dst) == idev->cnf.mtu6))) { - rt->dst.metrics[RTAX_MTU-1] = arg->mtu; - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); + dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); } return 0; } @@ -2285,7 +2297,7 @@ static int rt6_fill_node(struct net *net, NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } - if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; if (rt->dst.neighbour) @@ -2461,8 +2473,6 @@ static int ip6_route_dev_notify(struct notifier_block *this, #ifdef CONFIG_PROC_FS -#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) - struct rt6_proc_arg { char *buffer; @@ -2678,6 +2688,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_null_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, @@ -2688,6 +2699,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_prohibit_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255); net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), @@ -2697,6 +2709,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_blk_hole_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255); #endif net->ipv6.sysctl.flush_delay = 0; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d6bfaec3bbbf..8ce38f10a547 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -606,8 +606,9 @@ static int ipip6_rcv(struct sk_buff *skb) return 0; } - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + /* no tunnel matched, let upstream know, ipsec may handle it */ rcu_read_unlock(); + return 1; out: kfree_skb(skb); return 0; @@ -730,10 +731,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = dst, - .saddr = tiph->saddr, - .tos = RT_TOS(tos) } }, + struct flowi fl = { .fl4_dst = dst, + .fl4_src = tiph->saddr, + .fl4_tos = RT_TOS(tos), .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { @@ -855,10 +855,9 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos) } }, + struct flowi fl = { .fl4_dst = iph->daddr, + .fl4_src = iph->saddr, + .fl4_tos = RT_TOS(iph->tos), .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; struct rtable *rt; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7e41e2cbb85e..20aa95e37359 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -130,6 +130,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct rt6_info *rt; struct flowi fl; struct dst_entry *dst; int addr_type; @@ -280,6 +281,26 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(sk, dst, NULL, NULL); + rt = (struct rt6_info *) dst; + if (tcp_death_row.sysctl_tw_recycle && + !tp->rx_opt.ts_recent_stamp && + ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) { + struct inet_peer *peer = rt6_get_peer(rt); + /* + * VJ's idea. We save last timestamp seen from + * the destination in peer table, when entering state + * TIME-WAIT * and initialize rx_opt.ts_recent from it, + * when trying new connection. + */ + if (peer) { + inet_peer_refcheck(peer); + if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { + tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; + tp->rx_opt.ts_recent = peer->tcp_ts; + } + } + } + icsk->icsk_ext_hdr_len = 0; if (np->opt) icsk->icsk_ext_hdr_len = (np->opt->opt_flen + @@ -906,12 +927,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { }; #endif -static struct timewait_sock_ops tcp6_timewait_sock_ops = { - .twsk_obj_size = sizeof(struct tcp6_timewait_sock), - .twsk_unique = tcp_twsk_unique, - .twsk_destructor= tcp_twsk_destructor, -}; - static void __tcp_v6_send_check(struct sk_buff *skb, struct in6_addr *saddr, struct in6_addr *daddr) { @@ -1176,6 +1191,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; + struct dst_entry *dst = NULL; #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; #else @@ -1273,6 +1289,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) TCP_ECN_create_request(req, tcp_hdr(skb)); if (!isn) { + struct inet_peer *peer = NULL; + if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { @@ -1285,13 +1303,57 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!sk->sk_bound_dev_if && ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) treq->iif = inet6_iif(skb); - if (!want_cookie) { - isn = tcp_v6_init_sequence(skb); - } else { + + if (want_cookie) { isn = cookie_v6_init_sequence(sk, skb, &req->mss); req->cookie_ts = tmp_opt.tstamp_ok; + goto have_isn; } + + /* VJ's idea. We save last timestamp seen + * from the destination in peer table, when entering + * state TIME-WAIT, and check against it before + * accepting new connection request. + * + * If "isn" is not zero, this request hit alive + * timewait bucket, so that all the necessary checks + * are made in the function processing timewait state. + */ + if (tmp_opt.saw_tstamp && + tcp_death_row.sysctl_tw_recycle && + (dst = inet6_csk_route_req(sk, req)) != NULL && + (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && + ipv6_addr_equal((struct in6_addr *)peer->daddr.a6, + &treq->rmt_addr)) { + inet_peer_refcheck(peer); + if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && + (s32)(peer->tcp_ts - req->ts_recent) > + TCP_PAWS_WINDOW) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); + goto drop_and_release; + } + } + /* Kill the following clause, if you dislike this way. */ + else if (!sysctl_tcp_syncookies && + (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (sysctl_max_syn_backlog >> 2)) && + (!peer || !peer->tcp_ts_stamp) && + (!dst || !dst_metric(dst, RTAX_RTT))) { + /* Without syncookies last quarter of + * backlog is filled with destinations, + * proven to be alive. + * It means that we continue to communicate + * to destinations, already remembered + * to the moment of synflood. + */ + LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n", + &treq->rmt_addr, ntohs(tcp_hdr(skb)->source)); + goto drop_and_release; + } + + isn = tcp_v6_init_sequence(skb); } +have_isn: tcp_rsk(req)->snt_isn = isn; security_inet_conn_request(sk, skb, req); @@ -1304,6 +1366,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); return 0; +drop_and_release: + dst_release(dst); drop_and_free: reqsk_free(req); drop: @@ -1382,28 +1446,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto out_overflow; - if (dst == NULL) { - struct in6_addr *final_p, final; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - final_p = fl6_update_dst(&fl, opt, &final); - ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, &fl); - - if (ip6_dst_lookup(sk, &dst, &fl)) - goto out; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + if (!dst) { + dst = inet6_csk_route_req(sk, req); + if (!dst) goto out; } @@ -1476,7 +1521,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); - newtp->advmss = dst_metric(dst, RTAX_ADVMSS); + newtp->advmss = dst_metric_advmss(dst); tcp_initialize_rcv_mss(newsk); newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; @@ -1818,19 +1863,51 @@ do_time_wait: goto discard_it; } -static int tcp_v6_remember_stamp(struct sock *sk) +static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) { - /* Alas, not yet... */ - return 0; + struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet_peer *peer; + + if (!rt || + !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) { + peer = inet_getpeer_v6(&np->daddr, 1); + *release_it = true; + } else { + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + peer = rt->rt6i_peer; + *release_it = false; + } + + return peer; } +static void *tcp_v6_tw_get_peer(struct sock *sk) +{ + struct inet6_timewait_sock *tw6 = inet6_twsk(sk); + struct inet_timewait_sock *tw = inet_twsk(sk); + + if (tw->tw_family == AF_INET) + return tcp_v4_tw_get_peer(sk); + + return inet_getpeer_v6(&tw6->tw_v6_daddr, 1); +} + +static struct timewait_sock_ops tcp6_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp6_timewait_sock), + .twsk_unique = tcp_twsk_unique, + .twsk_destructor= tcp_twsk_destructor, + .twsk_getpeer = tcp_v6_tw_get_peer, +}; + static const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .remember_stamp = tcp_v6_remember_stamp, + .get_peer = tcp_v6_get_peer, .net_header_len = sizeof(struct ipv6hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, @@ -1862,7 +1939,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .remember_stamp = tcp_v4_remember_stamp, + .get_peer = tcp_v4_get_peer, .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 91def93bec85..9a009c66c8a3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -54,8 +54,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr; - __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); + __be32 sk1_rcv_saddr = sk_rcv_saddr(sk); + __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); @@ -227,7 +227,7 @@ begin: if (result) { exact_match: - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, daddr, hnum, dif) < badness)) { @@ -294,7 +294,7 @@ begin: goto begin; if (result) { - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, hnum, saddr, sport, daddr, dport, dif) < badness)) { @@ -602,7 +602,7 @@ static void flush_stack(struct sock **stack, unsigned int count, sk = stack[i]; if (skb1) { - if (sk_rcvqueues_full(sk, skb)) { + if (sk_rcvqueues_full(sk, skb1)) { kfree_skb(skb1); goto drop; } @@ -1477,6 +1477,7 @@ struct proto udpv6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; static struct inet_protosw udpv6_protosw = { diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 5f48fadc27f7..986c4de5292e 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -55,6 +55,7 @@ struct proto udplitev6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; static struct inet_protosw udplite6_protosw = { diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index b809812c8d30..645cb968d450 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -14,6 +14,7 @@ #include <net/dsfield.h> #include <net/dst.h> #include <net/inet_ecn.h> +#include <net/ip6_route.h> #include <net/ipv6.h> #include <net/xfrm.h> @@ -53,7 +54,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) if (x->props.flags & XFRM_STATE_NOECN) dsfield &= ~INET_ECN_MASK; ipv6_change_dsfield(top_iph, 0, dsfield); - top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT); + top_iph->hop_limit = ip6_dst_hoplimit(dst->child); ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); return 0; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6434bd5ce088..8e688b3de9ab 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -17,6 +17,7 @@ #include <linux/netfilter_ipv6.h> #include <net/dst.h> #include <net/ipv6.h> +#include <net/ip6_route.h> #include <net/xfrm.h> int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, @@ -88,8 +89,21 @@ static int xfrm6_output_finish(struct sk_buff *skb) return xfrm_output(skb); } +static int __xfrm6_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + + if ((x && x->props.mode == XFRM_MODE_TUNNEL) && + ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || + dst_allfrag(skb_dst(skb)))) { + return ip6_fragment(skb, xfrm6_output_finish); + } + return xfrm6_output_finish(skb); +} + int xfrm6_output(struct sk_buff *skb) { return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, - skb_dst(skb)->dev, xfrm6_output_finish); + skb_dst(skb)->dev, __xfrm6_output); } diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 7f097989cde2..c9890e25cd4c 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -45,7 +45,6 @@ #include <linux/capability.h> #include <linux/module.h> #include <linux/types.h> -#include <linux/smp_lock.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/slab.h> @@ -2281,6 +2280,16 @@ static int irda_getsockopt(struct socket *sock, int level, int optname, switch (optname) { case IRLMP_ENUMDEVICES: + + /* Offset to first device entry */ + offset = sizeof(struct irda_device_list) - + sizeof(struct irda_device_info); + + if (len < offset) { + err = -EINVAL; + goto out; + } + /* Ask lmp for the current discovery log */ discoveries = irlmp_get_discoveries(&list.len, self->mask.word, self->nslots); @@ -2291,15 +2300,9 @@ static int irda_getsockopt(struct socket *sock, int level, int optname, } /* Write total list length back to client */ - if (copy_to_user(optval, &list, - sizeof(struct irda_device_list) - - sizeof(struct irda_device_info))) + if (copy_to_user(optval, &list, offset)) err = -EFAULT; - /* Offset to first device entry */ - offset = sizeof(struct irda_device_list) - - sizeof(struct irda_device_info); - /* Copy the list itself - watch for overflow */ if (list.len > 2048) { err = -EINVAL; diff --git a/net/irda/ircomm/Makefile b/net/irda/ircomm/Makefile index 48689458c086..ab23b5ba7e33 100644 --- a/net/irda/ircomm/Makefile +++ b/net/irda/ircomm/Makefile @@ -4,5 +4,5 @@ obj-$(CONFIG_IRCOMM) += ircomm.o ircomm-tty.o -ircomm-objs := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o -ircomm-tty-objs := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o +ircomm-y := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o +ircomm-tty-y := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o diff --git a/net/irda/irlan/Makefile b/net/irda/irlan/Makefile index 77549bc8641b..94eefbc8e6b9 100644 --- a/net/irda/irlan/Makefile +++ b/net/irda/irlan/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_IRLAN) += irlan.o -irlan-objs := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o +irlan-y := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o diff --git a/net/irda/irnet/Makefile b/net/irda/irnet/Makefile index b3ee01e0def3..61c365c8a2a0 100644 --- a/net/irda/irnet/Makefile +++ b/net/irda/irnet/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_IRNET) += irnet.o -irnet-objs := irnet_ppp.o irnet_irda.o +irnet-y := irnet_ppp.o irnet_irda.o diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 7fa86373de41..7c567b8aa89a 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -15,7 +15,6 @@ #include <linux/sched.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include "irnet_ppp.h" /* Private header */ /* Please put other headers in irnet.h - Thanks */ diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 285761e77d90..f6054f9ccbe3 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -550,22 +550,30 @@ EXPORT_SYMBOL(irttp_close_tsap); */ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb) { + int ret; + IRDA_ASSERT(self != NULL, return -1;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); IRDA_ASSERT(skb != NULL, return -1;); IRDA_DEBUG(4, "%s()\n", __func__); + /* Take shortcut on zero byte packets */ + if (skb->len == 0) { + ret = 0; + goto err; + } + /* Check that nothing bad happens */ - if ((skb->len == 0) || (!self->connected)) { - IRDA_DEBUG(1, "%s(), No data, or not connected\n", - __func__); + if (!self->connected) { + IRDA_WARNING("%s(), Not connected\n", __func__); + ret = -ENOTCONN; goto err; } if (skb->len > self->max_seg_size) { - IRDA_DEBUG(1, "%s(), UData is too large for IrLAP!\n", - __func__); + IRDA_ERROR("%s(), UData is too large for IrLAP!\n", __func__); + ret = -EMSGSIZE; goto err; } @@ -576,7 +584,7 @@ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb) err: dev_kfree_skb(skb); - return -1; + return ret; } EXPORT_SYMBOL(irttp_udata_request); @@ -599,9 +607,15 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb) IRDA_DEBUG(2, "%s() : queue len = %d\n", __func__, skb_queue_len(&self->tx_queue)); + /* Take shortcut on zero byte packets */ + if (skb->len == 0) { + ret = 0; + goto err; + } + /* Check that nothing bad happens */ - if ((skb->len == 0) || (!self->connected)) { - IRDA_WARNING("%s: No data, or not connected\n", __func__); + if (!self->connected) { + IRDA_WARNING("%s: Not connected\n", __func__); ret = -ENOTCONN; goto err; } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index f7db676de77d..1ee5dab3cfae 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -36,6 +36,7 @@ #define KMSG_COMPONENT "iucv" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/kernel_stat.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/spinlock.h> @@ -1804,6 +1805,7 @@ static void iucv_external_interrupt(unsigned int ext_int_code, struct iucv_irq_data *p; struct iucv_irq_list *work; + kstat_cpu(smp_processor_id()).irqs[EXTINT_IUC]++; p = iucv_irq_data[smp_processor_id()]; if (p->ippathid >= iucv_max_pathid) { WARN_ON(p->ippathid >= iucv_max_pathid); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 0bf6a59545ab..110efb704c9b 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -476,15 +476,13 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m { struct flowi fl = { .oif = sk->sk_bound_dev_if, - .nl_u = { .ip4_u = { - .daddr = daddr, - .saddr = inet->inet_saddr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = daddr, + .fl4_src = inet->inet_saddr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = { - .sport = inet->inet_sport, - .dport = inet->inet_dport } } }; + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = inet->inet_dport }; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times @@ -674,4 +672,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); MODULE_DESCRIPTION("L2TP over IP"); MODULE_VERSION("1.0"); -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, SOCK_DGRAM, IPPROTO_L2TP); + +/* Use the value of SOCK_DGRAM (2) directory, because __stringify does't like + * enums + */ +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP); diff --git a/net/lapb/Makefile b/net/lapb/Makefile index 53f7c90db163..fff797dfc88c 100644 --- a/net/lapb/Makefile +++ b/net/lapb/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_LAPB) += lapb.o -lapb-objs := lapb_in.o lapb_out.o lapb_subr.o lapb_timer.o lapb_iface.o +lapb-y := lapb_in.o lapb_out.o lapb_subr.o lapb_timer.o lapb_iface.o diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 582612998211..dfd3a648a551 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -316,9 +316,9 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) if (unlikely(addr->sllc_family != AF_LLC)) goto out; rc = -ENODEV; - rtnl_lock(); + rcu_read_lock(); if (sk->sk_bound_dev_if) { - llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); + llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); if (llc->dev) { if (!addr->sllc_arphrd) addr->sllc_arphrd = llc->dev->type; @@ -329,14 +329,15 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) !llc_mac_match(addr->sllc_mac, llc->dev->dev_addr)) { rc = -EINVAL; - dev_put(llc->dev); llc->dev = NULL; } } } else - llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, + llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, addr->sllc_mac); - rtnl_unlock(); + if (llc->dev) + dev_hold(llc->dev); + rcu_read_unlock(); if (!llc->dev) goto out; if (!addr->sllc_sap) { diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 4d6f8653ec88..9109262abd24 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -6,6 +6,7 @@ config MAC80211 select CRYPTO_ARC4 select CRYPTO_AES select CRC32 + select AVERAGE ---help--- This option enables the hardware independent IEEE 802.11 networking stack. @@ -92,7 +93,7 @@ config MAC80211_MESH config MAC80211_LEDS bool "Enable LED triggers" depends on MAC80211 - select NEW_LEDS + depends on LEDS_CLASS select LEDS_TRIGGERS ---help--- This option enables a few LED triggers for different diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index d2b03e0851ef..4bd6ef0be380 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -147,6 +147,5 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]) void ieee80211_aes_key_free(struct crypto_cipher *tfm) { - if (tfm) - crypto_free_cipher(tfm); + crypto_free_cipher(tfm); } diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index b4d66cca76d6..d502b2684a66 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -128,6 +128,5 @@ struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[]) void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm) { - if (tfm) - crypto_free_cipher(tfm); + crypto_free_cipher(tfm); } diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 720b7a84af59..f138b195d657 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -129,9 +129,7 @@ static void sta_rx_agg_reorder_timer_expired(unsigned long data) timer_to_tid[0]); rcu_read_lock(); - spin_lock(&sta->lock); ieee80211_release_reorder_timeout(sta, *ptid); - spin_unlock(&sta->lock); rcu_read_unlock(); } @@ -256,7 +254,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, } /* prepare A-MPDU MLME for Rx aggregation */ - tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC); + tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_KERNEL); if (!tid_agg_rx) { #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) @@ -280,9 +278,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, /* prepare reordering buffer */ tid_agg_rx->reorder_buf = - kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC); + kcalloc(buf_size, sizeof(struct sk_buff *), GFP_KERNEL); tid_agg_rx->reorder_time = - kcalloc(buf_size, sizeof(unsigned long), GFP_ATOMIC); + kcalloc(buf_size, sizeof(unsigned long), GFP_KERNEL); if (!tid_agg_rx->reorder_buf || !tid_agg_rx->reorder_time) { #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index d4679b265ba8..9cc472c6a6a5 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -342,10 +342,11 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) /* send AddBA request */ ieee80211_send_addba_request(sdata, sta->sta.addr, tid, tid_tx->dialog_token, start_seq_num, - 0x40, 5000); + 0x40, tid_tx->timeout); } -int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) +int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, + u16 timeout) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -420,6 +421,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) skb_queue_head_init(&tid_tx->pending); __set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); + tid_tx->timeout = timeout; + /* Tx timer */ tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired; tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid]; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 18bd0e550600..4bc8a9250cfd 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -19,9 +19,10 @@ #include "rate.h" #include "mesh.h" -static int ieee80211_add_iface(struct wiphy *wiphy, char *name, - enum nl80211_iftype type, u32 *flags, - struct vif_params *params) +static struct net_device *ieee80211_add_iface(struct wiphy *wiphy, char *name, + enum nl80211_iftype type, + u32 *flags, + struct vif_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct net_device *dev; @@ -29,12 +30,15 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name, int err; err = ieee80211_if_add(local, name, &dev, type, params); - if (err || type != NL80211_IFTYPE_MONITOR || !flags) - return err; + if (err) + return ERR_PTR(err); - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - sdata->u.mntr_flags = *flags; - return 0; + if (type == NL80211_IFTYPE_MONITOR && flags) { + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + sdata->u.mntr_flags = *flags; + } + + return dev; } static int ieee80211_del_iface(struct wiphy *wiphy, struct net_device *dev) @@ -56,11 +60,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, if (ret) return ret; - if (ieee80211_vif_is_mesh(&sdata->vif) && params->mesh_id_len) - ieee80211_sdata_set_mesh_id(sdata, - params->mesh_id_len, - params->mesh_id); - if (type == NL80211_IFTYPE_AP_VLAN && params && params->use_4addr == 0) rcu_assign_pointer(sdata->u.vlan.sta, NULL); @@ -296,11 +295,12 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_config_default_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx) + u8 key_idx, bool uni, + bool multi) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - ieee80211_set_default_key(sdata, key_idx); + ieee80211_set_default_key(sdata, key_idx, uni, multi); return 0; } @@ -343,8 +343,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { - sinfo->filled |= STATION_INFO_SIGNAL; + sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; sinfo->signal = (s8)sta->last_signal; + sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } sinfo->txrate.flags = 0; @@ -983,7 +984,7 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, return 0; } -static int ieee80211_get_mesh_params(struct wiphy *wiphy, +static int ieee80211_get_mesh_config(struct wiphy *wiphy, struct net_device *dev, struct mesh_config *conf) { @@ -999,9 +1000,39 @@ static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask) return (mask >> (parm-1)) & 0x1; } -static int ieee80211_set_mesh_params(struct wiphy *wiphy, - struct net_device *dev, - const struct mesh_config *nconf, u32 mask) +static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, + const struct mesh_setup *setup) +{ + u8 *new_ie; + const u8 *old_ie; + + /* first allocate the new vendor information element */ + new_ie = NULL; + old_ie = ifmsh->vendor_ie; + + ifmsh->vendor_ie_len = setup->vendor_ie_len; + if (setup->vendor_ie_len) { + new_ie = kmemdup(setup->vendor_ie, setup->vendor_ie_len, + GFP_KERNEL); + if (!new_ie) + return -ENOMEM; + } + + /* now copy the rest of the setup parameters */ + ifmsh->mesh_id_len = setup->mesh_id_len; + memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len); + ifmsh->mesh_pp_id = setup->path_sel_proto; + ifmsh->mesh_pm_id = setup->path_metric; + ifmsh->vendor_ie = new_ie; + + kfree(old_ie); + + return 0; +} + +static int ieee80211_update_mesh_config(struct wiphy *wiphy, + struct net_device *dev, u32 mask, + const struct mesh_config *nconf) { struct mesh_config *conf; struct ieee80211_sub_if_data *sdata; @@ -1024,6 +1055,8 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy, conf->dot11MeshMaxRetries = nconf->dot11MeshMaxRetries; if (_chg_mesh_attr(NL80211_MESHCONF_TTL, mask)) conf->dot11MeshTTL = nconf->dot11MeshTTL; + if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask)) + conf->dot11MeshTTL = nconf->element_ttl; if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) conf->auto_open_plinks = nconf->auto_open_plinks; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, mask)) @@ -1050,6 +1083,31 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy, return 0; } +static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, + const struct mesh_config *conf, + const struct mesh_setup *setup) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + int err; + + memcpy(&ifmsh->mshcfg, conf, sizeof(struct mesh_config)); + err = copy_mesh_setup(ifmsh, setup); + if (err) + return err; + ieee80211_start_mesh(sdata); + + return 0; +} + +static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + ieee80211_stop_mesh(sdata); + + return 0; +} #endif static int ieee80211_change_bss(struct wiphy *wiphy, @@ -1108,6 +1166,12 @@ static int ieee80211_change_bss(struct wiphy *wiphy, sdata->flags &= ~IEEE80211_SDATA_DONT_BRIDGE_PACKETS; } + if (params->ht_opmode >= 0) { + sdata->vif.bss_conf.ht_operation_mode = + (u16) params->ht_opmode; + changed |= BSS_CHANGED_HT; + } + ieee80211_bss_info_change_notify(sdata, changed); return 0; @@ -1299,6 +1363,13 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) struct ieee80211_local *local = wiphy_priv(wiphy); int err; + if (changed & WIPHY_PARAM_FRAG_THRESHOLD) { + err = drv_set_frag_threshold(local, wiphy->frag_threshold); + + if (err) + return err; + } + if (changed & WIPHY_PARAM_COVERAGE_CLASS) { err = drv_set_coverage_class(local, wiphy->coverage_class); @@ -1522,6 +1593,37 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, return 0; } +static int ieee80211_remain_on_channel_hw(struct ieee80211_local *local, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type chantype, + unsigned int duration, u64 *cookie) +{ + int ret; + u32 random_cookie; + + lockdep_assert_held(&local->mtx); + + if (local->hw_roc_cookie) + return -EBUSY; + /* must be nonzero */ + random_cookie = random32() | 1; + + *cookie = random_cookie; + local->hw_roc_dev = dev; + local->hw_roc_cookie = random_cookie; + local->hw_roc_channel = chan; + local->hw_roc_channel_type = chantype; + local->hw_roc_duration = duration; + ret = drv_remain_on_channel(local, chan, chantype, duration); + if (ret) { + local->hw_roc_channel = NULL; + local->hw_roc_cookie = 0; + } + + return ret; +} + static int ieee80211_remain_on_channel(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, @@ -1530,41 +1632,121 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + + if (local->ops->remain_on_channel) { + int ret; + + mutex_lock(&local->mtx); + ret = ieee80211_remain_on_channel_hw(local, dev, + chan, channel_type, + duration, cookie); + local->hw_roc_for_tx = false; + mutex_unlock(&local->mtx); + + return ret; + } return ieee80211_wk_remain_on_channel(sdata, chan, channel_type, duration, cookie); } +static int ieee80211_cancel_remain_on_channel_hw(struct ieee80211_local *local, + u64 cookie) +{ + int ret; + + lockdep_assert_held(&local->mtx); + + if (local->hw_roc_cookie != cookie) + return -ENOENT; + + ret = drv_cancel_remain_on_channel(local); + if (ret) + return ret; + + local->hw_roc_cookie = 0; + local->hw_roc_channel = NULL; + + ieee80211_recalc_idle(local); + + return 0; +} + static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, struct net_device *dev, u64 cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + + if (local->ops->cancel_remain_on_channel) { + int ret; + + mutex_lock(&local->mtx); + ret = ieee80211_cancel_remain_on_channel_hw(local, cookie); + mutex_unlock(&local->mtx); + + return ret; + } return ieee80211_wk_cancel_remain_on_channel(sdata, cookie); } +static enum work_done_result +ieee80211_offchan_tx_done(struct ieee80211_work *wk, struct sk_buff *skb) +{ + /* + * Use the data embedded in the work struct for reporting + * here so if the driver mangled the SKB before dropping + * it (which is the only way we really should get here) + * then we don't report mangled data. + * + * If there was no wait time, then by the time we get here + * the driver will likely not have reported the status yet, + * so in that case userspace will have to deal with it. + */ + + if (wk->offchan_tx.wait && wk->offchan_tx.frame) + cfg80211_mgmt_tx_status(wk->sdata->dev, + (unsigned long) wk->offchan_tx.frame, + wk->ie, wk->ie_len, false, GFP_KERNEL); + + return WORK_DONE_DESTROY; +} + static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct sta_info *sta; + struct ieee80211_work *wk; const struct ieee80211_mgmt *mgmt = (void *)buf; u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | IEEE80211_TX_CTL_REQ_TX_STATUS; + bool is_offchan = false; /* Check that we are on the requested channel for transmission */ if (chan != local->tmp_channel && chan != local->oper_channel) - return -EBUSY; + is_offchan = true; if (channel_type_valid && (channel_type != local->tmp_channel_type && channel_type != local->_oper_channel_type)) + is_offchan = true; + + if (chan == local->hw_roc_channel) { + /* TODO: check channel type? */ + is_offchan = false; + flags |= IEEE80211_TX_CTL_TX_OFFCHAN; + } + + if (is_offchan && !offchan) return -EBUSY; switch (sdata->vif.type) { @@ -1572,6 +1754,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_MESH_POINT: if (!ieee80211_is_action(mgmt->frame_control) || mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) break; @@ -1598,12 +1781,128 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, IEEE80211_SKB_CB(skb)->flags = flags; skb->dev = sdata->dev; - ieee80211_tx_skb(sdata, skb); *cookie = (unsigned long) skb; + + if (is_offchan && local->ops->remain_on_channel) { + unsigned int duration; + int ret; + + mutex_lock(&local->mtx); + /* + * If the duration is zero, then the driver + * wouldn't actually do anything. Set it to + * 100 for now. + * + * TODO: cancel the off-channel operation + * when we get the SKB's TX status and + * the wait time was zero before. + */ + duration = 100; + if (wait) + duration = wait; + ret = ieee80211_remain_on_channel_hw(local, dev, chan, + channel_type, + duration, cookie); + if (ret) { + kfree_skb(skb); + mutex_unlock(&local->mtx); + return ret; + } + + local->hw_roc_for_tx = true; + local->hw_roc_duration = wait; + + /* + * queue up frame for transmission after + * ieee80211_ready_on_channel call + */ + + /* modify cookie to prevent API mismatches */ + *cookie ^= 2; + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN; + local->hw_roc_skb = skb; + mutex_unlock(&local->mtx); + + return 0; + } + + /* + * Can transmit right away if the channel was the + * right one and there's no wait involved... If a + * wait is involved, we might otherwise not be on + * the right channel for long enough! + */ + if (!is_offchan && !wait && !sdata->vif.bss_conf.idle) { + ieee80211_tx_skb(sdata, skb); + return 0; + } + + wk = kzalloc(sizeof(*wk) + len, GFP_KERNEL); + if (!wk) { + kfree_skb(skb); + return -ENOMEM; + } + + wk->type = IEEE80211_WORK_OFFCHANNEL_TX; + wk->chan = chan; + wk->sdata = sdata; + wk->done = ieee80211_offchan_tx_done; + wk->offchan_tx.frame = skb; + wk->offchan_tx.wait = wait; + wk->ie_len = len; + memcpy(wk->ie, buf, len); + + ieee80211_add_work(wk); return 0; } +static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, + struct net_device *dev, + u64 cookie) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct ieee80211_work *wk; + int ret = -ENOENT; + + mutex_lock(&local->mtx); + + if (local->ops->cancel_remain_on_channel) { + cookie ^= 2; + ret = ieee80211_cancel_remain_on_channel_hw(local, cookie); + + if (ret == 0) { + kfree_skb(local->hw_roc_skb); + local->hw_roc_skb = NULL; + } + + mutex_unlock(&local->mtx); + + return ret; + } + + list_for_each_entry(wk, &local->work_list, list) { + if (wk->sdata != sdata) + continue; + + if (wk->type != IEEE80211_WORK_OFFCHANNEL_TX) + continue; + + if (cookie != (unsigned long) wk->offchan_tx.frame) + continue; + + wk->timeout = jiffies; + + ieee80211_queue_work(&local->hw, &local->work_work); + ret = 0; + break; + } + mutex_unlock(&local->mtx); + + return ret; +} + static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, struct net_device *dev, u16 frame_type, bool reg) @@ -1621,6 +1920,23 @@ static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, ieee80211_queue_work(&local->hw, &local->reconfig_filter); } +static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + if (local->started) + return -EOPNOTSUPP; + + return drv_set_antenna(local, tx_ant, rx_ant); +} + +static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + return drv_get_antenna(local, tx_ant, rx_ant); +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1645,8 +1961,10 @@ struct cfg80211_ops mac80211_config_ops = { .change_mpath = ieee80211_change_mpath, .get_mpath = ieee80211_get_mpath, .dump_mpath = ieee80211_dump_mpath, - .set_mesh_params = ieee80211_set_mesh_params, - .get_mesh_params = ieee80211_get_mesh_params, + .update_mesh_config = ieee80211_update_mesh_config, + .get_mesh_config = ieee80211_get_mesh_config, + .join_mesh = ieee80211_join_mesh, + .leave_mesh = ieee80211_leave_mesh, #endif .change_bss = ieee80211_change_bss, .set_txq_params = ieee80211_set_txq_params, @@ -1671,6 +1989,9 @@ struct cfg80211_ops mac80211_config_ops = { .remain_on_channel = ieee80211_remain_on_channel, .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, .mgmt_tx = ieee80211_mgmt_tx, + .mgmt_tx_cancel_wait = ieee80211_mgmt_tx_cancel_wait, .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, .mgmt_frame_register = ieee80211_mgmt_frame_register, + .set_antenna = ieee80211_set_antenna, + .get_antenna = ieee80211_get_antenna, }; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 18260aa99c56..1f02e599a318 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -21,16 +21,30 @@ int mac80211_open_file_generic(struct inode *inode, struct file *file) return 0; } -#define DEBUGFS_READONLY_FILE(name, buflen, fmt, value...) \ +#define DEBUGFS_FORMAT_BUFFER_SIZE 100 + +int mac80211_format_buffer(char __user *userbuf, size_t count, + loff_t *ppos, char *fmt, ...) +{ + va_list args; + char buf[DEBUGFS_FORMAT_BUFFER_SIZE]; + int res; + + va_start(args, fmt); + res = vscnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + return simple_read_from_buffer(userbuf, count, ppos, buf, res); +} + +#define DEBUGFS_READONLY_FILE(name, fmt, value...) \ static ssize_t name## _read(struct file *file, char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ struct ieee80211_local *local = file->private_data; \ - char buf[buflen]; \ - int res; \ \ - res = scnprintf(buf, buflen, fmt "\n", ##value); \ - return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ + return mac80211_format_buffer(userbuf, count, ppos, \ + fmt "\n", ##value); \ } \ \ static const struct file_operations name## _ops = { \ @@ -46,13 +60,13 @@ static const struct file_operations name## _ops = { \ debugfs_create_file(#name, mode, phyd, local, &name## _ops); -DEBUGFS_READONLY_FILE(frequency, 20, "%d", +DEBUGFS_READONLY_FILE(frequency, "%d", local->hw.conf.channel->center_freq); -DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d", +DEBUGFS_READONLY_FILE(total_ps_buffered, "%d", local->total_ps_buffered); -DEBUGFS_READONLY_FILE(wep_iv, 20, "%#08x", +DEBUGFS_READONLY_FILE(wep_iv, "%#08x", local->wep_iv & 0xffffff); -DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", +DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver"); static ssize_t tsf_read(struct file *file, char __user *user_buf, @@ -60,13 +74,11 @@ static ssize_t tsf_read(struct file *file, char __user *user_buf, { struct ieee80211_local *local = file->private_data; u64 tsf; - char buf[100]; tsf = drv_get_tsf(local); - snprintf(buf, sizeof(buf), "0x%016llx\n", (unsigned long long) tsf); - - return simple_read_from_buffer(user_buf, count, ppos, buf, 19); + return mac80211_format_buffer(user_buf, count, ppos, "0x%016llx\n", + (unsigned long long) tsf); } static ssize_t tsf_write(struct file *file, @@ -131,12 +143,9 @@ static ssize_t noack_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; - int res; - char buf[10]; - res = scnprintf(buf, sizeof(buf), "%d\n", local->wifi_wme_noack_test); - - return simple_read_from_buffer(user_buf, count, ppos, buf, res); + return mac80211_format_buffer(user_buf, count, ppos, "%d\n", + local->wifi_wme_noack_test); } static ssize_t noack_write(struct file *file, @@ -168,12 +177,8 @@ static ssize_t uapsd_queues_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; - int res; - char buf[10]; - - res = scnprintf(buf, sizeof(buf), "0x%x\n", local->uapsd_queues); - - return simple_read_from_buffer(user_buf, count, ppos, buf, res); + return mac80211_format_buffer(user_buf, count, ppos, "0x%x\n", + local->uapsd_queues); } static ssize_t uapsd_queues_write(struct file *file, @@ -215,12 +220,9 @@ static ssize_t uapsd_max_sp_len_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; - int res; - char buf[10]; - res = scnprintf(buf, sizeof(buf), "0x%x\n", local->uapsd_max_sp_len); - - return simple_read_from_buffer(user_buf, count, ppos, buf, res); + return mac80211_format_buffer(user_buf, count, ppos, "0x%x\n", + local->uapsd_max_sp_len); } static ssize_t uapsd_max_sp_len_write(struct file *file, diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h index 09cc9be34796..7c87529630f5 100644 --- a/net/mac80211/debugfs.h +++ b/net/mac80211/debugfs.h @@ -4,6 +4,8 @@ #ifdef CONFIG_MAC80211_DEBUGFS extern void debugfs_hw_add(struct ieee80211_local *local); extern int mac80211_open_file_generic(struct inode *inode, struct file *file); +extern int mac80211_format_buffer(char __user *userbuf, size_t count, + loff_t *ppos, char *fmt, ...); #else static inline void debugfs_hw_add(struct ieee80211_local *local) { diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 1243d1db5c59..f7ef3477c24a 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -15,18 +15,17 @@ #include "debugfs.h" #include "debugfs_key.h" -#define KEY_READ(name, prop, buflen, format_string) \ +#define KEY_READ(name, prop, format_string) \ static ssize_t key_##name##_read(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - char buf[buflen]; \ struct ieee80211_key *key = file->private_data; \ - int res = scnprintf(buf, buflen, format_string, key->prop); \ - return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ + return mac80211_format_buffer(userbuf, count, ppos, \ + format_string, key->prop); \ } -#define KEY_READ_D(name) KEY_READ(name, name, 20, "%d\n") -#define KEY_READ_X(name) KEY_READ(name, name, 20, "0x%x\n") +#define KEY_READ_D(name) KEY_READ(name, name, "%d\n") +#define KEY_READ_X(name) KEY_READ(name, name, "0x%x\n") #define KEY_OPS(name) \ static const struct file_operations key_ ##name## _ops = { \ @@ -39,9 +38,9 @@ static const struct file_operations key_ ##name## _ops = { \ KEY_READ_##format(name) \ KEY_OPS(name) -#define KEY_CONF_READ(name, buflen, format_string) \ - KEY_READ(conf_##name, conf.name, buflen, format_string) -#define KEY_CONF_READ_D(name) KEY_CONF_READ(name, 20, "%d\n") +#define KEY_CONF_READ(name, format_string) \ + KEY_READ(conf_##name, conf.name, format_string) +#define KEY_CONF_READ_D(name) KEY_CONF_READ(name, "%d\n") #define KEY_CONF_OPS(name) \ static const struct file_operations key_ ##name## _ops = { \ @@ -59,7 +58,7 @@ KEY_CONF_FILE(keyidx, D); KEY_CONF_FILE(hw_key_idx, D); KEY_FILE(flags, X); KEY_FILE(tx_rx_count, D); -KEY_READ(ifindex, sdata->name, IFNAMSIZ + 2, "%s\n"); +KEY_READ(ifindex, sdata->name, "%s\n"); KEY_OPS(ifindex); static ssize_t key_algorithm_read(struct file *file, @@ -275,7 +274,8 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key) debugfs_remove_recursive(key->debugfs.dir); key->debugfs.dir = NULL; } -void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata) + +void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) { char buf[50]; struct ieee80211_key *key; @@ -283,25 +283,29 @@ void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata) if (!sdata->debugfs.dir) return; - /* this is running under the key lock */ + lockdep_assert_held(&sdata->local->key_mtx); - key = sdata->default_key; - if (key) { + if (sdata->default_unicast_key) { + key = sdata->default_unicast_key; sprintf(buf, "../keys/%d", key->debugfs.cnt); - sdata->debugfs.default_key = - debugfs_create_symlink("default_key", + sdata->debugfs.default_unicast_key = + debugfs_create_symlink("default_unicast_key", sdata->debugfs.dir, buf); - } else - ieee80211_debugfs_key_remove_default(sdata); -} - -void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata) -{ - if (!sdata) - return; + } else { + debugfs_remove(sdata->debugfs.default_unicast_key); + sdata->debugfs.default_unicast_key = NULL; + } - debugfs_remove(sdata->debugfs.default_key); - sdata->debugfs.default_key = NULL; + if (sdata->default_multicast_key) { + key = sdata->default_multicast_key; + sprintf(buf, "../keys/%d", key->debugfs.cnt); + sdata->debugfs.default_multicast_key = + debugfs_create_symlink("default_multicast_key", + sdata->debugfs.dir, buf); + } else { + debugfs_remove(sdata->debugfs.default_multicast_key); + sdata->debugfs.default_multicast_key = NULL; + } } void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/debugfs_key.h b/net/mac80211/debugfs_key.h index 54717b4e1371..32adc77e9c77 100644 --- a/net/mac80211/debugfs_key.h +++ b/net/mac80211/debugfs_key.h @@ -4,8 +4,7 @@ #ifdef CONFIG_MAC80211_DEBUGFS void ieee80211_debugfs_key_add(struct ieee80211_key *key); void ieee80211_debugfs_key_remove(struct ieee80211_key *key); -void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata); -void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata); +void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_key_add_mgmt_default( struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_key_remove_mgmt_default( @@ -17,10 +16,7 @@ static inline void ieee80211_debugfs_key_add(struct ieee80211_key *key) {} static inline void ieee80211_debugfs_key_remove(struct ieee80211_key *key) {} -static inline void ieee80211_debugfs_key_add_default( - struct ieee80211_sub_if_data *sdata) -{} -static inline void ieee80211_debugfs_key_remove_default( +static inline void ieee80211_debugfs_key_update_default( struct ieee80211_sub_if_data *sdata) {} static inline void ieee80211_debugfs_key_add_mgmt_default( diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index cbdf36d7841c..2dabdf7680d0 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -251,6 +251,7 @@ IEEE80211_IF_FILE(dot11MeshConfirmTimeout, IEEE80211_IF_FILE(dot11MeshHoldingTimeout, u.mesh.mshcfg.dot11MeshHoldingTimeout, DEC); IEEE80211_IF_FILE(dot11MeshTTL, u.mesh.mshcfg.dot11MeshTTL, DEC); +IEEE80211_IF_FILE(element_ttl, u.mesh.mshcfg.element_ttl, DEC); IEEE80211_IF_FILE(auto_open_plinks, u.mesh.mshcfg.auto_open_plinks, DEC); IEEE80211_IF_FILE(dot11MeshMaxPeerLinks, u.mesh.mshcfg.dot11MeshMaxPeerLinks, DEC); @@ -355,6 +356,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(dot11MeshConfirmTimeout); MESHPARAMS_ADD(dot11MeshHoldingTimeout); MESHPARAMS_ADD(dot11MeshTTL); + MESHPARAMS_ADD(element_ttl); MESHPARAMS_ADD(auto_open_plinks); MESHPARAMS_ADD(dot11MeshMaxPeerLinks); MESHPARAMS_ADD(dot11MeshHWMPactivePathTimeout); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 4601fea1784d..c04a1396cf8d 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -17,20 +17,18 @@ /* sta attributtes */ -#define STA_READ(name, buflen, field, format_string) \ +#define STA_READ(name, field, format_string) \ static ssize_t sta_ ##name## _read(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - int res; \ struct sta_info *sta = file->private_data; \ - char buf[buflen]; \ - res = scnprintf(buf, buflen, format_string, sta->field); \ - return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ + return mac80211_format_buffer(userbuf, count, ppos, \ + format_string, sta->field); \ } -#define STA_READ_D(name, field) STA_READ(name, 20, field, "%d\n") -#define STA_READ_U(name, field) STA_READ(name, 20, field, "%u\n") -#define STA_READ_S(name, field) STA_READ(name, 20, field, "%s\n") +#define STA_READ_D(name, field) STA_READ(name, field, "%d\n") +#define STA_READ_U(name, field) STA_READ(name, field, "%u\n") +#define STA_READ_S(name, field) STA_READ(name, field, "%s\n") #define STA_OPS(name) \ static const struct file_operations sta_ ##name## _ops = { \ @@ -79,22 +77,18 @@ static ssize_t sta_num_ps_buf_frames_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char buf[20]; struct sta_info *sta = file->private_data; - int res = scnprintf(buf, sizeof(buf), "%u\n", - skb_queue_len(&sta->ps_tx_buf)); - return simple_read_from_buffer(userbuf, count, ppos, buf, res); + return mac80211_format_buffer(userbuf, count, ppos, "%u\n", + skb_queue_len(&sta->ps_tx_buf)); } STA_OPS(num_ps_buf_frames); static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char buf[20]; struct sta_info *sta = file->private_data; - int res = scnprintf(buf, sizeof(buf), "%d\n", - jiffies_to_msecs(jiffies - sta->last_rx)); - return simple_read_from_buffer(userbuf, count, ppos, buf, res); + return mac80211_format_buffer(userbuf, count, ppos, "%d\n", + jiffies_to_msecs(jiffies - sta->last_rx)); } STA_OPS(inactive_ms); @@ -118,34 +112,35 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, char buf[71 + STA_TID_NUM * 40], *p = buf; int i; struct sta_info *sta = file->private_data; + struct tid_ampdu_rx *tid_rx; + struct tid_ampdu_tx *tid_tx; + + rcu_read_lock(); - spin_lock_bh(&sta->lock); p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n", sta->ampdu_mlme.dialog_token_allocator + 1); p += scnprintf(p, sizeof(buf) + buf - p, "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tpending\n"); + for (i = 0; i < STA_TID_NUM; i++) { + tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]); + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]); + p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i); - p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", - !!sta->ampdu_mlme.tid_rx[i]); + p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_rx); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", - sta->ampdu_mlme.tid_rx[i] ? - sta->ampdu_mlme.tid_rx[i]->dialog_token : 0); + tid_rx ? tid_rx->dialog_token : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x", - sta->ampdu_mlme.tid_rx[i] ? - sta->ampdu_mlme.tid_rx[i]->ssn : 0); + tid_rx ? tid_rx->ssn : 0); - p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", - !!sta->ampdu_mlme.tid_tx[i]); + p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_tx); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", - sta->ampdu_mlme.tid_tx[i] ? - sta->ampdu_mlme.tid_tx[i]->dialog_token : 0); + tid_tx ? tid_tx->dialog_token : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\t%03d", - sta->ampdu_mlme.tid_tx[i] ? - skb_queue_len(&sta->ampdu_mlme.tid_tx[i]->pending) : 0); + tid_tx ? skb_queue_len(&tid_tx->pending) : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\n"); } - spin_unlock_bh(&sta->lock); + rcu_read_unlock(); return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); } @@ -194,7 +189,7 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu if (tx) { if (start) - ret = ieee80211_start_tx_ba_session(&sta->sta, tid); + ret = ieee80211_start_tx_ba_session(&sta->sta, tid, 5000); else ret = ieee80211_stop_tx_ba_session(&sta->sta, tid); } else { diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 16983825f8e8..98d589960a49 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -233,6 +233,20 @@ static inline void drv_get_tkip_seq(struct ieee80211_local *local, trace_drv_get_tkip_seq(local, hw_key_idx, iv32, iv16); } +static inline int drv_set_frag_threshold(struct ieee80211_local *local, + u32 value) +{ + int ret = 0; + + might_sleep(); + + trace_drv_set_frag_threshold(local, value); + if (local->ops->set_frag_threshold) + ret = local->ops->set_frag_threshold(&local->hw, value); + trace_drv_return_int(local, ret); + return ret; +} + static inline int drv_set_rts_threshold(struct ieee80211_local *local, u32 value) { @@ -353,7 +367,7 @@ static inline void drv_reset_tsf(struct ieee80211_local *local) static inline int drv_tx_last_beacon(struct ieee80211_local *local) { - int ret = 1; + int ret = 0; /* default unsuported op for less congestion */ might_sleep(); @@ -428,4 +442,57 @@ static inline void drv_channel_switch(struct ieee80211_local *local, trace_drv_return_void(local); } + +static inline int drv_set_antenna(struct ieee80211_local *local, + u32 tx_ant, u32 rx_ant) +{ + int ret = -EOPNOTSUPP; + might_sleep(); + if (local->ops->set_antenna) + ret = local->ops->set_antenna(&local->hw, tx_ant, rx_ant); + trace_drv_set_antenna(local, tx_ant, rx_ant, ret); + return ret; +} + +static inline int drv_get_antenna(struct ieee80211_local *local, + u32 *tx_ant, u32 *rx_ant) +{ + int ret = -EOPNOTSUPP; + might_sleep(); + if (local->ops->get_antenna) + ret = local->ops->get_antenna(&local->hw, tx_ant, rx_ant); + trace_drv_get_antenna(local, *tx_ant, *rx_ant, ret); + return ret; +} + +static inline int drv_remain_on_channel(struct ieee80211_local *local, + struct ieee80211_channel *chan, + enum nl80211_channel_type chantype, + unsigned int duration) +{ + int ret; + + might_sleep(); + + trace_drv_remain_on_channel(local, chan, chantype, duration); + ret = local->ops->remain_on_channel(&local->hw, chan, chantype, + duration); + trace_drv_return_int(local, ret); + + return ret; +} + +static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local) +{ + int ret; + + might_sleep(); + + trace_drv_cancel_remain_on_channel(local); + ret = local->ops->cancel_remain_on_channel(&local->hw); + trace_drv_return_int(local, ret); + + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 6831fb1641c8..49c84218b2f4 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -531,6 +531,27 @@ TRACE_EVENT(drv_get_tkip_seq, ) ); +TRACE_EVENT(drv_set_frag_threshold, + TP_PROTO(struct ieee80211_local *local, u32 value), + + TP_ARGS(local, value), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, value) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->value = value; + ), + + TP_printk( + LOCAL_PR_FMT " value:%d", + LOCAL_PR_ARG, __entry->value + ) +); + TRACE_EVENT(drv_set_rts_threshold, TP_PROTO(struct ieee80211_local *local, u32 value), @@ -862,6 +883,100 @@ TRACE_EVENT(drv_channel_switch, ) ); +TRACE_EVENT(drv_set_antenna, + TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret), + + TP_ARGS(local, tx_ant, rx_ant, ret), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, tx_ant) + __field(u32, rx_ant) + __field(int, ret) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->tx_ant = tx_ant; + __entry->rx_ant = rx_ant; + __entry->ret = ret; + ), + + TP_printk( + LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d", + LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret + ) +); + +TRACE_EVENT(drv_get_antenna, + TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret), + + TP_ARGS(local, tx_ant, rx_ant, ret), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, tx_ant) + __field(u32, rx_ant) + __field(int, ret) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->tx_ant = tx_ant; + __entry->rx_ant = rx_ant; + __entry->ret = ret; + ), + + TP_printk( + LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d", + LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret + ) +); + +TRACE_EVENT(drv_remain_on_channel, + TP_PROTO(struct ieee80211_local *local, struct ieee80211_channel *chan, + enum nl80211_channel_type chantype, unsigned int duration), + + TP_ARGS(local, chan, chantype, duration), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(int, center_freq) + __field(int, channel_type) + __field(unsigned int, duration) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->center_freq = chan->center_freq; + __entry->channel_type = chantype; + __entry->duration = duration; + ), + + TP_printk( + LOCAL_PR_FMT " freq:%dMHz duration:%dms", + LOCAL_PR_ARG, __entry->center_freq, __entry->duration + ) +); + +TRACE_EVENT(drv_cancel_remain_on_channel, + TP_PROTO(struct ieee80211_local *local), + + TP_ARGS(local), + + TP_STRUCT__entry( + LOCAL_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT, LOCAL_PR_ARG + ) +); + /* * Tracing for API calls that drivers call. */ @@ -1099,6 +1214,42 @@ TRACE_EVENT(api_chswitch_done, ) ); +TRACE_EVENT(api_ready_on_channel, + TP_PROTO(struct ieee80211_local *local), + + TP_ARGS(local), + + TP_STRUCT__entry( + LOCAL_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT, LOCAL_PR_ARG + ) +); + +TRACE_EVENT(api_remain_on_channel_expired, + TP_PROTO(struct ieee80211_local *local), + + TP_ARGS(local), + + TP_STRUCT__entry( + LOCAL_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT, LOCAL_PR_ARG + ) +); + /* * Tracing for internal functions * (which may also be called in response to driver calls) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 239c4836a946..53c7077ffd4f 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -780,6 +780,9 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mutex_lock(&sdata->u.ibss.mtx); + if (!sdata->u.ibss.ssid_len) + goto mgmt_out; /* not ready to merge yet */ + switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_PROBE_REQ: ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len); @@ -797,6 +800,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; } + mgmt_out: mutex_unlock(&sdata->u.ibss.mtx); } @@ -915,6 +919,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.privacy = params->privacy; sdata->u.ibss.basic_rates = params->basic_rates; + memcpy(sdata->vif.bss_conf.mcast_rate, params->mcast_rate, + sizeof(params->mcast_rate)); sdata->vif.bss_conf.beacon_int = params->beacon_interval; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b80c38689927..c47d7c0e48a4 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -23,6 +23,7 @@ #include <linux/types.h> #include <linux/spinlock.h> #include <linux/etherdevice.h> +#include <linux/leds.h> #include <net/ieee80211_radiotap.h> #include <net/cfg80211.h> #include <net/mac80211.h> @@ -167,6 +168,7 @@ typedef unsigned __bitwise__ ieee80211_rx_result; * @IEEE80211_RX_FRAGMENTED: fragmented frame * @IEEE80211_RX_AMSDU: a-MSDU packet * @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed + * @IEEE80211_RX_DEFERRED_RELEASE: frame was subjected to receive reordering * * These are per-frame flags that are attached to a frame in the * @rx_flags field of &struct ieee80211_rx_status. @@ -177,6 +179,7 @@ enum ieee80211_packet_rx_flags { IEEE80211_RX_FRAGMENTED = BIT(2), IEEE80211_RX_AMSDU = BIT(3), IEEE80211_RX_MALFORMED_ACTION_FRM = BIT(4), + IEEE80211_RX_DEFERRED_RELEASE = BIT(5), }; /** @@ -260,6 +263,7 @@ enum ieee80211_work_type { IEEE80211_WORK_ASSOC_BEACON_WAIT, IEEE80211_WORK_ASSOC, IEEE80211_WORK_REMAIN_ON_CHANNEL, + IEEE80211_WORK_OFFCHANNEL_TX, }; /** @@ -320,6 +324,10 @@ struct ieee80211_work { struct { u32 duration; } remain; + struct { + struct sk_buff *frame; + u32 wait; + } offchan_tx; }; int ie_len; @@ -349,8 +357,10 @@ struct ieee80211_if_managed { struct work_struct chswitch_work; struct work_struct beacon_connection_loss_work; + unsigned long beacon_timeout; unsigned long probe_timeout; int probe_send_count; + bool nullfunc_failed; struct mutex mtx; struct cfg80211_bss *associated; @@ -477,6 +487,8 @@ struct ieee80211_if_mesh { struct mesh_config mshcfg; u32 mesh_seqnum; bool accepting_plinks; + const u8 *vendor_ie; + u8 vendor_ie_len; }; #ifdef CONFIG_MAC80211_MESH @@ -550,7 +562,7 @@ struct ieee80211_sub_if_data { unsigned int fragment_next; struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; - struct ieee80211_key *default_key; + struct ieee80211_key *default_unicast_key, *default_multicast_key; struct ieee80211_key *default_mgmt_key; u16 sequence_number; @@ -578,9 +590,7 @@ struct ieee80211_sub_if_data { struct ieee80211_if_vlan vlan; struct ieee80211_if_managed mgd; struct ieee80211_if_ibss ibss; -#ifdef CONFIG_MAC80211_MESH struct ieee80211_if_mesh mesh; -#endif u32 mntr_flags; } u; @@ -588,7 +598,8 @@ struct ieee80211_sub_if_data { struct { struct dentry *dir; struct dentry *subdir_stations; - struct dentry *default_key; + struct dentry *default_unicast_key; + struct dentry *default_multicast_key; struct dentry *default_mgmt_key; } debugfs; #endif @@ -602,19 +613,6 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) return container_of(p, struct ieee80211_sub_if_data, vif); } -static inline void -ieee80211_sdata_set_mesh_id(struct ieee80211_sub_if_data *sdata, - u8 mesh_id_len, u8 *mesh_id) -{ -#ifdef CONFIG_MAC80211_MESH - struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - ifmsh->mesh_id_len = mesh_id_len; - memcpy(ifmsh->mesh_id, mesh_id, mesh_id_len); -#else - WARN_ON(1); -#endif -} - enum sdata_queue_type { IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0, IEEE80211_SDATA_QUEUE_AGG_START = 1, @@ -635,6 +633,20 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_SKB_ADD, }; +#ifdef CONFIG_MAC80211_LEDS +struct tpt_led_trigger { + struct led_trigger trig; + char name[32]; + const struct ieee80211_tpt_blink *blink_table; + unsigned int blink_table_len; + struct timer_list timer; + unsigned long prev_traffic; + unsigned long tx_bytes, rx_bytes; + unsigned int active, want; + bool running; +}; +#endif + /** * mac80211 scan flags - currently active scan mode * @@ -764,6 +776,15 @@ struct ieee80211_local { struct sk_buff_head skb_queue; struct sk_buff_head skb_queue_unreliable; + /* + * Internal FIFO queue which is shared between multiple rx path + * stages. Its main task is to provide a serialization mechanism, + * so all rx handlers can enjoy having exclusive access to their + * private data structures. + */ + struct sk_buff_head rx_skb_queue; + bool running_rx_handler; /* protected by rx_skb_queue.lock */ + /* Station data */ /* * The mutex only protects the list and counter, @@ -843,6 +864,7 @@ struct ieee80211_local { #ifdef CONFIG_MAC80211_LEDS int tx_led_counter, rx_led_counter; struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led; + struct tpt_led_trigger *tpt_led_trigger; char tx_led_name[32], rx_led_name[32], assoc_led_name[32], radio_led_name[32]; #endif @@ -929,6 +951,15 @@ struct ieee80211_local { } debugfs; #endif + struct ieee80211_channel *hw_roc_channel; + struct net_device *hw_roc_dev; + struct sk_buff *hw_roc_skb; + struct work_struct hw_roc_start, hw_roc_done; + enum nl80211_channel_type hw_roc_channel_type; + unsigned int hw_roc_duration; + u32 hw_roc_cookie; + bool hw_roc_for_tx; + /* dummy netdev for use w/ NAPI */ struct net_device napi_dev; @@ -1120,6 +1151,7 @@ void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local); void ieee80211_offchannel_stop_station(struct ieee80211_local *local); void ieee80211_offchannel_return(struct ieee80211_local *local, bool enable_beaconing); +void ieee80211_hw_roc_setup(struct ieee80211_local *local); /* interface handling */ int ieee80211_iface_init(void); @@ -1264,6 +1296,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, int powersave); void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr); +void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, + struct ieee80211_hdr *hdr, bool ack); void ieee80211_beacon_connection_loss_work(struct work_struct *work); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, @@ -1278,6 +1312,9 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local, struct sk_buff *skb); int ieee80211_add_pending_skbs(struct ieee80211_local *local, struct sk_buff_head *skbs); +int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, + struct sk_buff_head *skbs, + void (*fn)(void *data), void *data); void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, @@ -1287,6 +1324,10 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, const u8 *ie, size_t ie_len, enum ieee80211_band band, u32 rate_mask, u8 channel); +struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, + u8 *dst, + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len); void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f9163b12c7f1..8acba456744e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -197,11 +197,6 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) sdata->bss = &sdata->u.ap; break; case NL80211_IFTYPE_MESH_POINT: - if (!ieee80211_vif_is_mesh(&sdata->vif)) - break; - /* mesh ifaces must set allmulti to forward mcast traffic */ - atomic_inc(&local->iff_allmultis); - break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_ADHOC: @@ -225,6 +220,8 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) /* we're brought up, everything changes */ hw_reconf_flags = ~0; ieee80211_led_radio(local, true); + ieee80211_mod_tpt_led_trig(local, + IEEE80211_TPT_LEDTRIG_FL_RADIO, 0); } /* @@ -273,12 +270,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) goto err_stop; } - if (ieee80211_vif_is_mesh(&sdata->vif)) { - local->fif_other_bss++; - ieee80211_configure_filter(local); - - ieee80211_start_mesh(sdata); - } else if (sdata->vif.type == NL80211_IFTYPE_AP) { + if (sdata->vif.type == NL80211_IFTYPE_AP) { local->fif_pspoll++; local->fif_probe_req++; @@ -391,6 +383,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, u32 hw_reconf_flags = 0; int i; + if (local->scan_sdata == sdata) + ieee80211_scan_cancel(local); + clear_bit(SDATA_STATE_RUNNING, &sdata->state); /* @@ -500,18 +495,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_adjust_monitor_flags(sdata, -1); ieee80211_configure_filter(local); break; - case NL80211_IFTYPE_MESH_POINT: - if (ieee80211_vif_is_mesh(&sdata->vif)) { - /* other_bss and allmulti are always set on mesh - * ifaces */ - local->fif_other_bss--; - atomic_dec(&local->iff_allmultis); - - ieee80211_configure_filter(local); - - ieee80211_stop_mesh(sdata); - } - /* fall through */ default: flush_work(&sdata->work); /* @@ -523,9 +506,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, synchronize_rcu(); skb_queue_purge(&sdata->skb_queue); - if (local->scan_sdata == sdata) - ieee80211_scan_cancel(local); - /* * Disable beaconing here for mesh only, AP and IBSS * are already taken care of. @@ -1204,12 +1184,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, if (ret) goto fail; - if (ieee80211_vif_is_mesh(&sdata->vif) && - params && params->mesh_id_len) - ieee80211_sdata_set_mesh_id(sdata, - params->mesh_id_len, - params->mesh_id); - mutex_lock(&local->iflist_mtx); list_add_tail_rcu(&sdata->list, &local->interfaces); mutex_unlock(&local->iflist_mtx); @@ -1290,8 +1264,9 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; int count = 0; - bool working = false, scanning = false; + bool working = false, scanning = false, hw_roc = false; struct ieee80211_work *wk; + unsigned int led_trig_start = 0, led_trig_stop = 0; #ifdef CONFIG_PROVE_LOCKING WARN_ON(debug_locks && !lockdep_rtnl_is_held() && @@ -1333,6 +1308,9 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) local->scan_sdata->vif.bss_conf.idle = false; } + if (local->hw_roc_channel) + hw_roc = true; + list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->old_idle == sdata->vif.bss_conf.idle) continue; @@ -1341,6 +1319,20 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); } + if (working || scanning || hw_roc) + led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_WORK; + else + led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_WORK; + + if (count) + led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED; + else + led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED; + + ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop); + + if (hw_roc) + return ieee80211_idle_off(local, "hw remain-on-channel"); if (working) return ieee80211_idle_off(local, "working"); if (scanning) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index ccd676b2f599..8c02469b7176 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -30,19 +30,20 @@ * keys and per-station keys. Since each station belongs to an interface, * each station key also belongs to that interface. * - * Hardware acceleration is done on a best-effort basis, for each key - * that is eligible the hardware is asked to enable that key but if - * it cannot do that they key is simply kept for software encryption. - * There is currently no way of knowing this except by looking into - * debugfs. + * Hardware acceleration is done on a best-effort basis for algorithms + * that are implemented in software, for each key the hardware is asked + * to enable that key for offloading but if it cannot do that the key is + * simply kept for software encryption (unless it is for an algorithm + * that isn't implemented in software). + * There is currently no way of knowing whether a key is handled in SW + * or HW except by looking into debugfs. * - * All key operations are protected internally. - * - * Within mac80211, key references are, just as STA structure references, - * protected by RCU. Note, however, that some things are unprotected, - * namely the key->sta dereferences within the hardware acceleration - * functions. This means that sta_info_destroy() must remove the key - * which waits for an RCU grace period. + * All key management is internally protected by a mutex. Within all + * other parts of mac80211, key references are, just as STA structure + * references, protected by RCU. Note, however, that some things are + * unprotected, namely the key->sta dereferences within the hardware + * acceleration functions. This means that sta_info_destroy() must + * remove the key which waits for an RCU grace period. */ static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; @@ -84,10 +85,17 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) goto out_unsupported; sdata = key->sdata; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + /* + * The driver doesn't know anything about VLAN interfaces. + * Hence, don't send GTKs for VLAN interfaces to the driver. + */ + if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) + goto out_unsupported; sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); + } ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf); @@ -171,7 +179,7 @@ void ieee80211_key_removed(struct ieee80211_key_conf *key_conf) EXPORT_SYMBOL_GPL(ieee80211_key_removed); static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, - int idx) + int idx, bool uni, bool multi) { struct ieee80211_key *key = NULL; @@ -180,18 +188,19 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, if (idx >= 0 && idx < NUM_DEFAULT_KEYS) key = sdata->keys[idx]; - rcu_assign_pointer(sdata->default_key, key); + if (uni) + rcu_assign_pointer(sdata->default_unicast_key, key); + if (multi) + rcu_assign_pointer(sdata->default_multicast_key, key); - if (key) { - ieee80211_debugfs_key_remove_default(key->sdata); - ieee80211_debugfs_key_add_default(key->sdata); - } + ieee80211_debugfs_key_update_default(sdata); } -void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx) +void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, + bool uni, bool multi) { mutex_lock(&sdata->local->key_mtx); - __ieee80211_set_default_key(sdata, idx); + __ieee80211_set_default_key(sdata, idx, uni, multi); mutex_unlock(&sdata->local->key_mtx); } @@ -208,10 +217,7 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx) rcu_assign_pointer(sdata->default_mgmt_key, key); - if (key) { - ieee80211_debugfs_key_remove_mgmt_default(key->sdata); - ieee80211_debugfs_key_add_mgmt_default(key->sdata); - } + ieee80211_debugfs_key_update_default(sdata); } void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, @@ -229,7 +235,8 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, struct ieee80211_key *old, struct ieee80211_key *new) { - int idx, defkey, defmgmtkey; + int idx; + bool defunikey, defmultikey, defmgmtkey; if (new) list_add(&new->list, &sdata->key_list); @@ -250,29 +257,31 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, else idx = new->conf.keyidx; - defkey = old && sdata->default_key == old; + defunikey = old && sdata->default_unicast_key == old; + defmultikey = old && sdata->default_multicast_key == old; defmgmtkey = old && sdata->default_mgmt_key == old; - if (defkey && !new) - __ieee80211_set_default_key(sdata, -1); + if (defunikey && !new) + __ieee80211_set_default_key(sdata, -1, true, false); + if (defmultikey && !new) + __ieee80211_set_default_key(sdata, -1, false, true); if (defmgmtkey && !new) __ieee80211_set_default_mgmt_key(sdata, -1); rcu_assign_pointer(sdata->keys[idx], new); - if (defkey && new) - __ieee80211_set_default_key(sdata, new->conf.keyidx); + if (defunikey && new) + __ieee80211_set_default_key(sdata, new->conf.keyidx, + true, false); + if (defmultikey && new) + __ieee80211_set_default_key(sdata, new->conf.keyidx, + false, true); if (defmgmtkey && new) __ieee80211_set_default_mgmt_key(sdata, new->conf.keyidx); } - if (old) { - /* - * We'll use an empty list to indicate that the key - * has already been removed. - */ - list_del_init(&old->list); - } + if (old) + list_del(&old->list); } struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, @@ -366,6 +375,12 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key) if (!key) return; + /* + * Synchronize so the TX path can no longer be using + * this key before we free/remove it. + */ + synchronize_rcu(); + if (key->local) ieee80211_key_disable_hw_accel(key); @@ -407,8 +422,8 @@ int ieee80211_key_link(struct ieee80211_key *key, struct sta_info *ap; /* - * We're getting a sta pointer in, - * so must be under RCU read lock. + * We're getting a sta pointer in, so must be under + * appropriate locking for sta_info_get(). */ /* same here, the AP could be using QoS */ @@ -502,11 +517,12 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) mutex_lock(&sdata->local->key_mtx); - ieee80211_debugfs_key_remove_default(sdata); ieee80211_debugfs_key_remove_mgmt_default(sdata); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) __ieee80211_key_free(key); + ieee80211_debugfs_key_update_default(sdata); + mutex_unlock(&sdata->local->key_mtx); } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 0db1c0f5f697..8106aa1b7466 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -138,7 +138,8 @@ int __must_check ieee80211_key_link(struct ieee80211_key *key, struct sta_info *sta); void ieee80211_key_free(struct ieee80211_local *local, struct ieee80211_key *key); -void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); +void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, + bool uni, bool multi); void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx); void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/led.c b/net/mac80211/led.c index 063aad944246..14590332c81c 100644 --- a/net/mac80211/led.c +++ b/net/mac80211/led.c @@ -54,12 +54,22 @@ void ieee80211_led_radio(struct ieee80211_local *local, bool enabled) led_trigger_event(local->radio_led, LED_OFF); } +void ieee80211_led_names(struct ieee80211_local *local) +{ + snprintf(local->rx_led_name, sizeof(local->rx_led_name), + "%srx", wiphy_name(local->hw.wiphy)); + snprintf(local->tx_led_name, sizeof(local->tx_led_name), + "%stx", wiphy_name(local->hw.wiphy)); + snprintf(local->assoc_led_name, sizeof(local->assoc_led_name), + "%sassoc", wiphy_name(local->hw.wiphy)); + snprintf(local->radio_led_name, sizeof(local->radio_led_name), + "%sradio", wiphy_name(local->hw.wiphy)); +} + void ieee80211_led_init(struct ieee80211_local *local) { local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); if (local->rx_led) { - snprintf(local->rx_led_name, sizeof(local->rx_led_name), - "%srx", wiphy_name(local->hw.wiphy)); local->rx_led->name = local->rx_led_name; if (led_trigger_register(local->rx_led)) { kfree(local->rx_led); @@ -69,8 +79,6 @@ void ieee80211_led_init(struct ieee80211_local *local) local->tx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); if (local->tx_led) { - snprintf(local->tx_led_name, sizeof(local->tx_led_name), - "%stx", wiphy_name(local->hw.wiphy)); local->tx_led->name = local->tx_led_name; if (led_trigger_register(local->tx_led)) { kfree(local->tx_led); @@ -80,8 +88,6 @@ void ieee80211_led_init(struct ieee80211_local *local) local->assoc_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); if (local->assoc_led) { - snprintf(local->assoc_led_name, sizeof(local->assoc_led_name), - "%sassoc", wiphy_name(local->hw.wiphy)); local->assoc_led->name = local->assoc_led_name; if (led_trigger_register(local->assoc_led)) { kfree(local->assoc_led); @@ -91,14 +97,19 @@ void ieee80211_led_init(struct ieee80211_local *local) local->radio_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); if (local->radio_led) { - snprintf(local->radio_led_name, sizeof(local->radio_led_name), - "%sradio", wiphy_name(local->hw.wiphy)); local->radio_led->name = local->radio_led_name; if (led_trigger_register(local->radio_led)) { kfree(local->radio_led); local->radio_led = NULL; } } + + if (local->tpt_led_trigger) { + if (led_trigger_register(&local->tpt_led_trigger->trig)) { + kfree(local->tpt_led_trigger); + local->tpt_led_trigger = NULL; + } + } } void ieee80211_led_exit(struct ieee80211_local *local) @@ -119,15 +130,18 @@ void ieee80211_led_exit(struct ieee80211_local *local) led_trigger_unregister(local->rx_led); kfree(local->rx_led); } + + if (local->tpt_led_trigger) { + led_trigger_unregister(&local->tpt_led_trigger->trig); + kfree(local->tpt_led_trigger); + } } char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - if (local->radio_led) - return local->radio_led_name; - return NULL; + return local->radio_led_name; } EXPORT_SYMBOL(__ieee80211_get_radio_led_name); @@ -135,9 +149,7 @@ char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - if (local->assoc_led) - return local->assoc_led_name; - return NULL; + return local->assoc_led_name; } EXPORT_SYMBOL(__ieee80211_get_assoc_led_name); @@ -145,9 +157,7 @@ char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - if (local->tx_led) - return local->tx_led_name; - return NULL; + return local->tx_led_name; } EXPORT_SYMBOL(__ieee80211_get_tx_led_name); @@ -155,8 +165,144 @@ char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - if (local->rx_led) - return local->rx_led_name; - return NULL; + return local->rx_led_name; } EXPORT_SYMBOL(__ieee80211_get_rx_led_name); + +static unsigned long tpt_trig_traffic(struct ieee80211_local *local, + struct tpt_led_trigger *tpt_trig) +{ + unsigned long traffic, delta; + + traffic = tpt_trig->tx_bytes + tpt_trig->rx_bytes; + + delta = traffic - tpt_trig->prev_traffic; + tpt_trig->prev_traffic = traffic; + return DIV_ROUND_UP(delta, 1024 / 8); +} + +static void tpt_trig_timer(unsigned long data) +{ + struct ieee80211_local *local = (void *)data; + struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; + struct led_classdev *led_cdev; + unsigned long on, off, tpt; + int i; + + if (!tpt_trig->running) + return; + + mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ)); + + tpt = tpt_trig_traffic(local, tpt_trig); + + /* default to just solid on */ + on = 1; + off = 0; + + for (i = tpt_trig->blink_table_len - 1; i >= 0; i--) { + if (tpt_trig->blink_table[i].throughput < 0 || + tpt > tpt_trig->blink_table[i].throughput) { + off = tpt_trig->blink_table[i].blink_time / 2; + on = tpt_trig->blink_table[i].blink_time - off; + break; + } + } + + read_lock(&tpt_trig->trig.leddev_list_lock); + list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list) + led_blink_set(led_cdev, &on, &off); + read_unlock(&tpt_trig->trig.leddev_list_lock); +} + +char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, + unsigned int flags, + const struct ieee80211_tpt_blink *blink_table, + unsigned int blink_table_len) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct tpt_led_trigger *tpt_trig; + + if (WARN_ON(local->tpt_led_trigger)) + return NULL; + + tpt_trig = kzalloc(sizeof(struct tpt_led_trigger), GFP_KERNEL); + if (!tpt_trig) + return NULL; + + snprintf(tpt_trig->name, sizeof(tpt_trig->name), + "%stpt", wiphy_name(local->hw.wiphy)); + + tpt_trig->trig.name = tpt_trig->name; + + tpt_trig->blink_table = blink_table; + tpt_trig->blink_table_len = blink_table_len; + tpt_trig->want = flags; + + setup_timer(&tpt_trig->timer, tpt_trig_timer, (unsigned long)local); + + local->tpt_led_trigger = tpt_trig; + + return tpt_trig->name; +} +EXPORT_SYMBOL(__ieee80211_create_tpt_led_trigger); + +static void ieee80211_start_tpt_led_trig(struct ieee80211_local *local) +{ + struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; + + if (tpt_trig->running) + return; + + /* reset traffic */ + tpt_trig_traffic(local, tpt_trig); + tpt_trig->running = true; + + tpt_trig_timer((unsigned long)local); + mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ)); +} + +static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) +{ + struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; + struct led_classdev *led_cdev; + + if (!tpt_trig->running) + return; + + tpt_trig->running = false; + del_timer_sync(&tpt_trig->timer); + + read_lock(&tpt_trig->trig.leddev_list_lock); + list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list) + led_brightness_set(led_cdev, LED_OFF); + read_unlock(&tpt_trig->trig.leddev_list_lock); +} + +void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, + unsigned int types_on, unsigned int types_off) +{ + struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; + bool allowed; + + WARN_ON(types_on & types_off); + + if (!tpt_trig) + return; + + tpt_trig->active &= ~types_off; + tpt_trig->active |= types_on; + + /* + * Regardless of wanted state, we shouldn't blink when + * the radio is disabled -- this can happen due to some + * code ordering issues with __ieee80211_recalc_idle() + * being called before the radio is started. + */ + allowed = tpt_trig->active & IEEE80211_TPT_LEDTRIG_FL_RADIO; + + if (!allowed || !(tpt_trig->active & tpt_trig->want)) + ieee80211_stop_tpt_led_trig(local); + else + ieee80211_start_tpt_led_trig(local); +} diff --git a/net/mac80211/led.h b/net/mac80211/led.h index 77b1e1ba6039..e0275d9befa8 100644 --- a/net/mac80211/led.h +++ b/net/mac80211/led.h @@ -12,14 +12,17 @@ #include "ieee80211_i.h" #ifdef CONFIG_MAC80211_LEDS -extern void ieee80211_led_rx(struct ieee80211_local *local); -extern void ieee80211_led_tx(struct ieee80211_local *local, int q); -extern void ieee80211_led_assoc(struct ieee80211_local *local, - bool associated); -extern void ieee80211_led_radio(struct ieee80211_local *local, - bool enabled); -extern void ieee80211_led_init(struct ieee80211_local *local); -extern void ieee80211_led_exit(struct ieee80211_local *local); +void ieee80211_led_rx(struct ieee80211_local *local); +void ieee80211_led_tx(struct ieee80211_local *local, int q); +void ieee80211_led_assoc(struct ieee80211_local *local, + bool associated); +void ieee80211_led_radio(struct ieee80211_local *local, + bool enabled); +void ieee80211_led_names(struct ieee80211_local *local); +void ieee80211_led_init(struct ieee80211_local *local); +void ieee80211_led_exit(struct ieee80211_local *local); +void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, + unsigned int types_on, unsigned int types_off); #else static inline void ieee80211_led_rx(struct ieee80211_local *local) { @@ -35,10 +38,36 @@ static inline void ieee80211_led_radio(struct ieee80211_local *local, bool enabled) { } +static inline void ieee80211_led_names(struct ieee80211_local *local) +{ +} static inline void ieee80211_led_init(struct ieee80211_local *local) { } static inline void ieee80211_led_exit(struct ieee80211_local *local) { } +static inline void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, + unsigned int types_on, + unsigned int types_off) +{ +} +#endif + +static inline void +ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, __le16 fc, int bytes) +{ +#ifdef CONFIG_MAC80211_LEDS + if (local->tpt_led_trigger && ieee80211_is_data(fc)) + local->tpt_led_trigger->tx_bytes += bytes; +#endif +} + +static inline void +ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, __le16 fc, int bytes) +{ +#ifdef CONFIG_MAC80211_LEDS + if (local->tpt_led_trigger && ieee80211_is_data(fc)) + local->tpt_led_trigger->rx_bytes += bytes; #endif +} diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 107a0cbe52ac..485d36bc9a46 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -245,9 +245,12 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.enable_beacon = !!sdata->u.ibss.presp; break; +#ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: - sdata->vif.bss_conf.enable_beacon = true; + sdata->vif.bss_conf.enable_beacon = + !!sdata->u.mesh.mesh_id_len; break; +#endif default: /* not reached */ WARN_ON(1); @@ -481,6 +484,10 @@ ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { BIT(IEEE80211_STYPE_DEAUTH >> 4) | BIT(IEEE80211_STYPE_ACTION >> 4), }, + [NL80211_IFTYPE_MESH_POINT] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ACTION >> 4), + }, }; struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, @@ -514,10 +521,15 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, wiphy->mgmt_stypes = ieee80211_default_mgmt_stypes; + wiphy->privid = mac80211_wiphy_privid; + wiphy->flags |= WIPHY_FLAG_NETNS_OK | WIPHY_FLAG_4ADDR_AP | - WIPHY_FLAG_4ADDR_STATION; - wiphy->privid = mac80211_wiphy_privid; + WIPHY_FLAG_4ADDR_STATION | + WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS; + + if (!ops->set_key) + wiphy->flags |= WIPHY_FLAG_IBSS_RSN; wiphy->bss_priv_size = sizeof(struct ieee80211_bss); @@ -557,6 +569,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, spin_lock_init(&local->filter_lock); spin_lock_init(&local->queue_stop_reason_lock); + skb_queue_head_init(&local->rx_skb_queue); + INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work); ieee80211_work_init(local); @@ -593,6 +607,10 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, /* init dummy netdev for use w/ NAPI */ init_dummy_netdev(&local->napi_dev); + ieee80211_led_names(local); + + ieee80211_hw_roc_setup(local); + return local_to_hw(local); } EXPORT_SYMBOL(ieee80211_alloc_hw); @@ -737,6 +755,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } } + if (!local->ops->remain_on_channel) + local->hw.wiphy->max_remain_on_channel_duration = 5000; + result = wiphy_register(local->hw.wiphy); if (result < 0) goto fail_wiphy_register; @@ -898,6 +919,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) wiphy_warn(local->hw.wiphy, "skb_queue not empty\n"); skb_queue_purge(&local->skb_queue); skb_queue_purge(&local->skb_queue_unreliable); + skb_queue_purge(&local->rx_skb_queue); destroy_workqueue(local->workqueue); wiphy_unregister(local->hw.wiphy); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index c8a4f19ed13b..ca3af4685b0a 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -124,15 +124,6 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) ieee80211_mesh_housekeeping_timer((unsigned long) sdata); } -void mesh_ids_set_default(struct ieee80211_if_mesh *sta) -{ - sta->mesh_pp_id = 0; /* HWMP */ - sta->mesh_pm_id = 0; /* Airtime */ - sta->mesh_cc_id = 0; /* Disabled */ - sta->mesh_sp_id = 0; /* Neighbor Offset */ - sta->mesh_auth_id = 0; /* Disabled */ -} - int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) { int i; @@ -287,6 +278,13 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) *pos++ |= sdata->u.mesh.accepting_plinks ? MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; *pos++ = 0x00; + + if (sdata->u.mesh.vendor_ie) { + int len = sdata->u.mesh.vendor_ie_len; + const u8 *data = sdata->u.mesh.vendor_ie; + if (skb_tailroom(skb) > len) + memcpy(skb_put(skb, len), data, len); + } } u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl) @@ -412,39 +410,33 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, * ieee80211_new_mesh_header - create a new mesh header * @meshhdr: uninitialized mesh header * @sdata: mesh interface to be used - * @addr4: addr4 of the mesh frame (1st in ae header) - * may be NULL - * @addr5: addr5 of the mesh frame (1st or 2nd in ae header) - * may be NULL unless addr6 is present - * @addr6: addr6 of the mesh frame (2nd or 3rd in ae header) - * may be NULL unless addr5 is present + * @addr4or5: 1st address in the ae header, which may correspond to address 4 + * (if addr6 is NULL) or address 5 (if addr6 is present). It may + * be NULL. + * @addr6: 2nd address in the ae header, which corresponds to addr6 of the + * mesh frame * * Return the header length. */ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, - struct ieee80211_sub_if_data *sdata, char *addr4, - char *addr5, char *addr6) + struct ieee80211_sub_if_data *sdata, char *addr4or5, + char *addr6) { int aelen = 0; + BUG_ON(!addr4or5 && addr6); memset(meshhdr, 0, sizeof(*meshhdr)); meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &meshhdr->seqnum); sdata->u.mesh.mesh_seqnum++; - if (addr4) { + if (addr4or5 && !addr6) { meshhdr->flags |= MESH_FLAGS_AE_A4; aelen += ETH_ALEN; - memcpy(meshhdr->eaddr1, addr4, ETH_ALEN); - } - if (addr5 && addr6) { + memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN); + } else if (addr4or5 && addr6) { meshhdr->flags |= MESH_FLAGS_AE_A5_A6; aelen += 2 * ETH_ALEN; - if (!addr4) { - memcpy(meshhdr->eaddr1, addr5, ETH_ALEN); - memcpy(meshhdr->eaddr2, addr6, ETH_ALEN); - } else { - memcpy(meshhdr->eaddr2, addr5, ETH_ALEN); - memcpy(meshhdr->eaddr3, addr6, ETH_ALEN); - } + memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN); + memcpy(meshhdr->eaddr2, addr6, ETH_ALEN); } return 6 + aelen; } @@ -513,6 +505,14 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; + local->fif_other_bss++; + /* mesh ifaces must set allmulti to forward mcast traffic */ + atomic_inc(&local->iff_allmultis); + ieee80211_configure_filter(local); + + ifmsh->mesh_cc_id = 0; /* Disabled */ + ifmsh->mesh_sp_id = 0; /* Neighbor Offset */ + ifmsh->mesh_auth_id = 0; /* Disabled */ set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); ieee80211_mesh_root_setup(ifmsh); ieee80211_queue_work(&local->hw, &sdata->work); @@ -524,6 +524,13 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + + ifmsh->mesh_id_len = 0; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); + sta_info_flush(local, NULL); + del_timer_sync(&sdata->u.mesh.housekeeping_timer); del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); /* @@ -534,6 +541,10 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) * it no longer is. */ cancel_work_sync(&sdata->work); + + local->fif_other_bss--; + atomic_dec(&local->iff_allmultis); + ieee80211_configure_filter(local); } static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, @@ -663,26 +674,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_mesh_housekeeping_timer, (unsigned long) sdata); - ifmsh->mshcfg.dot11MeshRetryTimeout = MESH_RET_T; - ifmsh->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T; - ifmsh->mshcfg.dot11MeshHoldingTimeout = MESH_HOLD_T; - ifmsh->mshcfg.dot11MeshMaxRetries = MESH_MAX_RETR; - ifmsh->mshcfg.dot11MeshTTL = MESH_TTL; - ifmsh->mshcfg.auto_open_plinks = true; - ifmsh->mshcfg.dot11MeshMaxPeerLinks = - MESH_MAX_ESTAB_PLINKS; - ifmsh->mshcfg.dot11MeshHWMPactivePathTimeout = - MESH_PATH_TIMEOUT; - ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval = - MESH_PREQ_MIN_INT; - ifmsh->mshcfg.dot11MeshHWMPnetDiameterTraversalTime = - MESH_DIAM_TRAVERSAL_TIME; - ifmsh->mshcfg.dot11MeshHWMPmaxPREQretries = - MESH_MAX_PREQ_RETRIES; - ifmsh->mshcfg.path_refresh_time = - MESH_PATH_REFRESH_TIME; - ifmsh->mshcfg.min_discovery_timeout = - MESH_MIN_DISCOVERY_TIMEOUT; ifmsh->accepting_plinks = true; ifmsh->preq_id = 0; ifmsh->sn = 0; @@ -692,7 +683,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) /* Allocate all mesh structures when creating the first mesh interface. */ if (!mesh_allocated) ieee80211s_init(); - mesh_ids_set_default(ifmsh); setup_timer(&ifmsh->mesh_path_timer, ieee80211_mesh_path_timer, (unsigned long) sdata); diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 58e741128968..b99e230fe31c 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -164,44 +164,10 @@ struct mesh_rmc { }; -/* - * MESH_CFG_COMP_LEN Includes: - * - Active path selection protocol ID. - * - Active path selection metric ID. - * - Congestion control mode identifier. - * - Channel precedence. - * Does not include mesh capabilities, which may vary across nodes in the same - * mesh - */ -#define MESH_CFG_CMP_LEN (IEEE80211_MESH_CONFIG_LEN - 2) - -/* Default values, timeouts in ms */ -#define MESH_TTL 31 -#define MESH_MAX_RETR 3 -#define MESH_RET_T 100 -#define MESH_CONF_T 100 -#define MESH_HOLD_T 100 - -#define MESH_PATH_TIMEOUT 5000 -/* Minimum interval between two consecutive PREQs originated by the same - * interface - */ -#define MESH_PREQ_MIN_INT 10 -#define MESH_DIAM_TRAVERSAL_TIME 50 -/* A path will be refreshed if it is used PATH_REFRESH_TIME milliseconds before - * timing out. This way it will remain ACTIVE and no data frames will be - * unnecesarily held in the pending queue. - */ -#define MESH_PATH_REFRESH_TIME 1000 -#define MESH_MIN_DISCOVERY_TIMEOUT (2 * MESH_DIAM_TRAVERSAL_TIME) #define MESH_DEFAULT_BEACON_INTERVAL 1000 /* in 1024 us units */ -#define MESH_MAX_PREQ_RETRIES 4 #define MESH_PATH_EXPIRE (600 * HZ) -/* Default maximum number of established plinks per interface */ -#define MESH_MAX_ESTAB_PLINKS 32 - /* Default maximum number of plinks per interface */ #define MESH_MAX_PLINKS 256 @@ -221,8 +187,8 @@ struct mesh_rmc { int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, const u8 *da, const u8 *sa); int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, - struct ieee80211_sub_if_data *sdata, char *addr4, - char *addr5, char *addr6); + struct ieee80211_sub_if_data *sdata, char *addr4or5, + char *addr6); int mesh_rmc_check(u8 *addr, struct ieee80211s_hdr *mesh_hdr, struct ieee80211_sub_if_data *sdata); bool mesh_matches_local(struct ieee802_11_elems *ie, @@ -318,6 +284,11 @@ static inline void mesh_path_activate(struct mesh_path *mpath) mpath->flags |= MESH_PATH_ACTIVE | MESH_PATH_RESOLVED; } +static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) +{ + return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP; +} + #define for_each_mesh_entry(x, p, node, i) \ for (i = 0; i <= x->hash_mask; i++) \ hlist_for_each_entry_rcu(node, p, &x->hash_buckets[i], list) @@ -338,6 +309,8 @@ static inline void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata) {} static inline void mesh_plink_quiesce(struct sta_info *sta) {} static inline void mesh_plink_restart(struct sta_info *sta) {} +static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) +{ return false; } #endif #endif /* IEEE80211S_H */ diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 829e08a657d0..5bf64d7112b3 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -232,7 +232,7 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, *pos++ = WLAN_EID_PERR; *pos++ = ie_len; /* ttl */ - *pos++ = MESH_TTL; + *pos++ = ttl; /* number of destinations */ *pos++ = 1; /* @@ -522,7 +522,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, if (reply) { lifetime = PREQ_IE_LIFETIME(preq_elem); - ttl = ifmsh->mshcfg.dot11MeshTTL; + ttl = ifmsh->mshcfg.element_ttl; if (ttl != 0) { mhwmp_dbg("replying to the PREQ\n"); mesh_path_sel_frame_tx(MPATH_PREP, 0, target_addr, @@ -877,7 +877,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) sdata->u.mesh.last_sn_update = jiffies; } lifetime = default_lifetime(sdata); - ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; + ttl = sdata->u.mesh.mshcfg.element_ttl; if (ttl == 0) { sdata->u.mesh.mshstats.dropped_frames_ttl++; spin_unlock_bh(&mpath->state_lock); @@ -1013,5 +1013,6 @@ mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) mesh_path_sel_frame_tx(MPATH_RANN, 0, sdata->vif.addr, cpu_to_le32(++ifmsh->sn), 0, NULL, 0, broadcast_addr, - 0, MESH_TTL, 0, 0, 0, sdata); + 0, sdata->u.mesh.mshcfg.element_ttl, + 0, 0, 0, sdata); } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 349e466cf08b..8d65b47d9837 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -467,8 +467,8 @@ void mesh_plink_broken(struct sta_info *sta) mpath->flags &= ~MESH_PATH_ACTIVE; ++mpath->sn; spin_unlock_bh(&mpath->state_lock); - mesh_path_error_tx(MESH_TTL, mpath->dst, - cpu_to_le32(mpath->sn), + mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl, + mpath->dst, cpu_to_le32(mpath->sn), cpu_to_le16(PERR_RCODE_DEST_UNREACH), bcast, sdata); } else @@ -614,7 +614,8 @@ void mesh_path_discard_frame(struct sk_buff *skb, mpath = mesh_path_lookup(da, sdata); if (mpath) sn = ++mpath->sn; - mesh_path_error_tx(MESH_TTL, skb->data, cpu_to_le32(sn), + mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl, skb->data, + cpu_to_le32(sn), cpu_to_le16(PERR_RCODE_NO_ROUTE), ra, sdata); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 1c91f0f3c307..44b53931ba5e 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -160,7 +160,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, enum plink_frame_type action, u8 *da, __le16 llid, __le16 plid, __le16 reason) { struct ieee80211_local *local = sdata->local; - struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); + struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 + + sdata->u.mesh.vendor_ie_len); struct ieee80211_mgmt *mgmt; bool include_plid = false; static const u8 meshpeeringproto[] = { 0x00, 0x0F, 0xAC, 0x2A }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a3a9421555af..45fbb9e33746 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -28,13 +28,19 @@ #include "rate.h" #include "led.h" +#define IEEE80211_MAX_NULLFUNC_TRIES 2 #define IEEE80211_MAX_PROBE_TRIES 5 /* - * beacon loss detection timeout - * XXX: should depend on beacon interval + * Beacon loss timeout is calculated as N frames times the + * advertised beacon interval. This may need to be somewhat + * higher than what hardware might detect to account for + * delays in the host processing frames. But since we also + * probe on beacon miss before declaring the connection lost + * default to what we want. */ -#define IEEE80211_BEACON_LOSS_TIME (2 * HZ) +#define IEEE80211_BEACON_LOSS_COUNT 7 + /* * Time the connection can be idle before we probe * it to see if we can still talk to the AP. @@ -121,7 +127,7 @@ void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata) return; mod_timer(&sdata->u.mgd.bcn_mon_timer, - round_jiffies_up(jiffies + IEEE80211_BEACON_LOSS_TIME)); + round_jiffies_up(jiffies + sdata->u.mgd.beacon_timeout)); } void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata) @@ -619,11 +625,12 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) /* * Go to full PSM if the user configures a very low * latency requirement. - * The 2 second value is there for compatibility until - * the PM_QOS_NETWORK_LATENCY is configured with real - * values. + * The 2000 second value is there for compatibility + * until the PM_QOS_NETWORK_LATENCY is configured + * with real values. */ - if (latency > 1900000000 && latency != 2000000000) + if (latency > (1900 * USEC_PER_MSEC) && + latency != (2000 * USEC_PER_SEC)) timeout = 0; else timeout = 100; @@ -871,6 +878,9 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, bss_info_changed |= ieee80211_handle_bss_capability(sdata, cbss->capability, bss->has_erp_value, bss->erp_value); + sdata->u.mgd.beacon_timeout = usecs_to_jiffies(ieee80211_tu_to_usec( + IEEE80211_BEACON_LOSS_COUNT * bss_conf->beacon_int)); + sdata->u.mgd.associated = cbss; memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN); @@ -1026,6 +1036,54 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, ieee80211_sta_reset_conn_monitor(sdata); } +static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + if (!(ifmgd->flags & (IEEE80211_STA_BEACON_POLL | + IEEE80211_STA_CONNECTION_POLL))) + return; + + ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL | + IEEE80211_STA_BEACON_POLL); + mutex_lock(&sdata->local->iflist_mtx); + ieee80211_recalc_ps(sdata->local, -1); + mutex_unlock(&sdata->local->iflist_mtx); + + if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) + return; + + /* + * We've received a probe response, but are not sure whether + * we have or will be receiving any beacons or data, so let's + * schedule the timers again, just in case. + */ + ieee80211_sta_reset_beacon_monitor(sdata); + + mod_timer(&ifmgd->conn_mon_timer, + round_jiffies_up(jiffies + + IEEE80211_CONNECTION_IDLE_TIME)); +} + +void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, + struct ieee80211_hdr *hdr, bool ack) +{ + if (!ieee80211_is_data(hdr->frame_control)) + return; + + if (ack) + ieee80211_sta_reset_conn_monitor(sdata); + + if (ieee80211_is_nullfunc(hdr->frame_control) && + sdata->u.mgd.probe_send_count > 0) { + if (ack) + sdata->u.mgd.probe_send_count = 0; + else + sdata->u.mgd.nullfunc_failed = true; + ieee80211_queue_work(&sdata->local->hw, &sdata->work); + } +} + static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -1041,8 +1099,20 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) if (ifmgd->probe_send_count >= unicast_limit) dst = NULL; - ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); - ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0); + /* + * When the hardware reports an accurate Tx ACK status, it's + * better to send a nullfunc frame instead of a probe request, + * as it will kick us off the AP quickly if we aren't associated + * anymore. The timeout will be reset if the frame is ACKed by + * the AP. + */ + if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { + ifmgd->nullfunc_failed = false; + ieee80211_send_nullfunc(sdata->local, sdata, 0); + } else { + ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); + ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0); + } ifmgd->probe_send_count++; ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT; @@ -1108,6 +1178,30 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, mutex_unlock(&ifmgd->mtx); } +struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct sk_buff *skb; + const u8 *ssid; + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) + return NULL; + + ASSERT_MGD_MTX(ifmgd); + + if (!ifmgd->associated) + return NULL; + + ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); + skb = ieee80211_build_probe_req(sdata, ifmgd->associated->bssid, + ssid + 2, ssid[1], NULL, 0); + + return skb; +} +EXPORT_SYMBOL(ieee80211_ap_probereq_get); + static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -1485,29 +1579,8 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false); if (ifmgd->associated && - memcmp(mgmt->bssid, ifmgd->associated->bssid, ETH_ALEN) == 0 && - ifmgd->flags & (IEEE80211_STA_BEACON_POLL | - IEEE80211_STA_CONNECTION_POLL)) { - ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL | - IEEE80211_STA_BEACON_POLL); - mutex_lock(&sdata->local->iflist_mtx); - ieee80211_recalc_ps(sdata->local, -1); - mutex_unlock(&sdata->local->iflist_mtx); - - if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) - return; - - /* - * We've received a probe response, but are not sure whether - * we have or will be receiving any beacons or data, so let's - * schedule the timers again, just in case. - */ - ieee80211_sta_reset_beacon_monitor(sdata); - - mod_timer(&ifmgd->conn_mon_timer, - round_jiffies_up(jiffies + - IEEE80211_CONNECTION_IDLE_TIME)); - } + memcmp(mgmt->bssid, ifmgd->associated->bssid, ETH_ALEN) == 0) + ieee80211_reset_ap_probe(sdata); } /* @@ -1845,6 +1918,31 @@ static void ieee80211_sta_timer(unsigned long data) ieee80211_queue_work(&local->hw, &sdata->work); } +static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, + u8 *bssid) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL | + IEEE80211_STA_BEACON_POLL); + + ieee80211_set_disassoc(sdata, true, true); + mutex_unlock(&ifmgd->mtx); + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); + /* + * must be outside lock due to cfg80211, + * but that's not a problem. + */ + ieee80211_send_deauth_disassoc(sdata, bssid, + IEEE80211_STYPE_DEAUTH, + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, + NULL, true); + mutex_lock(&ifmgd->mtx); +} + void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; @@ -1857,12 +1955,49 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) IEEE80211_STA_CONNECTION_POLL) && ifmgd->associated) { u8 bssid[ETH_ALEN]; + int max_tries; memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); - if (time_is_after_jiffies(ifmgd->probe_timeout)) - run_again(ifmgd, ifmgd->probe_timeout); - else if (ifmgd->probe_send_count < IEEE80211_MAX_PROBE_TRIES) { + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + max_tries = IEEE80211_MAX_NULLFUNC_TRIES; + else + max_tries = IEEE80211_MAX_PROBE_TRIES; + + /* ACK received for nullfunc probing frame */ + if (!ifmgd->probe_send_count) + ieee80211_reset_ap_probe(sdata); + else if (ifmgd->nullfunc_failed) { + if (ifmgd->probe_send_count < max_tries) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + wiphy_debug(local->hw.wiphy, + "%s: No ack for nullfunc frame to" + " AP %pM, try %d\n", + sdata->name, bssid, + ifmgd->probe_send_count); +#endif + ieee80211_mgd_probe_ap_send(sdata); + } else { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + wiphy_debug(local->hw.wiphy, + "%s: No ack for nullfunc frame to" + " AP %pM, disconnecting.\n", + sdata->name, bssid); +#endif + ieee80211_sta_connection_lost(sdata, bssid); + } + } else if (time_is_after_jiffies(ifmgd->probe_timeout)) + run_again(ifmgd, ifmgd->probe_timeout); + else if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + wiphy_debug(local->hw.wiphy, + "%s: Failed to send nullfunc to AP %pM" + " after %dms, disconnecting.\n", + sdata->name, + bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); +#endif + ieee80211_sta_connection_lost(sdata, bssid); + } else if (ifmgd->probe_send_count < max_tries) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG wiphy_debug(local->hw.wiphy, "%s: No probe response from AP %pM" @@ -1877,27 +2012,13 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) * We actually lost the connection ... or did we? * Let's make sure! */ - ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL | - IEEE80211_STA_BEACON_POLL); wiphy_debug(local->hw.wiphy, "%s: No probe response from AP %pM" " after %dms, disconnecting.\n", sdata->name, bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); - ieee80211_set_disassoc(sdata, true, true); - mutex_unlock(&ifmgd->mtx); - mutex_lock(&local->mtx); - ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); - /* - * must be outside lock due to cfg80211, - * but that's not a problem. - */ - ieee80211_send_deauth_disassoc(sdata, bssid, - IEEE80211_STYPE_DEAUTH, - WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, - NULL, true); - mutex_lock(&ifmgd->mtx); + + ieee80211_sta_connection_lost(sdata, bssid); } } @@ -1988,6 +2109,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) add_timer(&ifmgd->timer); if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running)) add_timer(&ifmgd->chswitch_timer); + ieee80211_sta_reset_beacon_monitor(sdata); + ieee80211_restart_sta_timer(sdata); } #endif diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 4b564091e51d..b4e52676f3fb 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -14,6 +14,7 @@ */ #include <net/mac80211.h> #include "ieee80211_i.h" +#include "driver-trace.h" /* * inform AP that we will go to sleep so that it will buffer the frames @@ -190,3 +191,87 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, } mutex_unlock(&local->iflist_mtx); } + +static void ieee80211_hw_roc_start(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, hw_roc_start); + struct ieee80211_sub_if_data *sdata; + + mutex_lock(&local->mtx); + + if (!local->hw_roc_channel) { + mutex_unlock(&local->mtx); + return; + } + + ieee80211_recalc_idle(local); + + if (local->hw_roc_skb) { + sdata = IEEE80211_DEV_TO_SUB_IF(local->hw_roc_dev); + ieee80211_tx_skb(sdata, local->hw_roc_skb); + local->hw_roc_skb = NULL; + } else { + cfg80211_ready_on_channel(local->hw_roc_dev, + local->hw_roc_cookie, + local->hw_roc_channel, + local->hw_roc_channel_type, + local->hw_roc_duration, + GFP_KERNEL); + } + + mutex_unlock(&local->mtx); +} + +void ieee80211_ready_on_channel(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + + trace_api_ready_on_channel(local); + + ieee80211_queue_work(hw, &local->hw_roc_start); +} +EXPORT_SYMBOL_GPL(ieee80211_ready_on_channel); + +static void ieee80211_hw_roc_done(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, hw_roc_done); + + mutex_lock(&local->mtx); + + if (!local->hw_roc_channel) { + mutex_unlock(&local->mtx); + return; + } + + if (!local->hw_roc_for_tx) + cfg80211_remain_on_channel_expired(local->hw_roc_dev, + local->hw_roc_cookie, + local->hw_roc_channel, + local->hw_roc_channel_type, + GFP_KERNEL); + + local->hw_roc_channel = NULL; + local->hw_roc_cookie = 0; + + ieee80211_recalc_idle(local); + + mutex_unlock(&local->mtx); +} + +void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + + trace_api_remain_on_channel_expired(local); + + ieee80211_queue_work(hw, &local->hw_roc_done); +} +EXPORT_SYMBOL_GPL(ieee80211_remain_on_channel_expired); + +void ieee80211_hw_roc_setup(struct ieee80211_local *local) +{ + INIT_WORK(&local->hw_roc_start, ieee80211_hw_roc_start); + INIT_WORK(&local->hw_roc_done, ieee80211_hw_roc_done); +} diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 33f76993da08..3d5a2cb835c4 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -211,7 +211,8 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc) return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc); } -static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx) +static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, + struct ieee80211_supported_band *sband) { u8 i; @@ -222,7 +223,7 @@ static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx) if (basic_rates & (1 << *idx)) return; /* selected rate is a basic rate */ - for (i = *idx + 1; i <= max_rate_idx; i++) { + for (i = *idx + 1; i <= sband->n_bitrates; i++) { if (basic_rates & (1 << i)) { *idx = i; return; @@ -237,16 +238,25 @@ bool rate_control_send_low(struct ieee80211_sta *sta, struct ieee80211_tx_rate_control *txrc) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); + struct ieee80211_supported_band *sband = txrc->sband; + int mcast_rate; if (!sta || !priv_sta || rc_no_data_or_no_ack(txrc)) { info->control.rates[0].idx = rate_lowest_index(txrc->sband, sta); info->control.rates[0].count = (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 1 : txrc->hw->max_rate_tries; - if (!sta && txrc->ap) + if (!sta && txrc->bss) { + mcast_rate = txrc->bss_conf->mcast_rate[sband->band]; + if (mcast_rate > 0) { + info->control.rates[0].idx = mcast_rate - 1; + return true; + } + rc_send_low_broadcast(&info->control.rates[0].idx, txrc->bss_conf->basic_rates, - txrc->sband->n_bitrates); + sband); + } return true; } return false; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 2a18d6602d4a..165a4518bb48 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -371,7 +371,10 @@ minstrel_aggr_check(struct minstrel_priv *mp, struct ieee80211_sta *pubsta, stru if (likely(sta->ampdu_mlme.tid_tx[tid])) return; - ieee80211_start_tx_ba_session(pubsta, tid); + if (skb_get_queue_mapping(skb) == IEEE80211_AC_VO) + return; + + ieee80211_start_tx_ba_session(pubsta, tid, 5000); } static void @@ -407,8 +410,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, mi->ampdu_len += info->status.ampdu_len; if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) { - mi->sample_wait = 4 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len); - mi->sample_tries = 3; + mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len); + mi->sample_tries = 2; mi->sample_count--; } @@ -506,7 +509,9 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (!mr->retry_updated) minstrel_calc_retransmit(mp, mi, index); - if (mr->probability < MINSTREL_FRAC(20, 100)) + if (sample) + rate->count = 1; + else if (mr->probability < MINSTREL_FRAC(20, 100)) rate->count = 2; else if (rtscts) rate->count = mr->retry_count_rtscts; @@ -562,7 +567,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) */ if (minstrel_get_duration(sample_idx) > minstrel_get_duration(mi->max_tp_rate)) { - if (mr->sample_skipped < 10) + if (mr->sample_skipped < 20) goto next; if (mi->sample_slow++ > 2) @@ -586,6 +591,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, struct minstrel_ht_sta *mi = &msp->ht; struct minstrel_priv *mp = priv; int sample_idx; + bool sample = false; if (rate_control_send_low(sta, priv_sta, txrc)) return; @@ -596,10 +602,11 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, info->flags |= mi->tx_flags; sample_idx = minstrel_get_sample_rate(mp, mi); if (sample_idx >= 0) { + sample = true; minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx, txrc, true, false); minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate, - txrc, false, true); + txrc, false, false); info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; } else { minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate, @@ -607,7 +614,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2, txrc, false, true); } - minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, true); + minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, !sample); ar[3].count = 0; ar[3].idx = -1; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 902b03ee8f60..a6701ed87f0d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -533,10 +533,13 @@ static inline u16 seq_sub(u16 sq1, u16 sq2) static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw, struct tid_ampdu_rx *tid_agg_rx, - int index, - struct sk_buff_head *frames) + int index) { + struct ieee80211_local *local = hw_to_local(hw); struct sk_buff *skb = tid_agg_rx->reorder_buf[index]; + struct ieee80211_rx_status *status; + + lockdep_assert_held(&tid_agg_rx->reorder_lock); if (!skb) goto no_frame; @@ -544,7 +547,9 @@ static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw, /* release the frame from the reorder ring buffer */ tid_agg_rx->stored_mpdu_num--; tid_agg_rx->reorder_buf[index] = NULL; - __skb_queue_tail(frames, skb); + status = IEEE80211_SKB_RXCB(skb); + status->rx_flags |= IEEE80211_RX_DEFERRED_RELEASE; + skb_queue_tail(&local->rx_skb_queue, skb); no_frame: tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); @@ -552,15 +557,16 @@ no_frame: static void ieee80211_release_reorder_frames(struct ieee80211_hw *hw, struct tid_ampdu_rx *tid_agg_rx, - u16 head_seq_num, - struct sk_buff_head *frames) + u16 head_seq_num) { int index; + lockdep_assert_held(&tid_agg_rx->reorder_lock); + while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) { index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; - ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames); + ieee80211_release_reorder_frame(hw, tid_agg_rx, index); } } @@ -576,11 +582,12 @@ static void ieee80211_release_reorder_frames(struct ieee80211_hw *hw, #define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10) static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw, - struct tid_ampdu_rx *tid_agg_rx, - struct sk_buff_head *frames) + struct tid_ampdu_rx *tid_agg_rx) { int index, j; + lockdep_assert_held(&tid_agg_rx->reorder_lock); + /* release the buffer until next missing frame */ index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; @@ -606,8 +613,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw, wiphy_debug(hw->wiphy, "release an RX reorder frame due to timeout on earlier frames\n"); #endif - ieee80211_release_reorder_frame(hw, tid_agg_rx, - j, frames); + ieee80211_release_reorder_frame(hw, tid_agg_rx, j); /* * Increment the head seq# also for the skipped slots. @@ -617,31 +623,11 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw, skipped = 0; } } else while (tid_agg_rx->reorder_buf[index]) { - ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames); + ieee80211_release_reorder_frame(hw, tid_agg_rx, index); index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; } - /* - * Disable the reorder release timer for now. - * - * The current implementation lacks a proper locking scheme - * which would protect vital statistic and debug counters - * from being updated by two different but concurrent BHs. - * - * More information about the topic is available from: - * - thread: http://marc.info/?t=128635927000001 - * - * What was wrong: - * => http://marc.info/?l=linux-wireless&m=128636170811964 - * "Basically the thing is that until your patch, the data - * in the struct didn't actually need locking because it - * was accessed by the RX path only which is not concurrent." - * - * List of what needs to be fixed: - * => http://marc.info/?l=linux-wireless&m=128656352920957 - * - if (tid_agg_rx->stored_mpdu_num) { j = index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; @@ -660,10 +646,6 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw, } else { del_timer(&tid_agg_rx->reorder_timer); } - */ - -set_release_timer: - return; } /* @@ -673,8 +655,7 @@ set_release_timer: */ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, struct tid_ampdu_rx *tid_agg_rx, - struct sk_buff *skb, - struct sk_buff_head *frames) + struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; u16 sc = le16_to_cpu(hdr->seq_ctrl); @@ -683,10 +664,11 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, int index; bool ret = true; + spin_lock(&tid_agg_rx->reorder_lock); + buf_size = tid_agg_rx->buf_size; head_seq_num = tid_agg_rx->head_seq_num; - spin_lock(&tid_agg_rx->reorder_lock); /* frame with out of date sequence number */ if (seq_less(mpdu_seq_num, head_seq_num)) { dev_kfree_skb(skb); @@ -700,8 +682,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, if (!seq_less(mpdu_seq_num, head_seq_num + buf_size)) { head_seq_num = seq_inc(seq_sub(mpdu_seq_num, buf_size)); /* release stored frames up to new head to stack */ - ieee80211_release_reorder_frames(hw, tid_agg_rx, head_seq_num, - frames); + ieee80211_release_reorder_frames(hw, tid_agg_rx, head_seq_num); } /* Now the new frame is always in the range of the reordering buffer */ @@ -729,7 +710,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, tid_agg_rx->reorder_buf[index] = skb; tid_agg_rx->reorder_time[index] = jiffies; tid_agg_rx->stored_mpdu_num++; - ieee80211_sta_reorder_release(hw, tid_agg_rx, frames); + ieee80211_sta_reorder_release(hw, tid_agg_rx); out: spin_unlock(&tid_agg_rx->reorder_lock); @@ -740,8 +721,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, * Reorder MPDUs from A-MPDUs, keeping them on a buffer. Returns * true if the MPDU was buffered, false if it should be processed. */ -static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, - struct sk_buff_head *frames) +static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx) { struct sk_buff *skb = rx->skb; struct ieee80211_local *local = rx->local; @@ -796,11 +776,11 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, * sure that we cannot get to it any more before doing * anything with it. */ - if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames)) + if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb)) return; dont_reorder: - __skb_queue_tail(frames, skb); + skb_queue_tail(&local->rx_skb_queue, skb); } static ieee80211_rx_result debug_noinline @@ -948,12 +928,31 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * have been expected. */ struct ieee80211_key *key = NULL; + struct ieee80211_sub_if_data *sdata = rx->sdata; + int i; + if (ieee80211_is_mgmt(fc) && is_multicast_ether_addr(hdr->addr1) && (key = rcu_dereference(rx->sdata->default_mgmt_key))) rx->key = key; - else if ((key = rcu_dereference(rx->sdata->default_key))) - rx->key = key; + else { + if (rx->sta) { + for (i = 0; i < NUM_DEFAULT_KEYS; i++) { + key = rcu_dereference(rx->sta->gtk[i]); + if (key) + break; + } + } + if (!key) { + for (i = 0; i < NUM_DEFAULT_KEYS; i++) { + key = rcu_dereference(sdata->keys[i]); + if (key) + break; + } + } + if (key) + rx->key = key; + } return RX_CONTINUE; } else { u8 keyid; @@ -1102,8 +1101,6 @@ static void ap_sta_ps_end(struct sta_info *sta) atomic_dec(&sdata->bss->num_sta_ps); - clear_sta_flags(sta, WLAN_STA_PS_STA); - #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d exits power save mode\n", sdata->name, sta->sta.addr, sta->sta.aid); @@ -1158,12 +1155,14 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->rx_fragments++; sta->rx_bytes += rx->skb->len; sta->last_signal = status->signal; + ewma_add(&sta->avg_signal, -status->signal); /* * Change STA power saving mode only at the end of a frame * exchange sequence. */ if (!ieee80211_has_morefrags(hdr->frame_control) && + !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { if (test_sta_flags(sta, WLAN_STA_PS_STA)) { @@ -1515,12 +1514,30 @@ ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) if (rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP)) { if (unlikely(!ieee80211_has_protected(fc) && ieee80211_is_unicast_robust_mgmt_frame(rx->skb) && - rx->key)) + rx->key)) { + if (ieee80211_is_deauth(fc)) + cfg80211_send_unprot_deauth(rx->sdata->dev, + rx->skb->data, + rx->skb->len); + else if (ieee80211_is_disassoc(fc)) + cfg80211_send_unprot_disassoc(rx->sdata->dev, + rx->skb->data, + rx->skb->len); return -EACCES; + } /* BIP does not use Protected field, so need to check MMIE */ if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) && - ieee80211_get_mmie_keyidx(rx->skb) < 0)) + ieee80211_get_mmie_keyidx(rx->skb) < 0)) { + if (ieee80211_is_deauth(fc)) + cfg80211_send_unprot_deauth(rx->sdata->dev, + rx->skb->data, + rx->skb->len); + else if (ieee80211_is_disassoc(fc)) + cfg80211_send_unprot_disassoc(rx->sdata->dev, + rx->skb->data, + rx->skb->len); return -EACCES; + } /* * When using MFP, Action frames are not allowed prior to * having configured keys. @@ -1791,6 +1808,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (!fwd_skb && net_ratelimit()) printk(KERN_DEBUG "%s: failed to clone mesh frame\n", sdata->name); + if (!fwd_skb) + goto out; fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN); @@ -1828,6 +1847,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) } } + out: if (is_multicast_ether_addr(hdr->addr1) || sdata->dev->flags & IFF_PROMISC) return RX_CONTINUE; @@ -1872,9 +1892,8 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) dev->stats.rx_packets++; dev->stats.rx_bytes += rx->skb->len; - if (ieee80211_is_data(hdr->frame_control) && - !is_multicast_ether_addr(hdr->addr1) && - local->hw.conf.dynamic_ps_timeout > 0 && local->ps_sdata) { + if (local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 && + !is_multicast_ether_addr(((struct ethhdr *)rx->skb->data)->h_dest)) { mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); } @@ -1885,7 +1904,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) } static ieee80211_rx_result debug_noinline -ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) +ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) { struct ieee80211_local *local = rx->local; struct ieee80211_hw *hw = &local->hw; @@ -1923,9 +1942,11 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(tid_agg_rx->timeout)); + spin_lock(&tid_agg_rx->reorder_lock); /* release stored frames up to start of BAR */ - ieee80211_release_reorder_frames(hw, tid_agg_rx, start_seq_num, - frames); + ieee80211_release_reorder_frames(hw, tid_agg_rx, start_seq_num); + spin_unlock(&tid_agg_rx->reorder_lock); + kfree_skb(skb); return RX_QUEUED; } @@ -2116,10 +2137,13 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; case WLAN_CATEGORY_MESH_PLINK: - case WLAN_CATEGORY_MESH_PATH_SEL: if (!ieee80211_vif_is_mesh(&sdata->vif)) break; goto queue; + case WLAN_CATEGORY_MESH_PATH_SEL: + if (!mesh_path_sel_is_hwmp(sdata)) + break; + goto queue; } return RX_CONTINUE; @@ -2247,6 +2271,10 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) break; case cpu_to_le16(IEEE80211_STYPE_DEAUTH): case cpu_to_le16(IEEE80211_STYPE_DISASSOC): + if (is_multicast_ether_addr(mgmt->da) && + !is_broadcast_ether_addr(mgmt->da)) + return RX_DROP_MONITOR; + /* process only for station */ if (sdata->vif.type != NL80211_IFTYPE_STATION) return RX_DROP_MONITOR; @@ -2433,8 +2461,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, } } -static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, - struct sk_buff_head *frames) +static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx) { ieee80211_rx_result res = RX_DROP_MONITOR; struct sk_buff *skb; @@ -2446,7 +2473,15 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, goto rxh_next; \ } while (0); - while ((skb = __skb_dequeue(frames))) { + spin_lock(&rx->local->rx_skb_queue.lock); + if (rx->local->running_rx_handler) + goto unlock; + + rx->local->running_rx_handler = true; + + while ((skb = __skb_dequeue(&rx->local->rx_skb_queue))) { + spin_unlock(&rx->local->rx_skb_queue.lock); + /* * all the other fields are valid across frames * that belong to an aMPDU since they are on the @@ -2469,12 +2504,7 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, CALL_RXH(ieee80211_rx_h_mesh_fwding); #endif CALL_RXH(ieee80211_rx_h_data) - - /* special treatment -- needs the queue */ - res = ieee80211_rx_h_ctrl(rx, frames); - if (res != RX_CONTINUE) - goto rxh_next; - + CALL_RXH(ieee80211_rx_h_ctrl); CALL_RXH(ieee80211_rx_h_mgmt_check) CALL_RXH(ieee80211_rx_h_action) CALL_RXH(ieee80211_rx_h_userspace_mgmt) @@ -2483,18 +2513,20 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, rxh_next: ieee80211_rx_handlers_result(rx, res); - + spin_lock(&rx->local->rx_skb_queue.lock); #undef CALL_RXH } + + rx->local->running_rx_handler = false; + + unlock: + spin_unlock(&rx->local->rx_skb_queue.lock); } static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) { - struct sk_buff_head reorder_release; ieee80211_rx_result res = RX_DROP_MONITOR; - __skb_queue_head_init(&reorder_release); - #define CALL_RXH(rxh) \ do { \ res = rxh(rx); \ @@ -2505,9 +2537,9 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) CALL_RXH(ieee80211_rx_h_passive_scan) CALL_RXH(ieee80211_rx_h_check) - ieee80211_rx_reorder_ampdu(rx, &reorder_release); + ieee80211_rx_reorder_ampdu(rx); - ieee80211_rx_handlers(rx, &reorder_release); + ieee80211_rx_handlers(rx); return; rxh_next: @@ -2517,13 +2549,11 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) } /* - * This function makes calls into the RX path. Therefore the - * caller must hold the sta_info->lock and everything has to - * be under rcu_read_lock protection as well. + * This function makes calls into the RX path, therefore + * it has to be invoked under RCU read lock. */ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) { - struct sk_buff_head frames; struct ieee80211_rx_data rx = { .sta = sta, .sdata = sta->sdata, @@ -2536,13 +2566,11 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) if (!tid_agg_rx) return; - __skb_queue_head_init(&frames); - spin_lock(&tid_agg_rx->reorder_lock); - ieee80211_sta_reorder_release(&sta->local->hw, tid_agg_rx, &frames); + ieee80211_sta_reorder_release(&sta->local->hw, tid_agg_rx); spin_unlock(&tid_agg_rx->reorder_lock); - ieee80211_rx_handlers(&rx, &frames); + ieee80211_rx_handlers(&rx); } /* main receive path */ @@ -2741,6 +2769,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) return; + goto out; } } @@ -2780,6 +2809,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, return; } + out: dev_kfree_skb(skb); } @@ -2875,6 +2905,9 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) return; } + ieee80211_tpt_led_trig_rx(local, + ((struct ieee80211_hdr *)skb->data)->frame_control, + skb->len); __ieee80211_rx_handle_packet(hw, skb); rcu_read_unlock(); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 6d8f897d8763..c426504ed1cf 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -199,8 +199,11 @@ static void sta_unblock(struct work_struct *wk) if (!test_sta_flags(sta, WLAN_STA_PS_STA)) ieee80211_sta_ps_deliver_wakeup(sta); - else if (test_and_clear_sta_flags(sta, WLAN_STA_PSPOLL)) + else if (test_and_clear_sta_flags(sta, WLAN_STA_PSPOLL)) { + clear_sta_flags(sta, WLAN_STA_PS_DRIVER); ieee80211_sta_ps_deliver_poll_response(sta); + } else + clear_sta_flags(sta, WLAN_STA_PS_DRIVER); } static int sta_prepare_rate_control(struct ieee80211_local *local, @@ -241,6 +244,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->local = local; sta->sdata = sdata; + ewma_init(&sta->avg_signal, 1024, 8); + if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); return NULL; @@ -880,6 +885,13 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, } EXPORT_SYMBOL(ieee80211_find_sta); +static void clear_sta_ps_flags(void *_sta) +{ + struct sta_info *sta = _sta; + + clear_sta_flags(sta, WLAN_STA_PS_DRIVER | WLAN_STA_PS_STA); +} + /* powersave support code */ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) { @@ -894,7 +906,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) /* Send all buffered frames to the station */ sent = ieee80211_add_pending_skbs(local, &sta->tx_filtered); - buffered = ieee80211_add_pending_skbs(local, &sta->ps_tx_buf); + buffered = ieee80211_add_pending_skbs_fn(local, &sta->ps_tx_buf, + clear_sta_ps_flags, sta); sent += buffered; local->total_ps_buffered -= buffered; @@ -973,7 +986,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, if (block) set_sta_flags(sta, WLAN_STA_PS_DRIVER); - else + else if (test_sta_flags(sta, WLAN_STA_PS_DRIVER)) ieee80211_queue_work(hw, &sta->drv_unblock_wk); } EXPORT_SYMBOL(ieee80211_sta_block_awake); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 9265acadef32..bbdd2a86a94b 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/if_ether.h> #include <linux/workqueue.h> +#include <linux/average.h> #include "key.h" /** @@ -77,23 +78,26 @@ enum ieee80211_sta_info_flags { * @addba_resp_timer: timer for peer's response to addba request * @pending: pending frames queue -- use sta's spinlock to protect * @dialog_token: dialog token for aggregation session + * @timeout: session timeout value to be filled in ADDBA requests * @state: session state (see above) * @stop_initiator: initiator of a session stop * @tx_stop: TX DelBA frame when stopping * - * This structure is protected by RCU and the per-station - * spinlock. Assignments to the array holding it must hold - * the spinlock, only the TX path can access it under RCU - * lock-free if, and only if, the state has the flag - * %HT_AGG_STATE_OPERATIONAL set. Otherwise, the TX path - * must also acquire the spinlock and re-check the state, - * see comments in the tx code touching it. + * This structure's lifetime is managed by RCU, assignments to + * the array holding it must hold the aggregation mutex. + * + * The TX path can access it under RCU lock-free if, and + * only if, the state has the flag %HT_AGG_STATE_OPERATIONAL + * set. Otherwise, the TX path must also acquire the spinlock + * and re-check the state, see comments in the tx code + * touching it. */ struct tid_ampdu_tx { struct rcu_head rcu_head; struct timer_list addba_resp_timer; struct sk_buff_head pending; unsigned long state; + u16 timeout; u8 dialog_token; u8 stop_initiator; bool tx_stop; @@ -115,15 +119,13 @@ struct tid_ampdu_tx { * @rcu_head: RCU head used for freeing this struct * @reorder_lock: serializes access to reorder buffer, see below. * - * This structure is protected by RCU and the per-station - * spinlock. Assignments to the array holding it must hold - * the spinlock. + * This structure's lifetime is managed by RCU, assignments to + * the array holding it must hold the aggregation mutex. * - * The @reorder_lock is used to protect the variables and - * arrays such as @reorder_buf, @reorder_time, @head_seq_num, - * @stored_mpdu_num and @reorder_time from being corrupted by - * concurrent access of the RX path and the expired frame - * release timer. + * The @reorder_lock is used to protect the members of this + * struct, except for @timeout, @buf_size and @dialog_token, + * which are constant across the lifetime of the struct (the + * dialog token being used only for debugging). */ struct tid_ampdu_rx { struct rcu_head rcu_head; @@ -224,6 +226,7 @@ enum plink_state { * @rx_fragments: number of received MPDUs * @rx_dropped: number of dropped MPDUs from this STA * @last_signal: signal of last received frame from this STA + * @avg_signal: moving average of signal of received frames from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) * @tx_filtered_count: number of frames the hardware filtered for this STA * @tx_retry_failed: number of frames that failed retry @@ -248,6 +251,7 @@ enum plink_state { * @sta: station information we share with the driver * @dead: set to true when sta is unlinked * @uploaded: set to true when sta is uploaded to the driver + * @lost_packets: number of consecutive lost packets */ struct sta_info { /* General information, mostly static */ @@ -291,6 +295,7 @@ struct sta_info { unsigned long rx_fragments; unsigned long rx_dropped; int last_signal; + struct ewma avg_signal; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; /* Updated from TX status path only, no locking requirements */ @@ -335,6 +340,8 @@ struct sta_info { } debugfs; #endif + unsigned int lost_packets; + /* keep last! */ struct ieee80211_sta sta; }; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 3153c19893b8..38a797217a91 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -157,6 +157,15 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) } } +/* + * Use a static threshold for now, best value to be determined + * by testing ... + * Should it depend on: + * - on # of retransmissions + * - current throughput (higher value for higher tpt)? + */ +#define STA_LOST_PKT_THRESHOLD 50 + void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) { struct sk_buff *skb2; @@ -173,6 +182,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) int retry_count = -1, i; int rates_idx = -1; bool send_to_cooked; + bool acked; for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { /* the HW cannot have attempted that rate */ @@ -198,8 +208,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (memcmp(hdr->addr2, sta->sdata->vif.addr, ETH_ALEN)) continue; - if (!(info->flags & IEEE80211_TX_STAT_ACK) && - test_sta_flags(sta, WLAN_STA_PS_STA)) { + acked = !!(info->flags & IEEE80211_TX_STAT_ACK); + if (!acked && test_sta_flags(sta, WLAN_STA_PS_STA)) { /* * The STA is in power save mode, so assume * that this TX packet failed because of that. @@ -231,7 +241,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) rcu_read_unlock(); return; } else { - if (!(info->flags & IEEE80211_TX_STAT_ACK)) + if (!acked) sta->tx_retry_failed++; sta->tx_retry_count += retry_count; } @@ -240,9 +250,25 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (ieee80211_vif_is_mesh(&sta->sdata->vif)) ieee80211s_update_metric(local, sta, skb); - if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && - (info->flags & IEEE80211_TX_STAT_ACK)) + if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && acked) ieee80211_frame_acked(sta, skb); + + if ((sta->sdata->vif.type == NL80211_IFTYPE_STATION) && + (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) + ieee80211_sta_tx_notify(sta->sdata, (void *) skb->data, acked); + + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { + if (info->flags & IEEE80211_TX_STAT_ACK) { + if (sta->lost_packets) + sta->lost_packets = 0; + } else if (++sta->lost_packets >= STA_LOST_PKT_THRESHOLD) { + cfg80211_cqm_pktloss_notify(sta->sdata->dev, + sta->sta.addr, + sta->lost_packets, + GFP_ATOMIC); + sta->lost_packets = 0; + } + } } rcu_read_unlock(); @@ -295,10 +321,23 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) msecs_to_jiffies(10)); } - if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) + if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { + struct ieee80211_work *wk; + + rcu_read_lock(); + list_for_each_entry_rcu(wk, &local->work_list, list) { + if (wk->type != IEEE80211_WORK_OFFCHANNEL_TX) + continue; + if (wk->offchan_tx.frame != skb) + continue; + wk->offchan_tx.frame = NULL; + break; + } + rcu_read_unlock(); cfg80211_mgmt_tx_status( skb->dev, (unsigned long) skb, skb->data, skb->len, !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); + } /* this was a transmitted frame, but now we want to reuse it */ skb_orphan(skb); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 96c594309506..5950e3abead9 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -539,7 +539,11 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) ieee80211_is_robust_mgmt_frame(hdr) && (key = rcu_dereference(tx->sdata->default_mgmt_key))) tx->key = key; - else if ((key = rcu_dereference(tx->sdata->default_key))) + else if (is_multicast_ether_addr(hdr->addr1) && + (key = rcu_dereference(tx->sdata->default_multicast_key))) + tx->key = key; + else if (!is_multicast_ether_addr(hdr->addr1) && + (key = rcu_dereference(tx->sdata->default_unicast_key))) tx->key = key; else if (tx->sdata->drop_unencrypted && (tx->skb->protocol != tx->sdata->control_port_protocol) && @@ -622,7 +626,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; - txrc.ap = tx->sdata->vif.type == NL80211_IFTYPE_AP; + txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP || + tx->sdata->vif.type == NL80211_IFTYPE_ADHOC); /* set up RTS protection if desired */ if (len > tx->local->hw.wiphy->rts_threshold) { @@ -665,10 +670,11 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) if (unlikely(info->control.rates[0].idx < 0)) return TX_DROP; - if (txrc.reported_rate.idx < 0) + if (txrc.reported_rate.idx < 0) { txrc.reported_rate = info->control.rates[0]; - - if (tx->sta) + if (tx->sta && ieee80211_is_data(hdr->frame_control)) + tx->sta->last_tx_rate = txrc.reported_rate; + } else if (tx->sta) tx->sta->last_tx_rate = txrc.reported_rate; if (unlikely(!info->control.rates[0].count)) @@ -1033,6 +1039,7 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, struct ieee80211_radiotap_header *rthdr = (struct ieee80211_radiotap_header *) skb->data; struct ieee80211_supported_band *sband; + bool hw_frag; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, NULL); @@ -1042,6 +1049,9 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; tx->flags &= ~IEEE80211_TX_FRAGMENTED; + /* packet is fragmented in HW if we have a non-NULL driver callback */ + hw_frag = (tx->local->ops->set_frag_threshold != NULL); + /* * for every radiotap entry that is present * (ieee80211_radiotap_iterator_next returns -ENOENT when no more @@ -1078,7 +1088,8 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, } if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) info->flags &= ~IEEE80211_TX_INTFL_DONT_ENCRYPT; - if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) + if ((*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) && + !hw_frag) tx->flags |= IEEE80211_TX_FRAGMENTED; break; @@ -1181,8 +1192,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, /* * Set this flag (used below to indicate "automatic fragmentation"), * it will be cleared/left by radiotap as desired. + * Only valid when fragmentation is done by the stack. */ - tx->flags |= IEEE80211_TX_FRAGMENTED; + if (!local->ops->set_frag_threshold) + tx->flags |= IEEE80211_TX_FRAGMENTED; /* process and remove the injection radiotap header */ if (unlikely(info->flags & IEEE80211_TX_INTFL_HAS_RADIOTAP)) { @@ -1284,6 +1297,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, while (skb) { int q = skb_get_queue_mapping(skb); + __le16 fc; spin_lock_irqsave(&local->queue_stop_reason_lock, flags); ret = IEEE80211_TX_OK; @@ -1326,6 +1340,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, else info->control.sta = NULL; + fc = ((struct ieee80211_hdr *)skb->data)->frame_control; ret = drv_tx(local, skb); if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) { dev_kfree_skb(skb); @@ -1336,6 +1351,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, return IEEE80211_TX_AGAIN; } + ieee80211_tpt_led_trig_tx(local, fc, len); *skbp = skb = next; ieee80211_led_tx(local, 1); fragm = true; @@ -1533,8 +1549,10 @@ static int ieee80211_skb_resize(struct ieee80211_local *local, if (skb_header_cloned(skb)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); - else + else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); + else + return 0; if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) { wiphy_debug(local->hw.wiphy, @@ -1587,7 +1605,12 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, list) { if (!ieee80211_sdata_running(tmp_sdata)) continue; - if (tmp_sdata->vif.type != NL80211_IFTYPE_AP) + if (tmp_sdata->vif.type == + NL80211_IFTYPE_MONITOR || + tmp_sdata->vif.type == + NL80211_IFTYPE_AP_VLAN || + tmp_sdata->vif.type == + NL80211_IFTYPE_WDS) continue; if (compare_ether_addr(tmp_sdata->vif.addr, hdr->addr2) == 0) { @@ -1721,26 +1744,25 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *info; int ret = NETDEV_TX_BUSY, head_need; u16 ethertype, hdrlen, meshhdrlen = 0; __le16 fc; struct ieee80211_hdr hdr; struct ieee80211s_hdr mesh_hdr __maybe_unused; + struct mesh_path *mppath = NULL; const u8 *encaps_data; int encaps_len, skip_header_bytes; int nh_pos, h_pos; struct sta_info *sta = NULL; u32 sta_flags = 0; + struct sk_buff *tmp_skb; if (unlikely(skb->len < ETH_HLEN)) { ret = NETDEV_TX_OK; goto fail; } - nh_pos = skb_network_header(skb) - skb->data; - h_pos = skb_transport_header(skb) - skb->data; - /* convert Ethernet header to proper 802.11 header (based on * operation mode) */ ethertype = (skb->data[12] << 8) | skb->data[13]; @@ -1789,16 +1811,23 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, ret = NETDEV_TX_OK; goto fail; } + if (!is_multicast_ether_addr(skb->data)) + mppath = mpp_path_lookup(skb->data, sdata); + /* + * Do not use address extension, if it is a packet from + * the same interface and the destination is not being + * proxied by any other mest point. + */ if (compare_ether_addr(sdata->vif.addr, - skb->data + ETH_ALEN) == 0) { + skb->data + ETH_ALEN) == 0 && + (!mppath || !compare_ether_addr(mppath->mpp, skb->data))) { hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc, skb->data, skb->data + ETH_ALEN); meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, - sdata, NULL, NULL, NULL); + sdata, NULL, NULL); } else { /* packet from other interface */ - struct mesh_path *mppath; int is_mesh_mcast = 1; const u8 *mesh_da; @@ -1809,8 +1838,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, else { static const u8 bcast[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - - mppath = mpp_path_lookup(skb->data, sdata); if (mppath) { /* RA TA mDA mSA AE:DA SA */ mesh_da = mppath->mpp; @@ -1828,13 +1855,11 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, ieee80211_new_mesh_header(&mesh_hdr, sdata, skb->data + ETH_ALEN, - NULL, NULL); else meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata, - NULL, skb->data, skb->data + ETH_ALEN); @@ -1913,6 +1938,20 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, goto fail; } + /* + * If the skb is shared we need to obtain our own copy. + */ + if (skb_shared(skb)) { + tmp_skb = skb; + skb = skb_clone(skb, GFP_ATOMIC); + kfree_skb(tmp_skb); + + if (!skb) { + ret = NETDEV_TX_OK; + goto fail; + } + } + hdr.frame_control = fc; hdr.duration_id = 0; hdr.seq_ctrl = 0; @@ -1931,6 +1970,9 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, encaps_len = 0; } + nh_pos = skb_network_header(skb) - skb->data; + h_pos = skb_transport_header(skb) - skb->data; + skb_pull(skb, skip_header_bytes); nh_pos -= skip_header_bytes; h_pos -= skip_header_bytes; @@ -1997,6 +2039,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, skb_set_network_header(skb, nh_pos); skb_set_transport_header(skb, h_pos); + info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); dev->trans_start = jiffies; @@ -2257,7 +2300,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, u8 *pos; /* headroom, head length, tail length and maximum TIM length */ - skb = dev_alloc_skb(local->tx_headroom + 400); + skb = dev_alloc_skb(local->tx_headroom + 400 + + sdata->u.mesh.vendor_ie_len); if (!skb) goto out; @@ -2301,7 +2345,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; - txrc.ap = true; + txrc.bss = true; rate_control_get_rate(sdata, NULL, &txrc); info->control.vif = vif; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0b6fc92bc0d7..cf68700abffa 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -368,8 +368,9 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local, spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } -int ieee80211_add_pending_skbs(struct ieee80211_local *local, - struct sk_buff_head *skbs) +int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, + struct sk_buff_head *skbs, + void (*fn)(void *data), void *data) { struct ieee80211_hw *hw = &local->hw; struct sk_buff *skb; @@ -394,6 +395,9 @@ int ieee80211_add_pending_skbs(struct ieee80211_local *local, __skb_queue_tail(&local->pending[queue], skb); } + if (fn) + fn(data); + for (i = 0; i < hw->queues; i++) __ieee80211_wake_queue(hw, i, IEEE80211_QUEUE_STOP_REASON_SKB_ADD); @@ -402,6 +406,12 @@ int ieee80211_add_pending_skbs(struct ieee80211_local *local, return ret; } +int ieee80211_add_pending_skbs(struct ieee80211_local *local, + struct sk_buff_head *skbs) +{ + return ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL); +} + void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, enum queue_stop_reason reason) { @@ -1011,9 +1021,10 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, return pos - buffer; } -void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, - const u8 *ssid, size_t ssid_len, - const u8 *ie, size_t ie_len) +struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, + u8 *dst, + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -1027,7 +1038,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, if (!buf) { printk(KERN_DEBUG "%s: failed to allocate temporary IE " "buffer\n", sdata->name); - return; + return NULL; } chan = ieee80211_frequency_to_channel( @@ -1050,8 +1061,20 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, } IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; - ieee80211_tx_skb(sdata, skb); kfree(buf); + + return skb; +} + +void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len) +{ + struct sk_buff *skb; + + skb = ieee80211_build_probe_req(sdata, dst, ssid, ssid_len, ie, ie_len); + if (skb) + ieee80211_tx_skb(sdata, skb); } u32 ieee80211_sta_get_rates(struct ieee80211_local *local, @@ -1093,6 +1116,7 @@ u32 ieee80211_sta_get_rates(struct ieee80211_local *local, void ieee80211_stop_device(struct ieee80211_local *local) { ieee80211_led_radio(local, false); + ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO); cancel_work_sync(&local->reconfig_filter); @@ -1127,6 +1151,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) } ieee80211_led_radio(local, true); + ieee80211_mod_tpt_led_trig(local, + IEEE80211_TPT_LEDTRIG_FL_RADIO, 0); } /* add interfaces */ @@ -1152,6 +1178,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) } mutex_unlock(&local->sta_mtx); + /* setup fragmentation threshold */ + drv_set_frag_threshold(local, hw->wiphy->frag_threshold); + /* setup RTS threshold */ drv_set_rts_threshold(local, hw->wiphy->rts_threshold); diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 34e6d02da779..28bc084dbfb9 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -21,7 +21,16 @@ /* Default mapping in classifier to work with default * queue setup. */ -const int ieee802_1d_to_ac[8] = { 2, 3, 3, 2, 1, 1, 0, 0 }; +const int ieee802_1d_to_ac[8] = { + IEEE80211_AC_BE, + IEEE80211_AC_BK, + IEEE80211_AC_BK, + IEEE80211_AC_BE, + IEEE80211_AC_VI, + IEEE80211_AC_VI, + IEEE80211_AC_VO, + IEEE80211_AC_VO +}; static int wme_downgrade_ac(struct sk_buff *skb) { @@ -50,26 +59,22 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct sta_info *sta = NULL; - u32 sta_flags = 0; const u8 *ra = NULL; bool qos = false; if (local->hw.queues < 4 || skb->len < 6) { skb->priority = 0; /* required for correct WPA/11i MIC */ - return min_t(u16, local->hw.queues - 1, - ieee802_1d_to_ac[skb->priority]); + return min_t(u16, local->hw.queues - 1, IEEE80211_AC_BE); } rcu_read_lock(); switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: - rcu_read_lock(); sta = rcu_dereference(sdata->u.vlan.sta); - if (sta) - sta_flags = get_sta_flags(sta); - rcu_read_unlock(); - if (sta) + if (sta) { + qos = get_sta_flags(sta) & WLAN_STA_WME; break; + } case NL80211_IFTYPE_AP: ra = skb->data; break; @@ -98,17 +103,13 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, if (!sta && ra && !is_multicast_ether_addr(ra)) { sta = sta_info_get(sdata, ra); if (sta) - sta_flags = get_sta_flags(sta); + qos = get_sta_flags(sta) & WLAN_STA_WME; } - - if (sta_flags & WLAN_STA_WME) - qos = true; - rcu_read_unlock(); if (!qos) { skb->priority = 0; /* required for correct WPA/11i MIC */ - return ieee802_1d_to_ac[skb->priority]; + return IEEE80211_AC_BE; } /* use the data classifier to determine what 802.1d tag the diff --git a/net/mac80211/work.c b/net/mac80211/work.c index ae344d1ba056..36305e0d06ef 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -458,8 +458,9 @@ ieee80211_direct_probe(struct ieee80211_work *wk) return WORK_ACT_TIMEOUT; } - printk(KERN_DEBUG "%s: direct probe to %pM (try %d)\n", - sdata->name, wk->filter_ta, wk->probe_auth.tries); + printk(KERN_DEBUG "%s: direct probe to %pM (try %d/%i)\n", + sdata->name, wk->filter_ta, wk->probe_auth.tries, + IEEE80211_AUTH_MAX_TRIES); /* * Direct probe is sent to broadcast address as some APs @@ -561,6 +562,25 @@ ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk) } static enum work_action __must_check +ieee80211_offchannel_tx(struct ieee80211_work *wk) +{ + if (!wk->started) { + wk->timeout = jiffies + msecs_to_jiffies(wk->offchan_tx.wait); + + /* + * After this, offchan_tx.frame remains but now is no + * longer a valid pointer -- we still need it as the + * cookie for canceling this work. + */ + ieee80211_tx_skb(wk->sdata, wk->offchan_tx.frame); + + return WORK_ACT_NONE; + } + + return WORK_ACT_TIMEOUT; +} + +static enum work_action __must_check ieee80211_assoc_beacon_wait(struct ieee80211_work *wk) { if (wk->started) @@ -955,6 +975,9 @@ static void ieee80211_work_work(struct work_struct *work) case IEEE80211_WORK_REMAIN_ON_CHANNEL: rma = ieee80211_remain_on_channel_timeout(wk); break; + case IEEE80211_WORK_OFFCHANNEL_TX: + rma = ieee80211_offchannel_tx(wk); + break; case IEEE80211_WORK_ASSOC_BEACON_WAIT: rma = ieee80211_assoc_beacon_wait(wk); break; @@ -1051,11 +1074,13 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_work *wk; + bool cleanup = false; mutex_lock(&local->mtx); list_for_each_entry(wk, &local->work_list, list) { if (wk->sdata != sdata) continue; + cleanup = true; wk->type = IEEE80211_WORK_ABORT; wk->started = true; wk->timeout = jiffies; @@ -1063,7 +1088,8 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->mtx); /* run cleanups etc. */ - ieee80211_work_work(&local->work_work); + if (cleanup) + ieee80211_work_work(&local->work_work); mutex_lock(&local->mtx); list_for_each_entry(wk, &local->work_list, list) { diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 5faec4fd8193..e69d537362c7 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -173,9 +173,11 @@ next_hook: outdev, &elem, okfn, hook_thresh); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; - } else if (verdict == NF_DROP) { + } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { kfree_skb(skb); - ret = -EPERM; + ret = -(verdict >> NF_VERDICT_BITS); + if (ret == 0) + ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS)) diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index a22dac227055..70bd1d0774c6 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -4,6 +4,7 @@ menuconfig IP_VS tristate "IP virtual server support" depends on NET && INET && NETFILTER + depends on (NF_CONNTRACK || NF_CONNTRACK=n) ---help--- IP Virtual Server support will let you build a high-performance virtual server based on cluster of two or more real servers. This diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index a475edee0912..5c48ffb60c28 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app); EXPORT_SYMBOL(unregister_ip_vs_app); EXPORT_SYMBOL(register_ip_vs_app_inc); -/* ipvs application list head */ -static LIST_HEAD(ip_vs_app_list); -static DEFINE_MUTEX(__ip_vs_app_mutex); - - /* * Get an ip_vs_app object */ @@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app) * Allocate/initialize app incarnation and register it in proto apps. */ static int -ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) +ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, + __u16 port) { struct ip_vs_protocol *pp; struct ip_vs_app *inc; @@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) } } - ret = pp->register_app(inc); + ret = pp->register_app(net, inc); if (ret) goto out; @@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) * Release app incarnation */ static void -ip_vs_app_inc_release(struct ip_vs_app *inc) +ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) { struct ip_vs_protocol *pp; @@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc) return; if (pp->unregister_app) - pp->unregister_app(inc); + pp->unregister_app(net, inc); IP_VS_DBG(9, "%s App %s:%u unregistered\n", pp->name, inc->name, ntohs(inc->port)); @@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc) * Register an application incarnation in protocol applications */ int -register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) +register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, + __u16 port) { + struct netns_ipvs *ipvs = net_ipvs(net); int result; - mutex_lock(&__ip_vs_app_mutex); + mutex_lock(&ipvs->app_mutex); - result = ip_vs_app_inc_new(app, proto, port); + result = ip_vs_app_inc_new(net, app, proto, port); - mutex_unlock(&__ip_vs_app_mutex); + mutex_unlock(&ipvs->app_mutex); return result; } @@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) /* * ip_vs_app registration routine */ -int register_ip_vs_app(struct ip_vs_app *app) +int register_ip_vs_app(struct net *net, struct ip_vs_app *app) { + struct netns_ipvs *ipvs = net_ipvs(net); /* increase the module use count */ ip_vs_use_count_inc(); - mutex_lock(&__ip_vs_app_mutex); + mutex_lock(&ipvs->app_mutex); - list_add(&app->a_list, &ip_vs_app_list); + list_add(&app->a_list, &ipvs->app_list); - mutex_unlock(&__ip_vs_app_mutex); + mutex_unlock(&ipvs->app_mutex); return 0; } @@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app) * ip_vs_app unregistration routine * We are sure there are no app incarnations attached to services */ -void unregister_ip_vs_app(struct ip_vs_app *app) +void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_app *inc, *nxt; - mutex_lock(&__ip_vs_app_mutex); + mutex_lock(&ipvs->app_mutex); list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { - ip_vs_app_inc_release(inc); + ip_vs_app_inc_release(net, inc); } list_del(&app->a_list); - mutex_unlock(&__ip_vs_app_mutex); + mutex_unlock(&ipvs->app_mutex); /* decrease the module use count */ ip_vs_use_count_dec(); @@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app) /* * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) */ -int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp) +int ip_vs_bind_app(struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) { return pp->app_conn_bind(cp); } @@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) * /proc/net/ip_vs_app entry function */ -static struct ip_vs_app *ip_vs_app_idx(loff_t pos) +static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) { struct ip_vs_app *app, *inc; - list_for_each_entry(app, &ip_vs_app_list, a_list) { + list_for_each_entry(app, &ipvs->app_list, a_list) { list_for_each_entry(inc, &app->incs_list, a_list) { if (pos-- == 0) return inc; @@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos) static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) { - mutex_lock(&__ip_vs_app_mutex); + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); - return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN; + mutex_lock(&ipvs->app_mutex); + + return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; } static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_app *inc, *app; struct list_head *e; + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); ++*pos; if (v == SEQ_START_TOKEN) - return ip_vs_app_idx(0); + return ip_vs_app_idx(ipvs, 0); inc = v; app = inc->app; @@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) return list_entry(e, struct ip_vs_app, a_list); /* go on to next application */ - for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) { + for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { app = list_entry(e, struct ip_vs_app, a_list); list_for_each_entry(inc, &app->incs_list, a_list) { return inc; @@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) { - mutex_unlock(&__ip_vs_app_mutex); + struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq)); + + mutex_unlock(&ipvs->app_mutex); } static int ip_vs_app_seq_show(struct seq_file *seq, void *v) @@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = { static int ip_vs_app_open(struct inode *inode, struct file *file) { - return seq_open(file, &ip_vs_app_seq_ops); + return seq_open_net(inode, file, &ip_vs_app_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations ip_vs_app_fops = { @@ -569,15 +578,36 @@ static const struct file_operations ip_vs_app_fops = { }; #endif -int __init ip_vs_app_init(void) +static int __net_init __ip_vs_app_init(struct net *net) { - /* we will replace it with proc_net_ipvs_create() soon */ - proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); + struct netns_ipvs *ipvs = net_ipvs(net); + + INIT_LIST_HEAD(&ipvs->app_list); + __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key); + proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); return 0; } +static void __net_exit __ip_vs_app_cleanup(struct net *net) +{ + proc_net_remove(net, "ip_vs_app"); +} + +static struct pernet_operations ip_vs_app_ops = { + .init = __ip_vs_app_init, + .exit = __ip_vs_app_cleanup, +}; + +int __init ip_vs_app_init(void) +{ + int rv; + + rv = register_pernet_subsys(&ip_vs_app_ops); + return rv; +} + void ip_vs_app_cleanup(void) { - proc_net_remove(&init_net, "ip_vs_app"); + unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 66e4662925d5..83233fe24a08 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -64,9 +64,6 @@ static struct list_head *ip_vs_conn_tab __read_mostly; /* SLAB cache for IPVS connections */ static struct kmem_cache *ip_vs_conn_cachep __read_mostly; -/* counter for current IPVS connections */ -static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); - /* counter for no client port connections */ static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); @@ -76,7 +73,7 @@ static unsigned int ip_vs_conn_rnd __read_mostly; /* * Fine locking granularity for big connection hash table */ -#define CT_LOCKARRAY_BITS 4 +#define CT_LOCKARRAY_BITS 5 #define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS) #define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1) @@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key) /* * Returns hash value for IPVS connection entry */ -static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, +static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto, const union nf_inet_addr *addr, __be16 port) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), - (__force u32)port, proto, ip_vs_conn_rnd) - & ip_vs_conn_tab_mask; + return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), + (__force u32)port, proto, ip_vs_conn_rnd) ^ + ((size_t)net>>8)) & ip_vs_conn_tab_mask; #endif - return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, - ip_vs_conn_rnd) - & ip_vs_conn_tab_mask; + return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto, + ip_vs_conn_rnd) ^ + ((size_t)net>>8)) & ip_vs_conn_tab_mask; } static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, @@ -166,15 +163,15 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, port = p->vport; } - return ip_vs_conn_hashkey(p->af, p->protocol, addr, port); + return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port); } static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, - NULL, 0, &p); + ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol, + &cp->caddr, cp->cport, NULL, 0, &p); if (cp->pe) { p.pe = cp->pe; @@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) } /* - * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. + * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port. * returns bool success. */ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) @@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && + p->cport == cp->cport && p->vport == cp->vport && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && - p->cport == cp->cport && p->vport == cp->vport && ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && - p->protocol == cp->protocol) { + p->protocol == cp->protocol && + ip_vs_conn_net_eq(cp, p->net)) { /* HIT */ atomic_inc(&cp->refcnt); ct_read_unlock(hash); @@ -313,23 +311,23 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, struct ip_vs_conn_param *p) { __be16 _ports[2], *pptr; + struct net *net = skb_net(skb); pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) return 1; if (likely(!inverse)) - ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0], - &iph->daddr, pptr[1], p); + ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr, + pptr[0], &iph->daddr, pptr[1], p); else - ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1], - &iph->saddr, pptr[0], p); + ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr, + pptr[1], &iph->saddr, pptr[0], p); return 0; } struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { @@ -353,6 +351,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + if (!ip_vs_conn_net_eq(cp, p->net)) + continue; if (p->pe_data && p->pe->ct_match) { if (p->pe == cp->pe && p->pe->ct_match(p, cp)) goto out; @@ -404,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && + p->vport == cp->cport && p->cport == cp->dport && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && - p->vport == cp->cport && p->cport == cp->dport && - p->protocol == cp->protocol) { + p->protocol == cp->protocol && + ip_vs_conn_net_eq(cp, p->net)) { /* HIT */ atomic_inc(&cp->refcnt); ret = cp; @@ -428,7 +429,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { @@ -611,8 +611,8 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) struct ip_vs_dest *dest; if ((cp) && (!cp->dest)) { - dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, - &cp->vaddr, cp->vport, + dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, + cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark); ip_vs_bind_dest(cp, dest); return dest; @@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) int ip_vs_check_template(struct ip_vs_conn *ct) { struct ip_vs_dest *dest = ct->dest; + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct)); /* * Checking the dest server status. */ if ((dest == NULL) || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || - (sysctl_ip_vs_expire_quiescent_template && + (ipvs->sysctl_expire_quiescent_template && (atomic_read(&dest->weight) == 0))) { IP_VS_DBG_BUF(9, "check_template: dest not available for " "protocol %s s:%s:%d v:%s:%d " @@ -730,6 +731,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) static void ip_vs_conn_expire(unsigned long data) { struct ip_vs_conn *cp = (struct ip_vs_conn *)data; + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); cp->timeout = 60*HZ; @@ -772,7 +774,7 @@ static void ip_vs_conn_expire(unsigned long data) ip_vs_unbind_dest(cp); if (cp->flags & IP_VS_CONN_F_NO_CPORT) atomic_dec(&ip_vs_conn_no_cport_cnt); - atomic_dec(&ip_vs_conn_count); + atomic_dec(&ipvs->conn_count); kmem_cache_free(ip_vs_conn_cachep, cp); return; @@ -806,7 +808,9 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, struct ip_vs_dest *dest, __u32 fwmark) { struct ip_vs_conn *cp; - struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol); + struct netns_ipvs *ipvs = net_ipvs(p->net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, + p->protocol); cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); if (cp == NULL) { @@ -816,6 +820,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, INIT_LIST_HEAD(&cp->c_list); setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); + ip_vs_conn_net_set(cp, p->net); cp->af = p->af; cp->protocol = p->protocol; ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); @@ -846,7 +851,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, atomic_set(&cp->n_control, 0); atomic_set(&cp->in_pkts, 0); - atomic_inc(&ip_vs_conn_count); + atomic_inc(&ipvs->conn_count); if (flags & IP_VS_CONN_F_NO_CPORT) atomic_inc(&ip_vs_conn_no_cport_cnt); @@ -865,8 +870,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, #endif ip_vs_bind_xmit(cp); - if (unlikely(pp && atomic_read(&pp->appcnt))) - ip_vs_bind_app(cp, pp); + if (unlikely(pd && atomic_read(&pd->appcnt))) + ip_vs_bind_app(cp, pd->pp); /* * Allow conntrack to be preserved. By default, conntrack @@ -875,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, * IP_VS_CONN_F_ONE_PACKET too. */ - if (ip_vs_conntrack_enabled()) + if (ip_vs_conntrack_enabled(ipvs)) cp->flags |= IP_VS_CONN_F_NFCT; /* Hash it in the ip_vs_conn_tab finally */ @@ -888,17 +893,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, * /proc/net/ip_vs_conn entries */ #ifdef CONFIG_PROC_FS +struct ip_vs_iter_state { + struct seq_net_private p; + struct list_head *l; +}; static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) { int idx; struct ip_vs_conn *cp; + struct ip_vs_iter_state *iter = seq->private; for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { ct_read_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { if (pos-- == 0) { - seq->private = &ip_vs_conn_tab[idx]; + iter->l = &ip_vs_conn_tab[idx]; return cp; } } @@ -910,14 +920,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) { - seq->private = NULL; + struct ip_vs_iter_state *iter = seq->private; + + iter->l = NULL; return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; } static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_conn *cp = v; - struct list_head *e, *l = seq->private; + struct ip_vs_iter_state *iter = seq->private; + struct list_head *e, *l = iter->l; int idx; ++*pos; @@ -934,18 +947,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) while (++idx < ip_vs_conn_tab_size) { ct_read_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { - seq->private = &ip_vs_conn_tab[idx]; + iter->l = &ip_vs_conn_tab[idx]; return cp; } ct_read_unlock_bh(idx); } - seq->private = NULL; + iter->l = NULL; return NULL; } static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) { - struct list_head *l = seq->private; + struct ip_vs_iter_state *iter = seq->private; + struct list_head *l = iter->l; if (l) ct_read_unlock_bh(l - ip_vs_conn_tab); @@ -959,9 +973,12 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); else { const struct ip_vs_conn *cp = v; + struct net *net = seq_file_net(seq); char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; size_t len = 0; + if (!ip_vs_conn_net_eq(cp, net)) + return 0; if (cp->pe_data) { pe_data[0] = ' '; len = strlen(cp->pe->name); @@ -1006,7 +1023,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = { static int ip_vs_conn_open(struct inode *inode, struct file *file) { - return seq_open(file, &ip_vs_conn_seq_ops); + return seq_open_net(inode, file, &ip_vs_conn_seq_ops, + sizeof(struct ip_vs_iter_state)); } static const struct file_operations ip_vs_conn_fops = { @@ -1033,6 +1051,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); else { const struct ip_vs_conn *cp = v; + struct net *net = seq_file_net(seq); + + if (!ip_vs_conn_net_eq(cp, net)) + return 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) @@ -1069,7 +1091,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = { static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) { - return seq_open(file, &ip_vs_conn_sync_seq_ops); + return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops, + sizeof(struct ip_vs_iter_state)); } static const struct file_operations ip_vs_conn_sync_fops = { @@ -1115,7 +1138,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp) } /* Called from keventd and must protect itself from softirqs */ -void ip_vs_random_dropentry(void) +void ip_vs_random_dropentry(struct net *net) { int idx; struct ip_vs_conn *cp; @@ -1135,7 +1158,8 @@ void ip_vs_random_dropentry(void) if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; - + if (!ip_vs_conn_net_eq(cp, net)) + continue; if (cp->protocol == IPPROTO_TCP) { switch(cp->state) { case IP_VS_TCP_S_SYN_RECV: @@ -1170,12 +1194,13 @@ void ip_vs_random_dropentry(void) /* * Flush all the connection entries in the ip_vs_conn_tab */ -static void ip_vs_conn_flush(void) +static void ip_vs_conn_flush(struct net *net) { int idx; struct ip_vs_conn *cp; + struct netns_ipvs *ipvs = net_ipvs(net); - flush_again: +flush_again: for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { /* * Lock is actually needed in this loop. @@ -1183,7 +1208,8 @@ static void ip_vs_conn_flush(void) ct_write_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { - + if (!ip_vs_conn_net_eq(cp, net)) + continue; IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); if (cp->control) { @@ -1196,16 +1222,41 @@ static void ip_vs_conn_flush(void) /* the counter may be not NULL, because maybe some conn entries are run by slow timer handler or unhashed but still referred */ - if (atomic_read(&ip_vs_conn_count) != 0) { + if (atomic_read(&ipvs->conn_count) != 0) { schedule(); goto flush_again; } } +/* + * per netns init and exit + */ +int __net_init __ip_vs_conn_init(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + atomic_set(&ipvs->conn_count, 0); + + proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops); + proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); + return 0; +} +static void __net_exit __ip_vs_conn_cleanup(struct net *net) +{ + /* flush all the connection entries first */ + ip_vs_conn_flush(net); + proc_net_remove(net, "ip_vs_conn"); + proc_net_remove(net, "ip_vs_conn_sync"); +} +static struct pernet_operations ipvs_conn_ops = { + .init = __ip_vs_conn_init, + .exit = __ip_vs_conn_cleanup, +}; int __init ip_vs_conn_init(void) { int idx; + int retc; /* Compute size and mask */ ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; @@ -1243,24 +1294,18 @@ int __init ip_vs_conn_init(void) rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); } - proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); - proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); + retc = register_pernet_subsys(&ipvs_conn_ops); /* calculate the random value for connection hash */ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); - return 0; + return retc; } - void ip_vs_conn_cleanup(void) { - /* flush all the connection entries first */ - ip_vs_conn_flush(); - + unregister_pernet_subsys(&ipvs_conn_ops); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); - proc_net_remove(&init_net, "ip_vs_conn"); - proc_net_remove(&init_net, "ip_vs_conn_sync"); vfree(ip_vs_conn_tab); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5287771d0647..f36a84f33efb 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -41,6 +41,7 @@ #include <net/icmp.h> /* for icmp_send */ #include <net/route.h> #include <net/ip6_checksum.h> +#include <net/netns/generic.h> /* net_generic() */ #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> @@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put); EXPORT_SYMBOL(ip_vs_get_debug_level); #endif +int ip_vs_net_id __read_mostly; +#ifdef IP_VS_GENERIC_NETNS +EXPORT_SYMBOL(ip_vs_net_id); +#endif +/* netns cnt used for uniqueness */ +static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); /* ID used in ICMP lookups */ #define icmp_id(icmph) (((icmph)->un).echo.id) @@ -108,21 +115,28 @@ static inline void ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { - spin_lock(&dest->stats.lock); - dest->stats.ustats.inpkts++; - dest->stats.ustats.inbytes += skb->len; - spin_unlock(&dest->stats.lock); - - spin_lock(&dest->svc->stats.lock); - dest->svc->stats.ustats.inpkts++; - dest->svc->stats.ustats.inbytes += skb->len; - spin_unlock(&dest->svc->stats.lock); - - spin_lock(&ip_vs_stats.lock); - ip_vs_stats.ustats.inpkts++; - ip_vs_stats.ustats.inbytes += skb->len; - spin_unlock(&ip_vs_stats.lock); + struct ip_vs_cpu_stats *s; + + s = this_cpu_ptr(dest->stats.cpustats); + s->ustats.inpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.inbytes += skb->len; + u64_stats_update_end(&s->syncp); + + s = this_cpu_ptr(dest->svc->stats.cpustats); + s->ustats.inpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.inbytes += skb->len; + u64_stats_update_end(&s->syncp); + + s = this_cpu_ptr(ipvs->cpustats); + s->ustats.inpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.inbytes += skb->len; + u64_stats_update_end(&s->syncp); } } @@ -131,21 +145,28 @@ static inline void ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { - spin_lock(&dest->stats.lock); - dest->stats.ustats.outpkts++; - dest->stats.ustats.outbytes += skb->len; - spin_unlock(&dest->stats.lock); - - spin_lock(&dest->svc->stats.lock); - dest->svc->stats.ustats.outpkts++; - dest->svc->stats.ustats.outbytes += skb->len; - spin_unlock(&dest->svc->stats.lock); - - spin_lock(&ip_vs_stats.lock); - ip_vs_stats.ustats.outpkts++; - ip_vs_stats.ustats.outbytes += skb->len; - spin_unlock(&ip_vs_stats.lock); + struct ip_vs_cpu_stats *s; + + s = this_cpu_ptr(dest->stats.cpustats); + s->ustats.outpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.outbytes += skb->len; + u64_stats_update_end(&s->syncp); + + s = this_cpu_ptr(dest->svc->stats.cpustats); + s->ustats.outpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.outbytes += skb->len; + u64_stats_update_end(&s->syncp); + + s = this_cpu_ptr(ipvs->cpustats); + s->ustats.outpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.outbytes += skb->len; + u64_stats_update_end(&s->syncp); } } @@ -153,28 +174,28 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) static inline void ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) { - spin_lock(&cp->dest->stats.lock); - cp->dest->stats.ustats.conns++; - spin_unlock(&cp->dest->stats.lock); + struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct ip_vs_cpu_stats *s; - spin_lock(&svc->stats.lock); - svc->stats.ustats.conns++; - spin_unlock(&svc->stats.lock); + s = this_cpu_ptr(cp->dest->stats.cpustats); + s->ustats.conns++; - spin_lock(&ip_vs_stats.lock); - ip_vs_stats.ustats.conns++; - spin_unlock(&ip_vs_stats.lock); + s = this_cpu_ptr(svc->stats.cpustats); + s->ustats.conns++; + + s = this_cpu_ptr(ipvs->cpustats); + s->ustats.conns++; } static inline int ip_vs_set_state(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { - if (unlikely(!pp->state_transition)) + if (unlikely(!pd->pp->state_transition)) return 0; - return pp->state_transition(cp, direction, skb, pp); + return pd->pp->state_transition(cp, direction, skb, pd); } static inline int @@ -184,7 +205,8 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, const union nf_inet_addr *vaddr, __be16 vport, struct ip_vs_conn_param *p) { - ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); + ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, + vport, p); p->pe = svc->pe; if (p->pe && p->pe->fill_param) return p->pe->fill_param(p, skb); @@ -327,8 +349,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a new connection according to the template */ - ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, src_port, - &iph.daddr, dst_port, ¶m); + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, + src_port, &iph.daddr, dst_port, ¶m); cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark); if (cp == NULL) { @@ -371,8 +393,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_protocol *pp, int *ignored) + struct ip_vs_proto_data *pd, int *ignored) { + struct ip_vs_protocol *pp = pd->pp; struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; struct ip_vs_dest *dest; @@ -401,7 +424,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * Do not schedule replies from local real server. */ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && - (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) { + (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, "Not scheduling reply for existing connection"); __ip_vs_conn_put(cp); @@ -442,8 +465,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, */ { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, - pptr[0], &iph.daddr, pptr[1], &p); + + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, + &iph.saddr, pptr[0], &iph.daddr, pptr[1], + &p); cp = ip_vs_conn_new(&p, &dest->addr, dest->port ? dest->port : pptr[1], flags, dest, skb->mark); @@ -472,11 +497,14 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * no destination is available for a new connection. */ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { + struct net *net; + struct netns_ipvs *ipvs; __be16 _ports[2], *pptr; struct ip_vs_iphdr iph; int unicast; + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); @@ -484,18 +512,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_service_put(svc); return NF_DROP; } + net = skb_net(skb); #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; else #endif - unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST); + unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST); /* if it is fwmark-based service, the cache_bypass sysctl is up and the destination is a non-local unicast, then create a cache_bypass connection entry */ - if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { + ipvs = net_ipvs(net); + if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { int ret, cs; struct ip_vs_conn *cp; unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && @@ -509,7 +539,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->af, iph.protocol, + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, pptr[0], &iph.daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, &daddr, 0, @@ -523,10 +553,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_in_stats(cp, skb); /* set state */ - cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); + cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); /* transmit the first SYN packet */ - ret = cp->packet_xmit(skb, cp, pp); + ret = cp->packet_xmit(skb, cp, pd->pp); /* do not touch skb anymore */ atomic_inc(&cp->in_pkts); @@ -707,6 +737,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, unsigned int offset, unsigned int ihl) { + struct netns_ipvs *ipvs; unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != 0) { @@ -728,6 +759,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, if (!skb_make_writable(skb, offset)) goto out; + ipvs = net_ipvs(skb_net(skb)); + #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) ip_vs_nat_icmp_v6(skb, pp, cp, 1); @@ -737,11 +770,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) + if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) goto out; } else #endif - if ((sysctl_ip_vs_snat_reroute || + if ((ipvs->sysctl_snat_reroute || skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(skb, RTN_LOCAL) != 0) goto out; @@ -833,7 +866,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); if (!cp) return NF_ACCEPT; @@ -910,7 +943,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, ip_vs_fill_iphdr(AF_INET6, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); if (!cp) return NF_ACCEPT; @@ -949,9 +982,12 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) * Used for NAT and local client. */ static unsigned int -handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int ihl) { + struct ip_vs_protocol *pp = pd->pp; + struct netns_ipvs *ipvs; + IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); if (!skb_make_writable(skb, ihl)) @@ -986,13 +1022,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * if it came from this machine itself. So re-compute * the routing information. */ + ipvs = net_ipvs(skb_net(skb)); + #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) + if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) goto drop; } else #endif - if ((sysctl_ip_vs_snat_reroute || + if ((ipvs->sysctl_snat_reroute || skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(skb, RTN_LOCAL) != 0) goto drop; @@ -1000,7 +1038,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); ip_vs_out_stats(cp, skb); - ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); + ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); skb->ipvs_property = 1; if (!(cp->flags & IP_VS_CONN_F_NFCT)) ip_vs_notrack(skb); @@ -1024,9 +1062,12 @@ drop: static unsigned int ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) { + struct net *net = NULL; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; + struct netns_ipvs *ipvs; EnterFunction(11); @@ -1047,6 +1088,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (unlikely(!skb_dst(skb))) return NF_ACCEPT; + net = skb_net(skb); ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { @@ -1070,9 +1112,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } - pp = ip_vs_proto_get(iph.protocol); - if (unlikely(!pp)) + pd = ip_vs_proto_data_get(net, iph.protocol); + if (unlikely(!pd)) return NF_ACCEPT; + pp = pd->pp; /* reassemble IP fragments */ #ifdef CONFIG_IP_VS_IPV6 @@ -1098,11 +1141,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); + cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); + ipvs = net_ipvs(net); if (likely(cp)) - return handle_response(af, skb, pp, cp, iph.len); - if (sysctl_ip_vs_nat_icmp_send && + return handle_response(af, skb, pd, cp, iph.len); + if (ipvs->sysctl_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) { @@ -1112,7 +1156,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_lookup_real_service(af, iph.protocol, + if (ip_vs_lookup_real_service(net, af, iph.protocol, &iph.saddr, pptr[0])) { /* @@ -1227,12 +1271,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, static int ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) { + struct net *net = NULL; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; unsigned int offset, ihl, verdict; union nf_inet_addr snet; @@ -1274,9 +1320,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - pp = ip_vs_proto_get(cih->protocol); - if (!pp) + net = skb_net(skb); + pd = ip_vs_proto_data_get(net, cih->protocol); + if (!pd) return NF_ACCEPT; + pp = pd->pp; /* Is the embedded protocol header present? */ if (unlikely(cih->frag_off & htons(IP_OFFSET) && @@ -1290,10 +1338,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); + cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1); if (!cp) { /* The packet could also belong to a local client */ - cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); if (cp) { snet.ip = iph->saddr; return handle_response_icmp(AF_INET, skb, &snet, @@ -1337,6 +1385,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) { + struct net *net = NULL; struct ipv6hdr *iph; struct icmp6hdr _icmph, *ic; struct ipv6hdr _ciph, *cih; /* The ip header contained @@ -1344,6 +1393,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; unsigned int offset, verdict; union nf_inet_addr snet; struct rt6_info *rt; @@ -1386,9 +1436,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - pp = ip_vs_proto_get(cih->nexthdr); - if (!pp) + net = skb_net(skb); + pd = ip_vs_proto_data_get(net, cih->nexthdr); + if (!pd) return NF_ACCEPT; + pp = pd->pp; /* Is the embedded protocol header present? */ /* TODO: we don't support fragmentation at the moment anyways */ @@ -1402,10 +1454,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) ip_vs_fill_iphdr(AF_INET6, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); + cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1); if (!cp) { /* The packet could also belong to a local client */ - cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); if (cp) { ipv6_addr_copy(&snet.in6, &iph->saddr); return handle_response_icmp(AF_INET6, skb, &snet, @@ -1448,10 +1500,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) static unsigned int ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) { + struct net *net; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, restart, pkts; + struct netns_ipvs *ipvs; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) @@ -1505,20 +1560,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } + net = skb_net(skb); /* Protocol supported? */ - pp = ip_vs_proto_get(iph.protocol); - if (unlikely(!pp)) + pd = ip_vs_proto_data_get(net, iph.protocol); + if (unlikely(!pd)) return NF_ACCEPT; - + pp = pd->pp; /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); + cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); if (unlikely(!cp)) { int v; - if (!pp->conn_schedule(af, skb, pp, &v, &cp)) + if (!pp->conn_schedule(af, skb, pd, &v, &cp)) return v; } @@ -1530,12 +1586,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); - + net = skb_net(skb); + ipvs = net_ipvs(net); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ - if (sysctl_ip_vs_expire_nodest_conn) { + if (ipvs->sysctl_expire_nodest_conn) { /* try to expire the connection immediately */ ip_vs_conn_expire_now(cp); } @@ -1546,7 +1603,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } ip_vs_in_stats(cp, skb); - restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); + restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); if (cp->packet_xmit) ret = cp->packet_xmit(skb, cp, pp); /* do not touch skb anymore */ @@ -1563,37 +1620,38 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * * For ONE_PKT let ip_vs_sync_conn() do the filter work. */ + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) - pkts = sysctl_ip_vs_sync_threshold[0]; + pkts = ipvs->sysctl_sync_threshold[0]; else pkts = atomic_add_return(1, &cp->in_pkts); - if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && + if ((ipvs->sync_state & IP_VS_STATE_MASTER) && cp->protocol == IPPROTO_SCTP) { if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && - (pkts % sysctl_ip_vs_sync_threshold[1] - == sysctl_ip_vs_sync_threshold[0])) || + (pkts % ipvs->sysctl_sync_threshold[1] + == ipvs->sysctl_sync_threshold[0])) || (cp->old_state != cp->state && ((cp->state == IP_VS_SCTP_S_CLOSED) || (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { - ip_vs_sync_conn(cp); + ip_vs_sync_conn(net, cp); goto out; } } /* Keep this block last: TCP and others with pp->num_states <= 1 */ - else if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && + else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && - (pkts % sysctl_ip_vs_sync_threshold[1] - == sysctl_ip_vs_sync_threshold[0])) || + (pkts % ipvs->sysctl_sync_threshold[1] + == ipvs->sysctl_sync_threshold[0])) || ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && ((cp->state == IP_VS_TCP_S_FIN_WAIT) || (cp->state == IP_VS_TCP_S_CLOSE) || (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || (cp->state == IP_VS_TCP_S_TIME_WAIT))))) - ip_vs_sync_conn(cp); + ip_vs_sync_conn(net, cp); out: cp->old_state = cp->state; @@ -1812,7 +1870,41 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { }, #endif }; +/* + * Initialize IP Virtual Server netns mem. + */ +static int __net_init __ip_vs_init(struct net *net) +{ + struct netns_ipvs *ipvs; + + ipvs = net_generic(net, ip_vs_net_id); + if (ipvs == NULL) { + pr_err("%s(): no memory.\n", __func__); + return -ENOMEM; + } + ipvs->net = net; + /* Counters used for creating unique names */ + ipvs->gen = atomic_read(&ipvs_netns_cnt); + atomic_inc(&ipvs_netns_cnt); + net->ipvs = ipvs; + printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n", + sizeof(struct netns_ipvs), ipvs->gen); + return 0; +} + +static void __net_exit __ip_vs_cleanup(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen); +} +static struct pernet_operations ipvs_core_ops = { + .init = __ip_vs_init, + .exit = __ip_vs_cleanup, + .id = &ip_vs_net_id, + .size = sizeof(struct netns_ipvs), +}; /* * Initialize IP Virtual Server @@ -1821,8 +1913,11 @@ static int __init ip_vs_init(void) { int ret; - ip_vs_estimator_init(); + ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ + if (ret < 0) + return ret; + ip_vs_estimator_init(); ret = ip_vs_control_init(); if (ret < 0) { pr_err("can't setup control.\n"); @@ -1843,15 +1938,23 @@ static int __init ip_vs_init(void) goto cleanup_app; } + ret = ip_vs_sync_init(); + if (ret < 0) { + pr_err("can't setup sync data.\n"); + goto cleanup_conn; + } + ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); if (ret < 0) { pr_err("can't register hooks.\n"); - goto cleanup_conn; + goto cleanup_sync; } pr_info("ipvs loaded.\n"); return ret; +cleanup_sync: + ip_vs_sync_cleanup(); cleanup_conn: ip_vs_conn_cleanup(); cleanup_app: @@ -1861,17 +1964,20 @@ static int __init ip_vs_init(void) ip_vs_control_cleanup(); cleanup_estimator: ip_vs_estimator_cleanup(); + unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ return ret; } static void __exit ip_vs_cleanup(void) { nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + ip_vs_sync_cleanup(); ip_vs_conn_cleanup(); ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); ip_vs_estimator_cleanup(); + unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ pr_info("ipvs unloaded.\n"); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d12a13c497ba..09ca2ce2f2b7 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -38,6 +38,7 @@ #include <linux/mutex.h> #include <net/net_namespace.h> +#include <linux/nsproxy.h> #include <net/ip.h> #ifdef CONFIG_IP_VS_IPV6 #include <net/ipv6.h> @@ -57,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex); /* lock for service table */ static DEFINE_RWLOCK(__ip_vs_svc_lock); -/* lock for table with the real services */ -static DEFINE_RWLOCK(__ip_vs_rs_lock); - -/* lock for state and timeout tables */ -static DEFINE_SPINLOCK(ip_vs_securetcp_lock); - -/* lock for drop entry handling */ -static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); - -/* lock for drop packet handling */ -static DEFINE_SPINLOCK(__ip_vs_droppacket_lock); - -/* 1/rate drop and drop-entry variables */ -int ip_vs_drop_rate = 0; -int ip_vs_drop_counter = 0; -static atomic_t ip_vs_dropentry = ATOMIC_INIT(0); - -/* number of virtual services */ -static int ip_vs_num_services = 0; - /* sysctl variables */ -static int sysctl_ip_vs_drop_entry = 0; -static int sysctl_ip_vs_drop_packet = 0; -static int sysctl_ip_vs_secure_tcp = 0; -static int sysctl_ip_vs_amemthresh = 1024; -static int sysctl_ip_vs_am_droprate = 10; -int sysctl_ip_vs_cache_bypass = 0; -int sysctl_ip_vs_expire_nodest_conn = 0; -int sysctl_ip_vs_expire_quiescent_template = 0; -int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; -int sysctl_ip_vs_nat_icmp_send = 0; -#ifdef CONFIG_IP_VS_NFCT -int sysctl_ip_vs_conntrack; -#endif -int sysctl_ip_vs_snat_reroute = 1; -int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */ #ifdef CONFIG_IP_VS_DEBUG static int sysctl_ip_vs_debug_level = 0; @@ -105,18 +71,17 @@ int ip_vs_get_debug_level(void) #ifdef CONFIG_IP_VS_IPV6 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ -static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) +static int __ip_vs_addr_is_local_v6(struct net *net, + const struct in6_addr *addr) { struct rt6_info *rt; struct flowi fl = { .oif = 0, - .nl_u = { - .ip6_u = { - .daddr = *addr, - .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, + .fl6_dst = *addr, + .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, }; - rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); + rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl); if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) return 1; @@ -127,7 +92,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) * update_defense_level is called from keventd and from sysctl, * so it needs to protect itself from softirqs */ -static void update_defense_level(void) +static void update_defense_level(struct netns_ipvs *ipvs) { struct sysinfo i; static int old_secure_tcp = 0; @@ -143,73 +108,73 @@ static void update_defense_level(void) /* si_swapinfo(&i); */ /* availmem = availmem - (i.totalswap - i.freeswap); */ - nomem = (availmem < sysctl_ip_vs_amemthresh); + nomem = (availmem < ipvs->sysctl_amemthresh); local_bh_disable(); /* drop_entry */ - spin_lock(&__ip_vs_dropentry_lock); - switch (sysctl_ip_vs_drop_entry) { + spin_lock(&ipvs->dropentry_lock); + switch (ipvs->sysctl_drop_entry) { case 0: - atomic_set(&ip_vs_dropentry, 0); + atomic_set(&ipvs->dropentry, 0); break; case 1: if (nomem) { - atomic_set(&ip_vs_dropentry, 1); - sysctl_ip_vs_drop_entry = 2; + atomic_set(&ipvs->dropentry, 1); + ipvs->sysctl_drop_entry = 2; } else { - atomic_set(&ip_vs_dropentry, 0); + atomic_set(&ipvs->dropentry, 0); } break; case 2: if (nomem) { - atomic_set(&ip_vs_dropentry, 1); + atomic_set(&ipvs->dropentry, 1); } else { - atomic_set(&ip_vs_dropentry, 0); - sysctl_ip_vs_drop_entry = 1; + atomic_set(&ipvs->dropentry, 0); + ipvs->sysctl_drop_entry = 1; }; break; case 3: - atomic_set(&ip_vs_dropentry, 1); + atomic_set(&ipvs->dropentry, 1); break; } - spin_unlock(&__ip_vs_dropentry_lock); + spin_unlock(&ipvs->dropentry_lock); /* drop_packet */ - spin_lock(&__ip_vs_droppacket_lock); - switch (sysctl_ip_vs_drop_packet) { + spin_lock(&ipvs->droppacket_lock); + switch (ipvs->sysctl_drop_packet) { case 0: - ip_vs_drop_rate = 0; + ipvs->drop_rate = 0; break; case 1: if (nomem) { - ip_vs_drop_rate = ip_vs_drop_counter - = sysctl_ip_vs_amemthresh / - (sysctl_ip_vs_amemthresh-availmem); - sysctl_ip_vs_drop_packet = 2; + ipvs->drop_rate = ipvs->drop_counter + = ipvs->sysctl_amemthresh / + (ipvs->sysctl_amemthresh-availmem); + ipvs->sysctl_drop_packet = 2; } else { - ip_vs_drop_rate = 0; + ipvs->drop_rate = 0; } break; case 2: if (nomem) { - ip_vs_drop_rate = ip_vs_drop_counter - = sysctl_ip_vs_amemthresh / - (sysctl_ip_vs_amemthresh-availmem); + ipvs->drop_rate = ipvs->drop_counter + = ipvs->sysctl_amemthresh / + (ipvs->sysctl_amemthresh-availmem); } else { - ip_vs_drop_rate = 0; - sysctl_ip_vs_drop_packet = 1; + ipvs->drop_rate = 0; + ipvs->sysctl_drop_packet = 1; } break; case 3: - ip_vs_drop_rate = sysctl_ip_vs_am_droprate; + ipvs->drop_rate = ipvs->sysctl_am_droprate; break; } - spin_unlock(&__ip_vs_droppacket_lock); + spin_unlock(&ipvs->droppacket_lock); /* secure_tcp */ - spin_lock(&ip_vs_securetcp_lock); - switch (sysctl_ip_vs_secure_tcp) { + spin_lock(&ipvs->securetcp_lock); + switch (ipvs->sysctl_secure_tcp) { case 0: if (old_secure_tcp >= 2) to_change = 0; @@ -218,7 +183,7 @@ static void update_defense_level(void) if (nomem) { if (old_secure_tcp < 2) to_change = 1; - sysctl_ip_vs_secure_tcp = 2; + ipvs->sysctl_secure_tcp = 2; } else { if (old_secure_tcp >= 2) to_change = 0; @@ -231,7 +196,7 @@ static void update_defense_level(void) } else { if (old_secure_tcp >= 2) to_change = 0; - sysctl_ip_vs_secure_tcp = 1; + ipvs->sysctl_secure_tcp = 1; } break; case 3: @@ -239,10 +204,11 @@ static void update_defense_level(void) to_change = 1; break; } - old_secure_tcp = sysctl_ip_vs_secure_tcp; + old_secure_tcp = ipvs->sysctl_secure_tcp; if (to_change >= 0) - ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); - spin_unlock(&ip_vs_securetcp_lock); + ip_vs_protocol_timeout_change(ipvs, + ipvs->sysctl_secure_tcp > 1); + spin_unlock(&ipvs->securetcp_lock); local_bh_enable(); } @@ -252,16 +218,16 @@ static void update_defense_level(void) * Timer for checking the defense */ #define DEFENSE_TIMER_PERIOD 1*HZ -static void defense_work_handler(struct work_struct *work); -static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); static void defense_work_handler(struct work_struct *work) { - update_defense_level(); - if (atomic_read(&ip_vs_dropentry)) - ip_vs_random_dropentry(); + struct netns_ipvs *ipvs = + container_of(work, struct netns_ipvs, defense_work.work); - schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); + update_defense_level(ipvs); + if (atomic_read(&ipvs->dropentry)) + ip_vs_random_dropentry(ipvs->net); + schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); } int @@ -289,33 +255,13 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; /* the service table hashed by fwmark */ static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; -/* - * Hash table: for real service lookups - */ -#define IP_VS_RTAB_BITS 4 -#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) -#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) - -static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE]; - -/* - * Trash for destinations - */ -static LIST_HEAD(ip_vs_dest_trash); - -/* - * FTP & NULL virtual service counters - */ -static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0); -static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); - /* * Returns hash value for virtual service */ -static __inline__ unsigned -ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, - __be16 port) +static inline unsigned +ip_vs_svc_hashkey(struct net *net, int af, unsigned proto, + const union nf_inet_addr *addr, __be16 port) { register unsigned porth = ntohs(port); __be32 addr_fold = addr->ip; @@ -325,6 +271,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, addr_fold = addr->ip6[0]^addr->ip6[1]^ addr->ip6[2]^addr->ip6[3]; #endif + addr_fold ^= ((size_t)net>>8); return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) & IP_VS_SVC_TAB_MASK; @@ -333,13 +280,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, /* * Returns hash value of fwmark for virtual service lookup */ -static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark) +static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) { - return fwmark & IP_VS_SVC_TAB_MASK; + return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; } /* - * Hashes a service in the ip_vs_svc_table by <proto,addr,port> + * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port> * or in the ip_vs_svc_fwm_table by fwmark. * Should be called with locked tables. */ @@ -355,16 +302,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) if (svc->fwmark == 0) { /* - * Hash it by <protocol,addr,port> in ip_vs_svc_table + * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table */ - hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, - svc->port); + hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, + &svc->addr, svc->port); list_add(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* - * Hash it by fwmark in ip_vs_svc_fwm_table + * Hash it by fwmark in svc_fwm_table */ - hash = ip_vs_svc_fwm_hashkey(svc->fwmark); + hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); } @@ -376,7 +323,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) /* - * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table. + * Unhashes a service from svc_table / svc_fwm_table. * Should be called with locked tables. */ static int ip_vs_svc_unhash(struct ip_vs_service *svc) @@ -388,10 +335,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) } if (svc->fwmark == 0) { - /* Remove it from the ip_vs_svc_table table */ + /* Remove it from the svc_table table */ list_del(&svc->s_list); } else { - /* Remove it from the ip_vs_svc_fwm_table table */ + /* Remove it from the svc_fwm_table table */ list_del(&svc->f_list); } @@ -402,23 +349,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) /* - * Get service by {proto,addr,port} in the service table. + * Get service by {netns, proto,addr,port} in the service table. */ static inline struct ip_vs_service * -__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, - __be16 vport) +__ip_vs_service_find(struct net *net, int af, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport) { unsigned hash; struct ip_vs_service *svc; /* Check for "full" addressed entries */ - hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); + hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ if ((svc->af == af) && ip_vs_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) - && (svc->protocol == protocol)) { + && (svc->protocol == protocol) + && net_eq(svc->net, net)) { /* HIT */ return svc; } @@ -432,16 +380,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, * Get service by {fwmark} in the service table. */ static inline struct ip_vs_service * -__ip_vs_svc_fwm_find(int af, __u32 fwmark) +__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) { unsigned hash; struct ip_vs_service *svc; /* Check for fwmark addressed entries */ - hash = ip_vs_svc_fwm_hashkey(fwmark); + hash = ip_vs_svc_fwm_hashkey(net, fwmark); list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { - if (svc->fwmark == fwmark && svc->af == af) { + if (svc->fwmark == fwmark && svc->af == af + && net_eq(svc->net, net)) { /* HIT */ return svc; } @@ -451,42 +400,44 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark) } struct ip_vs_service * -ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, +ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; + struct netns_ipvs *ipvs = net_ipvs(net); read_lock(&__ip_vs_svc_lock); /* * Check the table hashed by fwmark first */ - if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark))) + svc = __ip_vs_svc_fwm_find(net, af, fwmark); + if (fwmark && svc) goto out; /* * Check the table hashed by <protocol,addr,port> * for "full" addressed entries */ - svc = __ip_vs_service_find(af, protocol, vaddr, vport); + svc = __ip_vs_service_find(net, af, protocol, vaddr, vport); if (svc == NULL && protocol == IPPROTO_TCP - && atomic_read(&ip_vs_ftpsvc_counter) + && atomic_read(&ipvs->ftpsvc_counter) && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { /* * Check if ftp service entry exists, the packet * might belong to FTP data connections. */ - svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT); + svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT); } if (svc == NULL - && atomic_read(&ip_vs_nullsvc_counter)) { + && atomic_read(&ipvs->nullsvc_counter)) { /* * Check if the catch-all port (port zero) exists */ - svc = __ip_vs_service_find(af, protocol, vaddr, 0); + svc = __ip_vs_service_find(net, af, protocol, vaddr, 0); } out: @@ -521,6 +472,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), ntohs(svc->port), atomic_read(&svc->usecnt)); + free_percpu(svc->stats.cpustats); kfree(svc); } } @@ -547,10 +499,10 @@ static inline unsigned ip_vs_rs_hashkey(int af, } /* - * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>. + * Hashes ip_vs_dest in rs_table by <proto,addr,port>. * should be called with locked tables. */ -static int ip_vs_rs_hash(struct ip_vs_dest *dest) +static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) { unsigned hash; @@ -564,19 +516,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest) */ hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); - list_add(&dest->d_list, &ip_vs_rtable[hash]); + list_add(&dest->d_list, &ipvs->rs_table[hash]); return 1; } /* - * UNhashes ip_vs_dest from ip_vs_rtable. + * UNhashes ip_vs_dest from rs_table. * should be called with locked tables. */ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) { /* - * Remove it from the ip_vs_rtable table. + * Remove it from the rs_table table. */ if (!list_empty(&dest->d_list)) { list_del(&dest->d_list); @@ -590,10 +542,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) * Lookup real service by <proto,addr,port> in the real service table. */ struct ip_vs_dest * -ip_vs_lookup_real_service(int af, __u16 protocol, +ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport) { + struct netns_ipvs *ipvs = net_ipvs(net); unsigned hash; struct ip_vs_dest *dest; @@ -603,19 +556,19 @@ ip_vs_lookup_real_service(int af, __u16 protocol, */ hash = ip_vs_rs_hashkey(af, daddr, dport); - read_lock(&__ip_vs_rs_lock); - list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { + read_lock(&ipvs->rs_lock); + list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { if ((dest->af == af) && ip_vs_addr_equal(af, &dest->addr, daddr) && (dest->port == dport) && ((dest->protocol == protocol) || dest->vfwmark)) { /* HIT */ - read_unlock(&__ip_vs_rs_lock); + read_unlock(&ipvs->rs_lock); return dest; } } - read_unlock(&__ip_vs_rs_lock); + read_unlock(&ipvs->rs_lock); return NULL; } @@ -654,7 +607,8 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * ip_vs_lookup_real_service() looked promissing, but * seems not working as expected. */ -struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, +struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, + const union nf_inet_addr *daddr, __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol, __u32 fwmark) @@ -662,7 +616,7 @@ struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, struct ip_vs_dest *dest; struct ip_vs_service *svc; - svc = ip_vs_service_get(af, fwmark, protocol, vaddr, vport); + svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); if (!svc) return NULL; dest = ip_vs_lookup_dest(svc, daddr, dport); @@ -687,11 +641,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, __be16 dport) { struct ip_vs_dest *dest, *nxt; + struct netns_ipvs *ipvs = net_ipvs(svc->net); /* * Find the destination in trash */ - list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { + list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " "dest->refcnt=%d\n", dest->vfwmark, @@ -722,6 +677,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, list_del(&dest->n_list); ip_vs_dst_reset(dest); __ip_vs_unbind_svc(dest); + free_percpu(dest->stats.cpustats); kfree(dest); } } @@ -739,14 +695,16 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * are expired, and the refcnt of each destination in the trash must * be 1, so we simply release them here. */ -static void ip_vs_trash_cleanup(void) +static void ip_vs_trash_cleanup(struct net *net) { struct ip_vs_dest *dest, *nxt; + struct netns_ipvs *ipvs = net_ipvs(net); - list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { + list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { list_del(&dest->n_list); ip_vs_dst_reset(dest); __ip_vs_unbind_svc(dest); + free_percpu(dest->stats.cpustats); kfree(dest); } } @@ -770,6 +728,7 @@ static void __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add) { + struct netns_ipvs *ipvs = net_ipvs(svc->net); int conn_flags; /* set the weight and the flags */ @@ -782,12 +741,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, conn_flags |= IP_VS_CONN_F_NOOUTPUT; } else { /* - * Put the real service in ip_vs_rtable if not present. + * Put the real service in rs_table if not present. * For now only for NAT! */ - write_lock_bh(&__ip_vs_rs_lock); - ip_vs_rs_hash(dest); - write_unlock_bh(&__ip_vs_rs_lock); + write_lock_bh(&ipvs->rs_lock); + ip_vs_rs_hash(ipvs, dest); + write_unlock_bh(&ipvs->rs_lock); } atomic_set(&dest->conn_flags, conn_flags); @@ -815,7 +774,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, spin_unlock(&dest->dst_lock); if (add) - ip_vs_new_estimator(&dest->stats); + ip_vs_new_estimator(svc->net, &dest->stats); write_lock_bh(&__ip_vs_svc_lock); @@ -852,12 +811,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atype = ipv6_addr_type(&udest->addr.in6); if ((!(atype & IPV6_ADDR_UNICAST) || atype & IPV6_ADDR_LINKLOCAL) && - !__ip_vs_addr_is_local_v6(&udest->addr.in6)) + !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6)) return -EINVAL; } else #endif { - atype = inet_addr_type(&init_net, udest->addr.ip); + atype = inet_addr_type(svc->net, udest->addr.ip); if (atype != RTN_LOCAL && atype != RTN_UNICAST) return -EINVAL; } @@ -867,6 +826,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, pr_err("%s(): no memory.\n", __func__); return -ENOMEM; } + dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (!dest->stats.cpustats) { + pr_err("%s() alloc_percpu failed\n", __func__); + goto err_alloc; + } dest->af = svc->af; dest->protocol = svc->protocol; @@ -890,6 +854,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, LeaveFunction(2); return 0; + +err_alloc: + kfree(dest); + return -ENOMEM; } @@ -1008,16 +976,18 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete a destination (must be already unlinked from the service) */ -static void __ip_vs_del_dest(struct ip_vs_dest *dest) +static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) { - ip_vs_kill_estimator(&dest->stats); + struct netns_ipvs *ipvs = net_ipvs(net); + + ip_vs_kill_estimator(net, &dest->stats); /* * Remove it from the d-linked list with the real services. */ - write_lock_bh(&__ip_vs_rs_lock); + write_lock_bh(&ipvs->rs_lock); ip_vs_rs_unhash(dest); - write_unlock_bh(&__ip_vs_rs_lock); + write_unlock_bh(&ipvs->rs_lock); /* * Decrease the refcnt of the dest, and free the dest @@ -1036,6 +1006,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) and only one user context can update virtual service at a time, so the operation here is OK */ atomic_dec(&dest->svc->refcnt); + free_percpu(dest->stats.cpustats); kfree(dest); } else { IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " @@ -1043,7 +1014,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); - list_add(&dest->n_list, &ip_vs_dest_trash); + list_add(&dest->n_list, &ipvs->dest_trash); atomic_inc(&dest->refcnt); } } @@ -1107,7 +1078,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete the destination */ - __ip_vs_del_dest(dest); + __ip_vs_del_dest(svc->net, dest); LeaveFunction(2); @@ -1119,13 +1090,14 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) * Add a service into the service hash table */ static int -ip_vs_add_service(struct ip_vs_service_user_kern *u, +ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { int ret = 0; struct ip_vs_scheduler *sched = NULL; struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; + struct netns_ipvs *ipvs = net_ipvs(net); /* increase the module use count */ ip_vs_use_count_inc(); @@ -1161,6 +1133,11 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, ret = -ENOMEM; goto out_err; } + svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (!svc->stats.cpustats) { + pr_err("%s() alloc_percpu failed\n", __func__); + goto out_err; + } /* I'm the first user of the service */ atomic_set(&svc->usecnt, 0); @@ -1174,6 +1151,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, svc->flags = u->flags; svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; + svc->net = net; INIT_LIST_HEAD(&svc->destinations); rwlock_init(&svc->sched_lock); @@ -1191,15 +1169,15 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, /* Update the virtual service counters */ if (svc->port == FTPPORT) - atomic_inc(&ip_vs_ftpsvc_counter); + atomic_inc(&ipvs->ftpsvc_counter); else if (svc->port == 0) - atomic_inc(&ip_vs_nullsvc_counter); + atomic_inc(&ipvs->nullsvc_counter); - ip_vs_new_estimator(&svc->stats); + ip_vs_new_estimator(net, &svc->stats); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) - ip_vs_num_services++; + ipvs->num_services++; /* Hash the service into the service table */ write_lock_bh(&__ip_vs_svc_lock); @@ -1209,6 +1187,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, *svc_p = svc; return 0; + out_err: if (svc != NULL) { ip_vs_unbind_scheduler(svc); @@ -1217,6 +1196,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, ip_vs_app_inc_put(svc->inc); local_bh_enable(); } + if (svc->stats.cpustats) + free_percpu(svc->stats.cpustats); kfree(svc); } ip_vs_scheduler_put(sched); @@ -1336,14 +1317,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; struct ip_vs_pe *old_pe; + struct netns_ipvs *ipvs = net_ipvs(svc->net); pr_info("%s: enter\n", __func__); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) - ip_vs_num_services--; + ipvs->num_services--; - ip_vs_kill_estimator(&svc->stats); + ip_vs_kill_estimator(svc->net, &svc->stats); /* Unbind scheduler */ old_sched = svc->scheduler; @@ -1366,16 +1348,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) */ list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { __ip_vs_unlink_dest(svc, dest, 0); - __ip_vs_del_dest(dest); + __ip_vs_del_dest(svc->net, dest); } /* * Update the virtual service counters */ if (svc->port == FTPPORT) - atomic_dec(&ip_vs_ftpsvc_counter); + atomic_dec(&ipvs->ftpsvc_counter); else if (svc->port == 0) - atomic_dec(&ip_vs_nullsvc_counter); + atomic_dec(&ipvs->nullsvc_counter); /* * Free the service if nobody refers to it @@ -1385,6 +1367,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), ntohs(svc->port), atomic_read(&svc->usecnt)); + free_percpu(svc->stats.cpustats); kfree(svc); } @@ -1430,17 +1413,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc) /* * Flush all the virtual services */ -static int ip_vs_flush(void) +static int ip_vs_flush(struct net *net) { int idx; struct ip_vs_service *svc, *nxt; /* - * Flush the service table hashed by <protocol,addr,port> + * Flush the service table hashed by <netns,protocol,addr,port> */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { - ip_vs_unlink_service(svc); + list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], + s_list) { + if (net_eq(svc->net, net)) + ip_vs_unlink_service(svc); } } @@ -1450,7 +1435,8 @@ static int ip_vs_flush(void) for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry_safe(svc, nxt, &ip_vs_svc_fwm_table[idx], f_list) { - ip_vs_unlink_service(svc); + if (net_eq(svc->net, net)) + ip_vs_unlink_service(svc); } } @@ -1474,24 +1460,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) return 0; } -static int ip_vs_zero_all(void) +static int ip_vs_zero_all(struct net *net) { int idx; struct ip_vs_service *svc; for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - ip_vs_zero_service(svc); + if (net_eq(svc->net, net)) + ip_vs_zero_service(svc); } } for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - ip_vs_zero_service(svc); + if (net_eq(svc->net, net)) + ip_vs_zero_service(svc); } } - ip_vs_zero_stats(&ip_vs_stats); + ip_vs_zero_stats(net_ipvs(net)->tot_stats); return 0; } @@ -1500,6 +1488,7 @@ static int proc_do_defense_mode(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = current->nsproxy->net_ns; int *valp = table->data; int val = *valp; int rc; @@ -1510,7 +1499,7 @@ proc_do_defense_mode(ctl_table *table, int write, /* Restore the correct value */ *valp = val; } else { - update_defense_level(); + update_defense_level(net_ipvs(net)); } } return rc; @@ -1550,7 +1539,8 @@ proc_do_sync_mode(ctl_table *table, int write, /* Restore the correct value */ *valp = val; } else { - ip_vs_sync_switch_mode(val); + struct net *net = current->nsproxy->net_ns; + ip_vs_sync_switch_mode(net, val); } } return rc; @@ -1558,42 +1548,31 @@ proc_do_sync_mode(ctl_table *table, int write, /* * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) + * Do not change order or insert new entries without + * align with netns init in __ip_vs_control_init() */ static struct ctl_table vs_vars[] = { { .procname = "amemthresh", - .data = &sysctl_ip_vs_amemthresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#ifdef CONFIG_IP_VS_DEBUG - { - .procname = "debug_level", - .data = &sysctl_ip_vs_debug_level, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, -#endif { .procname = "am_droprate", - .data = &sysctl_ip_vs_am_droprate, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "drop_entry", - .data = &sysctl_ip_vs_drop_entry, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_defense_mode, }, { .procname = "drop_packet", - .data = &sysctl_ip_vs_drop_packet, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_defense_mode, @@ -1601,7 +1580,6 @@ static struct ctl_table vs_vars[] = { #ifdef CONFIG_IP_VS_NFCT { .procname = "conntrack", - .data = &sysctl_ip_vs_conntrack, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -1609,25 +1587,62 @@ static struct ctl_table vs_vars[] = { #endif { .procname = "secure_tcp", - .data = &sysctl_ip_vs_secure_tcp, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_defense_mode, }, { .procname = "snat_reroute", - .data = &sysctl_ip_vs_snat_reroute, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, }, { .procname = "sync_version", - .data = &sysctl_ip_vs_sync_ver, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_do_sync_mode, }, + { + .procname = "cache_bypass", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "expire_nodest_conn", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "expire_quiescent_template", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sync_threshold", + .maxlen = + sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), + .mode = 0644, + .proc_handler = proc_do_sync_threshold, + }, + { + .procname = "nat_icmp_send", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_IP_VS_DEBUG + { + .procname = "debug_level", + .data = &sysctl_ip_vs_debug_level, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #if 0 { .procname = "timeout_established", @@ -1714,41 +1729,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = proc_dointvec_jiffies, }, #endif - { - .procname = "cache_bypass", - .data = &sysctl_ip_vs_cache_bypass, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "expire_nodest_conn", - .data = &sysctl_ip_vs_expire_nodest_conn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "expire_quiescent_template", - .data = &sysctl_ip_vs_expire_quiescent_template, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "sync_threshold", - .data = &sysctl_ip_vs_sync_threshold, - .maxlen = sizeof(sysctl_ip_vs_sync_threshold), - .mode = 0644, - .proc_handler = proc_do_sync_threshold, - }, - { - .procname = "nat_icmp_send", - .data = &sysctl_ip_vs_nat_icmp_send, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { } }; @@ -1760,11 +1740,10 @@ const struct ctl_path net_vs_ctl_path[] = { }; EXPORT_SYMBOL_GPL(net_vs_ctl_path); -static struct ctl_table_header * sysctl_header; - #ifdef CONFIG_PROC_FS struct ip_vs_iter { + struct seq_net_private p; /* Do not move this, netns depends upon it*/ struct list_head *table; int bucket; }; @@ -1791,6 +1770,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags) /* Get the Nth entry in the two lists */ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) { + struct net *net = seq_file_net(seq); struct ip_vs_iter *iter = seq->private; int idx; struct ip_vs_service *svc; @@ -1798,7 +1778,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* look in hash by protocol */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (pos-- == 0){ + if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_table; iter->bucket = idx; return svc; @@ -1809,7 +1789,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* keep looking in fwmark */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (pos-- == 0) { + if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_fwm_table; iter->bucket = idx; return svc; @@ -1963,7 +1943,7 @@ static const struct seq_operations ip_vs_info_seq_ops = { static int ip_vs_info_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ip_vs_info_seq_ops, + return seq_open_net(inode, file, &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter)); } @@ -1977,13 +1957,11 @@ static const struct file_operations ip_vs_info_fops = { #endif -struct ip_vs_stats ip_vs_stats = { - .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock), -}; - #ifdef CONFIG_PROC_FS static int ip_vs_stats_show(struct seq_file *seq, void *v) { + struct net *net = seq_file_single_net(seq); + struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, @@ -1991,29 +1969,29 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) seq_printf(seq, " Conns Packets Packets Bytes Bytes\n"); - spin_lock_bh(&ip_vs_stats.lock); - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, - ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, - (unsigned long long) ip_vs_stats.ustats.inbytes, - (unsigned long long) ip_vs_stats.ustats.outbytes); + spin_lock_bh(&tot_stats->lock); + seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns, + tot_stats->ustats.inpkts, tot_stats->ustats.outpkts, + (unsigned long long) tot_stats->ustats.inbytes, + (unsigned long long) tot_stats->ustats.outbytes); /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); seq_printf(seq,"%8X %8X %8X %16X %16X\n", - ip_vs_stats.ustats.cps, - ip_vs_stats.ustats.inpps, - ip_vs_stats.ustats.outpps, - ip_vs_stats.ustats.inbps, - ip_vs_stats.ustats.outbps); - spin_unlock_bh(&ip_vs_stats.lock); + tot_stats->ustats.cps, + tot_stats->ustats.inpps, + tot_stats->ustats.outpps, + tot_stats->ustats.inbps, + tot_stats->ustats.outbps); + spin_unlock_bh(&tot_stats->lock); return 0; } static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) { - return single_open(file, ip_vs_stats_show, NULL); + return single_open_net(inode, file, ip_vs_stats_show); } static const struct file_operations ip_vs_stats_fops = { @@ -2024,13 +2002,68 @@ static const struct file_operations ip_vs_stats_fops = { .release = single_release, }; +static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) +{ + struct net *net = seq_file_single_net(seq); + struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; + int i; + +/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ + seq_puts(seq, + " Total Incoming Outgoing Incoming Outgoing\n"); + seq_printf(seq, + "CPU Conns Packets Packets Bytes Bytes\n"); + + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i); + seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", + i, u->ustats.conns, u->ustats.inpkts, + u->ustats.outpkts, (__u64)u->ustats.inbytes, + (__u64)u->ustats.outbytes); + } + + spin_lock_bh(&tot_stats->lock); + seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", + tot_stats->ustats.conns, tot_stats->ustats.inpkts, + tot_stats->ustats.outpkts, + (unsigned long long) tot_stats->ustats.inbytes, + (unsigned long long) tot_stats->ustats.outbytes); + +/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ + seq_puts(seq, + " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); + seq_printf(seq, " %8X %8X %8X %16X %16X\n", + tot_stats->ustats.cps, + tot_stats->ustats.inpps, + tot_stats->ustats.outpps, + tot_stats->ustats.inbps, + tot_stats->ustats.outbps); + spin_unlock_bh(&tot_stats->lock); + + return 0; +} + +static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file) +{ + return single_open_net(inode, file, ip_vs_stats_percpu_show); +} + +static const struct file_operations ip_vs_stats_percpu_fops = { + .owner = THIS_MODULE, + .open = ip_vs_stats_percpu_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* * Set timeout values for tcp tcpfin udp in the timeout_table. */ -static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) +static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) { + struct ip_vs_proto_data *pd; + IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", u->tcp_timeout, u->tcp_fin_timeout, @@ -2038,19 +2071,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) #ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] = u->tcp_timeout * HZ; } if (u->tcp_fin_timeout) { - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] = u->tcp_fin_timeout * HZ; } #endif #ifdef CONFIG_IP_VS_PROTO_UDP if (u->udp_timeout) { - ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] + pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + pd->timeout_table[IP_VS_UDP_S_NORMAL] = u->udp_timeout * HZ; } #endif @@ -2115,6 +2151,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, static int do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { + struct net *net = sock_net(sk); int ret; unsigned char arg[MAX_ARG_LEN]; struct ip_vs_service_user *usvc_compat; @@ -2149,19 +2186,20 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ - ret = ip_vs_flush(); + ret = ip_vs_flush(net); goto out_unlock; } else if (cmd == IP_VS_SO_SET_TIMEOUT) { /* Set timeout values for (tcp tcpfin udp) */ - ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg); + ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); goto out_unlock; } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid); + ret = start_sync_thread(net, dm->state, dm->mcast_ifn, + dm->syncid); goto out_unlock; } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - ret = stop_sync_thread(dm->state); + ret = stop_sync_thread(net, dm->state); goto out_unlock; } @@ -2176,7 +2214,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_ZERO) { /* if no service address is set, zero counters in all */ if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { - ret = ip_vs_zero_all(); + ret = ip_vs_zero_all(net); goto out_unlock; } } @@ -2193,10 +2231,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* Lookup the exact service by <protocol, addr, port> or fwmark */ if (usvc.fwmark == 0) - svc = __ip_vs_service_find(usvc.af, usvc.protocol, + svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, &usvc.addr, usvc.port); else - svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark); + svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); if (cmd != IP_VS_SO_SET_ADD && (svc == NULL || svc->protocol != usvc.protocol)) { @@ -2209,7 +2247,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (svc != NULL) ret = -EEXIST; else - ret = ip_vs_add_service(&usvc, &svc); + ret = ip_vs_add_service(net, &usvc, &svc); break; case IP_VS_SO_SET_EDIT: ret = ip_vs_edit_service(svc, &usvc); @@ -2269,7 +2307,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) } static inline int -__ip_vs_get_service_entries(const struct ip_vs_get_services *get, +__ip_vs_get_service_entries(struct net *net, + const struct ip_vs_get_services *get, struct ip_vs_get_services __user *uptr) { int idx, count=0; @@ -2280,7 +2319,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET) + if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; if (count >= get->num_services) @@ -2299,7 +2338,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET) + if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; if (count >= get->num_services) @@ -2319,7 +2358,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, } static inline int -__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, +__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, struct ip_vs_get_dests __user *uptr) { struct ip_vs_service *svc; @@ -2327,9 +2366,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, int ret = 0; if (get->fwmark) - svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark); + svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); else - svc = __ip_vs_service_find(AF_INET, get->protocol, &addr, + svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, get->port); if (svc) { @@ -2364,17 +2403,19 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, } static inline void -__ip_vs_get_timeouts(struct ip_vs_timeout_user *u) +__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) { + struct ip_vs_proto_data *pd; + #ifdef CONFIG_IP_VS_PROTO_TCP - u->tcp_timeout = - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; - u->tcp_fin_timeout = - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; + u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; #endif #ifdef CONFIG_IP_VS_PROTO_UDP + pd = ip_vs_proto_data_get(net, IPPROTO_UDP); u->udp_timeout = - ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ; + pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; #endif } @@ -2403,7 +2444,10 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) unsigned char arg[128]; int ret = 0; unsigned int copylen; + struct net *net = sock_net(sk); + struct netns_ipvs *ipvs = net_ipvs(net); + BUG_ON(!net); if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -2446,7 +2490,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) struct ip_vs_getinfo info; info.version = IP_VS_VERSION_CODE; info.size = ip_vs_conn_tab_size; - info.num_services = ip_vs_num_services; + info.num_services = ipvs->num_services; if (copy_to_user(user, &info, sizeof(info)) != 0) ret = -EFAULT; } @@ -2465,7 +2509,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; goto out; } - ret = __ip_vs_get_service_entries(get, user); + ret = __ip_vs_get_service_entries(net, get, user); } break; @@ -2478,10 +2522,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) entry = (struct ip_vs_service_entry *)arg; addr.ip = entry->addr; if (entry->fwmark) - svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark); + svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); else - svc = __ip_vs_service_find(AF_INET, entry->protocol, - &addr, entry->port); + svc = __ip_vs_service_find(net, AF_INET, + entry->protocol, &addr, + entry->port); if (svc) { ip_vs_copy_service(entry, svc); if (copy_to_user(user, entry, sizeof(*entry)) != 0) @@ -2504,7 +2549,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; goto out; } - ret = __ip_vs_get_dest_entries(get, user); + ret = __ip_vs_get_dest_entries(net, get, user); } break; @@ -2512,7 +2557,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(&t); + __ip_vs_get_timeouts(net, &t); if (copy_to_user(user, &t, sizeof(t)) != 0) ret = -EFAULT; } @@ -2523,15 +2568,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) struct ip_vs_daemon_user d[2]; memset(&d, 0, sizeof(d)); - if (ip_vs_sync_state & IP_VS_STATE_MASTER) { + if (ipvs->sync_state & IP_VS_STATE_MASTER) { d[0].state = IP_VS_STATE_MASTER; - strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn)); - d[0].syncid = ip_vs_master_syncid; + strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, + sizeof(d[0].mcast_ifn)); + d[0].syncid = ipvs->master_syncid; } - if (ip_vs_sync_state & IP_VS_STATE_BACKUP) { + if (ipvs->sync_state & IP_VS_STATE_BACKUP) { d[1].state = IP_VS_STATE_BACKUP; - strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn)); - d[1].syncid = ip_vs_backup_syncid; + strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, + sizeof(d[1].mcast_ifn)); + d[1].syncid = ipvs->backup_syncid; } if (copy_to_user(user, &d, sizeof(d)) != 0) ret = -EFAULT; @@ -2570,6 +2617,7 @@ static struct genl_family ip_vs_genl_family = { .name = IPVS_GENL_NAME, .version = IPVS_GENL_VERSION, .maxattr = IPVS_CMD_MAX, + .netnsok = true, /* Make ipvsadm to work on netns */ }; /* Policy used for first-level command attributes */ @@ -2724,11 +2772,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, int idx = 0, i; int start = cb->args[0]; struct ip_vs_service *svc; + struct net *net = skb_sknet(skb); mutex_lock(&__ip_vs_mutex); for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { - if (++idx <= start) + if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -2739,7 +2788,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { - if (++idx <= start) + if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -2755,7 +2804,8 @@ nla_put_failure: return skb->len; } -static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, +static int ip_vs_genl_parse_service(struct net *net, + struct ip_vs_service_user_kern *usvc, struct nlattr *nla, int full_entry, struct ip_vs_service **ret_svc) { @@ -2798,9 +2848,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, } if (usvc->fwmark) - svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark); + svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); else - svc = __ip_vs_service_find(usvc->af, usvc->protocol, + svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, &usvc->addr, usvc->port); *ret_svc = svc; @@ -2837,13 +2887,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, return 0; } -static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) +static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, + struct nlattr *nla) { struct ip_vs_service_user_kern usvc; struct ip_vs_service *svc; int ret; - ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc); + ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc); return ret ? ERR_PTR(ret) : svc; } @@ -2911,6 +2962,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, struct ip_vs_service *svc; struct ip_vs_dest *dest; struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; + struct net *net = skb_sknet(skb); mutex_lock(&__ip_vs_mutex); @@ -2919,7 +2971,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) goto out_err; - svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); + + svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc) || svc == NULL) goto out_err; @@ -3033,20 +3086,23 @@ nla_put_failure: static int ip_vs_genl_dump_daemons(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb_net(skb); + struct netns_ipvs *ipvs = net_ipvs(net); + mutex_lock(&__ip_vs_mutex); - if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { + if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, - ip_vs_master_mcast_ifn, - ip_vs_master_syncid, cb) < 0) + ipvs->master_mcast_ifn, + ipvs->master_syncid, cb) < 0) goto nla_put_failure; cb->args[0] = 1; } - if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { + if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, - ip_vs_backup_mcast_ifn, - ip_vs_backup_syncid, cb) < 0) + ipvs->backup_mcast_ifn, + ipvs->backup_syncid, cb) < 0) goto nla_put_failure; cb->args[1] = 1; @@ -3058,31 +3114,33 @@ nla_put_failure: return skb->len; } -static int ip_vs_genl_new_daemon(struct nlattr **attrs) +static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) { if (!(attrs[IPVS_DAEMON_ATTR_STATE] && attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && attrs[IPVS_DAEMON_ATTR_SYNC_ID])) return -EINVAL; - return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), + return start_sync_thread(net, + nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); } -static int ip_vs_genl_del_daemon(struct nlattr **attrs) +static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) { if (!attrs[IPVS_DAEMON_ATTR_STATE]) return -EINVAL; - return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); + return stop_sync_thread(net, + nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); } -static int ip_vs_genl_set_config(struct nlattr **attrs) +static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(&t); + __ip_vs_get_timeouts(net, &t); if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); @@ -3094,7 +3152,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs) if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); - return ip_vs_set_timeout(&t); + return ip_vs_set_timeout(net, &t); } static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) @@ -3104,16 +3162,20 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) struct ip_vs_dest_user_kern udest; int ret = 0, cmd; int need_full_svc = 0, need_full_dest = 0; + struct net *net; + struct netns_ipvs *ipvs; + net = skb_sknet(skb); + ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; mutex_lock(&__ip_vs_mutex); if (cmd == IPVS_CMD_FLUSH) { - ret = ip_vs_flush(); + ret = ip_vs_flush(net); goto out; } else if (cmd == IPVS_CMD_SET_CONFIG) { - ret = ip_vs_genl_set_config(info->attrs); + ret = ip_vs_genl_set_config(net, info->attrs); goto out; } else if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { @@ -3129,13 +3191,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) } if (cmd == IPVS_CMD_NEW_DAEMON) - ret = ip_vs_genl_new_daemon(daemon_attrs); + ret = ip_vs_genl_new_daemon(net, daemon_attrs); else - ret = ip_vs_genl_del_daemon(daemon_attrs); + ret = ip_vs_genl_del_daemon(net, daemon_attrs); goto out; } else if (cmd == IPVS_CMD_ZERO && !info->attrs[IPVS_CMD_ATTR_SERVICE]) { - ret = ip_vs_zero_all(); + ret = ip_vs_zero_all(net); goto out; } @@ -3145,7 +3207,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) need_full_svc = 1; - ret = ip_vs_genl_parse_service(&usvc, + ret = ip_vs_genl_parse_service(net, &usvc, info->attrs[IPVS_CMD_ATTR_SERVICE], need_full_svc, &svc); if (ret) @@ -3175,7 +3237,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) switch (cmd) { case IPVS_CMD_NEW_SERVICE: if (svc == NULL) - ret = ip_vs_add_service(&usvc, &svc); + ret = ip_vs_add_service(net, &usvc, &svc); else ret = -EEXIST; break; @@ -3213,7 +3275,11 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; void *reply; int ret, cmd, reply_cmd; + struct net *net; + struct netns_ipvs *ipvs; + net = skb_sknet(skb); + ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; if (cmd == IPVS_CMD_GET_SERVICE) @@ -3242,7 +3308,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) { struct ip_vs_service *svc; - svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); + svc = ip_vs_genl_find_service(net, + info->attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc)) { ret = PTR_ERR(svc); goto out_err; @@ -3262,7 +3329,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(&t); + __ip_vs_get_timeouts(net, &t); #ifdef CONFIG_IP_VS_PROTO_TCP NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, @@ -3408,62 +3475,172 @@ static void ip_vs_genl_unregister(void) /* End of Generic Netlink interface definitions */ +/* + * per netns intit/exit func. + */ +int __net_init __ip_vs_control_init(struct net *net) +{ + int idx; + struct netns_ipvs *ipvs = net_ipvs(net); + struct ctl_table *tbl; + + atomic_set(&ipvs->dropentry, 0); + spin_lock_init(&ipvs->dropentry_lock); + spin_lock_init(&ipvs->droppacket_lock); + spin_lock_init(&ipvs->securetcp_lock); + ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); + + /* Initialize rs_table */ + for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) + INIT_LIST_HEAD(&ipvs->rs_table[idx]); + + INIT_LIST_HEAD(&ipvs->dest_trash); + atomic_set(&ipvs->ftpsvc_counter, 0); + atomic_set(&ipvs->nullsvc_counter, 0); + + /* procfs stats */ + ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); + if (ipvs->tot_stats == NULL) { + pr_err("%s(): no memory.\n", __func__); + return -ENOMEM; + } + ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (!ipvs->cpustats) { + pr_err("%s() alloc_percpu failed\n", __func__); + goto err_alloc; + } + spin_lock_init(&ipvs->tot_stats->lock); + + for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) + INIT_LIST_HEAD(&ipvs->rs_table[idx]); + + proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); + proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); + proc_net_fops_create(net, "ip_vs_stats_percpu", 0, + &ip_vs_stats_percpu_fops); + + if (!net_eq(net, &init_net)) { + tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); + if (tbl == NULL) + goto err_dup; + } else + tbl = vs_vars; + /* Initialize sysctl defaults */ + idx = 0; + ipvs->sysctl_amemthresh = 1024; + tbl[idx++].data = &ipvs->sysctl_amemthresh; + ipvs->sysctl_am_droprate = 10; + tbl[idx++].data = &ipvs->sysctl_am_droprate; + tbl[idx++].data = &ipvs->sysctl_drop_entry; + tbl[idx++].data = &ipvs->sysctl_drop_packet; +#ifdef CONFIG_IP_VS_NFCT + tbl[idx++].data = &ipvs->sysctl_conntrack; +#endif + tbl[idx++].data = &ipvs->sysctl_secure_tcp; + ipvs->sysctl_snat_reroute = 1; + tbl[idx++].data = &ipvs->sysctl_snat_reroute; + ipvs->sysctl_sync_ver = 1; + tbl[idx++].data = &ipvs->sysctl_sync_ver; + tbl[idx++].data = &ipvs->sysctl_cache_bypass; + tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; + tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; + ipvs->sysctl_sync_threshold[0] = 3; + ipvs->sysctl_sync_threshold[1] = 50; + tbl[idx].data = &ipvs->sysctl_sync_threshold; + tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); + tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; + + + ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, + vs_vars); + if (ipvs->sysctl_hdr == NULL) + goto err_reg; + ip_vs_new_estimator(net, ipvs->tot_stats); + ipvs->sysctl_tbl = tbl; + /* Schedule defense work */ + INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); + schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(tbl); +err_dup: + free_percpu(ipvs->cpustats); +err_alloc: + kfree(ipvs->tot_stats); + return -ENOMEM; +} + +static void __net_exit __ip_vs_control_cleanup(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + ip_vs_trash_cleanup(net); + ip_vs_kill_estimator(net, ipvs->tot_stats); + cancel_delayed_work_sync(&ipvs->defense_work); + cancel_work_sync(&ipvs->defense_work.work); + unregister_net_sysctl_table(ipvs->sysctl_hdr); + proc_net_remove(net, "ip_vs_stats_percpu"); + proc_net_remove(net, "ip_vs_stats"); + proc_net_remove(net, "ip_vs"); + free_percpu(ipvs->cpustats); + kfree(ipvs->tot_stats); +} + +static struct pernet_operations ipvs_control_ops = { + .init = __ip_vs_control_init, + .exit = __ip_vs_control_cleanup, +}; int __init ip_vs_control_init(void) { - int ret; int idx; + int ret; EnterFunction(2); - /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ + /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { INIT_LIST_HEAD(&ip_vs_svc_table[idx]); INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); } - for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_rtable[idx]); + + ret = register_pernet_subsys(&ipvs_control_ops); + if (ret) { + pr_err("cannot register namespace.\n"); + goto err; } - smp_wmb(); + + smp_wmb(); /* Do we really need it now ? */ ret = nf_register_sockopt(&ip_vs_sockopts); if (ret) { pr_err("cannot register sockopt.\n"); - return ret; + goto err_net; } ret = ip_vs_genl_register(); if (ret) { pr_err("cannot register Generic Netlink interface.\n"); nf_unregister_sockopt(&ip_vs_sockopts); - return ret; + goto err_net; } - proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); - proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); - - sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); - - ip_vs_new_estimator(&ip_vs_stats); - - /* Hook the defense timer */ - schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); - LeaveFunction(2); return 0; + +err_net: + unregister_pernet_subsys(&ipvs_control_ops); +err: + return ret; } void ip_vs_control_cleanup(void) { EnterFunction(2); - ip_vs_trash_cleanup(); - cancel_rearming_delayed_work(&defense_work); - cancel_work_sync(&defense_work.work); - ip_vs_kill_estimator(&ip_vs_stats); - unregister_sysctl_table(sysctl_header); - proc_net_remove(&init_net, "ip_vs_stats"); - proc_net_remove(&init_net, "ip_vs"); + unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index ff28801962e0..f560a05c965a 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -8,8 +8,12 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Changes: - * + * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> + * Network name space (netns) aware. + * Global data moved to netns i.e struct netns_ipvs + * Affected data: est_list and est_lock. + * estimation_timer() runs with timer per netns. + * get_stats()) do the per cpu summing. */ #define KMSG_COMPONENT "IPVS" @@ -48,11 +52,42 @@ */ -static void estimation_timer(unsigned long arg); +/* + * Make a summary from each cpu + */ +static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, + struct ip_vs_cpu_stats *stats) +{ + int i; + + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); + unsigned int start; + __u64 inbytes, outbytes; + if (i) { + sum->conns += s->ustats.conns; + sum->inpkts += s->ustats.inpkts; + sum->outpkts += s->ustats.outpkts; + do { + start = u64_stats_fetch_begin_bh(&s->syncp); + inbytes = s->ustats.inbytes; + outbytes = s->ustats.outbytes; + } while (u64_stats_fetch_retry_bh(&s->syncp, start)); + sum->inbytes += inbytes; + sum->outbytes += outbytes; + } else { + sum->conns = s->ustats.conns; + sum->inpkts = s->ustats.inpkts; + sum->outpkts = s->ustats.outpkts; + do { + start = u64_stats_fetch_begin_bh(&s->syncp); + sum->inbytes = s->ustats.inbytes; + sum->outbytes = s->ustats.outbytes; + } while (u64_stats_fetch_retry_bh(&s->syncp, start)); + } + } +} -static LIST_HEAD(est_list); -static DEFINE_SPINLOCK(est_lock); -static DEFINE_TIMER(est_timer, estimation_timer, 0, 0); static void estimation_timer(unsigned long arg) { @@ -62,11 +97,16 @@ static void estimation_timer(unsigned long arg) u32 n_inpkts, n_outpkts; u64 n_inbytes, n_outbytes; u32 rate; + struct net *net = (struct net *)arg; + struct netns_ipvs *ipvs; - spin_lock(&est_lock); - list_for_each_entry(e, &est_list, list) { + ipvs = net_ipvs(net); + ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats); + spin_lock(&ipvs->est_lock); + list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); + ip_vs_read_cpu_stats(&s->ustats, s->cpustats); spin_lock(&s->lock); n_conns = s->ustats.conns; n_inpkts = s->ustats.inpkts; @@ -75,38 +115,39 @@ static void estimation_timer(unsigned long arg) n_outbytes = s->ustats.outbytes; /* scaled by 2^10, but divided 2 seconds */ - rate = (n_conns - e->last_conns)<<9; + rate = (n_conns - e->last_conns) << 9; e->last_conns = n_conns; - e->cps += ((long)rate - (long)e->cps)>>2; - s->ustats.cps = (e->cps+0x1FF)>>10; + e->cps += ((long)rate - (long)e->cps) >> 2; + s->ustats.cps = (e->cps + 0x1FF) >> 10; - rate = (n_inpkts - e->last_inpkts)<<9; + rate = (n_inpkts - e->last_inpkts) << 9; e->last_inpkts = n_inpkts; - e->inpps += ((long)rate - (long)e->inpps)>>2; - s->ustats.inpps = (e->inpps+0x1FF)>>10; + e->inpps += ((long)rate - (long)e->inpps) >> 2; + s->ustats.inpps = (e->inpps + 0x1FF) >> 10; - rate = (n_outpkts - e->last_outpkts)<<9; + rate = (n_outpkts - e->last_outpkts) << 9; e->last_outpkts = n_outpkts; - e->outpps += ((long)rate - (long)e->outpps)>>2; - s->ustats.outpps = (e->outpps+0x1FF)>>10; + e->outpps += ((long)rate - (long)e->outpps) >> 2; + s->ustats.outpps = (e->outpps + 0x1FF) >> 10; - rate = (n_inbytes - e->last_inbytes)<<4; + rate = (n_inbytes - e->last_inbytes) << 4; e->last_inbytes = n_inbytes; - e->inbps += ((long)rate - (long)e->inbps)>>2; - s->ustats.inbps = (e->inbps+0xF)>>5; + e->inbps += ((long)rate - (long)e->inbps) >> 2; + s->ustats.inbps = (e->inbps + 0xF) >> 5; - rate = (n_outbytes - e->last_outbytes)<<4; + rate = (n_outbytes - e->last_outbytes) << 4; e->last_outbytes = n_outbytes; - e->outbps += ((long)rate - (long)e->outbps)>>2; - s->ustats.outbps = (e->outbps+0xF)>>5; + e->outbps += ((long)rate - (long)e->outbps) >> 2; + s->ustats.outbps = (e->outbps + 0xF) >> 5; spin_unlock(&s->lock); } - spin_unlock(&est_lock); - mod_timer(&est_timer, jiffies + 2*HZ); + spin_unlock(&ipvs->est_lock); + mod_timer(&ipvs->est_timer, jiffies + 2*HZ); } -void ip_vs_new_estimator(struct ip_vs_stats *stats) +void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; INIT_LIST_HEAD(&est->list); @@ -126,18 +167,19 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats) est->last_outbytes = stats->ustats.outbytes; est->outbps = stats->ustats.outbps<<5; - spin_lock_bh(&est_lock); - list_add(&est->list, &est_list); - spin_unlock_bh(&est_lock); + spin_lock_bh(&ipvs->est_lock); + list_add(&est->list, &ipvs->est_list); + spin_unlock_bh(&ipvs->est_lock); } -void ip_vs_kill_estimator(struct ip_vs_stats *stats) +void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; - spin_lock_bh(&est_lock); + spin_lock_bh(&ipvs->est_lock); list_del(&est->list); - spin_unlock_bh(&est_lock); + spin_unlock_bh(&ipvs->est_lock); } void ip_vs_zero_estimator(struct ip_vs_stats *stats) @@ -157,13 +199,35 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) est->outbps = 0; } -int __init ip_vs_estimator_init(void) +static int __net_init __ip_vs_estimator_init(struct net *net) { - mod_timer(&est_timer, jiffies + 2 * HZ); + struct netns_ipvs *ipvs = net_ipvs(net); + + INIT_LIST_HEAD(&ipvs->est_list); + spin_lock_init(&ipvs->est_lock); + setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net); + mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); return 0; } +static void __net_exit __ip_vs_estimator_exit(struct net *net) +{ + del_timer_sync(&net_ipvs(net)->est_timer); +} +static struct pernet_operations ip_vs_app_ops = { + .init = __ip_vs_estimator_init, + .exit = __ip_vs_estimator_exit, +}; + +int __init ip_vs_estimator_init(void) +{ + int rv; + + rv = register_pernet_subsys(&ip_vs_app_ops); + return rv; +} + void ip_vs_estimator_cleanup(void) { - del_timer_sync(&est_timer); + unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 84aef65b37d1..6b5dd6ddaae9 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, int ret = 0; enum ip_conntrack_info ctinfo; struct nf_conn *ct; + struct net *net; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -197,13 +198,15 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, */ { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(AF_INET, iph->protocol, - &from, port, &cp->caddr, 0, &p); + ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, + iph->protocol, &from, port, + &cp->caddr, 0, &p); n_cp = ip_vs_conn_out_get(&p); } if (!n_cp) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr, + ip_vs_conn_fill_param(ip_vs_conn_net(cp), + AF_INET, IPPROTO_TCP, &cp->caddr, 0, &cp->vaddr, port, &p); n_cp = ip_vs_conn_new(&p, &from, port, IP_VS_CONN_F_NO_CPORT | @@ -257,8 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * would be adjusted twice. */ + net = skb_net(skb); cp->app_data = NULL; - ip_vs_tcp_conn_listen(n_cp); + ip_vs_tcp_conn_listen(net, n_cp); ip_vs_conn_put(n_cp); return ret; } @@ -287,6 +291,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; + struct net *net; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -358,9 +363,9 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port, - &cp->vaddr, htons(ntohs(cp->vport)-1), - &p); + ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, + iph->protocol, &to, port, &cp->vaddr, + htons(ntohs(cp->vport)-1), &p); n_cp = ip_vs_conn_in_get(&p); if (!n_cp) { n_cp = ip_vs_conn_new(&p, &cp->daddr, @@ -378,7 +383,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Move tunnel to listen state */ - ip_vs_tcp_conn_listen(n_cp); + net = skb_net(skb); + ip_vs_tcp_conn_listen(net, n_cp); ip_vs_conn_put(n_cp); return 1; @@ -399,23 +405,22 @@ static struct ip_vs_app ip_vs_ftp = { .pkt_in = ip_vs_ftp_in, }; - /* - * ip_vs_ftp initialization + * per netns ip_vs_ftp initialization */ -static int __init ip_vs_ftp_init(void) +static int __net_init __ip_vs_ftp_init(struct net *net) { int i, ret; struct ip_vs_app *app = &ip_vs_ftp; - ret = register_ip_vs_app(app); + ret = register_ip_vs_app(net, app); if (ret) return ret; for (i=0; i<IP_VS_APP_MAX_PORTS; i++) { if (!ports[i]) continue; - ret = register_ip_vs_app_inc(app, app->protocol, ports[i]); + ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]); if (ret) break; pr_info("%s: loaded support on port[%d] = %d\n", @@ -423,18 +428,39 @@ static int __init ip_vs_ftp_init(void) } if (ret) - unregister_ip_vs_app(app); + unregister_ip_vs_app(net, app); return ret; } +/* + * netns exit + */ +static void __ip_vs_ftp_exit(struct net *net) +{ + struct ip_vs_app *app = &ip_vs_ftp; + + unregister_ip_vs_app(net, app); +} + +static struct pernet_operations ip_vs_ftp_ops = { + .init = __ip_vs_ftp_init, + .exit = __ip_vs_ftp_exit, +}; +int __init ip_vs_ftp_init(void) +{ + int rv; + + rv = register_pernet_subsys(&ip_vs_ftp_ops); + return rv; +} /* * ip_vs_ftp finish. */ static void __exit ip_vs_ftp_exit(void) { - unregister_ip_vs_app(&ip_vs_ftp); + unregister_pernet_subsys(&ip_vs_ftp_ops); } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 9323f8944199..d5bec3371871 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -70,7 +70,6 @@ * entries that haven't been touched for a day. */ #define COUNT_FOR_FULL_EXPIRATION 30 -static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ; /* @@ -117,7 +116,7 @@ struct ip_vs_lblc_table { static ctl_table vs_vars_table[] = { { .procname = "lblc_expiration", - .data = &sysctl_ip_vs_lblc_expiration, + .data = NULL, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -125,8 +124,6 @@ static ctl_table vs_vars_table[] = { { } }; -static struct ctl_table_header * sysctl_header; - static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { list_del(&en->list); @@ -248,6 +245,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) struct ip_vs_lblc_entry *en, *nxt; unsigned long now = jiffies; int i, j; + struct netns_ipvs *ipvs = net_ipvs(svc->net); for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLC_TAB_MASK; @@ -255,7 +253,8 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_before(now, - en->lastuse + sysctl_ip_vs_lblc_expiration)) + en->lastuse + + ipvs->sysctl_lblc_expiration)) continue; ip_vs_lblc_free(en); @@ -543,23 +542,73 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = .schedule = ip_vs_lblc_schedule, }; +/* + * per netns init. + */ +static int __net_init __ip_vs_lblc_init(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + if (!net_eq(net, &init_net)) { + ipvs->lblc_ctl_table = kmemdup(vs_vars_table, + sizeof(vs_vars_table), + GFP_KERNEL); + if (ipvs->lblc_ctl_table == NULL) + goto err_dup; + } else + ipvs->lblc_ctl_table = vs_vars_table; + ipvs->sysctl_lblc_expiration = 24*60*60*HZ; + ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; + + ipvs->lblc_ctl_header = + register_net_sysctl_table(net, net_vs_ctl_path, + ipvs->lblc_ctl_table); + if (!ipvs->lblc_ctl_header) + goto err_reg; + + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(ipvs->lblc_ctl_table); + +err_dup: + return -ENOMEM; +} + +static void __net_exit __ip_vs_lblc_exit(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + unregister_net_sysctl_table(ipvs->lblc_ctl_header); + + if (!net_eq(net, &init_net)) + kfree(ipvs->lblc_ctl_table); +} + +static struct pernet_operations ip_vs_lblc_ops = { + .init = __ip_vs_lblc_init, + .exit = __ip_vs_lblc_exit, +}; static int __init ip_vs_lblc_init(void) { int ret; - sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); + ret = register_pernet_subsys(&ip_vs_lblc_ops); + if (ret) + return ret; + ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); if (ret) - unregister_sysctl_table(sysctl_header); + unregister_pernet_subsys(&ip_vs_lblc_ops); return ret; } - static void __exit ip_vs_lblc_cleanup(void) { - unregister_sysctl_table(sysctl_header); unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); + unregister_pernet_subsys(&ip_vs_lblc_ops); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index dbeed8ea421a..61ae8cfcf0b4 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -70,8 +70,6 @@ * entries that haven't been touched for a day. */ #define COUNT_FOR_FULL_EXPIRATION 30 -static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ; - /* * for IPVS lblcr entry hash table @@ -296,7 +294,7 @@ struct ip_vs_lblcr_table { static ctl_table vs_vars_table[] = { { .procname = "lblcr_expiration", - .data = &sysctl_ip_vs_lblcr_expiration, + .data = NULL, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -304,8 +302,6 @@ static ctl_table vs_vars_table[] = { { } }; -static struct ctl_table_header * sysctl_header; - static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) { list_del(&en->list); @@ -425,14 +421,15 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) unsigned long now = jiffies; int i, j; struct ip_vs_lblcr_entry *en, *nxt; + struct netns_ipvs *ipvs = net_ipvs(svc->net); for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLCR_TAB_MASK; write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { - if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, - now)) + if (time_after(en->lastuse + + ipvs->sysctl_lblcr_expiration, now)) continue; ip_vs_lblcr_free(en); @@ -664,6 +661,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) read_lock(&svc->sched_lock); en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); if (en) { + struct netns_ipvs *ipvs = net_ipvs(svc->net); /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; @@ -675,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* More than one destination + enough time passed by, cleanup */ if (atomic_read(&en->set.size) > 1 && time_after(jiffies, en->set.lastmod + - sysctl_ip_vs_lblcr_expiration)) { + ipvs->sysctl_lblcr_expiration)) { struct ip_vs_dest *m; write_lock(&en->set.lock); @@ -744,23 +742,73 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = .schedule = ip_vs_lblcr_schedule, }; +/* + * per netns init. + */ +static int __net_init __ip_vs_lblcr_init(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + if (!net_eq(net, &init_net)) { + ipvs->lblcr_ctl_table = kmemdup(vs_vars_table, + sizeof(vs_vars_table), + GFP_KERNEL); + if (ipvs->lblcr_ctl_table == NULL) + goto err_dup; + } else + ipvs->lblcr_ctl_table = vs_vars_table; + ipvs->sysctl_lblcr_expiration = 24*60*60*HZ; + ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; + + ipvs->lblcr_ctl_header = + register_net_sysctl_table(net, net_vs_ctl_path, + ipvs->lblcr_ctl_table); + if (!ipvs->lblcr_ctl_header) + goto err_reg; + + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(ipvs->lblcr_ctl_table); + +err_dup: + return -ENOMEM; +} + +static void __net_exit __ip_vs_lblcr_exit(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + unregister_net_sysctl_table(ipvs->lblcr_ctl_header); + + if (!net_eq(net, &init_net)) + kfree(ipvs->lblcr_ctl_table); +} + +static struct pernet_operations ip_vs_lblcr_ops = { + .init = __ip_vs_lblcr_init, + .exit = __ip_vs_lblcr_exit, +}; static int __init ip_vs_lblcr_init(void) { int ret; - sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); + ret = register_pernet_subsys(&ip_vs_lblcr_ops); + if (ret) + return ret; + ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); if (ret) - unregister_sysctl_table(sysctl_header); + unregister_pernet_subsys(&ip_vs_lblcr_ops); return ret; } - static void __exit ip_vs_lblcr_cleanup(void) { - unregister_sysctl_table(sysctl_header); unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); + unregister_pernet_subsys(&ip_vs_lblcr_ops); } diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 4680647cd450..f454c80df0a7 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, struct nf_conntrack_tuple *orig, new_reply; struct ip_vs_conn *cp; struct ip_vs_conn_param p; + struct net *net = nf_ct_net(ct); if (exp->tuple.src.l3num != PF_INET) return; @@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, /* RS->CLIENT */ orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum, + ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, &orig->src.u3, orig->src.u.tcp.port, &orig->dst.u3, orig->dst.u.tcp.port, &p); cp = ip_vs_conn_out_get(&p); @@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) " for conn " FMT_CONN "\n", __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); - h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); + h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, + &tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); /* Show what happens instead of calling nf_ct_kill() */ diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index c53998390877..6ac986cdcff3 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -60,6 +60,31 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) return 0; } +/* + * register an ipvs protocols netns related data + */ +static int +register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + unsigned hash = IP_VS_PROTO_HASH(pp->protocol); + struct ip_vs_proto_data *pd = + kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC); + + if (!pd) { + pr_err("%s(): no memory.\n", __func__); + return -ENOMEM; + } + pd->pp = pp; /* For speed issues */ + pd->next = ipvs->proto_data_table[hash]; + ipvs->proto_data_table[hash] = pd; + atomic_set(&pd->appcnt, 0); /* Init app counter */ + + if (pp->init_netns != NULL) + pp->init_netns(net, pd); + + return 0; +} /* * unregister an ipvs protocol @@ -82,6 +107,29 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) return -ESRCH; } +/* + * unregister an ipvs protocols netns data + */ +static int +unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data **pd_p; + unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol); + + pd_p = &ipvs->proto_data_table[hash]; + for (; *pd_p; pd_p = &(*pd_p)->next) { + if (*pd_p == pd) { + *pd_p = pd->next; + if (pd->pp->exit_netns != NULL) + pd->pp->exit_netns(net, pd); + kfree(pd); + return 0; + } + } + + return -ESRCH; +} /* * get ip_vs_protocol object by its proto. @@ -100,19 +148,44 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) } EXPORT_SYMBOL(ip_vs_proto_get); +/* + * get ip_vs_protocol object data by netns and proto + */ +struct ip_vs_proto_data * +__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) +{ + struct ip_vs_proto_data *pd; + unsigned hash = IP_VS_PROTO_HASH(proto); + + for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) { + if (pd->pp->protocol == proto) + return pd; + } + + return NULL; +} + +struct ip_vs_proto_data * +ip_vs_proto_data_get(struct net *net, unsigned short proto) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + return __ipvs_proto_data_get(ipvs, proto); +} +EXPORT_SYMBOL(ip_vs_proto_data_get); /* * Propagate event for state change to all protocols */ -void ip_vs_protocol_timeout_change(int flags) +void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) { - struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; int i; for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { - for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) { - if (pp->timeout_change) - pp->timeout_change(pp, flags); + for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) { + if (pd->pp->timeout_change) + pd->pp->timeout_change(pd, flags); } } } @@ -236,6 +309,46 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); } +/* + * per network name-space init + */ +static int __net_init __ip_vs_protocol_init(struct net *net) +{ +#ifdef CONFIG_IP_VS_PROTO_TCP + register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); +#endif +#ifdef CONFIG_IP_VS_PROTO_UDP + register_ip_vs_proto_netns(net, &ip_vs_protocol_udp); +#endif +#ifdef CONFIG_IP_VS_PROTO_SCTP + register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp); +#endif +#ifdef CONFIG_IP_VS_PROTO_AH + register_ip_vs_proto_netns(net, &ip_vs_protocol_ah); +#endif +#ifdef CONFIG_IP_VS_PROTO_ESP + register_ip_vs_proto_netns(net, &ip_vs_protocol_esp); +#endif + return 0; +} + +static void __net_exit __ip_vs_protocol_cleanup(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd; + int i; + + /* unregister all the ipvs proto data for this netns */ + for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { + while ((pd = ipvs->proto_data_table[i]) != NULL) + unregister_ip_vs_proto_netns(net, pd); + } +} + +static struct pernet_operations ipvs_proto_ops = { + .init = __ip_vs_protocol_init, + .exit = __ip_vs_protocol_cleanup, +}; int __init ip_vs_protocol_init(void) { @@ -265,6 +378,7 @@ int __init ip_vs_protocol_init(void) REGISTER_PROTOCOL(&ip_vs_protocol_esp); #endif pr_info("Registered protocols (%s)\n", &protocols[2]); + return register_pernet_subsys(&ipvs_proto_ops); return 0; } @@ -275,6 +389,7 @@ void ip_vs_protocol_cleanup(void) struct ip_vs_protocol *pp; int i; + unregister_pernet_subsys(&ipvs_proto_ops); /* unregister all the ipvs protocols */ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { while ((pp = ip_vs_proto_table[i]) != NULL) diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 3a0461117d3f..5b8eb8b12c3e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -41,28 +41,30 @@ struct isakmp_hdr { #define PORT_ISAKMP 500 static void -ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, - int inverse, struct ip_vs_conn_param *p) +ah_esp_conn_fill_param_proto(struct net *net, int af, + const struct ip_vs_iphdr *iph, int inverse, + struct ip_vs_conn_param *p) { if (likely(!inverse)) - ip_vs_conn_fill_param(af, IPPROTO_UDP, + ip_vs_conn_fill_param(net, af, IPPROTO_UDP, &iph->saddr, htons(PORT_ISAKMP), &iph->daddr, htons(PORT_ISAKMP), p); else - ip_vs_conn_fill_param(af, IPPROTO_UDP, + ip_vs_conn_fill_param(net, af, IPPROTO_UDP, &iph->daddr, htons(PORT_ISAKMP), &iph->saddr, htons(PORT_ISAKMP), p); } static struct ip_vs_conn * -ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, +ah_esp_conn_in_get(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { struct ip_vs_conn *cp; struct ip_vs_conn_param p; + struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); cp = ip_vs_conn_in_get(&p); if (!cp) { /* @@ -72,7 +74,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " "%s%s %s->%s\n", inverse ? "ICMP+" : "", - pp->name, + ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); } @@ -83,21 +85,21 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, static struct ip_vs_conn * ah_esp_conn_out_get(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { struct ip_vs_conn *cp; struct ip_vs_conn_param p; + struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); cp = ip_vs_conn_out_get(&p); if (!cp) { IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " "%s%s %s->%s\n", inverse ? "ICMP+" : "", - pp->name, + ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); } @@ -107,7 +109,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, static int -ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { /* @@ -117,26 +119,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; } -static void ah_esp_init(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -static void ah_esp_exit(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - #ifdef CONFIG_IP_VS_PROTO_AH struct ip_vs_protocol ip_vs_protocol_ah = { .name = "AH", .protocol = IPPROTO_AH, .num_states = 1, .dont_defrag = 1, - .init = ah_esp_init, - .exit = ah_esp_exit, + .init = NULL, + .exit = NULL, .conn_schedule = ah_esp_conn_schedule, .conn_in_get = ah_esp_conn_in_get, .conn_out_get = ah_esp_conn_out_get, @@ -149,7 +139,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = { .app_conn_bind = NULL, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = NULL, /* ISAKMP */ - .set_state_timeout = NULL, }; #endif @@ -159,8 +148,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = { .protocol = IPPROTO_ESP, .num_states = 1, .dont_defrag = 1, - .init = ah_esp_init, - .exit = ah_esp_exit, + .init = NULL, + .exit = NULL, .conn_schedule = ah_esp_conn_schedule, .conn_in_get = ah_esp_conn_in_get, .conn_out_get = ah_esp_conn_out_get, diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index a315159983ad..fb2d04ac5d4e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -9,9 +9,10 @@ #include <net/ip_vs.h> static int -sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { + struct net *net; struct ip_vs_service *svc; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; @@ -27,13 +28,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, sizeof(_schunkh), &_schunkh); if (sch == NULL) return 0; - + net = skb_net(skb); if ((sch->type == SCTP_CID_INIT) && - (svc = ip_vs_service_get(af, skb->mark, iph.protocol, + (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, &iph.daddr, sh->dest))) { int ignored; - if (ip_vs_todrop()) { + if (ip_vs_todrop(net_ipvs(net))) { /* * It seems that we are very loaded. * We have to drop this packet :( @@ -46,10 +47,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pp); + *verdict = ip_vs_leave(svc, skb, pd); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -861,7 +862,7 @@ static struct ipvs_sctp_nextstate /* * Timeout table[state] */ -static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { +static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { [IP_VS_SCTP_S_NONE] = 2 * HZ, [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, @@ -905,20 +906,8 @@ static const char *sctp_state_name(int state) return "?"; } -static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags) -{ -} - -static int -sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) -{ - -return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST, - sctp_state_name_table, sname, to); -} - static inline int -set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, +set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int direction, const struct sk_buff *skb) { sctp_chunkhdr_t _sctpch, *sch; @@ -976,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, IP_VS_DBG_BUF(8, "%s %s %s:%d->" "%s:%d state: %s->%s conn->refcnt:%d\n", - pp->name, + pd->pp->name, ((direction == IP_VS_DIR_OUTPUT) ? "output " : "input "), IP_VS_DBG_ADDR(cp->af, &cp->daddr), @@ -1000,75 +989,73 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, } } } + if (likely(pd)) + cp->timeout = pd->timeout_table[cp->state = next_state]; + else /* What to do ? */ + cp->timeout = sctp_timeouts[cp->state = next_state]; - cp->timeout = pp->timeout_table[cp->state = next_state]; - - return 1; + return 1; } static int sctp_state_transition(struct ip_vs_conn *cp, int direction, - const struct sk_buff *skb, struct ip_vs_protocol *pp) + const struct sk_buff *skb, struct ip_vs_proto_data *pd) { int ret = 0; spin_lock(&cp->lock); - ret = set_sctp_state(pp, cp, direction, skb); + ret = set_sctp_state(pd, cp, direction, skb); spin_unlock(&cp->lock); return ret; } -/* - * Hash table for SCTP application incarnations - */ -#define SCTP_APP_TAB_BITS 4 -#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) -#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) - -static struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; -static DEFINE_SPINLOCK(sctp_app_lock); - static inline __u16 sctp_app_hashkey(__be16 port) { return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) & SCTP_APP_TAB_MASK; } -static int sctp_register_app(struct ip_vs_app *inc) +static int sctp_register_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); hash = sctp_app_hashkey(port); - spin_lock_bh(&sctp_app_lock); - list_for_each_entry(i, &sctp_apps[hash], p_list) { + spin_lock_bh(&ipvs->sctp_app_lock); + list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &sctp_apps[hash]); - atomic_inc(&ip_vs_protocol_sctp.appcnt); + list_add(&inc->p_list, &ipvs->sctp_apps[hash]); + atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&sctp_app_lock); + spin_unlock_bh(&ipvs->sctp_app_lock); return ret; } -static void sctp_unregister_app(struct ip_vs_app *inc) +static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) { - spin_lock_bh(&sctp_app_lock); - atomic_dec(&ip_vs_protocol_sctp.appcnt); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); + + spin_lock_bh(&ipvs->sctp_app_lock); + atomic_dec(&pd->appcnt); list_del(&inc->p_list); - spin_unlock_bh(&sctp_app_lock); + spin_unlock_bh(&ipvs->sctp_app_lock); } static int sctp_app_conn_bind(struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); int hash; struct ip_vs_app *inc; int result = 0; @@ -1079,12 +1066,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = sctp_app_hashkey(cp->vport); - spin_lock(&sctp_app_lock); - list_for_each_entry(inc, &sctp_apps[hash], p_list) { + spin_lock(&ipvs->sctp_app_lock); + list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&sctp_app_lock); + spin_unlock(&ipvs->sctp_app_lock); IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -1100,43 +1087,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&sctp_app_lock); + spin_unlock(&ipvs->sctp_app_lock); out: return result; } -static void ip_vs_sctp_init(struct ip_vs_protocol *pp) +/* --------------------------------------------- + * timeouts is netns related now. + * --------------------------------------------- + */ +static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) { - IP_VS_INIT_HASH_TABLE(sctp_apps); - pp->timeout_table = sctp_timeouts; -} + struct netns_ipvs *ipvs = net_ipvs(net); + ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); + spin_lock_init(&ipvs->tcp_app_lock); + pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, + sizeof(sctp_timeouts)); +} -static void ip_vs_sctp_exit(struct ip_vs_protocol *pp) +static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) { - + kfree(pd->timeout_table); } struct ip_vs_protocol ip_vs_protocol_sctp = { - .name = "SCTP", - .protocol = IPPROTO_SCTP, - .num_states = IP_VS_SCTP_S_LAST, - .dont_defrag = 0, - .appcnt = ATOMIC_INIT(0), - .init = ip_vs_sctp_init, - .exit = ip_vs_sctp_exit, - .register_app = sctp_register_app, + .name = "SCTP", + .protocol = IPPROTO_SCTP, + .num_states = IP_VS_SCTP_S_LAST, + .dont_defrag = 0, + .init = NULL, + .exit = NULL, + .init_netns = __ip_vs_sctp_init, + .exit_netns = __ip_vs_sctp_exit, + .register_app = sctp_register_app, .unregister_app = sctp_unregister_app, - .conn_schedule = sctp_conn_schedule, - .conn_in_get = ip_vs_conn_in_get_proto, - .conn_out_get = ip_vs_conn_out_get_proto, - .snat_handler = sctp_snat_handler, - .dnat_handler = sctp_dnat_handler, - .csum_check = sctp_csum_check, - .state_name = sctp_state_name, + .conn_schedule = sctp_conn_schedule, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, + .snat_handler = sctp_snat_handler, + .dnat_handler = sctp_dnat_handler, + .csum_check = sctp_csum_check, + .state_name = sctp_state_name, .state_transition = sctp_state_transition, - .app_conn_bind = sctp_app_conn_bind, - .debug_packet = ip_vs_tcpudp_debug_packet, - .timeout_change = sctp_timeout_change, - .set_state_timeout = sctp_set_state_timeout, + .app_conn_bind = sctp_app_conn_bind, + .debug_packet = ip_vs_tcpudp_debug_packet, + .timeout_change = NULL, }; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 1cdab12abfef..c0cc341b840d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -9,8 +9,12 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Changes: + * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> * + * Network name space (netns) aware. + * Global data moved to netns i.e struct netns_ipvs + * tcp_timeouts table has copy per netns in a hash table per + * protocol ip_vs_proto_data and is handled by netns */ #define KMSG_COMPONENT "IPVS" @@ -28,9 +32,10 @@ #include <net/ip_vs.h> static int -tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { + struct net *net; struct ip_vs_service *svc; struct tcphdr _tcph, *th; struct ip_vs_iphdr iph; @@ -42,14 +47,14 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, *verdict = NF_DROP; return 0; } - + net = skb_net(skb); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ if (th->syn && - (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, - th->dest))) { + (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, + &iph.daddr, th->dest))) { int ignored; - if (ip_vs_todrop()) { + if (ip_vs_todrop(net_ipvs(net))) { /* * It seems that we are very loaded. * We have to drop this packet :( @@ -63,10 +68,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pp); + *verdict = ip_vs_leave(svc, skb, pd); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -344,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = { /* * Timeout table[state] */ -static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { +static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { [IP_VS_TCP_S_NONE] = 2*HZ, [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, @@ -443,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = { /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, }; -static struct tcp_states_t *tcp_state_table = tcp_states; - - -static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) +static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags) { int on = (flags & 1); /* secure_tcp */ @@ -456,14 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) ** for most if not for all of the applications. Something ** like "capabilities" (flags) for each object. */ - tcp_state_table = (on? tcp_states_dos : tcp_states); -} - -static int -tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) -{ - return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST, - tcp_state_name_table, sname, to); + pd->tcp_state_table = (on ? tcp_states_dos : tcp_states); } static inline int tcp_state_idx(struct tcphdr *th) @@ -480,7 +475,7 @@ static inline int tcp_state_idx(struct tcphdr *th) } static inline void -set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, +set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int direction, struct tcphdr *th) { int state_idx; @@ -503,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, goto tcp_state_out; } - new_state = tcp_state_table[state_off+state_idx].next_state[cp->state]; + new_state = + pd->tcp_state_table[state_off+state_idx].next_state[cp->state]; tcp_state_out: if (new_state != cp->state) { @@ -511,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" "%s:%d state: %s->%s conn->refcnt:%d\n", - pp->name, + pd->pp->name, ((state_off == TCP_DIR_OUTPUT) ? "output " : "input "), th->syn ? 'S' : '.', @@ -541,17 +537,19 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, } } - cp->timeout = pp->timeout_table[cp->state = new_state]; + if (likely(pd)) + cp->timeout = pd->timeout_table[cp->state = new_state]; + else /* What to do ? */ + cp->timeout = tcp_timeouts[cp->state = new_state]; } - /* * Handle state transitions */ static int tcp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { struct tcphdr _tcph, *th; @@ -566,23 +564,12 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, return 0; spin_lock(&cp->lock); - set_tcp_state(pp, cp, direction, th); + set_tcp_state(pd, cp, direction, th); spin_unlock(&cp->lock); return 1; } - -/* - * Hash table for TCP application incarnations - */ -#define TCP_APP_TAB_BITS 4 -#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) -#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) - -static struct list_head tcp_apps[TCP_APP_TAB_SIZE]; -static DEFINE_SPINLOCK(tcp_app_lock); - static inline __u16 tcp_app_hashkey(__be16 port) { return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) @@ -590,44 +577,50 @@ static inline __u16 tcp_app_hashkey(__be16 port) } -static int tcp_register_app(struct ip_vs_app *inc) +static int tcp_register_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); hash = tcp_app_hashkey(port); - spin_lock_bh(&tcp_app_lock); - list_for_each_entry(i, &tcp_apps[hash], p_list) { + spin_lock_bh(&ipvs->tcp_app_lock); + list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &tcp_apps[hash]); - atomic_inc(&ip_vs_protocol_tcp.appcnt); + list_add(&inc->p_list, &ipvs->tcp_apps[hash]); + atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&tcp_app_lock); + spin_unlock_bh(&ipvs->tcp_app_lock); return ret; } static void -tcp_unregister_app(struct ip_vs_app *inc) +tcp_unregister_app(struct net *net, struct ip_vs_app *inc) { - spin_lock_bh(&tcp_app_lock); - atomic_dec(&ip_vs_protocol_tcp.appcnt); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + + spin_lock_bh(&ipvs->tcp_app_lock); + atomic_dec(&pd->appcnt); list_del(&inc->p_list); - spin_unlock_bh(&tcp_app_lock); + spin_unlock_bh(&ipvs->tcp_app_lock); } static int tcp_app_conn_bind(struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); int hash; struct ip_vs_app *inc; int result = 0; @@ -639,12 +632,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = tcp_app_hashkey(cp->vport); - spin_lock(&tcp_app_lock); - list_for_each_entry(inc, &tcp_apps[hash], p_list) { + spin_lock(&ipvs->tcp_app_lock); + list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&tcp_app_lock); + spin_unlock(&ipvs->tcp_app_lock); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -661,7 +654,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&tcp_app_lock); + spin_unlock(&ipvs->tcp_app_lock); out: return result; @@ -671,24 +664,35 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* * Set LISTEN timeout. (ip_vs_conn_put will setup timer) */ -void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) +void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) { + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + spin_lock(&cp->lock); cp->state = IP_VS_TCP_S_LISTEN; - cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN]; + cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN] + : tcp_timeouts[IP_VS_TCP_S_LISTEN]); spin_unlock(&cp->lock); } - -static void ip_vs_tcp_init(struct ip_vs_protocol *pp) +/* --------------------------------------------- + * timeouts is netns related now. + * --------------------------------------------- + */ +static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) { - IP_VS_INIT_HASH_TABLE(tcp_apps); - pp->timeout_table = tcp_timeouts; -} + struct netns_ipvs *ipvs = net_ipvs(net); + ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); + spin_lock_init(&ipvs->tcp_app_lock); + pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, + sizeof(tcp_timeouts)); + pd->tcp_state_table = tcp_states; +} -static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) +static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) { + kfree(pd->timeout_table); } @@ -697,9 +701,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .protocol = IPPROTO_TCP, .num_states = IP_VS_TCP_S_LAST, .dont_defrag = 0, - .appcnt = ATOMIC_INIT(0), - .init = ip_vs_tcp_init, - .exit = ip_vs_tcp_exit, + .init = NULL, + .exit = NULL, + .init_netns = __ip_vs_tcp_init, + .exit_netns = __ip_vs_tcp_exit, .register_app = tcp_register_app, .unregister_app = tcp_unregister_app, .conn_schedule = tcp_conn_schedule, @@ -713,5 +718,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .app_conn_bind = tcp_app_conn_bind, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = tcp_timeout_change, - .set_state_timeout = tcp_set_state_timeout, }; diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index cd398de010cc..f1282cbe6fe3 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -9,7 +9,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Changes: + * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> + * Network name space (netns) aware. * */ @@ -28,9 +29,10 @@ #include <net/ip6_checksum.h> static int -udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { + struct net *net; struct ip_vs_service *svc; struct udphdr _udph, *uh; struct ip_vs_iphdr iph; @@ -42,13 +44,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, *verdict = NF_DROP; return 0; } - - svc = ip_vs_service_get(af, skb->mark, iph.protocol, + net = skb_net(skb); + svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, &iph.daddr, uh->dest); if (svc) { int ignored; - if (ip_vs_todrop()) { + if (ip_vs_todrop(net_ipvs(net))) { /* * It seems that we are very loaded. * We have to drop this packet :( @@ -62,10 +64,10 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pp); + *verdict = ip_vs_leave(svc, skb, pd); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -344,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) return 1; } - -/* - * Note: the caller guarantees that only one of register_app, - * unregister_app or app_conn_bind is called each time. - */ - -#define UDP_APP_TAB_BITS 4 -#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) -#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) - -static struct list_head udp_apps[UDP_APP_TAB_SIZE]; -static DEFINE_SPINLOCK(udp_app_lock); - static inline __u16 udp_app_hashkey(__be16 port) { return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) @@ -364,44 +353,50 @@ static inline __u16 udp_app_hashkey(__be16 port) } -static int udp_register_app(struct ip_vs_app *inc) +static int udp_register_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); hash = udp_app_hashkey(port); - spin_lock_bh(&udp_app_lock); - list_for_each_entry(i, &udp_apps[hash], p_list) { + spin_lock_bh(&ipvs->udp_app_lock); + list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &udp_apps[hash]); - atomic_inc(&ip_vs_protocol_udp.appcnt); + list_add(&inc->p_list, &ipvs->udp_apps[hash]); + atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&udp_app_lock); + spin_unlock_bh(&ipvs->udp_app_lock); return ret; } static void -udp_unregister_app(struct ip_vs_app *inc) +udp_unregister_app(struct net *net, struct ip_vs_app *inc) { - spin_lock_bh(&udp_app_lock); - atomic_dec(&ip_vs_protocol_udp.appcnt); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + struct netns_ipvs *ipvs = net_ipvs(net); + + spin_lock_bh(&ipvs->udp_app_lock); + atomic_dec(&pd->appcnt); list_del(&inc->p_list); - spin_unlock_bh(&udp_app_lock); + spin_unlock_bh(&ipvs->udp_app_lock); } static int udp_app_conn_bind(struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); int hash; struct ip_vs_app *inc; int result = 0; @@ -413,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = udp_app_hashkey(cp->vport); - spin_lock(&udp_app_lock); - list_for_each_entry(inc, &udp_apps[hash], p_list) { + spin_lock(&ipvs->udp_app_lock); + list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&udp_app_lock); + spin_unlock(&ipvs->udp_app_lock); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -435,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&udp_app_lock); + spin_unlock(&ipvs->udp_app_lock); out: return result; } -static int udp_timeouts[IP_VS_UDP_S_LAST+1] = { +static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_NORMAL] = 5*60*HZ, [IP_VS_UDP_S_LAST] = 2*HZ, }; @@ -452,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_LAST] = "BUG!", }; - -static int -udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) -{ - return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST, - udp_state_name_table, sname, to); -} - static const char * udp_state_name(int state) { if (state >= IP_VS_UDP_S_LAST) @@ -470,20 +457,30 @@ static const char * udp_state_name(int state) static int udp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { - cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL]; + if (unlikely(!pd)) { + pr_err("UDP no ns data\n"); + return 0; + } + + cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; return 1; } -static void udp_init(struct ip_vs_protocol *pp) +static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) { - IP_VS_INIT_HASH_TABLE(udp_apps); - pp->timeout_table = udp_timeouts; + struct netns_ipvs *ipvs = net_ipvs(net); + + ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); + spin_lock_init(&ipvs->udp_app_lock); + pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, + sizeof(udp_timeouts)); } -static void udp_exit(struct ip_vs_protocol *pp) +static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) { + kfree(pd->timeout_table); } @@ -492,8 +489,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = { .protocol = IPPROTO_UDP, .num_states = IP_VS_UDP_S_LAST, .dont_defrag = 0, - .init = udp_init, - .exit = udp_exit, + .init = NULL, + .exit = NULL, + .init_netns = __udp_init, + .exit_netns = __udp_exit, .conn_schedule = udp_conn_schedule, .conn_in_get = ip_vs_conn_in_get_proto, .conn_out_get = ip_vs_conn_out_get_proto, @@ -507,5 +506,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = { .app_conn_bind = udp_app_conn_bind, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = NULL, - .set_state_timeout = udp_set_state_timeout, }; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index c1c167ab73ee..d1adf988eb08 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -192,6 +192,7 @@ union ip_vs_sync_conn { #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) struct ip_vs_sync_thread_data { + struct net *net; struct socket *sock; char *buf; }; @@ -259,10 +260,6 @@ struct ip_vs_sync_mesg { /* ip_vs_sync_conn entries start here */ }; -/* the maximum length of sync (sending/receiving) message */ -static int sync_send_mesg_maxlen; -static int sync_recv_mesg_maxlen; - struct ip_vs_sync_buff { struct list_head list; unsigned long firstuse; @@ -273,28 +270,6 @@ struct ip_vs_sync_buff { unsigned char *end; }; - -/* the sync_buff list head and the lock */ -static LIST_HEAD(ip_vs_sync_queue); -static DEFINE_SPINLOCK(ip_vs_sync_lock); - -/* current sync_buff for accepting new conn entries */ -static struct ip_vs_sync_buff *curr_sb = NULL; -static DEFINE_SPINLOCK(curr_sb_lock); - -/* ipvs sync daemon state */ -volatile int ip_vs_sync_state = IP_VS_STATE_NONE; -volatile int ip_vs_master_syncid = 0; -volatile int ip_vs_backup_syncid = 0; - -/* multicast interface name */ -char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; -char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; - -/* sync daemon tasks */ -static struct task_struct *sync_master_thread; -static struct task_struct *sync_backup_thread; - /* multicast addr */ static struct sockaddr_in mcast_addr = { .sin_family = AF_INET, @@ -324,20 +299,20 @@ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) put_unaligned_be32(ho->previous_delta, &no->previous_delta); } -static inline struct ip_vs_sync_buff *sb_dequeue(void) +static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs) { struct ip_vs_sync_buff *sb; - spin_lock_bh(&ip_vs_sync_lock); - if (list_empty(&ip_vs_sync_queue)) { + spin_lock_bh(&ipvs->sync_lock); + if (list_empty(&ipvs->sync_queue)) { sb = NULL; } else { - sb = list_entry(ip_vs_sync_queue.next, + sb = list_entry(ipvs->sync_queue.next, struct ip_vs_sync_buff, list); list_del(&sb->list); } - spin_unlock_bh(&ip_vs_sync_lock); + spin_unlock_bh(&ipvs->sync_lock); return sb; } @@ -345,25 +320,27 @@ static inline struct ip_vs_sync_buff *sb_dequeue(void) /* * Create a new sync buffer for Version 1 proto. */ -static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) +static inline struct ip_vs_sync_buff * +ip_vs_sync_buff_create(struct netns_ipvs *ipvs) { struct ip_vs_sync_buff *sb; if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; - if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { + sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); + if (!sb->mesg) { kfree(sb); return NULL; } sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */ sb->mesg->version = SYNC_PROTO_VER; - sb->mesg->syncid = ip_vs_master_syncid; + sb->mesg->syncid = ipvs->master_syncid; sb->mesg->size = sizeof(struct ip_vs_sync_mesg); sb->mesg->nr_conns = 0; sb->mesg->spare = 0; sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); - sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; + sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen; sb->firstuse = jiffies; return sb; @@ -375,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) kfree(sb); } -static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) +static inline void sb_queue_tail(struct netns_ipvs *ipvs) { - spin_lock(&ip_vs_sync_lock); - if (ip_vs_sync_state & IP_VS_STATE_MASTER) - list_add_tail(&sb->list, &ip_vs_sync_queue); + struct ip_vs_sync_buff *sb = ipvs->sync_buff; + + spin_lock(&ipvs->sync_lock); + if (ipvs->sync_state & IP_VS_STATE_MASTER) + list_add_tail(&sb->list, &ipvs->sync_queue); else ip_vs_sync_buff_release(sb); - spin_unlock(&ip_vs_sync_lock); + spin_unlock(&ipvs->sync_lock); } /* @@ -390,18 +369,18 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) * than the specified time or the specified time is zero. */ static inline struct ip_vs_sync_buff * -get_curr_sync_buff(unsigned long time) +get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time) { struct ip_vs_sync_buff *sb; - spin_lock_bh(&curr_sb_lock); - if (curr_sb && (time == 0 || - time_before(jiffies - curr_sb->firstuse, time))) { - sb = curr_sb; - curr_sb = NULL; + spin_lock_bh(&ipvs->sync_buff_lock); + if (ipvs->sync_buff && (time == 0 || + time_before(jiffies - ipvs->sync_buff->firstuse, time))) { + sb = ipvs->sync_buff; + ipvs->sync_buff = NULL; } else sb = NULL; - spin_unlock_bh(&curr_sb_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); return sb; } @@ -409,33 +388,37 @@ get_curr_sync_buff(unsigned long time) * Switch mode from sending version 0 or 1 * - must handle sync_buf */ -void ip_vs_sync_switch_mode(int mode) { +void ip_vs_sync_switch_mode(struct net *net, int mode) +{ + struct netns_ipvs *ipvs = net_ipvs(net); - if (!ip_vs_sync_state & IP_VS_STATE_MASTER) + if (!ipvs->sync_state & IP_VS_STATE_MASTER) return; - if (mode == sysctl_ip_vs_sync_ver || !curr_sb) + if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff) return; - spin_lock_bh(&curr_sb_lock); + spin_lock_bh(&ipvs->sync_buff_lock); /* Buffer empty ? then let buf_create do the job */ - if ( curr_sb->mesg->size <= sizeof(struct ip_vs_sync_mesg)) { - kfree(curr_sb); - curr_sb = NULL; + if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) { + kfree(ipvs->sync_buff); + ipvs->sync_buff = NULL; } else { - spin_lock_bh(&ip_vs_sync_lock); - if (ip_vs_sync_state & IP_VS_STATE_MASTER) - list_add_tail(&curr_sb->list, &ip_vs_sync_queue); + spin_lock_bh(&ipvs->sync_lock); + if (ipvs->sync_state & IP_VS_STATE_MASTER) + list_add_tail(&ipvs->sync_buff->list, + &ipvs->sync_queue); else - ip_vs_sync_buff_release(curr_sb); - spin_unlock_bh(&ip_vs_sync_lock); + ip_vs_sync_buff_release(ipvs->sync_buff); + spin_unlock_bh(&ipvs->sync_lock); } - spin_unlock_bh(&curr_sb_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); } /* * Create a new sync buffer for Version 0 proto. */ -static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void) +static inline struct ip_vs_sync_buff * +ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) { struct ip_vs_sync_buff *sb; struct ip_vs_sync_mesg_v0 *mesg; @@ -443,16 +426,17 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void) if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; - if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { + sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); + if (!sb->mesg) { kfree(sb); return NULL; } mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; mesg->nr_conns = 0; - mesg->syncid = ip_vs_master_syncid; - mesg->size = 4; - sb->head = (unsigned char *)mesg + 4; - sb->end = (unsigned char *)mesg + sync_send_mesg_maxlen; + mesg->syncid = ipvs->master_syncid; + mesg->size = sizeof(struct ip_vs_sync_mesg_v0); + sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); + sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen; sb->firstuse = jiffies; return sb; } @@ -461,8 +445,9 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void) * Version 0 , could be switched in by sys_ctl. * Add an ip_vs_conn information into the current sync_buff. */ -void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) +void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg_v0 *m; struct ip_vs_sync_conn_v0 *s; int len; @@ -473,10 +458,12 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) if (cp->flags & IP_VS_CONN_F_ONE_PACKET) return; - spin_lock(&curr_sb_lock); - if (!curr_sb) { - if (!(curr_sb=ip_vs_sync_buff_create_v0())) { - spin_unlock(&curr_sb_lock); + spin_lock(&ipvs->sync_buff_lock); + if (!ipvs->sync_buff) { + ipvs->sync_buff = + ip_vs_sync_buff_create_v0(ipvs); + if (!ipvs->sync_buff) { + spin_unlock(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } @@ -484,8 +471,8 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : SIMPLE_CONN_SIZE; - m = (struct ip_vs_sync_mesg_v0 *)curr_sb->mesg; - s = (struct ip_vs_sync_conn_v0 *)curr_sb->head; + m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg; + s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head; /* copy members */ s->reserved = 0; @@ -506,18 +493,18 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) m->nr_conns++; m->size += len; - curr_sb->head += len; + ipvs->sync_buff->head += len; /* check if there is a space for next one */ - if (curr_sb->head + FULL_CONN_SIZE > curr_sb->end) { - sb_queue_tail(curr_sb); - curr_sb = NULL; + if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) { + sb_queue_tail(ipvs); + ipvs->sync_buff = NULL; } - spin_unlock(&curr_sb_lock); + spin_unlock(&ipvs->sync_buff_lock); /* synchronize its controller if it has */ if (cp->control) - ip_vs_sync_conn(cp->control); + ip_vs_sync_conn(net, cp->control); } /* @@ -525,16 +512,17 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) * Called by ip_vs_in. * Sending Version 1 messages */ -void ip_vs_sync_conn(struct ip_vs_conn *cp) +void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg *m; union ip_vs_sync_conn *s; __u8 *p; unsigned int len, pe_name_len, pad; /* Handle old version of the protocol */ - if (sysctl_ip_vs_sync_ver == 0) { - ip_vs_sync_conn_v0(cp); + if (ipvs->sysctl_sync_ver == 0) { + ip_vs_sync_conn_v0(net, cp); return; } /* Do not sync ONE PACKET */ @@ -551,7 +539,7 @@ sloop: pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); } - spin_lock(&curr_sb_lock); + spin_lock(&ipvs->sync_buff_lock); #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) @@ -570,26 +558,27 @@ sloop: /* check if there is a space for this one */ pad = 0; - if (curr_sb) { - pad = (4 - (size_t)curr_sb->head) & 3; - if (curr_sb->head + len + pad > curr_sb->end) { - sb_queue_tail(curr_sb); - curr_sb = NULL; + if (ipvs->sync_buff) { + pad = (4 - (size_t)ipvs->sync_buff->head) & 3; + if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) { + sb_queue_tail(ipvs); + ipvs->sync_buff = NULL; pad = 0; } } - if (!curr_sb) { - if (!(curr_sb=ip_vs_sync_buff_create())) { - spin_unlock(&curr_sb_lock); + if (!ipvs->sync_buff) { + ipvs->sync_buff = ip_vs_sync_buff_create(ipvs); + if (!ipvs->sync_buff) { + spin_unlock(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } } - m = curr_sb->mesg; - p = curr_sb->head; - curr_sb->head += pad + len; + m = ipvs->sync_buff->mesg; + p = ipvs->sync_buff->head; + ipvs->sync_buff->head += pad + len; m->size += pad + len; /* Add ev. padding from prev. sync_conn */ while (pad--) @@ -647,7 +636,7 @@ sloop: } } - spin_unlock(&curr_sb_lock); + spin_unlock(&ipvs->sync_buff_lock); control: /* synchronize its controller if it has */ @@ -661,7 +650,7 @@ control: if (cp->flags & IP_VS_CONN_F_TEMPLATE) { int pkts = atomic_add_return(1, &cp->in_pkts); - if (pkts % sysctl_ip_vs_sync_threshold[1] != 1) + if (pkts % ipvs->sysctl_sync_threshold[1] != 1) return; } goto sloop; @@ -671,21 +660,21 @@ control: * fill_param used by version 1 */ static inline int -ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, +ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, struct ip_vs_conn_param *p, __u8 *pe_data, unsigned int pe_data_len, __u8 *pe_name, unsigned int pe_name_len) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - ip_vs_conn_fill_param(af, sc->v6.protocol, + ip_vs_conn_fill_param(net, af, sc->v6.protocol, (const union nf_inet_addr *)&sc->v6.caddr, sc->v6.cport, (const union nf_inet_addr *)&sc->v6.vaddr, sc->v6.vport, p); else #endif - ip_vs_conn_fill_param(af, sc->v4.protocol, + ip_vs_conn_fill_param(net, af, sc->v4.protocol, (const union nf_inet_addr *)&sc->v4.caddr, sc->v4.cport, (const union nf_inet_addr *)&sc->v4.vaddr, @@ -699,7 +688,8 @@ ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, buff[pe_name_len]=0; p->pe = __ip_vs_pe_getbyname(buff); if (!p->pe) { - IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", buff); + IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", + buff); return 1; } } else { @@ -725,16 +715,16 @@ ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, * Param: ... * timeout is in sec. */ -static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, - unsigned state, unsigned protocol, unsigned type, +static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, + unsigned int flags, unsigned int state, + unsigned int protocol, unsigned int type, const union nf_inet_addr *daddr, __be16 dport, unsigned long timeout, __u32 fwmark, - struct ip_vs_sync_conn_options *opt, - struct ip_vs_protocol *pp) + struct ip_vs_sync_conn_options *opt) { struct ip_vs_dest *dest; struct ip_vs_conn *cp; - + struct netns_ipvs *ipvs = net_ipvs(net); if (!(flags & IP_VS_CONN_F_TEMPLATE)) cp = ip_vs_conn_in_get(param); @@ -749,7 +739,7 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, * If it is not found the connection will remain unbound * but still handled. */ - dest = ip_vs_find_dest(type, daddr, dport, param->vaddr, + dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark); /* Set the approprite ativity flag */ @@ -805,7 +795,7 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, if (opt) memcpy(&cp->in_seq, opt, sizeof(*opt)); - atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); + atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]); cp->state = state; cp->old_state = cp->state; /* @@ -821,17 +811,23 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) timeout = MAX_SCHEDULE_TIMEOUT / HZ; cp->timeout = timeout*HZ; - } else if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table) - cp->timeout = pp->timeout_table[state]; - else - cp->timeout = (3*60*HZ); + } else { + struct ip_vs_proto_data *pd; + + pd = ip_vs_proto_data_get(net, protocol); + if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) + cp->timeout = pd->timeout_table[state]; + else + cp->timeout = (3*60*HZ); + } ip_vs_conn_put(cp); } /* * Process received multicast message for Version 0 */ -static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) +static void ip_vs_process_message_v0(struct net *net, const char *buffer, + const size_t buflen) { struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; struct ip_vs_sync_conn_v0 *s; @@ -879,7 +875,6 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) } } else { /* protocol in templates is not used for state/timeout */ - pp = NULL; if (state > 0) { IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", state); @@ -887,16 +882,16 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) } } - ip_vs_conn_fill_param(AF_INET, s->protocol, + ip_vs_conn_fill_param(net, AF_INET, s->protocol, (const union nf_inet_addr *)&s->caddr, s->cport, (const union nf_inet_addr *)&s->vaddr, s->vport, ¶m); /* Send timeout as Zero */ - ip_vs_proc_conn(¶m, flags, state, s->protocol, AF_INET, + ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET, (union nf_inet_addr *)&s->daddr, s->dport, - 0, 0, opt, pp); + 0, 0, opt); } } @@ -945,7 +940,7 @@ static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, /* * Process a Version 1 sync. connection */ -static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end) +static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) { struct ip_vs_sync_conn_options opt; union ip_vs_sync_conn *s; @@ -1043,33 +1038,31 @@ static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end) } } else { /* protocol in templates is not used for state/timeout */ - pp = NULL; if (state > 0) { IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", state); state = 0; } } - if (ip_vs_conn_fill_param_sync(af, s, ¶m, - pe_data, pe_data_len, - pe_name, pe_name_len)) { + if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data, + pe_data_len, pe_name, pe_name_len)) { retc = 50; goto out; } /* If only IPv4, just silent skip IPv6 */ if (af == AF_INET) - ip_vs_proc_conn(¶m, flags, state, s->v4.protocol, af, + ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af, (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, ntohl(s->v4.timeout), ntohl(s->v4.fwmark), - (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL), - pp); + (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) + ); #ifdef CONFIG_IP_VS_IPV6 else - ip_vs_proc_conn(¶m, flags, state, s->v6.protocol, af, + ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af, (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, ntohl(s->v6.timeout), ntohl(s->v6.fwmark), - (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL), - pp); + (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) + ); #endif return 0; /* Error exit */ @@ -1083,8 +1076,10 @@ out: * ip_vs_conn entries. * Handles Version 0 & 1 */ -static void ip_vs_process_message(__u8 *buffer, const size_t buflen) +static void ip_vs_process_message(struct net *net, __u8 *buffer, + const size_t buflen) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; __u8 *p, *msg_end; int i, nr_conns; @@ -1101,7 +1096,7 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen) return; } /* SyncID sanity check */ - if (ip_vs_backup_syncid != 0 && m2->syncid != ip_vs_backup_syncid) { + if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) { IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); return; } @@ -1136,7 +1131,8 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen) return; } /* Process a single sync_conn */ - if ((retc=ip_vs_proc_sync_conn(p, msg_end)) < 0) { + retc = ip_vs_proc_sync_conn(net, p, msg_end); + if (retc < 0) { IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", retc); return; @@ -1146,7 +1142,7 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen) } } else { /* Old type of message */ - ip_vs_process_message_v0(buffer, buflen); + ip_vs_process_message_v0(net, buffer, buflen); return; } } @@ -1185,8 +1181,10 @@ static int set_mcast_if(struct sock *sk, char *ifname) { struct net_device *dev; struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); - if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) + dev = __dev_get_by_name(net, ifname); + if (!dev) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) @@ -1205,30 +1203,33 @@ static int set_mcast_if(struct sock *sk, char *ifname) * Set the maximum length of sync message according to the * specified interface's MTU. */ -static int set_sync_mesg_maxlen(int sync_state) +static int set_sync_mesg_maxlen(struct net *net, int sync_state) { + struct netns_ipvs *ipvs = net_ipvs(net); struct net_device *dev; int num; if (sync_state == IP_VS_STATE_MASTER) { - if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) + dev = __dev_get_by_name(net, ipvs->master_mcast_ifn); + if (!dev) return -ENODEV; num = (dev->mtu - sizeof(struct iphdr) - sizeof(struct udphdr) - SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; - sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN + + ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN + SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); IP_VS_DBG(7, "setting the maximum length of sync sending " - "message %d.\n", sync_send_mesg_maxlen); + "message %d.\n", ipvs->send_mesg_maxlen); } else if (sync_state == IP_VS_STATE_BACKUP) { - if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) + dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn); + if (!dev) return -ENODEV; - sync_recv_mesg_maxlen = dev->mtu - + ipvs->recv_mesg_maxlen = dev->mtu - sizeof(struct iphdr) - sizeof(struct udphdr); IP_VS_DBG(7, "setting the maximum length of sync receiving " - "message %d.\n", sync_recv_mesg_maxlen); + "message %d.\n", ipvs->recv_mesg_maxlen); } return 0; @@ -1243,6 +1244,7 @@ static int set_sync_mesg_maxlen(int sync_state) static int join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) { + struct net *net = sock_net(sk); struct ip_mreqn mreq; struct net_device *dev; int ret; @@ -1250,7 +1252,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) memset(&mreq, 0, sizeof(mreq)); memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); - if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) + dev = __dev_get_by_name(net, ifname); + if (!dev) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -1267,11 +1270,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) static int bind_mcastif_addr(struct socket *sock, char *ifname) { + struct net *net = sock_net(sock->sk); struct net_device *dev; __be32 addr; struct sockaddr_in sin; - if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) + dev = __dev_get_by_name(net, ifname); + if (!dev) return -ENODEV; addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); @@ -1293,8 +1298,9 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) /* * Set up sending multicast socket over UDP */ -static struct socket * make_send_sock(void) +static struct socket *make_send_sock(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); struct socket *sock; int result; @@ -1305,7 +1311,7 @@ static struct socket * make_send_sock(void) return ERR_PTR(result); } - result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn); + result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); goto error; @@ -1314,7 +1320,7 @@ static struct socket * make_send_sock(void) set_mcast_loop(sock->sk, 0); set_mcast_ttl(sock->sk, 1); - result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn); + result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); if (result < 0) { pr_err("Error binding address of the mcast interface\n"); goto error; @@ -1338,8 +1344,9 @@ static struct socket * make_send_sock(void) /* * Set up receiving multicast socket over UDP */ -static struct socket * make_receive_sock(void) +static struct socket *make_receive_sock(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); struct socket *sock; int result; @@ -1363,7 +1370,7 @@ static struct socket * make_receive_sock(void) /* join the multicast group */ result = join_mcast_group(sock->sk, (struct in_addr *) &mcast_addr.sin_addr, - ip_vs_backup_mcast_ifn); + ipvs->backup_mcast_ifn); if (result < 0) { pr_err("Error joining to the multicast group\n"); goto error; @@ -1434,20 +1441,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) static int sync_thread_master(void *data) { struct ip_vs_sync_thread_data *tinfo = data; + struct netns_ipvs *ipvs = net_ipvs(tinfo->net); struct ip_vs_sync_buff *sb; pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " "syncid = %d\n", - ip_vs_master_mcast_ifn, ip_vs_master_syncid); + ipvs->master_mcast_ifn, ipvs->master_syncid); while (!kthread_should_stop()) { - while ((sb = sb_dequeue())) { + while ((sb = sb_dequeue(ipvs))) { ip_vs_send_sync_msg(tinfo->sock, sb->mesg); ip_vs_sync_buff_release(sb); } - /* check if entries stay in curr_sb for 2 seconds */ - sb = get_curr_sync_buff(2 * HZ); + /* check if entries stay in ipvs->sync_buff for 2 seconds */ + sb = get_curr_sync_buff(ipvs, 2 * HZ); if (sb) { ip_vs_send_sync_msg(tinfo->sock, sb->mesg); ip_vs_sync_buff_release(sb); @@ -1457,14 +1465,13 @@ static int sync_thread_master(void *data) } /* clean up the sync_buff queue */ - while ((sb=sb_dequeue())) { + while ((sb = sb_dequeue(ipvs))) ip_vs_sync_buff_release(sb); - } /* clean up the current sync_buff */ - if ((sb = get_curr_sync_buff(0))) { + sb = get_curr_sync_buff(ipvs, 0); + if (sb) ip_vs_sync_buff_release(sb); - } /* release the sending multicast socket */ sock_release(tinfo->sock); @@ -1477,11 +1484,12 @@ static int sync_thread_master(void *data) static int sync_thread_backup(void *data) { struct ip_vs_sync_thread_data *tinfo = data; + struct netns_ipvs *ipvs = net_ipvs(tinfo->net); int len; pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " "syncid = %d\n", - ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); + ipvs->backup_mcast_ifn, ipvs->backup_syncid); while (!kthread_should_stop()) { wait_event_interruptible(*sk_sleep(tinfo->sock->sk), @@ -1491,7 +1499,7 @@ static int sync_thread_backup(void *data) /* do we have data now? */ while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { len = ip_vs_receive(tinfo->sock, tinfo->buf, - sync_recv_mesg_maxlen); + ipvs->recv_mesg_maxlen); if (len <= 0) { pr_err("receiving message error\n"); break; @@ -1500,7 +1508,7 @@ static int sync_thread_backup(void *data) /* disable bottom half, because it accesses the data shared by softirq while getting/creating conns */ local_bh_disable(); - ip_vs_process_message(tinfo->buf, len); + ip_vs_process_message(tinfo->net, tinfo->buf, len); local_bh_enable(); } } @@ -1514,11 +1522,12 @@ static int sync_thread_backup(void *data) } -int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) +int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) { struct ip_vs_sync_thread_data *tinfo; struct task_struct **realtask, *task; struct socket *sock; + struct netns_ipvs *ipvs = net_ipvs(net); char *name, *buf = NULL; int (*threadfn)(void *data); int result = -ENOMEM; @@ -1528,27 +1537,27 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) sizeof(struct ip_vs_sync_conn_v0)); if (state == IP_VS_STATE_MASTER) { - if (sync_master_thread) + if (ipvs->master_thread) return -EEXIST; - strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, - sizeof(ip_vs_master_mcast_ifn)); - ip_vs_master_syncid = syncid; - realtask = &sync_master_thread; - name = "ipvs_syncmaster"; + strlcpy(ipvs->master_mcast_ifn, mcast_ifn, + sizeof(ipvs->master_mcast_ifn)); + ipvs->master_syncid = syncid; + realtask = &ipvs->master_thread; + name = "ipvs_master:%d"; threadfn = sync_thread_master; - sock = make_send_sock(); + sock = make_send_sock(net); } else if (state == IP_VS_STATE_BACKUP) { - if (sync_backup_thread) + if (ipvs->backup_thread) return -EEXIST; - strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, - sizeof(ip_vs_backup_mcast_ifn)); - ip_vs_backup_syncid = syncid; - realtask = &sync_backup_thread; - name = "ipvs_syncbackup"; + strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, + sizeof(ipvs->backup_mcast_ifn)); + ipvs->backup_syncid = syncid; + realtask = &ipvs->backup_thread; + name = "ipvs_backup:%d"; threadfn = sync_thread_backup; - sock = make_receive_sock(); + sock = make_receive_sock(net); } else { return -EINVAL; } @@ -1558,9 +1567,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) goto out; } - set_sync_mesg_maxlen(state); + set_sync_mesg_maxlen(net, state); if (state == IP_VS_STATE_BACKUP) { - buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL); + buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL); if (!buf) goto outsocket; } @@ -1569,10 +1578,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) if (!tinfo) goto outbuf; + tinfo->net = net; tinfo->sock = sock; tinfo->buf = buf; - task = kthread_run(threadfn, tinfo, name); + task = kthread_run(threadfn, tinfo, name, ipvs->gen); if (IS_ERR(task)) { result = PTR_ERR(task); goto outtinfo; @@ -1580,7 +1590,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) /* mark as active */ *realtask = task; - ip_vs_sync_state |= state; + ipvs->sync_state |= state; /* increase the module use count */ ip_vs_use_count_inc(); @@ -1598,16 +1608,18 @@ out: } -int stop_sync_thread(int state) +int stop_sync_thread(struct net *net, int state) { + struct netns_ipvs *ipvs = net_ipvs(net); + IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); if (state == IP_VS_STATE_MASTER) { - if (!sync_master_thread) + if (!ipvs->master_thread) return -ESRCH; pr_info("stopping master sync thread %d ...\n", - task_pid_nr(sync_master_thread)); + task_pid_nr(ipvs->master_thread)); /* * The lock synchronizes with sb_queue_tail(), so that we don't @@ -1615,21 +1627,21 @@ int stop_sync_thread(int state) * progress of stopping the master sync daemon. */ - spin_lock_bh(&ip_vs_sync_lock); - ip_vs_sync_state &= ~IP_VS_STATE_MASTER; - spin_unlock_bh(&ip_vs_sync_lock); - kthread_stop(sync_master_thread); - sync_master_thread = NULL; + spin_lock_bh(&ipvs->sync_lock); + ipvs->sync_state &= ~IP_VS_STATE_MASTER; + spin_unlock_bh(&ipvs->sync_lock); + kthread_stop(ipvs->master_thread); + ipvs->master_thread = NULL; } else if (state == IP_VS_STATE_BACKUP) { - if (!sync_backup_thread) + if (!ipvs->backup_thread) return -ESRCH; pr_info("stopping backup sync thread %d ...\n", - task_pid_nr(sync_backup_thread)); + task_pid_nr(ipvs->backup_thread)); - ip_vs_sync_state &= ~IP_VS_STATE_BACKUP; - kthread_stop(sync_backup_thread); - sync_backup_thread = NULL; + ipvs->sync_state &= ~IP_VS_STATE_BACKUP; + kthread_stop(ipvs->backup_thread); + ipvs->backup_thread = NULL; } else { return -EINVAL; } @@ -1639,3 +1651,42 @@ int stop_sync_thread(int state) return 0; } + +/* + * Initialize data struct for each netns + */ +static int __net_init __ip_vs_sync_init(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + INIT_LIST_HEAD(&ipvs->sync_queue); + spin_lock_init(&ipvs->sync_lock); + spin_lock_init(&ipvs->sync_buff_lock); + + ipvs->sync_mcast_addr.sin_family = AF_INET; + ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT); + ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP); + return 0; +} + +static void __ip_vs_sync_cleanup(struct net *net) +{ + stop_sync_thread(net, IP_VS_STATE_MASTER); + stop_sync_thread(net, IP_VS_STATE_BACKUP); +} + +static struct pernet_operations ipvs_sync_ops = { + .init = __ip_vs_sync_init, + .exit = __ip_vs_sync_cleanup, +}; + + +int __init ip_vs_sync_init(void) +{ + return register_pernet_subsys(&ipvs_sync_ops); +} + +void __exit ip_vs_sync_cleanup(void) +{ + unregister_pernet_subsys(&ipvs_sync_ops); +} diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index fb2a445ddc59..1f2a4e35fb11 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -96,12 +96,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos))) { struct flowi fl = { - .oif = 0, - .nl_u = { - .ip4_u = { - .daddr = dest->addr.ip, - .saddr = 0, - .tos = rtos, } }, + .fl4_dst = dest->addr.ip, + .fl4_tos = rtos, }; if (ip_route_output_key(net, &rt, &fl)) { @@ -118,12 +114,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, spin_unlock(&dest->dst_lock); } else { struct flowi fl = { - .oif = 0, - .nl_u = { - .ip4_u = { - .daddr = daddr, - .saddr = 0, - .tos = rtos, } }, + .fl4_dst = daddr, + .fl4_tos = rtos, }; if (ip_route_output_key(net, &rt, &fl)) { @@ -178,14 +170,9 @@ __ip_vs_reroute_locally(struct sk_buff *skb) refdst_drop(orefdst); } else { struct flowi fl = { - .oif = 0, - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos), - } - }, + .fl4_dst = iph->daddr, + .fl4_src = iph->saddr, + .fl4_tos = RT_TOS(iph->tos), .mark = skb->mark, }; @@ -215,12 +202,7 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, { struct dst_entry *dst; struct flowi fl = { - .oif = 0, - .nl_u = { - .ip6_u = { - .daddr = *daddr, - }, - }, + .fl6_dst = *daddr, }; dst = ip6_route_output(net, NULL, &fl); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0ba7d4801daf..e95ac42ef673 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max); DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); -static unsigned int nf_conntrack_hash_rnd __read_mostly; +unsigned int nf_conntrack_hash_rnd __read_mostly; static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) { @@ -596,6 +596,21 @@ static noinline int early_drop(struct net *net, unsigned int hash) return dropped; } +void init_nf_conntrack_hash_rnd(void) +{ + unsigned int rand; + + /* + * Why not initialize nf_conntrack_rnd in a "init()" function ? + * Because there isn't enough entropy when system initializing, + * and we initialize it as late as possible. + */ + do { + get_random_bytes(&rand, sizeof(rand)); + } while (!rand); + cmpxchg(&nf_conntrack_hash_rnd, 0, rand); +} + static struct nf_conn * __nf_conntrack_alloc(struct net *net, u16 zone, const struct nf_conntrack_tuple *orig, @@ -605,18 +620,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone, struct nf_conn *ct; if (unlikely(!nf_conntrack_hash_rnd)) { - unsigned int rand; - - /* - * Why not initialize nf_conntrack_rnd in a "init()" function ? - * Because there isn't enough entropy when system initializing, - * and we initialize it as late as possible. - */ - do { - get_random_bytes(&rand, sizeof(rand)); - } while (!rand); - cmpxchg(&nf_conntrack_hash_rnd, 0, rand); - + init_nf_conntrack_hash_rnd(); /* recompute the hash as nf_conntrack_hash_rnd is initialized */ hash = hash_conntrack_raw(orig, zone); } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 774f32ba2ac9..4a9ed23180df 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -32,9 +32,7 @@ unsigned int nf_ct_expect_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); -static unsigned int nf_ct_expect_hash_rnd __read_mostly; unsigned int nf_ct_expect_max __read_mostly; -static int nf_ct_expect_hash_rnd_initted __read_mostly; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; @@ -77,15 +75,13 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple { unsigned int hash; - if (unlikely(!nf_ct_expect_hash_rnd_initted)) { - get_random_bytes(&nf_ct_expect_hash_rnd, - sizeof(nf_ct_expect_hash_rnd)); - nf_ct_expect_hash_rnd_initted = 1; + if (unlikely(!nf_conntrack_hash_rnd)) { + init_nf_conntrack_hash_rnd(); } hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | - (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd); + (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd); return ((u64)hash * nf_ct_expect_hsize) >> 32; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7f59be82449f..9eabaa6f28a8 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -254,7 +254,7 @@ ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) ret = security_secid_to_secctx(ct->secmark, &secctx, &len); if (ret) - return ret; + return 0; ret = -1; nest_secctx = nla_nest_start(skb, CTA_SECCTX | NLA_F_NESTED); @@ -453,16 +453,22 @@ ctnetlink_counters_size(const struct nf_conn *ct) ; } -#ifdef CONFIG_NF_CONNTRACK_SECMARK -static int ctnetlink_nlmsg_secctx_size(const struct nf_conn *ct) +static inline int +ctnetlink_secctx_size(const struct nf_conn *ct) { - int len; +#ifdef CONFIG_NF_CONNTRACK_SECMARK + int len, ret; - security_secid_to_secctx(ct->secmark, NULL, &len); + ret = security_secid_to_secctx(ct->secmark, NULL, &len); + if (ret) + return 0; - return sizeof(char) * len; -} + return nla_total_size(0) /* CTA_SECCTX */ + + nla_total_size(sizeof(char) * len); /* CTA_SECCTX_NAME */ +#else + return 0; #endif +} static inline size_t ctnetlink_nlmsg_size(const struct nf_conn *ct) @@ -479,10 +485,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(0) /* CTA_PROTOINFO */ + nla_total_size(0) /* CTA_HELP */ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ -#ifdef CONFIG_NF_CONNTRACK_SECMARK - + nla_total_size(0) /* CTA_SECCTX */ - + nla_total_size(ctnetlink_nlmsg_secctx_size(ct)) /* CTA_SECCTX_NAME */ -#endif + + ctnetlink_secctx_size(ct) #ifdef CONFIG_NF_NAT_NEEDED + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 328f1d2a51f8..8257bf643593 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -121,7 +121,7 @@ static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) ret = security_secid_to_secctx(ct->secmark, &secctx, &len); if (ret) - return ret; + return 0; ret = seq_printf(s, "secctx=%s ", secctx); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 80463507420e..ee5de3af510a 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -38,9 +38,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) struct compat_delta { - struct compat_delta *next; - unsigned int offset; - int delta; + unsigned int offset; /* offset in kernel */ + int delta; /* delta in 32bit user land */ }; struct xt_af { @@ -49,7 +48,9 @@ struct xt_af { struct list_head target; #ifdef CONFIG_COMPAT struct mutex compat_mutex; - struct compat_delta *compat_offsets; + struct compat_delta *compat_tab; + unsigned int number; /* number of slots in compat_tab[] */ + unsigned int cur; /* number of used slots in compat_tab[] */ #endif }; @@ -414,54 +415,67 @@ int xt_check_match(struct xt_mtchk_param *par, EXPORT_SYMBOL_GPL(xt_check_match); #ifdef CONFIG_COMPAT -int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) +int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) { - struct compat_delta *tmp; + struct xt_af *xp = &xt[af]; - tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL); - if (!tmp) - return -ENOMEM; + if (!xp->compat_tab) { + if (!xp->number) + return -EINVAL; + xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number); + if (!xp->compat_tab) + return -ENOMEM; + xp->cur = 0; + } - tmp->offset = offset; - tmp->delta = delta; + if (xp->cur >= xp->number) + return -EINVAL; - if (xt[af].compat_offsets) { - tmp->next = xt[af].compat_offsets->next; - xt[af].compat_offsets->next = tmp; - } else { - xt[af].compat_offsets = tmp; - tmp->next = NULL; - } + if (xp->cur) + delta += xp->compat_tab[xp->cur - 1].delta; + xp->compat_tab[xp->cur].offset = offset; + xp->compat_tab[xp->cur].delta = delta; + xp->cur++; return 0; } EXPORT_SYMBOL_GPL(xt_compat_add_offset); void xt_compat_flush_offsets(u_int8_t af) { - struct compat_delta *tmp, *next; - - if (xt[af].compat_offsets) { - for (tmp = xt[af].compat_offsets; tmp; tmp = next) { - next = tmp->next; - kfree(tmp); - } - xt[af].compat_offsets = NULL; + if (xt[af].compat_tab) { + vfree(xt[af].compat_tab); + xt[af].compat_tab = NULL; + xt[af].number = 0; } } EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); int xt_compat_calc_jump(u_int8_t af, unsigned int offset) { - struct compat_delta *tmp; - int delta; - - for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next) - if (tmp->offset < offset) - delta += tmp->delta; - return delta; + struct compat_delta *tmp = xt[af].compat_tab; + int mid, left = 0, right = xt[af].cur - 1; + + while (left <= right) { + mid = (left + right) >> 1; + if (offset > tmp[mid].offset) + left = mid + 1; + else if (offset < tmp[mid].offset) + right = mid - 1; + else + return mid ? tmp[mid - 1].delta : 0; + } + WARN_ON_ONCE(1); + return 0; } EXPORT_SYMBOL_GPL(xt_compat_calc_jump); +void xt_compat_init_offsets(u_int8_t af, unsigned int number) +{ + xt[af].number = number; + xt[af].cur = 0; +} +EXPORT_SYMBOL(xt_compat_init_offsets); + int xt_compat_match_offset(const struct xt_match *match) { u_int16_t csize = match->compatsize ? : match->matchsize; @@ -1337,7 +1351,7 @@ static int __init xt_init(void) mutex_init(&xt[i].mutex); #ifdef CONFIG_COMPAT mutex_init(&xt[i].compat_mutex); - xt[i].compat_offsets = NULL; + xt[i].compat_tab = NULL; #endif INIT_LIST_HEAD(&xt[i].target); INIT_LIST_HEAD(&xt[i].match); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 22a2d421e7eb..5128a6c4cb2c 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -70,9 +70,9 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) return false; fl.oif = info->priv->oif; } - fl.nl_u.ip4_u.daddr = info->gw.ip; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE; + fl.fl4_dst = info->gw.ip; + fl.fl4_tos = RT_TOS(iph->tos); + fl.fl4_scope = RT_SCOPE_UNIVERSE; if (ip_route_output_key(net, &rt, &fl) != 0) return false; @@ -150,9 +150,9 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) return false; fl.oif = info->priv->oif; } - fl.nl_u.ip6_u.daddr = info->gw.in6; - fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | - (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; + fl.fl6_dst = info->gw.in6; + fl.fl6_flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | + (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; dst = ip6_route_output(net, NULL, &fl); if (dst == NULL) return false; diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index e536710ad916..4ef1b63ad73f 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -112,6 +112,54 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info, return true; } +static inline bool +port_match(u16 min, u16 max, u16 port, bool invert) +{ + return (port >= min && port <= max) ^ invert; +} + +static inline bool +ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info, + const struct nf_conn *ct) +{ + const struct nf_conntrack_tuple *tuple; + + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + if ((info->match_flags & XT_CONNTRACK_PROTO) && + (nf_ct_protonum(ct) == info->l4proto) ^ + !(info->invert_flags & XT_CONNTRACK_PROTO)) + return false; + + /* Shortcut to match all recognized protocols by using ->src.all. */ + if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) && + !port_match(info->origsrc_port, info->origsrc_port_high, + ntohs(tuple->src.u.all), + info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT)) + return false; + + if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) && + !port_match(info->origdst_port, info->origdst_port_high, + ntohs(tuple->dst.u.all), + info->invert_flags & XT_CONNTRACK_ORIGDST_PORT)) + return false; + + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) && + !port_match(info->replsrc_port, info->replsrc_port_high, + ntohs(tuple->src.u.all), + info->invert_flags & XT_CONNTRACK_REPLSRC_PORT)) + return false; + + if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) && + !port_match(info->repldst_port, info->repldst_port_high, + ntohs(tuple->dst.u.all), + info->invert_flags & XT_CONNTRACK_REPLDST_PORT)) + return false; + + return true; +} + static bool conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, u16 state_mask, u16 status_mask) @@ -170,8 +218,13 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, !(info->invert_flags & XT_CONNTRACK_REPLDST)) return false; - if (!ct_proto_port_check(info, ct)) - return false; + if (par->match->revision != 3) { + if (!ct_proto_port_check(info, ct)) + return false; + } else { + if (!ct_proto_port_check_v3(par->matchinfo, ct)) + return false; + } if ((info->match_flags & XT_CONNTRACK_STATUS) && (!!(status_mask & ct->status) ^ @@ -207,6 +260,14 @@ conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) return conntrack_mt(skb, par, info->state_mask, info->status_mask); } +static bool +conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_conntrack_mtinfo3 *info = par->matchinfo; + + return conntrack_mt(skb, par, info->state_mask, info->status_mask); +} + static int conntrack_mt_check(const struct xt_mtchk_param *par) { int ret; @@ -244,6 +305,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { .destroy = conntrack_mt_destroy, .me = THIS_MODULE, }, + { + .name = "conntrack", + .revision = 3, + .family = NFPROTO_UNSPEC, + .matchsize = sizeof(struct xt_conntrack_mtinfo3), + .match = conntrack_mt_v3, + .checkentry = conntrack_mt_check, + .destroy = conntrack_mt_destroy, + .me = THIS_MODULE, + }, }; static int __init conntrack_mt_init(void) diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 9127a3d8aa35..bb10b0717f1b 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); + cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */); if (unlikely(cp == NULL)) { match = false; goto out; diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index c8a4079261f0..af7f3355103e 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -107,7 +107,6 @@ enum { NLBL_CIPSOV4_C_LISTALL, __NLBL_CIPSOV4_C_MAX, }; -#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1) /* NetLabel CIPSOv4 attributes */ enum { diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index 05d96431f819..0a25838bcf45 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -173,7 +173,6 @@ enum { NLBL_MGMT_C_VERSION, __NLBL_MGMT_C_MAX, }; -#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1) /* NetLabel Management attributes */ enum { diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h index 7aba63595137..0bc8dc3f9e3c 100644 --- a/net/netlabel/netlabel_unlabeled.h +++ b/net/netlabel/netlabel_unlabeled.h @@ -180,7 +180,6 @@ enum { NLBL_UNLABEL_C_STATICLISTDEF, __NLBL_UNLABEL_C_MAX, }; -#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1) /* NetLabel Unlabeled attributes */ enum { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3616f27b9d46..91cb1d71f018 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -61,6 +61,7 @@ #include <linux/kernel.h> #include <linux/kmod.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> @@ -163,8 +164,13 @@ struct packet_mreq_max { static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing, int tx_ring); +#define PGV_FROM_VMALLOC 1 +struct pgv { + char *buffer; +}; + struct packet_ring_buffer { - char **pg_vec; + struct pgv *pg_vec; unsigned int head; unsigned int frames_per_block; unsigned int frame_size; @@ -217,6 +223,13 @@ struct packet_skb_cb { #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) +static inline __pure struct page *pgv_to_page(void *addr) +{ + if (is_vmalloc_addr(addr)) + return vmalloc_to_page(addr); + return virt_to_page(addr); +} + static void __packet_set_status(struct packet_sock *po, void *frame, int status) { union { @@ -229,11 +242,11 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) switch (po->tp_version) { case TPACKET_V1: h.h1->tp_status = status; - flush_dcache_page(virt_to_page(&h.h1->tp_status)); + flush_dcache_page(pgv_to_page(&h.h1->tp_status)); break; case TPACKET_V2: h.h2->tp_status = status; - flush_dcache_page(virt_to_page(&h.h2->tp_status)); + flush_dcache_page(pgv_to_page(&h.h2->tp_status)); break; default: pr_err("TPACKET version not supported\n"); @@ -256,10 +269,10 @@ static int __packet_get_status(struct packet_sock *po, void *frame) h.raw = frame; switch (po->tp_version) { case TPACKET_V1: - flush_dcache_page(virt_to_page(&h.h1->tp_status)); + flush_dcache_page(pgv_to_page(&h.h1->tp_status)); return h.h1->tp_status; case TPACKET_V2: - flush_dcache_page(virt_to_page(&h.h2->tp_status)); + flush_dcache_page(pgv_to_page(&h.h2->tp_status)); return h.h2->tp_status; default: pr_err("TPACKET version not supported\n"); @@ -283,7 +296,8 @@ static void *packet_lookup_frame(struct packet_sock *po, pg_vec_pos = position / rb->frames_per_block; frame_offset = position % rb->frames_per_block; - h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size); + h.raw = rb->pg_vec[pg_vec_pos].buffer + + (frame_offset * rb->frame_size); if (status != __packet_get_status(po, h.raw)) return NULL; @@ -503,7 +517,8 @@ out_free: return err; } -static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, +static inline unsigned int run_filter(const struct sk_buff *skb, + const struct sock *sk, unsigned int res) { struct sk_filter *filter; @@ -511,22 +526,22 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, rcu_read_lock_bh(); filter = rcu_dereference_bh(sk->sk_filter); if (filter != NULL) - res = sk_run_filter(skb, filter->insns, filter->len); + res = sk_run_filter(skb, filter->insns); rcu_read_unlock_bh(); return res; } /* - This function makes lazy skb cloning in hope that most of packets - are discarded by BPF. - - Note tricky part: we DO mangle shared skb! skb->data, skb->len - and skb->cb are mangled. It works because (and until) packets - falling here are owned by current CPU. Output packets are cloned - by dev_queue_xmit_nit(), input packets are processed by net_bh - sequencially, so that if we return skb to original state on exit, - we will not harm anyone. + * This function makes lazy skb cloning in hope that most of packets + * are discarded by BPF. + * + * Note tricky part: we DO mangle shared skb! skb->data, skb->len + * and skb->cb are mangled. It works because (and until) packets + * falling here are owned by current CPU. Output packets are cloned + * by dev_queue_xmit_nit(), input packets are processed by net_bh + * sequencially, so that if we return skb to original state on exit, + * we will not harm anyone. */ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, @@ -552,11 +567,11 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, if (dev->header_ops) { /* The device has an explicit notion of ll header, - exported to higher levels. - - Otherwise, the device hides datails of it frame - structure, so that corresponding packet head - never delivered to user. + * exported to higher levels. + * + * Otherwise, the device hides details of its frame + * structure, so that corresponding packet head is + * never delivered to user. */ if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); @@ -791,17 +806,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, __packet_set_status(po, h.raw, status); smp_mb(); +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 { - struct page *p_start, *p_end; - u8 *h_end = h.raw + macoff + snaplen - 1; - - p_start = virt_to_page(h.raw); - p_end = virt_to_page(h_end); - while (p_start <= p_end) { - flush_dcache_page(p_start); - p_start++; - } + u8 *start, *end; + + end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); + for (start = h.raw; start < end; start += PAGE_SIZE) + flush_dcache_page(pgv_to_page(start)); } +#endif sk->sk_data_ready(sk, 0); @@ -907,7 +920,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, } err = -EFAULT; - page = virt_to_page(data); offset = offset_in_page(data); len_max = PAGE_SIZE - offset; len = ((to_write > len_max) ? len_max : to_write); @@ -926,11 +938,11 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return -EFAULT; } + page = pgv_to_page(data); + data += len; flush_dcache_page(page); get_page(page); - skb_fill_page_desc(skb, - nr_frags, - page++, offset, len); + skb_fill_page_desc(skb, nr_frags, page, offset, len); to_write -= len; offset = 0; len_max = PAGE_SIZE; @@ -1610,9 +1622,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, err = -EINVAL; vnet_hdr_len = sizeof(vnet_hdr); - if ((len -= vnet_hdr_len) < 0) + if (len < vnet_hdr_len) goto out_free; + len -= vnet_hdr_len; + if (skb_is_gso(skb)) { struct skb_shared_info *sinfo = skb_shinfo(skb); @@ -1636,8 +1650,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, if (skb->ip_summed == CHECKSUM_PARTIAL) { vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - vnet_hdr.csum_start = skb->csum_start - - skb_headroom(skb); + vnet_hdr.csum_start = skb_checksum_start_offset(skb); vnet_hdr.csum_offset = skb->csum_offset; } /* else everything is zero */ @@ -1719,7 +1732,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); if (dev) - strlcpy(uaddr->sa_data, dev->name, 15); + strncpy(uaddr->sa_data, dev->name, 14); else memset(uaddr->sa_data, 0, 14); rcu_read_unlock(); @@ -1742,6 +1755,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; sll->sll_protocol = po->num; + sll->sll_pkttype = 0; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); if (dev) { @@ -2322,37 +2336,70 @@ static const struct vm_operations_struct packet_mmap_ops = { .close = packet_mm_close, }; -static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) +static void free_pg_vec(struct pgv *pg_vec, unsigned int order, + unsigned int len) { int i; for (i = 0; i < len; i++) { - if (likely(pg_vec[i])) - free_pages((unsigned long) pg_vec[i], order); + if (likely(pg_vec[i].buffer)) { + if (is_vmalloc_addr(pg_vec[i].buffer)) + vfree(pg_vec[i].buffer); + else + free_pages((unsigned long)pg_vec[i].buffer, + order); + pg_vec[i].buffer = NULL; + } } kfree(pg_vec); } static inline char *alloc_one_pg_vec_page(unsigned long order) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN; + char *buffer = NULL; + gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | + __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; + + buffer = (char *) __get_free_pages(gfp_flags, order); + + if (buffer) + return buffer; + + /* + * __get_free_pages failed, fall back to vmalloc + */ + buffer = vzalloc((1 << order) * PAGE_SIZE); - return (char *) __get_free_pages(gfp_flags, order); + if (buffer) + return buffer; + + /* + * vmalloc failed, lets dig into swap here + */ + gfp_flags &= ~__GFP_NORETRY; + buffer = (char *)__get_free_pages(gfp_flags, order); + if (buffer) + return buffer; + + /* + * complete and utter failure + */ + return NULL; } -static char **alloc_pg_vec(struct tpacket_req *req, int order) +static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) { unsigned int block_nr = req->tp_block_nr; - char **pg_vec; + struct pgv *pg_vec; int i; - pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL); + pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL); if (unlikely(!pg_vec)) goto out; for (i = 0; i < block_nr; i++) { - pg_vec[i] = alloc_one_pg_vec_page(order); - if (unlikely(!pg_vec[i])) + pg_vec[i].buffer = alloc_one_pg_vec_page(order); + if (unlikely(!pg_vec[i].buffer)) goto out_free_pgvec; } @@ -2368,7 +2415,7 @@ out_free_pgvec: static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing, int tx_ring) { - char **pg_vec = NULL; + struct pgv *pg_vec = NULL; struct packet_sock *po = pkt_sk(sk); int was_running, order = 0; struct packet_ring_buffer *rb; @@ -2453,22 +2500,20 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, mutex_lock(&po->pg_vec_lock); if (closing || atomic_read(&po->mapped) == 0) { err = 0; -#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; }) spin_lock_bh(&rb_queue->lock); - pg_vec = XC(rb->pg_vec, pg_vec); + swap(rb->pg_vec, pg_vec); rb->frame_max = (req->tp_frame_nr - 1); rb->head = 0; rb->frame_size = req->tp_frame_size; spin_unlock_bh(&rb_queue->lock); - order = XC(rb->pg_vec_order, order); - req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr); + swap(rb->pg_vec_order, order); + swap(rb->pg_vec_len, req->tp_block_nr); rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE; po->prot_hook.func = (po->rx_ring.pg_vec) ? tpacket_rcv : packet_rcv; skb_queue_purge(rb_queue); -#undef XC if (atomic_read(&po->mapped)) pr_err("packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped)); @@ -2530,15 +2575,17 @@ static int packet_mmap(struct file *file, struct socket *sock, continue; for (i = 0; i < rb->pg_vec_len; i++) { - struct page *page = virt_to_page(rb->pg_vec[i]); + struct page *page; + void *kaddr = rb->pg_vec[i].buffer; int pg_num; - for (pg_num = 0; pg_num < rb->pg_vec_pages; - pg_num++, page++) { + for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) { + page = pgv_to_page(kaddr); err = vm_insert_page(vma, start, page); if (unlikely(err)) goto out; start += PAGE_SIZE; + kaddr += PAGE_SIZE; } } } diff --git a/net/phonet/Makefile b/net/phonet/Makefile index d62bbba649b3..e10b1b182ce3 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_PHONET) += phonet.o pn_pep.o -phonet-objs := \ +phonet-y := \ pn_dev.o \ pn_netlink.o \ socket.o \ @@ -8,4 +8,4 @@ phonet-objs := \ sysctl.o \ af_phonet.o -pn_pep-objs := pep.o pep-gprs.o +pn_pep-y := pep.o pep-gprs.o diff --git a/net/rds/Makefile b/net/rds/Makefile index b46eca109688..56d3f6023ced 100644 --- a/net/rds/Makefile +++ b/net/rds/Makefile @@ -4,7 +4,7 @@ rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \ loop.o page.o rdma.o obj-$(CONFIG_RDS_RDMA) += rds_rdma.o -rds_rdma-objs := rdma_transport.o \ +rds_rdma-y := rdma_transport.o \ ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \ ib_sysctl.o ib_rdma.o \ iw.o iw_cm.o iw_recv.o iw_ring.o iw_send.o iw_stats.o \ @@ -12,10 +12,8 @@ rds_rdma-objs := rdma_transport.o \ obj-$(CONFIG_RDS_TCP) += rds_tcp.o -rds_tcp-objs := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \ +rds_tcp-y := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \ tcp_send.o tcp_stats.o -ifeq ($(CONFIG_RDS_DEBUG), y) -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_RDS_DEBUG) := -DDEBUG diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 8920f2a83327..4e37c1cbe8b2 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -567,7 +567,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, goto out; } - if (args->nr_local > (u64)UINT_MAX) { + if (args->nr_local > UIO_MAXIOV) { ret = -EMSGSIZE; goto out; } diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 04f599089e6d..0198191b756d 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -149,20 +149,6 @@ static void rfkill_led_trigger_activate(struct led_classdev *led) rfkill_led_trigger_event(rfkill); } -const char *rfkill_get_led_trigger_name(struct rfkill *rfkill) -{ - return rfkill->led_trigger.name; -} -EXPORT_SYMBOL(rfkill_get_led_trigger_name); - -void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name) -{ - BUG_ON(!rfkill); - - rfkill->ledtrigname = name; -} -EXPORT_SYMBOL(rfkill_set_led_trigger_name); - static int rfkill_led_trigger_register(struct rfkill *rfkill) { rfkill->led_trigger.name = rfkill->ledtrigname diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index c46867c61c98..d1c3429b69ed 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -2,7 +2,7 @@ # Makefile for Linux kernel RxRPC # -af-rxrpc-objs := \ +af-rxrpc-y := \ af_rxrpc.o \ ar-accept.o \ ar-ack.o \ @@ -21,7 +21,7 @@ af-rxrpc-objs := \ ar-transport.o ifeq ($(CONFIG_PROC_FS),y) -af-rxrpc-objs += ar-proc.o +af-rxrpc-y += ar-proc.o endif obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index 9f1729bd60de..a53fb25a64ed 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -47,12 +47,12 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) case AF_INET: fl.oif = 0; fl.proto = IPPROTO_UDP, - fl.nl_u.ip4_u.saddr = 0; - fl.nl_u.ip4_u.daddr = peer->srx.transport.sin.sin_addr.s_addr; - fl.nl_u.ip4_u.tos = 0; + fl.fl4_dst = peer->srx.transport.sin.sin_addr.s_addr; + fl.fl4_src = 0; + fl.fl4_tos = 0; /* assume AFS.CM talking to AFS.FS */ - fl.uli_u.ports.sport = htons(7001); - fl.uli_u.ports.dport = htons(7000); + fl.fl_ip_sport = htons(7001); + fl.fl_ip_dport = htons(7000); break; default: BUG(); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 4dfecb0cba37..aa4d6337e43c 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -54,8 +54,6 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch) /* queue full, remove one skb to fulfill the limit */ skb_head = qdisc_dequeue_head(sch); - sch->bstats.bytes -= qdisc_pkt_len(skb_head); - sch->bstats.packets--; sch->qstats.drops++; kfree_skb(skb_head); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5dbb3cd96e59..34dc598440a2 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -60,8 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) /* check the reason of requeuing without tx lock first */ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - if (!netif_tx_queue_stopped(txq) && - !netif_tx_queue_frozen(txq)) { + if (!netif_tx_queue_frozen_or_stopped(txq)) { q->gso_skb = NULL; q->q.qlen--; } else @@ -122,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, spin_unlock(root_lock); HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) + if (!netif_tx_queue_frozen_or_stopped(txq)) ret = dev_hard_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); @@ -144,8 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, ret = dev_requeue_skb(skb, q); } - if (ret && (netif_tx_queue_stopped(txq) || - netif_tx_queue_frozen(txq))) + if (ret && netif_tx_queue_frozen_or_stopped(txq)) ret = 0; return ret; @@ -555,7 +553,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, size = QDISC_ALIGN(sizeof(*sch)); size += ops->priv_size + (QDISC_ALIGNTO - 1); - p = kzalloc(size, GFP_KERNEL); + p = kzalloc_node(size, GFP_KERNEL, + netdev_queue_numa_node_read(dev_queue)); + if (!p) goto errout; sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); @@ -810,20 +810,35 @@ static bool some_qdisc_is_busy(struct net_device *dev) return false; } -void dev_deactivate(struct net_device *dev) +void dev_deactivate_many(struct list_head *head) { - netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); - if (dev_ingress_queue(dev)) - dev_deactivate_queue(dev, dev_ingress_queue(dev), &noop_qdisc); + struct net_device *dev; + + list_for_each_entry(dev, head, unreg_list) { + netdev_for_each_tx_queue(dev, dev_deactivate_queue, + &noop_qdisc); + if (dev_ingress_queue(dev)) + dev_deactivate_queue(dev, dev_ingress_queue(dev), + &noop_qdisc); - dev_watchdog_down(dev); + dev_watchdog_down(dev); + } /* Wait for outstanding qdisc-less dev_queue_xmit calls. */ synchronize_rcu(); /* Wait for outstanding qdisc_run calls. */ - while (some_qdisc_is_busy(dev)) - yield(); + list_for_each_entry(dev, head, unreg_list) + while (some_qdisc_is_busy(dev)) + yield(); +} + +void dev_deactivate(struct net_device *dev) +{ + LIST_HEAD(single); + + list_add(&dev->unreg_list, &single); + dev_deactivate_many(&single); } static void dev_init_scheduler_queue(struct net_device *dev, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 8d42bb3ba540..a67ba3c5a0cc 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -239,6 +239,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) .Scell_log = q->parms.Scell_log, }; + sch->qstats.backlog = q->qdisc->qstats.backlog; opts = nla_nest_start(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 3cf478d012dd..d54ac94066c2 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -67,27 +67,47 @@ IMPLEMENTATION: This implementation limits maximal queue length to 128; - maximal mtu to 2^15-1; number of hash buckets to 1024. + max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024. The only goal of this restrictions was that all data - fit into one 4K page :-). Struct sfq_sched_data is - organized in anti-cache manner: all the data for a bucket - are scattered over different locations. This is not good, - but it allowed me to put it into 4K. + fit into one 4K page on 32bit arches. It is easy to increase these values, but not in flight. */ -#define SFQ_DEPTH 128 +#define SFQ_DEPTH 128 /* max number of packets per flow */ +#define SFQ_SLOTS 128 /* max number of flows */ +#define SFQ_EMPTY_SLOT 255 #define SFQ_HASH_DIVISOR 1024 +/* We use 16 bits to store allot, and want to handle packets up to 64K + * Scale allot by 8 (1<<3) so that no overflow occurs. + */ +#define SFQ_ALLOT_SHIFT 3 +#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT) -/* This type should contain at least SFQ_DEPTH*2 values */ +/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */ typedef unsigned char sfq_index; +/* + * We dont use pointers to save space. + * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array + * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1] + * are 'pointers' to dep[] array + */ struct sfq_head { sfq_index next; sfq_index prev; }; +struct sfq_slot { + struct sk_buff *skblist_next; + struct sk_buff *skblist_prev; + sfq_index qlen; /* number of skbs in skblist */ + sfq_index next; /* next slot in sfq chain */ + struct sfq_head dep; /* anchor in dep[] chains */ + unsigned short hash; /* hash value (index in ht[]) */ + short allot; /* credit for this slot */ +}; + struct sfq_sched_data { /* Parameters */ @@ -99,17 +119,24 @@ struct sfq_sched_data struct tcf_proto *filter_list; struct timer_list perturb_timer; u32 perturbation; - sfq_index tail; /* Index of current slot in round */ - sfq_index max_depth; /* Maximal depth */ - + sfq_index cur_depth; /* depth of longest slot */ + unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ + struct sfq_slot *tail; /* current slot in round */ sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ - sfq_index next[SFQ_DEPTH]; /* Active slots link */ - short allot[SFQ_DEPTH]; /* Current allotment per slot */ - unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */ - struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */ - struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */ + struct sfq_slot slots[SFQ_SLOTS]; + struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ }; +/* + * sfq_head are either in a sfq_slot or in dep[] array + */ +static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val) +{ + if (val < SFQ_SLOTS) + return &q->slots[val].dep; + return &q->dep[val - SFQ_SLOTS]; +} + static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) { return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); @@ -200,30 +227,41 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return 0; } +/* + * x : slot number [0 .. SFQ_SLOTS - 1] + */ static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; - int d = q->qs[x].qlen + SFQ_DEPTH; + int qlen = q->slots[x].qlen; + + p = qlen + SFQ_SLOTS; + n = q->dep[qlen].next; - p = d; - n = q->dep[d].next; - q->dep[x].next = n; - q->dep[x].prev = p; - q->dep[p].next = q->dep[n].prev = x; + q->slots[x].dep.next = n; + q->slots[x].dep.prev = p; + + q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */ + sfq_dep_head(q, n)->prev = x; } +#define sfq_unlink(q, x, n, p) \ + n = q->slots[x].dep.next; \ + p = q->slots[x].dep.prev; \ + sfq_dep_head(q, p)->next = n; \ + sfq_dep_head(q, n)->prev = p + + static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; + int d; - n = q->dep[x].next; - p = q->dep[x].prev; - q->dep[p].next = n; - q->dep[n].prev = p; - - if (n == p && q->max_depth == q->qs[x].qlen + 1) - q->max_depth--; + sfq_unlink(q, x, n, p); + d = q->slots[x].qlen--; + if (n == p && q->cur_depth == d) + q->cur_depth--; sfq_link(q, x); } @@ -232,34 +270,74 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x) sfq_index p, n; int d; - n = q->dep[x].next; - p = q->dep[x].prev; - q->dep[p].next = n; - q->dep[n].prev = p; - d = q->qs[x].qlen; - if (q->max_depth < d) - q->max_depth = d; + sfq_unlink(q, x, n, p); + d = ++q->slots[x].qlen; + if (q->cur_depth < d) + q->cur_depth = d; sfq_link(q, x); } +/* helper functions : might be changed when/if skb use a standard list_head */ + +/* remove one skb from tail of slot queue */ +static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot) +{ + struct sk_buff *skb = slot->skblist_prev; + + slot->skblist_prev = skb->prev; + skb->prev->next = (struct sk_buff *)slot; + skb->next = skb->prev = NULL; + return skb; +} + +/* remove one skb from head of slot queue */ +static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot) +{ + struct sk_buff *skb = slot->skblist_next; + + slot->skblist_next = skb->next; + skb->next->prev = (struct sk_buff *)slot; + skb->next = skb->prev = NULL; + return skb; +} + +static inline void slot_queue_init(struct sfq_slot *slot) +{ + slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot; +} + +/* add skb to slot queue (tail add) */ +static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb) +{ + skb->prev = slot->skblist_prev; + skb->next = (struct sk_buff *)slot; + slot->skblist_prev->next = skb; + slot->skblist_prev = skb; +} + +#define slot_queue_walk(slot, skb) \ + for (skb = slot->skblist_next; \ + skb != (struct sk_buff *)slot; \ + skb = skb->next) + static unsigned int sfq_drop(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index d = q->max_depth; + sfq_index x, d = q->cur_depth; struct sk_buff *skb; unsigned int len; + struct sfq_slot *slot; - /* Queue is full! Find the longest slot and - drop a packet from it */ - + /* Queue is full! Find the longest slot and drop tail packet from it */ if (d > 1) { - sfq_index x = q->dep[d + SFQ_DEPTH].next; - skb = q->qs[x].prev; + x = q->dep[d].next; + slot = &q->slots[x]; +drop: + skb = slot_dequeue_tail(slot); len = qdisc_pkt_len(skb); - __skb_unlink(skb, &q->qs[x]); - kfree_skb(skb); sfq_dec(q, x); + kfree_skb(skb); sch->q.qlen--; sch->qstats.drops++; sch->qstats.backlog -= len; @@ -268,19 +346,11 @@ static unsigned int sfq_drop(struct Qdisc *sch) if (d == 1) { /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ - d = q->next[q->tail]; - q->next[q->tail] = q->next[d]; - q->allot[q->next[d]] += q->quantum; - skb = q->qs[d].prev; - len = qdisc_pkt_len(skb); - __skb_unlink(skb, &q->qs[d]); - kfree_skb(skb); - sfq_dec(q, d); - sch->q.qlen--; - q->ht[q->hash[d]] = SFQ_DEPTH; - sch->qstats.drops++; - sch->qstats.backlog -= len; - return len; + x = q->tail->next; + slot = &q->slots[x]; + q->tail->next = slot->next; + q->ht[slot->hash] = SFQ_EMPTY_SLOT; + goto drop; } return 0; @@ -292,6 +362,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash; sfq_index x; + struct sfq_slot *slot; int uninitialized_var(ret); hash = sfq_classify(skb, sch, &ret); @@ -304,31 +375,32 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) hash--; x = q->ht[hash]; - if (x == SFQ_DEPTH) { - q->ht[hash] = x = q->dep[SFQ_DEPTH].next; - q->hash[x] = hash; + slot = &q->slots[x]; + if (x == SFQ_EMPTY_SLOT) { + x = q->dep[0].next; /* get a free slot */ + q->ht[hash] = x; + slot = &q->slots[x]; + slot->hash = hash; } - /* If selected queue has length q->limit, this means that - * all another queues are empty and that we do simple tail drop, + /* If selected queue has length q->limit, do simple tail drop, * i.e. drop _this_ packet. */ - if (q->qs[x].qlen >= q->limit) + if (slot->qlen >= q->limit) return qdisc_drop(skb, sch); sch->qstats.backlog += qdisc_pkt_len(skb); - __skb_queue_tail(&q->qs[x], skb); + slot_queue_add(slot, skb); sfq_inc(q, x); - if (q->qs[x].qlen == 1) { /* The flow is new */ - if (q->tail == SFQ_DEPTH) { /* It is the first flow */ - q->tail = x; - q->next[x] = x; - q->allot[x] = q->quantum; + if (slot->qlen == 1) { /* The flow is new */ + if (q->tail == NULL) { /* It is the first flow */ + slot->next = x; } else { - q->next[x] = q->next[q->tail]; - q->next[q->tail] = x; - q->tail = x; + slot->next = q->tail->next; + q->tail->next = x; } + q->tail = slot; + slot->allot = q->scaled_quantum; } if (++sch->q.qlen <= q->limit) { sch->bstats.bytes += qdisc_pkt_len(skb); @@ -344,14 +416,12 @@ static struct sk_buff * sfq_peek(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index a; /* No active slots */ - if (q->tail == SFQ_DEPTH) + if (q->tail == NULL) return NULL; - a = q->next[q->tail]; - return skb_peek(&q->qs[a]); + return q->slots[q->tail->next].skblist_next; } static struct sk_buff * @@ -359,34 +429,37 @@ sfq_dequeue(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - sfq_index a, old_a; + sfq_index a, next_a; + struct sfq_slot *slot; /* No active slots */ - if (q->tail == SFQ_DEPTH) + if (q->tail == NULL) return NULL; - a = old_a = q->next[q->tail]; - - /* Grab packet */ - skb = __skb_dequeue(&q->qs[a]); +next_slot: + a = q->tail->next; + slot = &q->slots[a]; + if (slot->allot <= 0) { + q->tail = slot; + slot->allot += q->scaled_quantum; + goto next_slot; + } + skb = slot_dequeue_head(slot); sfq_dec(q, a); sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ - if (q->qs[a].qlen == 0) { - q->ht[q->hash[a]] = SFQ_DEPTH; - a = q->next[a]; - if (a == old_a) { - q->tail = SFQ_DEPTH; + if (slot->qlen == 0) { + q->ht[slot->hash] = SFQ_EMPTY_SLOT; + next_a = slot->next; + if (a == next_a) { + q->tail = NULL; /* no more active slots */ return skb; } - q->next[q->tail] = a; - q->allot[a] += q->quantum; - } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { - q->tail = a; - a = q->next[a]; - q->allot[a] += q->quantum; + q->tail->next = next_a; + } else { + slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb)); } return skb; } @@ -422,6 +495,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) sch_tree_lock(sch); q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); + q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = ctl->perturb_period * HZ; if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); @@ -450,19 +524,19 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) init_timer_deferrable(&q->perturb_timer); for (i = 0; i < SFQ_HASH_DIVISOR; i++) - q->ht[i] = SFQ_DEPTH; + q->ht[i] = SFQ_EMPTY_SLOT; for (i = 0; i < SFQ_DEPTH; i++) { - skb_queue_head_init(&q->qs[i]); - q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH; - q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH; + q->dep[i].next = i + SFQ_SLOTS; + q->dep[i].prev = i + SFQ_SLOTS; } q->limit = SFQ_DEPTH - 1; - q->max_depth = 0; - q->tail = SFQ_DEPTH; + q->cur_depth = 0; + q->tail = NULL; if (opt == NULL) { q->quantum = psched_mtu(qdisc_dev(sch)); + q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = 0; q->perturbation = net_random(); } else { @@ -471,8 +545,10 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) return err; } - for (i = 0; i < SFQ_DEPTH; i++) + for (i = 0; i < SFQ_SLOTS; i++) { + slot_queue_init(&q->slots[i]); sfq_link(q, i); + } return 0; } @@ -547,10 +623,19 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index idx = q->ht[cl-1]; - struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen }; - struct tc_sfq_xstats xstats = { .allot = q->allot[idx] }; + sfq_index idx = q->ht[cl - 1]; + struct gnet_stats_queue qs = { 0 }; + struct tc_sfq_xstats xstats = { 0 }; + struct sk_buff *skb; + + if (idx != SFQ_EMPTY_SLOT) { + const struct sfq_slot *slot = &q->slots[idx]; + xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; + qs.qlen = slot->qlen; + slot_queue_walk(slot, skb) + qs.backlog += qdisc_pkt_len(skb); + } if (gnet_stats_copy_queue(d, &qs) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); @@ -565,7 +650,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < SFQ_HASH_DIVISOR; i++) { - if (q->ht[i] == SFQ_DEPTH || + if (q->ht[i] == SFQ_EMPTY_SLOT || arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 401af9596709..106479a7c94a 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -309,8 +309,7 @@ restart: if (__netif_tx_trylock(slave_txq)) { unsigned int length = qdisc_pkt_len(skb); - if (!netif_tx_queue_stopped(slave_txq) && - !netif_tx_queue_frozen(slave_txq) && + if (!netif_tx_queue_frozen_or_stopped(slave_txq) && slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { txq_trans_update(slave_txq); __netif_tx_unlock(slave_txq); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1ef29c74d85e..e58f9476f29c 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -92,7 +92,7 @@ static struct sctp_af *sctp_af_v6_specific; struct kmem_cache *sctp_chunk_cachep __read_mostly; struct kmem_cache *sctp_bucket_cachep __read_mostly; -int sysctl_sctp_mem[3]; +long sysctl_sctp_mem[3]; int sysctl_sctp_rmem[3]; int sysctl_sctp_wmem[3]; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e34ca9cc1167..a09b0dd25f50 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -111,12 +111,12 @@ static void sctp_sock_migrate(struct sock *, struct sock *, static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; extern struct kmem_cache *sctp_bucket_cachep; -extern int sysctl_sctp_mem[3]; +extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; static int sctp_memory_pressure; -static atomic_t sctp_memory_allocated; +static atomic_long_t sctp_memory_allocated; struct percpu_counter sctp_sockets_allocated; static void sctp_enter_memory_pressure(struct sock *sk) @@ -2932,6 +2932,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva struct sctp_association *asoc = NULL; struct sctp_setpeerprim prim; struct sctp_chunk *chunk; + struct sctp_af *af; int err; sp = sctp_sk(sk); @@ -2959,6 +2960,13 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva if (!sctp_state(asoc, ESTABLISHED)) return -ENOTCONN; + af = sctp_get_af_specific(prim.sspp_addr.ss_family); + if (!af) + return -EINVAL; + + if (!af->addr_valid((union sctp_addr *)&prim.sspp_addr, sp, NULL)) + return -EADDRNOTAVAIL; + if (!sctp_assoc_lookup_laddr(asoc, (union sctp_addr *)&prim.sspp_addr)) return -EADDRNOTAVAIL; @@ -5045,7 +5053,7 @@ static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len, if (copy_to_user(optval, &val, len)) return -EFAULT; - return -ENOTSUPP; + return 0; } /* @@ -6047,7 +6055,7 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, * will suddenly eat the receive_queue. * * Look at current nfs client by the way... - * However, this function was corrent in any case. 8) + * However, this function was correct in any case. 8) */ if (flags & MSG_PEEK) { spin_lock_bh(&sk->sk_receive_queue.lock); diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 832590bbe0c0..50cb57f0919e 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -54,7 +54,7 @@ static int sack_timer_max = 500; static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ static int rwnd_scale_max = 16; -extern int sysctl_sctp_mem[3]; +extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; @@ -203,7 +203,7 @@ static ctl_table sctp_table[] = { .data = &sysctl_sctp_mem, .maxlen = sizeof(sysctl_sctp_mem), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_doulongvec_minmax }, { .procname = "sctp_rmem", diff --git a/net/socket.c b/net/socket.c index 3ca2fd9e3720..ccc576a6a508 100644 --- a/net/socket.c +++ b/net/socket.c @@ -156,7 +156,7 @@ static const struct file_operations socket_file_ops = { */ static DEFINE_SPINLOCK(net_family_lock); -static const struct net_proto_family *net_families[NPROTO] __read_mostly; +static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; /* * Statistics counters of the socket lists @@ -262,6 +262,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb) } + static void wq_free_rcu(struct rcu_head *head) { struct socket_wq *wq = container_of(head, struct socket_wq, rcu); @@ -360,14 +361,14 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; - path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); + path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); if (unlikely(!path.dentry)) { put_unused_fd(fd); return -ENOMEM; } path.mnt = mntget(sock_mnt); - path.dentry->d_op = &sockfs_dentry_operations; + d_set_d_op(path.dentry, &sockfs_dentry_operations); d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; @@ -732,6 +733,21 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, return ret; } +/** + * kernel_recvmsg - Receive a message from a socket (kernel space) + * @sock: The socket to receive the message from + * @msg: Received message + * @vec: Input s/g array for message data + * @num: Size of input s/g array + * @size: Number of bytes to read + * @flags: Message flags (MSG_DONTWAIT, etc...) + * + * On return the msg structure contains the scatter/gather array passed in the + * vec argument. The array is modified so that it consists of the unfilled + * portion of the original array. + * + * The returned value is the total number of bytes received, or an error. + */ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { @@ -1200,7 +1216,7 @@ int __sock_create(struct net *net, int family, int type, int protocol, * requested real, full-featured networking support upon configuration. * Otherwise module support will break! */ - if (net_families[family] == NULL) + if (rcu_access_pointer(net_families[family]) == NULL) request_module("net-pf-%d", family); #endif @@ -2332,10 +2348,11 @@ int sock_register(const struct net_proto_family *ops) } spin_lock(&net_family_lock); - if (net_families[ops->family]) + if (rcu_dereference_protected(net_families[ops->family], + lockdep_is_held(&net_family_lock))) err = -EEXIST; else { - net_families[ops->family] = ops; + rcu_assign_pointer(net_families[ops->family], ops); err = 0; } spin_unlock(&net_family_lock); @@ -2363,7 +2380,7 @@ void sock_unregister(int family) BUG_ON(family < 0 || family >= NPROTO); spin_lock(&net_family_lock); - net_families[family] = NULL; + rcu_assign_pointer(net_families[family], NULL); spin_unlock(&net_family_lock); synchronize_rcu(); @@ -2374,6 +2391,8 @@ EXPORT_SYMBOL(sock_unregister); static int __init sock_init(void) { + int err; + /* * Initialize sock SLAB cache. */ @@ -2390,8 +2409,15 @@ static int __init sock_init(void) */ init_inodecache(); - register_filesystem(&sock_fs_type); + + err = register_filesystem(&sock_fs_type); + if (err) + goto out_fs; sock_mnt = kern_mount(&sock_fs_type); + if (IS_ERR(sock_mnt)) { + err = PTR_ERR(sock_mnt); + goto out_mount; + } /* The real protocol initialization is performed in later initcalls. */ @@ -2404,7 +2430,13 @@ static int __init sock_init(void) skb_timestamping_init(); #endif - return 0; +out: + return err; + +out_mount: + unregister_filesystem(&sock_fs_type); +out_fs: + goto out; } core_initcall(sock_init); /* early initcall */ diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index 7350d86a32ee..9e4cb59ef9f0 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile @@ -4,10 +4,10 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o -auth_rpcgss-objs := auth_gss.o gss_generic_token.o \ +auth_rpcgss-y := auth_gss.o gss_generic_token.o \ gss_mech_switch.o svcauth_gss.o obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o -rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ +rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9dab9573be41..92ce94f5146b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -989,20 +989,26 @@ call_refreshresult(struct rpc_task *task) dprint_status(task); task->tk_status = 0; - task->tk_action = call_allocate; - if (status >= 0 && rpcauth_uptodatecred(task)) - return; + task->tk_action = call_refresh; switch (status) { - case -EACCES: - rpc_exit(task, -EACCES); - return; - case -ENOMEM: - rpc_exit(task, -ENOMEM); + case 0: + if (rpcauth_uptodatecred(task)) + task->tk_action = call_allocate; return; case -ETIMEDOUT: rpc_delay(task, 3*HZ); + case -EAGAIN: + status = -EACCES; + if (!task->tk_cred_retry) + break; + task->tk_cred_retry--; + dprintk("RPC: %5u %s: retry refresh creds\n", + task->tk_pid, __func__); + return; } - task->tk_action = call_refresh; + dprintk("RPC: %5u %s: refresh creds failed with error %d\n", + task->tk_pid, __func__, status); + rpc_exit(task, status); } /* diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 10a17a37ec4e..09f01f41e55a 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -162,11 +162,19 @@ rpc_alloc_inode(struct super_block *sb) } static void -rpc_destroy_inode(struct inode *inode) +rpc_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(rpc_inode_cachep, RPC_I(inode)); } +static void +rpc_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, rpc_i_callback); +} + static int rpc_pipe_open(struct inode *inode, struct file *filp) { @@ -430,7 +438,7 @@ void rpc_put_mount(void) } EXPORT_SYMBOL_GPL(rpc_put_mount); -static int rpc_delete_dentry(struct dentry *dentry) +static int rpc_delete_dentry(const struct dentry *dentry) { return 1; } @@ -583,7 +591,7 @@ static struct dentry *__rpc_lookup_create(struct dentry *parent, } } if (!dentry->d_inode) - dentry->d_op = &rpc_dentry_operations; + d_set_d_op(dentry, &rpc_dentry_operations); out_err: return dentry; } diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index f71a73107ae9..80df89d957ba 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -115,9 +115,7 @@ EXPORT_SYMBOL_GPL(svc_seq_show); */ struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { - struct rpc_iostats *new; - new = kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL); - return new; + return kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL); } EXPORT_SYMBOL_GPL(rpc_alloc_iostats); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c82fe739fbdc..3f2c5559ca1a 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -5,7 +5,6 @@ */ #include <linux/sched.h> -#include <linux/smp_lock.h> #include <linux/errno.h> #include <linux/freezer.h> #include <linux/kthread.h> @@ -213,6 +212,7 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, spin_lock(&svc_xprt_class_lock); list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { struct svc_xprt *newxprt; + unsigned short newport; if (strcmp(xprt_name, xcl->xcl_name)) continue; @@ -231,8 +231,9 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, spin_lock_bh(&serv->sv_lock); list_add(&newxprt->xpt_list, &serv->sv_permsocks); spin_unlock_bh(&serv->sv_lock); + newport = svc_xprt_local_port(newxprt); clear_bit(XPT_BUSY, &newxprt->xpt_flags); - return svc_xprt_local_port(newxprt); + return newport; } err: spin_unlock(&svc_xprt_class_lock); @@ -426,8 +427,13 @@ void svc_xprt_received(struct svc_xprt *xprt) { BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); xprt->xpt_pool = NULL; + /* As soon as we clear busy, the xprt could be closed and + * 'put', so we need a reference to call svc_xprt_enqueue with: + */ + svc_xprt_get(xprt); clear_bit(XPT_BUSY, &xprt->xpt_flags); svc_xprt_enqueue(xprt); + svc_xprt_put(xprt); } EXPORT_SYMBOL_GPL(svc_xprt_received); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index dfcab5ac65af..96549df836ee 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -770,7 +770,7 @@ static void xs_destroy(struct rpc_xprt *xprt) dprintk("RPC: xs_destroy xprt %p\n", xprt); - cancel_rearming_delayed_work(&transport->connect_worker); + cancel_delayed_work_sync(&transport->connect_worker); xs_close(xprt); xs_free_peer_addresses(xprt); diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index b74f78d0c033..0436927369f3 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -29,28 +29,6 @@ config TIPC_ADVANCED Saying Y here will open some advanced configuration for TIPC. Most users do not need to bother; if unsure, just say N. -config TIPC_ZONES - int "Maximum number of zones in a network" - depends on TIPC_ADVANCED - range 1 255 - default "3" - help - Specifies how many zones can be supported in a TIPC network. - Can range from 1 to 255 zones; default is 3. - - Setting this to a smaller value saves some memory; - setting it to a higher value allows for more zones. - -config TIPC_CLUSTERS - int "Maximum number of clusters in a zone" - depends on TIPC_ADVANCED - range 1 1 - default "1" - help - Specifies how many clusters can be supported in a TIPC zone. - - *** Currently TIPC only supports a single cluster per zone. *** - config TIPC_NODES int "Maximum number of nodes in a cluster" depends on TIPC_ADVANCED @@ -72,7 +50,7 @@ config TIPC_PORTS Specifies how many ports can be supported by a node. Can range from 127 to 65535 ports; default is 8191. - Setting this to a smaller value saves some memory, + Setting this to a smaller value saves some memory, setting it to higher allows for more ports. config TIPC_LOG @@ -89,12 +67,15 @@ config TIPC_LOG managed remotely via TIPC. config TIPC_DEBUG - bool "Enable debug messages" + bool "Enable debugging support" default n help - This enables debugging of TIPC. + Saying Y here enables TIPC debugging capabilities used by developers. + Most users do not need to bother; if unsure, just say N. - Only say Y here if you are having trouble with TIPC. It will - enable the display of detailed information about what is going on. + Enabling debugging support causes TIPC to display data about its + internal state when certain abnormal conditions occur. It also + makes it easy for developers to capture additional information of + interest using the dbg() or msg_dbg() macros. endif # TIPC diff --git a/net/tipc/Makefile b/net/tipc/Makefile index dceb7027946c..521d24d04ab2 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -4,10 +4,10 @@ obj-$(CONFIG_TIPC) := tipc.o -tipc-y += addr.o bcast.o bearer.o config.o cluster.o \ +tipc-y += addr.o bcast.o bearer.o config.o \ core.o handler.o link.o discover.o msg.o \ name_distr.o subscr.o name_table.o net.o \ netlink.o node.o node_subscr.o port.o ref.o \ - socket.o user_reg.o zone.o dbg.o eth_media.o + socket.o log.o eth_media.o # End of file diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 8a2e89bffde5..88463d9a6f12 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -35,11 +35,7 @@ */ #include "core.h" -#include "dbg.h" #include "addr.h" -#include "zone.h" -#include "cluster.h" -#include "net.h" /** * tipc_addr_domain_valid - validates a network domain address @@ -57,14 +53,8 @@ int tipc_addr_domain_valid(u32 addr) u32 z = tipc_zone(addr); u32 max_nodes = tipc_max_nodes; - if (is_slave(addr)) - max_nodes = LOWEST_SLAVE + tipc_max_slaves; if (n > max_nodes) return 0; - if (c > tipc_max_clusters) - return 0; - if (z > tipc_max_zones) - return 0; if (n && (!z || !c)) return 0; diff --git a/net/tipc/addr.h b/net/tipc/addr.h index c1cc5724d8cc..2490fadd0caf 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -37,36 +37,11 @@ #ifndef _TIPC_ADDR_H #define _TIPC_ADDR_H -static inline u32 own_node(void) -{ - return tipc_node(tipc_own_addr); -} - -static inline u32 own_cluster(void) -{ - return tipc_cluster(tipc_own_addr); -} - -static inline u32 own_zone(void) -{ - return tipc_zone(tipc_own_addr); -} - static inline int in_own_cluster(u32 addr) { return !((addr ^ tipc_own_addr) >> 12); } -static inline int is_slave(u32 addr) -{ - return addr & 0x800; -} - -static inline int may_route(u32 addr) -{ - return(addr ^ tipc_own_addr) >> 11; -} - /** * addr_domain - convert 2-bit scope value to equivalent message lookup domain * diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 22a60fc98392..70ab5ef48766 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -36,25 +36,14 @@ */ #include "core.h" -#include "msg.h" -#include "dbg.h" #include "link.h" -#include "net.h" -#include "node.h" #include "port.h" -#include "addr.h" -#include "node_subscr.h" -#include "name_distr.h" -#include "bearer.h" -#include "name_table.h" #include "bcast.h" #define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ #define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ -#define BCLINK_LOG_BUF_SIZE 0 - /* * Loss rate for incoming broadcast frames; used to test retransmission code. * Set to N to cause every N'th frame to be discarded; 0 => don't discard any. @@ -114,11 +103,14 @@ struct bclink { }; -static struct bcbearer *bcbearer = NULL; -static struct bclink *bclink = NULL; -static struct link *bcl = NULL; +static struct bcbearer *bcbearer; +static struct bclink *bclink; +static struct link *bcl; static DEFINE_SPINLOCK(bc_lock); +/* broadcast-capable node map */ +struct tipc_node_map tipc_bcast_nmap; + const char tipc_bclink_name[] = "broadcast-link"; static void tipc_nmap_diff(struct tipc_node_map *nm_a, @@ -204,9 +196,8 @@ static void bclink_retransmit_pkt(u32 after, u32 to) struct sk_buff *buf; buf = bcl->first_out; - while (buf && less_eq(buf_seqno(buf), after)) { + while (buf && less_eq(buf_seqno(buf), after)) buf = buf->next; - } tipc_link_retransmit(bcl, buf, mod(to - after)); } @@ -232,9 +223,8 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) /* Skip over packets that node has previously acknowledged */ crs = bcl->first_out; - while (crs && less_eq(buf_seqno(crs), n_ptr->bclink.acked)) { + while (crs && less_eq(buf_seqno(crs), n_ptr->bclink.acked)) crs = crs->next; - } /* Update packets that node is now acknowledging */ @@ -433,16 +423,14 @@ int tipc_bclink_send_msg(struct sk_buff *buf) void tipc_bclink_recv_pkt(struct sk_buff *buf) { #if (TIPC_BCAST_LOSS_RATE) - static int rx_count = 0; + static int rx_count; #endif struct tipc_msg *msg = buf_msg(buf); - struct tipc_node* node = tipc_node_find(msg_prevnode(msg)); + struct tipc_node *node = tipc_node_find(msg_prevnode(msg)); u32 next_in; u32 seqno; struct sk_buff *deferred; - msg_dbg(msg, "<BC<<<"); - if (unlikely(!node || !tipc_node_is_up(node) || !node->bclink.supported || (msg_mc_netid(msg) != tipc_net_id))) { buf_discard(buf); @@ -450,7 +438,6 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf) } if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) { - msg_dbg(msg, "<BCNACK<<<"); if (msg_destnode(msg) == tipc_own_addr) { tipc_node_lock(node); tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); @@ -574,8 +561,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf, if (likely(!msg_non_seq(buf_msg(buf)))) { struct tipc_msg *msg; - assert(tipc_cltr_bcast_nodes.count != 0); - bcbuf_set_acks(buf, tipc_cltr_bcast_nodes.count); + assert(tipc_bcast_nmap.count != 0); + bcbuf_set_acks(buf, tipc_bcast_nmap.count); msg = buf_msg(buf); msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tipc_net_id); @@ -584,7 +571,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, /* Send buffer over bearers until all targets reached */ - bcbearer->remains = tipc_cltr_bcast_nodes; + bcbearer->remains = tipc_bcast_nmap; for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { struct bearer *p = bcbearer->bpairs[bp_index].primary; @@ -782,7 +769,6 @@ int tipc_bclink_init(void) bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC); if (!bcbearer || !bclink) { - nomem: warn("Multicast link creation failed, no memory\n"); kfree(bcbearer); bcbearer = NULL; @@ -807,14 +793,6 @@ int tipc_bclink_init(void) bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); - if (BCLINK_LOG_BUF_SIZE) { - char *pb = kmalloc(BCLINK_LOG_BUF_SIZE, GFP_ATOMIC); - - if (!pb) - goto nomem; - tipc_printbuf_init(&bcl->print_buf, pb, BCLINK_LOG_BUF_SIZE); - } - return 0; } @@ -823,8 +801,6 @@ void tipc_bclink_stop(void) spin_lock_bh(&bc_lock); if (bcbearer) { tipc_link_stop(bcl); - if (BCLINK_LOG_BUF_SIZE) - kfree(bcl->print_buf.buf); bcl = NULL; kfree(bclink); bclink = NULL; diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 011c03f0a4ab..51f8c5326ce6 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -51,6 +51,7 @@ struct tipc_node_map { u32 map[MAX_NODES / WSIZE]; }; +extern struct tipc_node_map tipc_bcast_nmap; #define PLSIZE 32 diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 9927d1d56c4f..837b7a467885 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -36,17 +36,13 @@ #include "core.h" #include "config.h" -#include "dbg.h" #include "bearer.h" -#include "link.h" -#include "port.h" #include "discover.h" -#include "bcast.h" #define MAX_ADDR_STR 32 static struct media media_list[MAX_MEDIA]; -static u32 media_count = 0; +static u32 media_count; struct bearer tipc_bearers[MAX_BEARERS]; @@ -167,7 +163,6 @@ int tipc_register_media(u32 media_type, m_ptr->priority = bearer_priority; m_ptr->tolerance = link_tolerance; m_ptr->window = send_window_limit; - dbg("Media <%s> registered\n", name); res = 0; exit: write_unlock_bh(&tipc_net_lock); @@ -199,9 +194,8 @@ void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a) unchar *addr = (unchar *)&a->dev_addr; tipc_printf(pb, "UNKNOWN(%u)", media_type); - for (i = 0; i < (sizeof(*a) - sizeof(a->type)); i++) { + for (i = 0; i < (sizeof(*a) - sizeof(a->type)); i++) tipc_printf(pb, "-%02x", addr[i]); - } } } @@ -256,7 +250,8 @@ static int bearer_name_validate(const char *name, /* ensure all component parts of bearer name are present */ media_name = name_copy; - if ((if_name = strchr(media_name, ':')) == NULL) + if_name = strchr(media_name, ':'); + if (if_name == NULL) return 0; *(if_name++) = 0; media_len = if_name - media_name; @@ -625,7 +620,7 @@ int tipc_block_bearer(const char *name) * Note: This routine assumes caller holds tipc_net_lock. */ -static int bearer_disable(struct bearer *b_ptr) +static void bearer_disable(struct bearer *b_ptr) { struct link *l_ptr; struct link *temp_l_ptr; @@ -641,7 +636,6 @@ static int bearer_disable(struct bearer *b_ptr) } spin_unlock_bh(&b_ptr->publ.lock); memset(b_ptr, 0, sizeof(struct bearer)); - return 0; } int tipc_disable_bearer(const char *name) @@ -654,8 +648,10 @@ int tipc_disable_bearer(const char *name) if (b_ptr == NULL) { warn("Attempt to disable unknown bearer <%s>\n", name); res = -EINVAL; - } else - res = bearer_disable(b_ptr); + } else { + bearer_disable(b_ptr); + res = 0; + } write_unlock_bh(&tipc_net_lock); return res; } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index a850b389663e..85f451d5aacf 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -37,12 +37,50 @@ #ifndef _TIPC_BEARER_H #define _TIPC_BEARER_H -#include "core.h" #include "bcast.h" #define MAX_BEARERS 8 #define MAX_MEDIA 4 +/* + * Identifiers of supported TIPC media types + */ +#define TIPC_MEDIA_TYPE_ETH 1 + +/* + * Destination address structure used by TIPC bearers when sending messages + * + * IMPORTANT: The fields of this structure MUST be stored using the specified + * byte order indicated below, as the structure is exchanged between nodes + * as part of a link setup process. + */ +struct tipc_media_addr { + __be32 type; /* bearer type (network byte order) */ + union { + __u8 eth_addr[6]; /* 48 bit Ethernet addr (byte array) */ + } dev_addr; +}; + +/** + * struct tipc_bearer - TIPC bearer info available to media code + * @usr_handle: pointer to additional media-specific information about bearer + * @mtu: max packet size bearer can support + * @blocked: non-zero if bearer is blocked + * @lock: spinlock for controlling access to bearer + * @addr: media-specific address associated with bearer + * @name: bearer name (format = media:interface) + * + * Note: TIPC initializes "name" and "lock" fields; media code is responsible + * for initialization all other fields when a bearer is enabled. + */ +struct tipc_bearer { + void *usr_handle; + u32 mtu; + int blocked; + spinlock_t lock; + struct tipc_media_addr addr; + char name[TIPC_MAX_BEARER_NAME]; +}; /** * struct media - TIPC media information available to internal users @@ -55,7 +93,7 @@ * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure * @window: default window (in packets) before declaring link congestion - * @type_id: TIPC media identifier [defined in tipc_bearer.h] + * @type_id: TIPC media identifier * @name: media name */ @@ -116,6 +154,34 @@ struct link; extern struct bearer tipc_bearers[]; +/* + * TIPC routines available to supported media types + */ +int tipc_register_media(u32 media_type, + char *media_name, int (*enable)(struct tipc_bearer *), + void (*disable)(struct tipc_bearer *), + int (*send_msg)(struct sk_buff *, + struct tipc_bearer *, struct tipc_media_addr *), + char *(*addr2str)(struct tipc_media_addr *a, + char *str_buf, int str_size), + struct tipc_media_addr *bcast_addr, const u32 bearer_priority, + const u32 link_tolerance, /* [ms] */ + const u32 send_window_limit); + +void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); + +int tipc_block_bearer(const char *name); +void tipc_continue(struct tipc_bearer *tb_ptr); + +int tipc_enable_bearer(const char *bearer_name, u32 bcast_scope, u32 priority); +int tipc_disable_bearer(const char *name); + +/* + * Routines made available to TIPC by supported media types + */ +int tipc_eth_media_start(void); +void tipc_eth_media_stop(void); + void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); @@ -126,7 +192,6 @@ void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr); struct bearer *tipc_bearer_find_interface(const char *if_name); int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr); int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr); -int tipc_bearer_init(void); void tipc_bearer_stop(void); void tipc_bearer_lock_push(struct bearer *b_ptr); diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c deleted file mode 100644 index 7fea14b98b97..000000000000 --- a/net/tipc/cluster.c +++ /dev/null @@ -1,557 +0,0 @@ -/* - * net/tipc/cluster.c: TIPC cluster management routines - * - * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" -#include "cluster.h" -#include "addr.h" -#include "node_subscr.h" -#include "link.h" -#include "node.h" -#include "net.h" -#include "msg.h" -#include "bearer.h" - -static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf, - u32 lower, u32 upper); -static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest); - -struct tipc_node **tipc_local_nodes = NULL; -struct tipc_node_map tipc_cltr_bcast_nodes = {0,{0,}}; -u32 tipc_highest_allowed_slave = 0; - -struct cluster *tipc_cltr_create(u32 addr) -{ - struct _zone *z_ptr; - struct cluster *c_ptr; - int max_nodes; - - c_ptr = kzalloc(sizeof(*c_ptr), GFP_ATOMIC); - if (c_ptr == NULL) { - warn("Cluster creation failure, no memory\n"); - return NULL; - } - - c_ptr->addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0); - if (in_own_cluster(addr)) - max_nodes = LOWEST_SLAVE + tipc_max_slaves; - else - max_nodes = tipc_max_nodes + 1; - - c_ptr->nodes = kcalloc(max_nodes + 1, sizeof(void*), GFP_ATOMIC); - if (c_ptr->nodes == NULL) { - warn("Cluster creation failure, no memory for node area\n"); - kfree(c_ptr); - return NULL; - } - - if (in_own_cluster(addr)) - tipc_local_nodes = c_ptr->nodes; - c_ptr->highest_slave = LOWEST_SLAVE - 1; - c_ptr->highest_node = 0; - - z_ptr = tipc_zone_find(tipc_zone(addr)); - if (!z_ptr) { - z_ptr = tipc_zone_create(addr); - } - if (!z_ptr) { - kfree(c_ptr->nodes); - kfree(c_ptr); - return NULL; - } - - tipc_zone_attach_cluster(z_ptr, c_ptr); - c_ptr->owner = z_ptr; - return c_ptr; -} - -void tipc_cltr_delete(struct cluster *c_ptr) -{ - u32 n_num; - - if (!c_ptr) - return; - for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) { - tipc_node_delete(c_ptr->nodes[n_num]); - } - for (n_num = LOWEST_SLAVE; n_num <= c_ptr->highest_slave; n_num++) { - tipc_node_delete(c_ptr->nodes[n_num]); - } - kfree(c_ptr->nodes); - kfree(c_ptr); -} - - -void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr) -{ - u32 n_num = tipc_node(n_ptr->addr); - u32 max_n_num = tipc_max_nodes; - - if (in_own_cluster(n_ptr->addr)) - max_n_num = tipc_highest_allowed_slave; - assert(n_num > 0); - assert(n_num <= max_n_num); - assert(c_ptr->nodes[n_num] == NULL); - c_ptr->nodes[n_num] = n_ptr; - if (n_num > c_ptr->highest_node) - c_ptr->highest_node = n_num; -} - -/** - * tipc_cltr_select_router - select router to a cluster - * - * Uses deterministic and fair algorithm. - */ - -u32 tipc_cltr_select_router(struct cluster *c_ptr, u32 ref) -{ - u32 n_num; - u32 ulim = c_ptr->highest_node; - u32 mask; - u32 tstart; - - assert(!in_own_cluster(c_ptr->addr)); - if (!ulim) - return 0; - - /* Start entry must be random */ - mask = tipc_max_nodes; - while (mask > ulim) - mask >>= 1; - tstart = ref & mask; - n_num = tstart; - - /* Lookup upwards with wrap-around */ - do { - if (tipc_node_is_up(c_ptr->nodes[n_num])) - break; - } while (++n_num <= ulim); - if (n_num > ulim) { - n_num = 1; - do { - if (tipc_node_is_up(c_ptr->nodes[n_num])) - break; - } while (++n_num < tstart); - if (n_num == tstart) - return 0; - } - assert(n_num <= ulim); - return tipc_node_select_router(c_ptr->nodes[n_num], ref); -} - -/** - * tipc_cltr_select_node - select destination node within a remote cluster - * - * Uses deterministic and fair algorithm. - */ - -struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector) -{ - u32 n_num; - u32 mask = tipc_max_nodes; - u32 start_entry; - - assert(!in_own_cluster(c_ptr->addr)); - if (!c_ptr->highest_node) - return NULL; - - /* Start entry must be random */ - while (mask > c_ptr->highest_node) { - mask >>= 1; - } - start_entry = (selector & mask) ? selector & mask : 1u; - assert(start_entry <= c_ptr->highest_node); - - /* Lookup upwards with wrap-around */ - for (n_num = start_entry; n_num <= c_ptr->highest_node; n_num++) { - if (tipc_node_has_active_links(c_ptr->nodes[n_num])) - return c_ptr->nodes[n_num]; - } - for (n_num = 1; n_num < start_entry; n_num++) { - if (tipc_node_has_active_links(c_ptr->nodes[n_num])) - return c_ptr->nodes[n_num]; - } - return NULL; -} - -/* - * Routing table management: See description in node.c - */ - -static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest) -{ - u32 size = INT_H_SIZE + data_size; - struct sk_buff *buf = tipc_buf_acquire(size); - struct tipc_msg *msg; - - if (buf) { - msg = buf_msg(buf); - memset((char *)msg, 0, size); - tipc_msg_init(msg, ROUTE_DISTRIBUTOR, 0, INT_H_SIZE, dest); - } - return buf; -} - -void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest, - u32 lower, u32 upper) -{ - struct sk_buff *buf = tipc_cltr_prepare_routing_msg(0, c_ptr->addr); - struct tipc_msg *msg; - - if (buf) { - msg = buf_msg(buf); - msg_set_remote_node(msg, dest); - msg_set_type(msg, ROUTE_ADDITION); - tipc_cltr_multicast(c_ptr, buf, lower, upper); - } else { - warn("Memory squeeze: broadcast of new route failed\n"); - } -} - -void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, - u32 lower, u32 upper) -{ - struct sk_buff *buf = tipc_cltr_prepare_routing_msg(0, c_ptr->addr); - struct tipc_msg *msg; - - if (buf) { - msg = buf_msg(buf); - msg_set_remote_node(msg, dest); - msg_set_type(msg, ROUTE_REMOVAL); - tipc_cltr_multicast(c_ptr, buf, lower, upper); - } else { - warn("Memory squeeze: broadcast of lost route failed\n"); - } -} - -void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest) -{ - struct sk_buff *buf; - struct tipc_msg *msg; - u32 highest = c_ptr->highest_slave; - u32 n_num; - int send = 0; - - assert(!is_slave(dest)); - assert(in_own_cluster(dest)); - assert(in_own_cluster(c_ptr->addr)); - if (highest <= LOWEST_SLAVE) - return; - buf = tipc_cltr_prepare_routing_msg(highest - LOWEST_SLAVE + 1, - c_ptr->addr); - if (buf) { - msg = buf_msg(buf); - msg_set_remote_node(msg, c_ptr->addr); - msg_set_type(msg, SLAVE_ROUTING_TABLE); - for (n_num = LOWEST_SLAVE; n_num <= highest; n_num++) { - if (c_ptr->nodes[n_num] && - tipc_node_has_active_links(c_ptr->nodes[n_num])) { - send = 1; - msg_set_dataoctet(msg, n_num); - } - } - if (send) - tipc_link_send(buf, dest, dest); - else - buf_discard(buf); - } else { - warn("Memory squeeze: broadcast of lost route failed\n"); - } -} - -void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest) -{ - struct sk_buff *buf; - struct tipc_msg *msg; - u32 highest = c_ptr->highest_node; - u32 n_num; - int send = 0; - - if (in_own_cluster(c_ptr->addr)) - return; - assert(!is_slave(dest)); - assert(in_own_cluster(dest)); - highest = c_ptr->highest_node; - buf = tipc_cltr_prepare_routing_msg(highest + 1, c_ptr->addr); - if (buf) { - msg = buf_msg(buf); - msg_set_remote_node(msg, c_ptr->addr); - msg_set_type(msg, EXT_ROUTING_TABLE); - for (n_num = 1; n_num <= highest; n_num++) { - if (c_ptr->nodes[n_num] && - tipc_node_has_active_links(c_ptr->nodes[n_num])) { - send = 1; - msg_set_dataoctet(msg, n_num); - } - } - if (send) - tipc_link_send(buf, dest, dest); - else - buf_discard(buf); - } else { - warn("Memory squeeze: broadcast of external route failed\n"); - } -} - -void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest) -{ - struct sk_buff *buf; - struct tipc_msg *msg; - u32 highest = c_ptr->highest_node; - u32 n_num; - int send = 0; - - assert(is_slave(dest)); - assert(in_own_cluster(c_ptr->addr)); - buf = tipc_cltr_prepare_routing_msg(highest, c_ptr->addr); - if (buf) { - msg = buf_msg(buf); - msg_set_remote_node(msg, c_ptr->addr); - msg_set_type(msg, LOCAL_ROUTING_TABLE); - for (n_num = 1; n_num <= highest; n_num++) { - if (c_ptr->nodes[n_num] && - tipc_node_has_active_links(c_ptr->nodes[n_num])) { - send = 1; - msg_set_dataoctet(msg, n_num); - } - } - if (send) - tipc_link_send(buf, dest, dest); - else - buf_discard(buf); - } else { - warn("Memory squeeze: broadcast of local route failed\n"); - } -} - -void tipc_cltr_recv_routing_table(struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - struct cluster *c_ptr; - struct tipc_node *n_ptr; - unchar *node_table; - u32 table_size; - u32 router; - u32 rem_node = msg_remote_node(msg); - u32 z_num; - u32 c_num; - u32 n_num; - - c_ptr = tipc_cltr_find(rem_node); - if (!c_ptr) { - c_ptr = tipc_cltr_create(rem_node); - if (!c_ptr) { - buf_discard(buf); - return; - } - } - - node_table = buf->data + msg_hdr_sz(msg); - table_size = msg_size(msg) - msg_hdr_sz(msg); - router = msg_prevnode(msg); - z_num = tipc_zone(rem_node); - c_num = tipc_cluster(rem_node); - - switch (msg_type(msg)) { - case LOCAL_ROUTING_TABLE: - assert(is_slave(tipc_own_addr)); - case EXT_ROUTING_TABLE: - for (n_num = 1; n_num < table_size; n_num++) { - if (node_table[n_num]) { - u32 addr = tipc_addr(z_num, c_num, n_num); - n_ptr = c_ptr->nodes[n_num]; - if (!n_ptr) { - n_ptr = tipc_node_create(addr); - } - if (n_ptr) - tipc_node_add_router(n_ptr, router); - } - } - break; - case SLAVE_ROUTING_TABLE: - assert(!is_slave(tipc_own_addr)); - assert(in_own_cluster(c_ptr->addr)); - for (n_num = 1; n_num < table_size; n_num++) { - if (node_table[n_num]) { - u32 slave_num = n_num + LOWEST_SLAVE; - u32 addr = tipc_addr(z_num, c_num, slave_num); - n_ptr = c_ptr->nodes[slave_num]; - if (!n_ptr) { - n_ptr = tipc_node_create(addr); - } - if (n_ptr) - tipc_node_add_router(n_ptr, router); - } - } - break; - case ROUTE_ADDITION: - if (!is_slave(tipc_own_addr)) { - assert(!in_own_cluster(c_ptr->addr) || - is_slave(rem_node)); - } else { - assert(in_own_cluster(c_ptr->addr) && - !is_slave(rem_node)); - } - n_ptr = c_ptr->nodes[tipc_node(rem_node)]; - if (!n_ptr) - n_ptr = tipc_node_create(rem_node); - if (n_ptr) - tipc_node_add_router(n_ptr, router); - break; - case ROUTE_REMOVAL: - if (!is_slave(tipc_own_addr)) { - assert(!in_own_cluster(c_ptr->addr) || - is_slave(rem_node)); - } else { - assert(in_own_cluster(c_ptr->addr) && - !is_slave(rem_node)); - } - n_ptr = c_ptr->nodes[tipc_node(rem_node)]; - if (n_ptr) - tipc_node_remove_router(n_ptr, router); - break; - default: - assert(!"Illegal routing manager message received\n"); - } - buf_discard(buf); -} - -void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router) -{ - u32 start_entry; - u32 tstop; - u32 n_num; - - if (is_slave(router)) - return; /* Slave nodes can not be routers */ - - if (in_own_cluster(c_ptr->addr)) { - start_entry = LOWEST_SLAVE; - tstop = c_ptr->highest_slave; - } else { - start_entry = 1; - tstop = c_ptr->highest_node; - } - - for (n_num = start_entry; n_num <= tstop; n_num++) { - if (c_ptr->nodes[n_num]) { - tipc_node_remove_router(c_ptr->nodes[n_num], router); - } - } -} - -/** - * tipc_cltr_multicast - multicast message to local nodes - */ - -static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf, - u32 lower, u32 upper) -{ - struct sk_buff *buf_copy; - struct tipc_node *n_ptr; - u32 n_num; - u32 tstop; - - assert(lower <= upper); - assert(((lower >= 1) && (lower <= tipc_max_nodes)) || - ((lower >= LOWEST_SLAVE) && (lower <= tipc_highest_allowed_slave))); - assert(((upper >= 1) && (upper <= tipc_max_nodes)) || - ((upper >= LOWEST_SLAVE) && (upper <= tipc_highest_allowed_slave))); - assert(in_own_cluster(c_ptr->addr)); - - tstop = is_slave(upper) ? c_ptr->highest_slave : c_ptr->highest_node; - if (tstop > upper) - tstop = upper; - for (n_num = lower; n_num <= tstop; n_num++) { - n_ptr = c_ptr->nodes[n_num]; - if (n_ptr && tipc_node_has_active_links(n_ptr)) { - buf_copy = skb_copy(buf, GFP_ATOMIC); - if (buf_copy == NULL) - break; - msg_set_destnode(buf_msg(buf_copy), n_ptr->addr); - tipc_link_send(buf_copy, n_ptr->addr, n_ptr->addr); - } - } - buf_discard(buf); -} - -/** - * tipc_cltr_broadcast - broadcast message to all nodes within cluster - */ - -void tipc_cltr_broadcast(struct sk_buff *buf) -{ - struct sk_buff *buf_copy; - struct cluster *c_ptr; - struct tipc_node *n_ptr; - u32 n_num; - u32 tstart; - u32 tstop; - u32 node_type; - - if (tipc_mode == TIPC_NET_MODE) { - c_ptr = tipc_cltr_find(tipc_own_addr); - assert(in_own_cluster(c_ptr->addr)); /* For now */ - - /* Send to standard nodes, then repeat loop sending to slaves */ - tstart = 1; - tstop = c_ptr->highest_node; - for (node_type = 1; node_type <= 2; node_type++) { - for (n_num = tstart; n_num <= tstop; n_num++) { - n_ptr = c_ptr->nodes[n_num]; - if (n_ptr && tipc_node_has_active_links(n_ptr)) { - buf_copy = skb_copy(buf, GFP_ATOMIC); - if (buf_copy == NULL) - goto exit; - msg_set_destnode(buf_msg(buf_copy), - n_ptr->addr); - tipc_link_send(buf_copy, n_ptr->addr, - n_ptr->addr); - } - } - tstart = LOWEST_SLAVE; - tstop = c_ptr->highest_slave; - } - } -exit: - buf_discard(buf); -} - -int tipc_cltr_init(void) -{ - tipc_highest_allowed_slave = LOWEST_SLAVE + tipc_max_slaves; - return tipc_cltr_create(tipc_own_addr) ? 0 : -ENOMEM; -} - diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h deleted file mode 100644 index 32636d98c9c6..000000000000 --- a/net/tipc/cluster.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * net/tipc/cluster.h: Include file for TIPC cluster management routines - * - * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _TIPC_CLUSTER_H -#define _TIPC_CLUSTER_H - -#include "addr.h" -#include "zone.h" - -#define LOWEST_SLAVE 2048u - -/** - * struct cluster - TIPC cluster structure - * @addr: network address of cluster - * @owner: pointer to zone that cluster belongs to - * @nodes: array of pointers to all nodes within cluster - * @highest_node: id of highest numbered node within cluster - * @highest_slave: (used for secondary node support) - */ - -struct cluster { - u32 addr; - struct _zone *owner; - struct tipc_node **nodes; - u32 highest_node; - u32 highest_slave; -}; - - -extern struct tipc_node **tipc_local_nodes; -extern u32 tipc_highest_allowed_slave; -extern struct tipc_node_map tipc_cltr_bcast_nodes; - -void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router); -void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest); -struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector); -u32 tipc_cltr_select_router(struct cluster *c_ptr, u32 ref); -void tipc_cltr_recv_routing_table(struct sk_buff *buf); -struct cluster *tipc_cltr_create(u32 addr); -void tipc_cltr_delete(struct cluster *c_ptr); -void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr); -void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest); -void tipc_cltr_broadcast(struct sk_buff *buf); -int tipc_cltr_init(void); - -void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi); -void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest); -void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi); - -static inline struct cluster *tipc_cltr_find(u32 addr) -{ - struct _zone *z_ptr = tipc_zone_find(addr); - - if (z_ptr) - return z_ptr->clusters[1]; - return NULL; -} - -#endif diff --git a/net/tipc/config.c b/net/tipc/config.c index 50a6133a3668..e16750dcf3c1 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -35,30 +35,11 @@ */ #include "core.h" -#include "dbg.h" -#include "bearer.h" #include "port.h" -#include "link.h" -#include "zone.h" -#include "addr.h" #include "name_table.h" -#include "node.h" #include "config.h" -#include "discover.h" -struct subscr_data { - char usr_handle[8]; - u32 domain; - u32 port_ref; - struct list_head subd_list; -}; - -struct manager { - u32 user_ref; - u32 port_ref; -}; - -static struct manager mng = { 0}; +static u32 config_port_ref; static DEFINE_SPINLOCK(config_lock); @@ -83,10 +64,8 @@ int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(buf); int new_tlv_space = TLV_SPACE(tlv_data_size); - if (skb_tailroom(buf) < new_tlv_space) { - dbg("tipc_cfg_append_tlv unable to append TLV\n"); + if (skb_tailroom(buf) < new_tlv_space) return 0; - } skb_put(buf, new_tlv_space); tlv->tlv_type = htons(tlv_type); tlv->tlv_len = htons(TLV_LENGTH(tlv_data_size)); @@ -281,38 +260,6 @@ static struct sk_buff *cfg_set_max_ports(void) return tipc_cfg_reply_none(); } -static struct sk_buff *cfg_set_max_zones(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value == tipc_max_zones) - return tipc_cfg_reply_none(); - if (value != delimit(value, 1, 255)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max zones must be 1-255)"); - if (tipc_mode == TIPC_NET_MODE) - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change max zones once TIPC has joined a network)"); - tipc_max_zones = value; - return tipc_cfg_reply_none(); -} - -static struct sk_buff *cfg_set_max_clusters(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value != delimit(value, 1, 1)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max clusters fixed at 1)"); - return tipc_cfg_reply_none(); -} - static struct sk_buff *cfg_set_max_nodes(void) { u32 value; @@ -332,19 +279,6 @@ static struct sk_buff *cfg_set_max_nodes(void) return tipc_cfg_reply_none(); } -static struct sk_buff *cfg_set_max_slaves(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value != 0) - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (max secondary nodes fixed at 0)"); - return tipc_cfg_reply_none(); -} - static struct sk_buff *cfg_set_netid(void) { u32 value; @@ -388,8 +322,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area } else if (!tipc_remote_management) { rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NO_REMOTE); goto exit; - } - else if (cmd >= 0x4000) { + } else if (cmd >= 0x4000) { u32 domain = 0; if ((tipc_nametbl_translate(TIPC_ZM_SRV, 0, &domain) == 0) || @@ -464,18 +397,9 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_SET_MAX_SUBSCR: rep_tlv_buf = cfg_set_max_subscriptions(); break; - case TIPC_CMD_SET_MAX_ZONES: - rep_tlv_buf = cfg_set_max_zones(); - break; - case TIPC_CMD_SET_MAX_CLUSTERS: - rep_tlv_buf = cfg_set_max_clusters(); - break; case TIPC_CMD_SET_MAX_NODES: rep_tlv_buf = cfg_set_max_nodes(); break; - case TIPC_CMD_SET_MAX_SLAVES: - rep_tlv_buf = cfg_set_max_slaves(); - break; case TIPC_CMD_SET_NETID: rep_tlv_buf = cfg_set_netid(); break; @@ -491,18 +415,9 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_SUBSCR: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions); break; - case TIPC_CMD_GET_MAX_ZONES: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_zones); - break; - case TIPC_CMD_GET_MAX_CLUSTERS: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_clusters); - break; case TIPC_CMD_GET_MAX_NODES: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_nodes); break; - case TIPC_CMD_GET_MAX_SLAVES: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_slaves); - break; case TIPC_CMD_GET_NETID: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); break; @@ -510,6 +425,15 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN); break; + case TIPC_CMD_SET_MAX_ZONES: + case TIPC_CMD_GET_MAX_ZONES: + case TIPC_CMD_SET_MAX_SLAVES: + case TIPC_CMD_GET_MAX_SLAVES: + case TIPC_CMD_SET_MAX_CLUSTERS: + case TIPC_CMD_GET_MAX_CLUSTERS: + rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED + " (obsolete command)"); + break; default: rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (unknown command)"); @@ -572,20 +496,16 @@ int tipc_cfg_init(void) struct tipc_name_seq seq; int res; - res = tipc_attach(&mng.user_ref, NULL, NULL); - if (res) - goto failed; - - res = tipc_createport(mng.user_ref, NULL, TIPC_CRITICAL_IMPORTANCE, + res = tipc_createport(NULL, TIPC_CRITICAL_IMPORTANCE, NULL, NULL, NULL, NULL, cfg_named_msg_event, NULL, - NULL, &mng.port_ref); + NULL, &config_port_ref); if (res) goto failed; seq.type = TIPC_CFG_SRV; seq.lower = seq.upper = tipc_own_addr; - res = tipc_nametbl_publish_rsv(mng.port_ref, TIPC_ZONE_SCOPE, &seq); + res = tipc_nametbl_publish_rsv(config_port_ref, TIPC_ZONE_SCOPE, &seq); if (res) goto failed; @@ -593,15 +513,13 @@ int tipc_cfg_init(void) failed: err("Unable to create configuration service\n"); - tipc_detach(mng.user_ref); - mng.user_ref = 0; return res; } void tipc_cfg_stop(void) { - if (mng.user_ref) { - tipc_detach(mng.user_ref); - mng.user_ref = 0; + if (config_port_ref) { + tipc_deleteport(config_port_ref); + config_port_ref = 0; } } diff --git a/net/tipc/config.h b/net/tipc/config.h index 481e12ece715..443159a166fd 100644 --- a/net/tipc/config.h +++ b/net/tipc/config.h @@ -39,7 +39,6 @@ /* ---------------------------------------------------------------------- */ -#include "core.h" #include "link.h" struct sk_buff *tipc_cfg_reply_alloc(int payload_size); diff --git a/net/tipc/core.c b/net/tipc/core.c index e2a09eb8efd4..e071579e0850 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -34,37 +34,17 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/random.h> - #include "core.h" -#include "dbg.h" #include "ref.h" -#include "net.h" -#include "user_reg.h" #include "name_table.h" #include "subscr.h" #include "config.h" -#ifndef CONFIG_TIPC_ZONES -#define CONFIG_TIPC_ZONES 3 -#endif - -#ifndef CONFIG_TIPC_CLUSTERS -#define CONFIG_TIPC_CLUSTERS 1 -#endif - #ifndef CONFIG_TIPC_NODES #define CONFIG_TIPC_NODES 255 #endif -#ifndef CONFIG_TIPC_SLAVE_NODES -#define CONFIG_TIPC_SLAVE_NODES 0 -#endif - #ifndef CONFIG_TIPC_PORTS #define CONFIG_TIPC_PORTS 8191 #endif @@ -85,10 +65,7 @@ const char tipc_alphabet[] = /* configurable TIPC parameters */ u32 tipc_own_addr; -int tipc_max_zones; -int tipc_max_clusters; int tipc_max_nodes; -int tipc_max_slaves; int tipc_max_ports; int tipc_max_subscriptions; int tipc_max_publications; @@ -138,10 +115,11 @@ int tipc_core_start_net(unsigned long addr) { int res; - if ((res = tipc_net_start(addr)) || - (res = tipc_eth_media_start())) { + res = tipc_net_start(addr); + if (!res) + res = tipc_eth_media_start(); + if (res) tipc_core_stop_net(); - } return res; } @@ -160,7 +138,6 @@ static void tipc_core_stop(void) tipc_handler_stop(); tipc_cfg_stop(); tipc_subscr_stop(); - tipc_reg_stop(); tipc_nametbl_stop(); tipc_ref_table_stop(); tipc_socket_stop(); @@ -181,16 +158,22 @@ static int tipc_core_start(void) get_random_bytes(&tipc_random, sizeof(tipc_random)); tipc_mode = TIPC_NODE_MODE; - if ((res = tipc_handler_start()) || - (res = tipc_ref_table_init(tipc_max_ports, tipc_random)) || - (res = tipc_reg_start()) || - (res = tipc_nametbl_init()) || - (res = tipc_k_signal((Handler)tipc_subscr_start, 0)) || - (res = tipc_k_signal((Handler)tipc_cfg_init, 0)) || - (res = tipc_netlink_start()) || - (res = tipc_socket_init())) { + res = tipc_handler_start(); + if (!res) + res = tipc_ref_table_init(tipc_max_ports, tipc_random); + if (!res) + res = tipc_nametbl_init(); + if (!res) + res = tipc_k_signal((Handler)tipc_subscr_start, 0); + if (!res) + res = tipc_k_signal((Handler)tipc_cfg_init, 0); + if (!res) + res = tipc_netlink_start(); + if (!res) + res = tipc_socket_init(); + if (res) tipc_core_stop(); - } + return res; } @@ -210,13 +193,11 @@ static int __init tipc_init(void) tipc_max_publications = 10000; tipc_max_subscriptions = 2000; tipc_max_ports = CONFIG_TIPC_PORTS; - tipc_max_zones = CONFIG_TIPC_ZONES; - tipc_max_clusters = CONFIG_TIPC_CLUSTERS; tipc_max_nodes = CONFIG_TIPC_NODES; - tipc_max_slaves = CONFIG_TIPC_SLAVE_NODES; tipc_net_id = 4711; - if ((res = tipc_core_start())) + res = tipc_core_start(); + if (res) err("Unable to start in single node mode\n"); else info("Started in single node mode\n"); @@ -236,43 +217,3 @@ module_exit(tipc_exit); MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(TIPC_MOD_VER); - -/* Native TIPC API for kernel-space applications (see tipc.h) */ - -EXPORT_SYMBOL(tipc_attach); -EXPORT_SYMBOL(tipc_detach); -EXPORT_SYMBOL(tipc_createport); -EXPORT_SYMBOL(tipc_deleteport); -EXPORT_SYMBOL(tipc_ownidentity); -EXPORT_SYMBOL(tipc_portimportance); -EXPORT_SYMBOL(tipc_set_portimportance); -EXPORT_SYMBOL(tipc_portunreliable); -EXPORT_SYMBOL(tipc_set_portunreliable); -EXPORT_SYMBOL(tipc_portunreturnable); -EXPORT_SYMBOL(tipc_set_portunreturnable); -EXPORT_SYMBOL(tipc_publish); -EXPORT_SYMBOL(tipc_withdraw); -EXPORT_SYMBOL(tipc_connect2port); -EXPORT_SYMBOL(tipc_disconnect); -EXPORT_SYMBOL(tipc_shutdown); -EXPORT_SYMBOL(tipc_send); -EXPORT_SYMBOL(tipc_send2name); -EXPORT_SYMBOL(tipc_send2port); -EXPORT_SYMBOL(tipc_multicast); - -/* TIPC API for external bearers (see tipc_bearer.h) */ - -EXPORT_SYMBOL(tipc_block_bearer); -EXPORT_SYMBOL(tipc_continue); -EXPORT_SYMBOL(tipc_disable_bearer); -EXPORT_SYMBOL(tipc_enable_bearer); -EXPORT_SYMBOL(tipc_recv_msg); -EXPORT_SYMBOL(tipc_register_media); - -/* TIPC API for external APIs (see tipc_port.h) */ - -EXPORT_SYMBOL(tipc_createport_raw); -EXPORT_SYMBOL(tipc_reject_msg); -EXPORT_SYMBOL(tipc_send_buf_fast); -EXPORT_SYMBOL(tipc_acknowledge); - diff --git a/net/tipc/core.h b/net/tipc/core.h index e19389e57227..997158546e25 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -39,10 +39,6 @@ #include <linux/tipc.h> #include <linux/tipc_config.h> -#include <net/tipc/tipc_msg.h> -#include <net/tipc/tipc_port.h> -#include <net/tipc/tipc_bearer.h> -#include <net/tipc/tipc.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -62,6 +58,9 @@ #define TIPC_MOD_VER "2.0.0" +struct tipc_msg; /* msg.h */ +struct print_buf; /* log.h */ + /* * TIPC sanity test macros */ @@ -84,6 +83,7 @@ * user-defined buffers can be configured to do the same thing. */ extern struct print_buf *const TIPC_NULL; +extern struct print_buf *const TIPC_CONS; extern struct print_buf *const TIPC_LOG; void tipc_printf(struct print_buf *, const char *fmt, ...); @@ -96,73 +96,35 @@ void tipc_printf(struct print_buf *, const char *fmt, ...); #define TIPC_OUTPUT TIPC_LOG #endif -/* - * TIPC can be configured to send system messages to TIPC_OUTPUT - * or to the system console only. - */ - -#ifdef CONFIG_TIPC_DEBUG - #define err(fmt, arg...) tipc_printf(TIPC_OUTPUT, \ - KERN_ERR "TIPC: " fmt, ## arg) + KERN_ERR "TIPC: " fmt, ## arg) #define warn(fmt, arg...) tipc_printf(TIPC_OUTPUT, \ - KERN_WARNING "TIPC: " fmt, ## arg) + KERN_WARNING "TIPC: " fmt, ## arg) #define info(fmt, arg...) tipc_printf(TIPC_OUTPUT, \ - KERN_NOTICE "TIPC: " fmt, ## arg) - -#else - -#define err(fmt, arg...) printk(KERN_ERR "TIPC: " fmt , ## arg) -#define info(fmt, arg...) printk(KERN_INFO "TIPC: " fmt , ## arg) -#define warn(fmt, arg...) printk(KERN_WARNING "TIPC: " fmt , ## arg) + KERN_NOTICE "TIPC: " fmt, ## arg) -#endif +#ifdef CONFIG_TIPC_DEBUG /* * DBG_OUTPUT is the destination print buffer for debug messages. - * It defaults to the the null print buffer, but can be redefined - * (typically in the individual .c files being debugged) to allow - * selected debug messages to be generated where needed. */ #ifndef DBG_OUTPUT -#define DBG_OUTPUT TIPC_NULL +#define DBG_OUTPUT TIPC_LOG #endif -/* - * TIPC can be configured to send debug messages to the specified print buffer - * (typically DBG_OUTPUT) or to suppress them entirely. - */ +#define dbg(fmt, arg...) tipc_printf(DBG_OUTPUT, KERN_DEBUG fmt, ## arg); -#ifdef CONFIG_TIPC_DEBUG - -#define dbg(fmt, arg...) \ - do { \ - if (DBG_OUTPUT != TIPC_NULL) \ - tipc_printf(DBG_OUTPUT, fmt, ## arg); \ - } while (0) -#define msg_dbg(msg, txt) \ - do { \ - if (DBG_OUTPUT != TIPC_NULL) \ - tipc_msg_dbg(DBG_OUTPUT, msg, txt); \ - } while (0) -#define dump(fmt, arg...) \ - do { \ - if (DBG_OUTPUT != TIPC_NULL) \ - tipc_dump_dbg(DBG_OUTPUT, fmt, ##arg); \ - } while (0) +#define msg_dbg(msg, txt) tipc_msg_dbg(DBG_OUTPUT, msg, txt); void tipc_msg_dbg(struct print_buf *, struct tipc_msg *, const char *); -void tipc_dump_dbg(struct print_buf *, const char *fmt, ...); #else #define dbg(fmt, arg...) do {} while (0) #define msg_dbg(msg, txt) do {} while (0) -#define dump(fmt, arg...) do {} while (0) -#define tipc_msg_dbg(...) do {} while (0) -#define tipc_dump_dbg(...) do {} while (0) +#define tipc_msg_dbg(buf, msg, txt) do {} while (0) #endif @@ -174,14 +136,18 @@ void tipc_dump_dbg(struct print_buf *, const char *fmt, ...); #define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ /* + * TIPC operating mode routines + */ +#define TIPC_NOT_RUNNING 0 +#define TIPC_NODE_MODE 1 +#define TIPC_NET_MODE 2 + +/* * Global configuration variables */ extern u32 tipc_own_addr; -extern int tipc_max_zones; -extern int tipc_max_clusters; extern int tipc_max_nodes; -extern int tipc_max_slaves; extern int tipc_max_ports; extern int tipc_max_subscriptions; extern int tipc_max_publications; @@ -240,7 +206,6 @@ u32 tipc_k_signal(Handler routine, unsigned long argument); static inline void k_init_timer(struct timer_list *timer, Handler routine, unsigned long argument) { - dbg("initializing timer %p\n", timer); setup_timer(timer, routine, argument); } @@ -260,7 +225,6 @@ static inline void k_init_timer(struct timer_list *timer, Handler routine, static inline void k_start_timer(struct timer_list *timer, unsigned long msec) { - dbg("starting timer %p for %u\n", timer, msec); mod_timer(timer, jiffies + msecs_to_jiffies(msec) + 1); } @@ -277,7 +241,6 @@ static inline void k_start_timer(struct timer_list *timer, unsigned long msec) static inline void k_cancel_timer(struct timer_list *timer) { - dbg("cancelling timer %p\n", timer); del_timer_sync(timer); } @@ -295,7 +258,6 @@ static inline void k_cancel_timer(struct timer_list *timer) static inline void k_term_timer(struct timer_list *timer) { - dbg("terminating timer %p\n", timer); } diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 4a7cd3719b78..fa026bd91a68 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -35,12 +35,8 @@ */ #include "core.h" -#include "dbg.h" #include "link.h" -#include "zone.h" #include "discover.h" -#include "port.h" -#include "name_table.h" #define TIPC_LINK_REQ_INIT 125 /* min delay during bearer start up */ #define TIPC_LINK_REQ_FAST 2000 /* normal delay if bearer has no links */ @@ -134,8 +130,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) u32 net_id = msg_bc_netid(msg); u32 type = msg_type(msg); - msg_get_media_addr(msg,&media_addr); - msg_dbg(msg, "RECV:"); + msg_get_media_addr(msg, &media_addr); buf_discard(buf); if (net_id != tipc_net_id) @@ -151,10 +146,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) } if (!tipc_in_scope(dest, tipc_own_addr)) return; - if (is_slave(tipc_own_addr) && is_slave(orig)) - return; - if (is_slave(orig) && !in_own_cluster(orig)) - return; if (in_own_cluster(orig)) { /* Always accept link here */ struct sk_buff *rbuf; @@ -162,7 +153,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) struct tipc_node *n_ptr = tipc_node_find(orig); int link_fully_up; - dbg(" in own cluster\n"); if (n_ptr == NULL) { n_ptr = tipc_node_create(orig); if (!n_ptr) @@ -179,7 +169,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) link = n_ptr->links[b_ptr->identity]; if (!link) { - dbg("creating link\n"); link = tipc_link_create(b_ptr, orig, &media_addr); if (!link) { spin_unlock_bh(&n_ptr->lock); @@ -204,7 +193,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) return; rbuf = tipc_disc_init_msg(DSC_RESP_MSG, 1, orig, b_ptr); if (rbuf != NULL) { - msg_dbg(buf_msg(rbuf),"SEND:"); b_ptr->media->send_msg(rbuf, &b_ptr->publ, &media_addr); buf_discard(rbuf); } diff --git a/net/tipc/discover.h b/net/tipc/discover.h index f8e750636123..d2c3cffb79fc 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -37,8 +37,6 @@ #ifndef _TIPC_DISCOVER_H #define _TIPC_DISCOVER_H -#include "core.h" - struct link_req; struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 6e988ba485fd..b69092eb95d8 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -34,12 +34,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <net/tipc/tipc.h> -#include <net/tipc/tipc_bearer.h> -#include <net/tipc/tipc_msg.h> -#include <linux/netdevice.h> -#include <linux/slab.h> -#include <net/net_namespace.h> +#include "core.h" +#include "bearer.h" #define MAX_ETH_BEARERS 2 #define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI @@ -60,7 +56,7 @@ struct eth_bearer { }; static struct eth_bearer eth_bearers[MAX_ETH_BEARERS]; -static int eth_started = 0; +static int eth_started; static struct notifier_block notifier; /** @@ -148,7 +144,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) /* Find device with specified name */ - for_each_netdev(&init_net, pdev){ + for_each_netdev(&init_net, pdev) { if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { dev = pdev; break; @@ -159,7 +155,8 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) /* Find Ethernet bearer for device (or create one) */ - for (;(eb_ptr != stop) && eb_ptr->dev && (eb_ptr->dev != dev); eb_ptr++); + while ((eb_ptr != stop) && eb_ptr->dev && (eb_ptr->dev != dev)) + eb_ptr++; if (eb_ptr == stop) return -EDQUOT; if (!eb_ptr->dev) { diff --git a/net/tipc/handler.c b/net/tipc/handler.c index 0c70010a7dfe..274c98e164b7 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -45,7 +45,7 @@ struct queue_item { static struct kmem_cache *tipc_queue_item_cache; static struct list_head signal_queue_head; static DEFINE_SPINLOCK(qitem_lock); -static int handler_enabled = 0; +static int handler_enabled; static void process_signal_queue(unsigned long dummy); diff --git a/net/tipc/link.c b/net/tipc/link.c index b31992ccd5d3..18702f58d111 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -35,19 +35,11 @@ */ #include "core.h" -#include "dbg.h" #include "link.h" -#include "net.h" -#include "node.h" #include "port.h" -#include "addr.h" -#include "node_subscr.h" #include "name_distr.h" -#include "bearer.h" -#include "name_table.h" #include "discover.h" #include "config.h" -#include "bcast.h" /* @@ -57,12 +49,6 @@ #define INVALID_SESSION 0x10000 /* - * Limit for deferred reception queue: - */ - -#define DEF_QUEUE_LIMIT 256u - -/* * Link state events: */ @@ -110,75 +96,10 @@ static int link_send_sections_long(struct port *sender, static void link_check_defragm_bufs(struct link *l_ptr); static void link_state_event(struct link *l_ptr, u32 event); static void link_reset_statistics(struct link *l_ptr); -static void link_print(struct link *l_ptr, struct print_buf *buf, - const char *str); +static void link_print(struct link *l_ptr, const char *str); static void link_start(struct link *l_ptr); static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf); - -/* - * Debugging code used by link routines only - * - * When debugging link problems on a system that has multiple links, - * the standard TIPC debugging routines may not be useful since they - * allow the output from multiple links to be intermixed. For this reason - * routines of the form "dbg_link_XXX()" have been created that will capture - * debug info into a link's personal print buffer, which can then be dumped - * into the TIPC system log (TIPC_LOG) upon request. - * - * To enable per-link debugging, use LINK_LOG_BUF_SIZE to specify the size - * of the print buffer used by each link. If LINK_LOG_BUF_SIZE is set to 0, - * the dbg_link_XXX() routines simply send their output to the standard - * debug print buffer (DBG_OUTPUT), if it has been defined; this can be useful - * when there is only a single link in the system being debugged. - * - * Notes: - * - When enabled, LINK_LOG_BUF_SIZE should be set to at least TIPC_PB_MIN_SIZE - * - "l_ptr" must be valid when using dbg_link_XXX() macros - */ - -#define LINK_LOG_BUF_SIZE 0 - -#define dbg_link(fmt, arg...) \ - do { \ - if (LINK_LOG_BUF_SIZE) \ - tipc_printf(&l_ptr->print_buf, fmt, ## arg); \ - } while (0) -#define dbg_link_msg(msg, txt) \ - do { \ - if (LINK_LOG_BUF_SIZE) \ - tipc_msg_dbg(&l_ptr->print_buf, msg, txt); \ - } while (0) -#define dbg_link_state(txt) \ - do { \ - if (LINK_LOG_BUF_SIZE) \ - link_print(l_ptr, &l_ptr->print_buf, txt); \ - } while (0) -#define dbg_link_dump() do { \ - if (LINK_LOG_BUF_SIZE) { \ - tipc_printf(LOG, "\n\nDumping link <%s>:\n", l_ptr->name); \ - tipc_printbuf_move(LOG, &l_ptr->print_buf); \ - } \ -} while (0) - -static void dbg_print_link(struct link *l_ptr, const char *str) -{ - if (DBG_OUTPUT != TIPC_NULL) - link_print(l_ptr, DBG_OUTPUT, str); -} - -static void dbg_print_buf_chain(struct sk_buff *root_buf) -{ - if (DBG_OUTPUT != TIPC_NULL) { - struct sk_buff *buf = root_buf; - - while (buf) { - msg_dbg(buf_msg(buf), "In chain: "); - buf = buf->next; - } - } -} - /* * Simple link routines */ @@ -266,14 +187,17 @@ static int link_name_validate(const char *name, struct link_name *name_parts) /* ensure all component parts of link name are present */ addr_local = name_copy; - if ((if_local = strchr(addr_local, ':')) == NULL) + if_local = strchr(addr_local, ':'); + if (if_local == NULL) return 0; *(if_local++) = 0; - if ((addr_peer = strchr(if_local, '-')) == NULL) + addr_peer = strchr(if_local, '-'); + if (addr_peer == NULL) return 0; *(addr_peer++) = 0; if_local_len = addr_peer - if_local; - if ((if_peer = strchr(addr_peer, ':')) == NULL) + if_peer = strchr(addr_peer, ':'); + if (if_peer == NULL) return 0; *(if_peer++) = 0; if_peer_len = strlen(if_peer) + 1; @@ -392,17 +316,6 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, return NULL; } - if (LINK_LOG_BUF_SIZE) { - char *pb = kmalloc(LINK_LOG_BUF_SIZE, GFP_ATOMIC); - - if (!pb) { - kfree(l_ptr); - warn("Link creation failed, no memory for print buffer\n"); - return NULL; - } - tipc_printbuf_init(&l_ptr->print_buf, pb, LINK_LOG_BUF_SIZE); - } - l_ptr->addr = peer; if_name = strchr(b_ptr->publ.name, ':') + 1; sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:", @@ -437,8 +350,6 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, l_ptr->owner = tipc_node_attach_link(l_ptr); if (!l_ptr->owner) { - if (LINK_LOG_BUF_SIZE) - kfree(l_ptr->print_buf.buf); kfree(l_ptr); return NULL; } @@ -447,9 +358,6 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, list_add_tail(&l_ptr->link_list, &b_ptr->links); tipc_k_signal((Handler)link_start, (unsigned long)l_ptr); - dbg("tipc_link_create(): tolerance = %u,cont intv = %u, abort_limit = %u\n", - l_ptr->tolerance, l_ptr->continuity_interval, l_ptr->abort_limit); - return l_ptr; } @@ -469,8 +377,6 @@ void tipc_link_delete(struct link *l_ptr) return; } - dbg("tipc_link_delete()\n"); - k_cancel_timer(&l_ptr->timer); tipc_node_lock(l_ptr->owner); @@ -478,8 +384,6 @@ void tipc_link_delete(struct link *l_ptr) tipc_node_detach_link(l_ptr->owner, l_ptr); tipc_link_stop(l_ptr); list_del_init(&l_ptr->link_list); - if (LINK_LOG_BUF_SIZE) - kfree(l_ptr->print_buf.buf); tipc_node_unlock(l_ptr->owner); k_term_timer(&l_ptr->timer); kfree(l_ptr); @@ -487,7 +391,6 @@ void tipc_link_delete(struct link *l_ptr) static void link_start(struct link *l_ptr) { - dbg("link_start %x\n", l_ptr); link_state_event(l_ptr, STARTING_EVT); } @@ -639,7 +542,6 @@ void tipc_link_reset(struct link *l_ptr) link_init_max_pkt(l_ptr); l_ptr->state = RESET_UNKNOWN; - dbg_link_state("Resetting Link\n"); if ((prev_state == RESET_UNKNOWN) || (prev_state == RESET_RESET)) return; @@ -713,25 +615,18 @@ static void link_state_event(struct link *l_ptr, unsigned event) return; /* Not yet. */ if (link_blocked(l_ptr)) { - if (event == TIMEOUT_EVT) { + if (event == TIMEOUT_EVT) link_set_timer(l_ptr, cont_intv); - } return; /* Changeover going on */ } - dbg_link("STATE_EV: <%s> ", l_ptr->name); switch (l_ptr->state) { case WORKING_WORKING: - dbg_link("WW/"); switch (event) { case TRAFFIC_MSG_EVT: - dbg_link("TRF-"); - /* fall through */ case ACTIVATE_MSG: - dbg_link("ACT\n"); break; case TIMEOUT_EVT: - dbg_link("TIM "); if (l_ptr->next_in_no != l_ptr->checkpoint) { l_ptr->checkpoint = l_ptr->next_in_no; if (tipc_bclink_acks_missing(l_ptr->owner)) { @@ -746,7 +641,6 @@ static void link_state_event(struct link *l_ptr, unsigned event) link_set_timer(l_ptr, cont_intv); break; } - dbg_link(" -> WU\n"); l_ptr->state = WORKING_UNKNOWN; l_ptr->fsm_msg_cnt = 0; tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); @@ -754,7 +648,6 @@ static void link_state_event(struct link *l_ptr, unsigned event) link_set_timer(l_ptr, cont_intv / 4); break; case RESET_MSG: - dbg_link("RES -> RR\n"); info("Resetting link <%s>, requested by peer\n", l_ptr->name); tipc_link_reset(l_ptr); @@ -769,18 +662,14 @@ static void link_state_event(struct link *l_ptr, unsigned event) } break; case WORKING_UNKNOWN: - dbg_link("WU/"); switch (event) { case TRAFFIC_MSG_EVT: - dbg_link("TRF-"); case ACTIVATE_MSG: - dbg_link("ACT -> WW\n"); l_ptr->state = WORKING_WORKING; l_ptr->fsm_msg_cnt = 0; link_set_timer(l_ptr, cont_intv); break; case RESET_MSG: - dbg_link("RES -> RR\n"); info("Resetting link <%s>, requested by peer " "while probing\n", l_ptr->name); tipc_link_reset(l_ptr); @@ -791,9 +680,7 @@ static void link_state_event(struct link *l_ptr, unsigned event) link_set_timer(l_ptr, cont_intv); break; case TIMEOUT_EVT: - dbg_link("TIM "); if (l_ptr->next_in_no != l_ptr->checkpoint) { - dbg_link("-> WW\n"); l_ptr->state = WORKING_WORKING; l_ptr->fsm_msg_cnt = 0; l_ptr->checkpoint = l_ptr->next_in_no; @@ -804,16 +691,11 @@ static void link_state_event(struct link *l_ptr, unsigned event) } link_set_timer(l_ptr, cont_intv); } else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) { - dbg_link("Probing %u/%u,timer = %u ms)\n", - l_ptr->fsm_msg_cnt, l_ptr->abort_limit, - cont_intv / 4); tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv / 4); } else { /* Link has failed */ - dbg_link("-> RU (%u probes unanswered)\n", - l_ptr->fsm_msg_cnt); warn("Resetting link <%s>, peer not responding\n", l_ptr->name); tipc_link_reset(l_ptr); @@ -830,18 +712,13 @@ static void link_state_event(struct link *l_ptr, unsigned event) } break; case RESET_UNKNOWN: - dbg_link("RU/"); switch (event) { case TRAFFIC_MSG_EVT: - dbg_link("TRF-\n"); break; case ACTIVATE_MSG: other = l_ptr->owner->active_links[0]; - if (other && link_working_unknown(other)) { - dbg_link("ACT\n"); + if (other && link_working_unknown(other)) break; - } - dbg_link("ACT -> WW\n"); l_ptr->state = WORKING_WORKING; l_ptr->fsm_msg_cnt = 0; link_activate(l_ptr); @@ -850,8 +727,6 @@ static void link_state_event(struct link *l_ptr, unsigned event) link_set_timer(l_ptr, cont_intv); break; case RESET_MSG: - dbg_link("RES\n"); - dbg_link(" -> RR\n"); l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 1, 0, 0, 0, 0); @@ -859,11 +734,9 @@ static void link_state_event(struct link *l_ptr, unsigned event) link_set_timer(l_ptr, cont_intv); break; case STARTING_EVT: - dbg_link("START-"); l_ptr->started = 1; /* fall through */ case TIMEOUT_EVT: - dbg_link("TIM\n"); tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); @@ -873,18 +746,12 @@ static void link_state_event(struct link *l_ptr, unsigned event) } break; case RESET_RESET: - dbg_link("RR/ "); switch (event) { case TRAFFIC_MSG_EVT: - dbg_link("TRF-"); - /* fall through */ case ACTIVATE_MSG: other = l_ptr->owner->active_links[0]; - if (other && link_working_unknown(other)) { - dbg_link("ACT\n"); + if (other && link_working_unknown(other)) break; - } - dbg_link("ACT -> WW\n"); l_ptr->state = WORKING_WORKING; l_ptr->fsm_msg_cnt = 0; link_activate(l_ptr); @@ -893,14 +760,11 @@ static void link_state_event(struct link *l_ptr, unsigned event) link_set_timer(l_ptr, cont_intv); break; case RESET_MSG: - dbg_link("RES\n"); break; case TIMEOUT_EVT: - dbg_link("TIM\n"); tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); - dbg_link("fsm_msg_cnt %u\n", l_ptr->fsm_msg_cnt); break; default: err("Unknown link event %u in RR state\n", event); @@ -940,9 +804,6 @@ static int link_bundle_buf(struct link *l_ptr, skb_copy_to_linear_data_offset(bundler, to_pos, buf->data, size); msg_set_size(bundler_msg, to_pos + size); msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1); - dbg("Packed msg # %u(%u octets) into pos %u in buf(#%u)\n", - msg_msgcnt(bundler_msg), size, to_pos, msg_seqno(bundler_msg)); - msg_dbg(msg, "PACKD:"); buf_discard(buf); l_ptr->stats.sent_bundled++; return 1; @@ -991,7 +852,6 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf) return link_schedule_port(l_ptr, msg_origport(msg), size); } - msg_dbg(msg, "TIPC: Congestion, throwing away\n"); buf_discard(buf); if (imp > CONN_MANAGER) { warn("Resetting link <%s>, send queue full", l_ptr->name); @@ -1075,22 +935,16 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector) int res = -ELINKCONG; read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_select(dest, selector); + n_ptr = tipc_node_find(dest); if (n_ptr) { tipc_node_lock(n_ptr); l_ptr = n_ptr->active_links[selector & 1]; - if (l_ptr) { - dbg("tipc_link_send: found link %x for dest %x\n", l_ptr, dest); + if (l_ptr) res = tipc_link_send_buf(l_ptr, buf); - } else { - dbg("Attempt to send msg to unreachable node:\n"); - msg_dbg(buf_msg(buf),">>>"); + else buf_discard(buf); - } tipc_node_unlock(n_ptr); } else { - dbg("Attempt to send msg to unknown node:\n"); - msg_dbg(buf_msg(buf),">>>"); buf_discard(buf); } read_unlock_bh(&tipc_net_lock); @@ -1117,17 +971,14 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf, if (likely(tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr))) { l_ptr->unacked_window = 0; - msg_dbg(msg,"SENT_FAST:"); return res; } - dbg("failed sent fast...\n"); tipc_bearer_schedule(l_ptr->b_ptr, l_ptr); l_ptr->stats.bearer_congs++; l_ptr->next_out = buf; return res; } - } - else + } else *used_max_pkt = l_ptr->max_pkt; } return tipc_link_send_buf(l_ptr, buf); /* All other cases */ @@ -1151,12 +1002,10 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode) return tipc_port_recv_msg(buf); read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_select(destnode, selector); + n_ptr = tipc_node_find(destnode); if (likely(n_ptr)) { tipc_node_lock(n_ptr); l_ptr = n_ptr->active_links[selector]; - dbg("send_fast: buf %x selected %x, destnode = %x\n", - buf, l_ptr, destnode); if (likely(l_ptr)) { res = link_send_buf_fast(l_ptr, buf, &dummy); tipc_node_unlock(n_ptr); @@ -1200,7 +1049,7 @@ again: !sender->user_port, &buf); read_lock_bh(&tipc_net_lock); - node = tipc_node_select(destaddr, selector); + node = tipc_node_find(destaddr); if (likely(node)) { tipc_node_lock(node); l_ptr = node->active_links[selector]; @@ -1283,10 +1132,10 @@ static int link_send_sections_long(struct port *sender, struct tipc_node *node; struct tipc_msg *hdr = &sender->publ.phdr; u32 dsz = msg_data_sz(hdr); - u32 max_pkt,fragm_sz,rest; + u32 max_pkt, fragm_sz, rest; struct tipc_msg fragm_hdr; - struct sk_buff *buf,*buf_chain,*prev; - u32 fragm_crs,fragm_rest,hsz,sect_rest; + struct sk_buff *buf, *buf_chain, *prev; + u32 fragm_crs, fragm_rest, hsz, sect_rest; const unchar *sect_crs; int curr_sect; u32 fragm_no; @@ -1306,7 +1155,6 @@ again: /* Prepare reusable fragment header: */ - msg_dbg(hdr, ">FRAGMENTING>"); tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(hdr)); msg_set_link_selector(&fragm_hdr, sender->publ.ref); @@ -1322,7 +1170,6 @@ again: skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE); hsz = msg_hdr_sz(hdr); skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz); - msg_dbg(buf_msg(buf), ">BUILD>"); /* Chop up message: */ @@ -1365,7 +1212,7 @@ error: /* Initiate new fragment: */ if (rest <= fragm_sz) { fragm_sz = rest; - msg_set_type(&fragm_hdr,LAST_FRAGMENT); + msg_set_type(&fragm_hdr, LAST_FRAGMENT); } else { msg_set_type(&fragm_hdr, FRAGMENT); } @@ -1381,16 +1228,14 @@ error: skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE); fragm_crs = INT_H_SIZE; fragm_rest = fragm_sz; - msg_dbg(buf_msg(buf)," >BUILD>"); } - } - while (rest > 0); + } while (rest > 0); /* * Now we have a buffer chain. Select a link and check * that packet size is still OK */ - node = tipc_node_select(destaddr, sender->publ.ref & 1); + node = tipc_node_find(destaddr); if (likely(node)) { tipc_node_lock(node); l_ptr = node->active_links[sender->publ.ref & 1]; @@ -1431,7 +1276,6 @@ reject: l_ptr->stats.sent_fragments++; msg_set_long_msgno(msg, l_ptr->long_msg_seq_no); link_add_to_outqueue(l_ptr, buf, msg); - msg_dbg(msg, ">ADD>"); buf = next; } @@ -1473,14 +1317,12 @@ u32 tipc_link_push_packet(struct link *l_ptr) msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) { - msg_dbg(buf_msg(buf), ">DEF-RETR>"); l_ptr->retransm_queue_head = mod(++r_q_head); l_ptr->retransm_queue_size = --r_q_size; l_ptr->stats.retransmitted++; return 0; } else { l_ptr->stats.bearer_congs++; - msg_dbg(buf_msg(buf), "|>DEF-RETR>"); return PUSH_FAILED; } } @@ -1490,15 +1332,13 @@ u32 tipc_link_push_packet(struct link *l_ptr) buf = l_ptr->proto_msg_queue; if (buf) { msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); - msg_set_bcast_ack(buf_msg(buf),l_ptr->owner->bclink.last_in); + msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) { - msg_dbg(buf_msg(buf), ">DEF-PROT>"); l_ptr->unacked_window = 0; buf_discard(buf); l_ptr->proto_msg_queue = NULL; return 0; } else { - msg_dbg(buf_msg(buf), "|>DEF-PROT>"); l_ptr->stats.bearer_congs++; return PUSH_FAILED; } @@ -1518,11 +1358,9 @@ u32 tipc_link_push_packet(struct link *l_ptr) if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) { if (msg_user(msg) == MSG_BUNDLER) msg_set_type(msg, CLOSED_MSG); - msg_dbg(msg, ">PUSH-DATA>"); l_ptr->next_out = buf->next; return 0; } else { - msg_dbg(msg, "|PUSH-DATA|"); l_ptr->stats.bearer_congs++; return PUSH_FAILED; } @@ -1570,8 +1408,7 @@ static void link_reset_all(unsigned long addr) for (i = 0; i < MAX_BEARERS; i++) { if (n_ptr->links[i]) { - link_print(n_ptr->links[i], TIPC_OUTPUT, - "Resetting link\n"); + link_print(n_ptr->links[i], "Resetting link\n"); tipc_link_reset(n_ptr->links[i]); } } @@ -1585,13 +1422,12 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf) struct tipc_msg *msg = buf_msg(buf); warn("Retransmission failure on link <%s>\n", l_ptr->name); - tipc_msg_dbg(TIPC_OUTPUT, msg, ">RETR-FAIL>"); if (l_ptr->addr) { /* Handle failure on standard link */ - link_print(l_ptr, TIPC_OUTPUT, "Resetting link\n"); + link_print(l_ptr, "Resetting link\n"); tipc_link_reset(l_ptr); } else { @@ -1601,21 +1437,21 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf) struct tipc_node *n_ptr; char addr_string[16]; - tipc_printf(TIPC_OUTPUT, "Msg seq number: %u, ", msg_seqno(msg)); - tipc_printf(TIPC_OUTPUT, "Outstanding acks: %lu\n", - (unsigned long) TIPC_SKB_CB(buf)->handle); + info("Msg seq number: %u, ", msg_seqno(msg)); + info("Outstanding acks: %lu\n", + (unsigned long) TIPC_SKB_CB(buf)->handle); n_ptr = l_ptr->owner->next; tipc_node_lock(n_ptr); tipc_addr_string_fill(addr_string, n_ptr->addr); - tipc_printf(TIPC_OUTPUT, "Multicast link info for %s\n", addr_string); - tipc_printf(TIPC_OUTPUT, "Supported: %d, ", n_ptr->bclink.supported); - tipc_printf(TIPC_OUTPUT, "Acked: %u\n", n_ptr->bclink.acked); - tipc_printf(TIPC_OUTPUT, "Last in: %u, ", n_ptr->bclink.last_in); - tipc_printf(TIPC_OUTPUT, "Gap after: %u, ", n_ptr->bclink.gap_after); - tipc_printf(TIPC_OUTPUT, "Gap to: %u\n", n_ptr->bclink.gap_to); - tipc_printf(TIPC_OUTPUT, "Nack sync: %u\n\n", n_ptr->bclink.nack_sync); + info("Multicast link info for %s\n", addr_string); + info("Supported: %d, ", n_ptr->bclink.supported); + info("Acked: %u\n", n_ptr->bclink.acked); + info("Last in: %u, ", n_ptr->bclink.last_in); + info("Gap after: %u, ", n_ptr->bclink.gap_after); + info("Gap to: %u\n", n_ptr->bclink.gap_to); + info("Nack sync: %u\n\n", n_ptr->bclink.nack_sync); tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr); @@ -1635,12 +1471,8 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, msg = buf_msg(buf); - dbg("Retransmitting %u in link %x\n", retransmits, l_ptr); - if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) { if (l_ptr->retransm_queue_size == 0) { - msg_dbg(msg, ">NO_RETR->BCONG>"); - dbg_print_link(l_ptr, " "); l_ptr->retransm_queue_head = msg_seqno(msg); l_ptr->retransm_queue_size = retransmits; } else { @@ -1667,7 +1499,6 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) { - msg_dbg(buf_msg(buf), ">RETR>"); buf = buf->next; retransmits--; l_ptr->stats.retransmitted++; @@ -1793,9 +1624,8 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) /* Ensure message data is a single contiguous unit */ - if (unlikely(buf_linearize(buf))) { + if (unlikely(buf_linearize(buf))) goto cont; - } /* Handle arrival of a non-unicast link message */ @@ -1907,7 +1737,7 @@ deliver: continue; case ROUTE_DISTRIBUTOR: tipc_node_unlock(n_ptr); - tipc_cltr_recv_routing_table(buf); + buf_discard(buf); continue; case NAME_DISTRIBUTOR: tipc_node_unlock(n_ptr); @@ -1953,12 +1783,10 @@ deliver: tipc_node_unlock(n_ptr); continue; } - msg_dbg(msg,"NSEQ<REC<"); link_state_event(l_ptr, TRAFFIC_MSG_EVT); if (link_working_working(l_ptr)) { /* Re-insert in front of queue */ - msg_dbg(msg,"RECV-REINS:"); buf->next = head; head = buf; tipc_node_unlock(n_ptr); @@ -2012,13 +1840,11 @@ u32 tipc_link_defer_pkt(struct sk_buff **head, *head = buf; return 1; } - if (seq_no == msg_seqno(msg)) { + if (seq_no == msg_seqno(msg)) break; - } prev = crs; crs = crs->next; - } - while (crs); + } while (crs); /* Message is a duplicate of an existing message */ @@ -2040,9 +1866,6 @@ static void link_handle_out_of_seq_msg(struct link *l_ptr, return; } - dbg("rx OOS msg: seq_no %u, expecting %u (%u)\n", - seq_no, mod(l_ptr->next_in_no), l_ptr->next_in_no); - /* Record OOS packet arrival (force mismatch on next timeout) */ l_ptr->checkpoint--; @@ -2132,11 +1955,10 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, msg_set_max_pkt(msg, l_ptr->max_pkt_target); } - if (tipc_node_has_redundant_links(l_ptr->owner)) { + if (tipc_node_has_redundant_links(l_ptr->owner)) msg_set_redundant_link(msg); - } else { + else msg_clear_redundant_link(msg); - } msg_set_linkprio(msg, l_ptr->priority); /* Ensure sequence number will not fit : */ @@ -2160,8 +1982,6 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, /* Message can be sent */ - msg_dbg(msg, ">>"); - buf = tipc_buf_acquire(msg_size); if (!buf) return; @@ -2195,8 +2015,6 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) u32 msg_tol; struct tipc_msg *msg = buf_msg(buf); - dbg("AT(%u):", jiffies_to_msecs(jiffies)); - msg_dbg(msg, "<<"); if (link_blocked(l_ptr)) goto exit; @@ -2215,11 +2033,8 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) case RESET_MSG: if (!link_working_unknown(l_ptr) && (l_ptr->peer_session != INVALID_SESSION)) { - if (msg_session(msg) == l_ptr->peer_session) { - dbg("Duplicate RESET: %u<->%u\n", - msg_session(msg), l_ptr->peer_session); + if (msg_session(msg) == l_ptr->peer_session) break; /* duplicate: ignore */ - } } /* fall thru' */ case ACTIVATE_MSG: @@ -2227,8 +2042,8 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) strcpy((strrchr(l_ptr->name, ':') + 1), (char *)msg_data(msg)); - if ((msg_tol = msg_link_tolerance(msg)) && - (msg_tol > l_ptr->tolerance)) + msg_tol = msg_link_tolerance(msg); + if (msg_tol > l_ptr->tolerance) link_set_supervision_props(l_ptr, msg_tol); if (msg_linkprio(msg) > l_ptr->priority) @@ -2251,13 +2066,13 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) l_ptr->peer_bearer_id = msg_bearer_id(msg); /* Synchronize broadcast sequence numbers */ - if (!tipc_node_has_redundant_links(l_ptr->owner)) { + if (!tipc_node_has_redundant_links(l_ptr->owner)) l_ptr->owner->bclink.last_in = mod(msg_last_bcast(msg)); - } break; case STATE_MSG: - if ((msg_tol = msg_link_tolerance(msg))) + msg_tol = msg_link_tolerance(msg); + if (msg_tol) link_set_supervision_props(l_ptr, msg_tol); if (msg_linkprio(msg) && @@ -2280,8 +2095,6 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) max_pkt_ack = msg_max_pkt(msg); if (max_pkt_ack > l_ptr->max_pkt) { - dbg("Link <%s> updated MTU %u -> %u\n", - l_ptr->name, l_ptr->max_pkt, max_pkt_ack); l_ptr->max_pkt = max_pkt_ack; l_ptr->max_pkt_probes = 0; } @@ -2289,9 +2102,8 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) max_pkt_ack = 0; if (msg_probe(msg)) { l_ptr->stats.recv_probes++; - if (msg_size(msg) > sizeof(l_ptr->proto_msg)) { + if (msg_size(msg) > sizeof(l_ptr->proto_msg)) max_pkt_ack = msg_size(msg); - } } /* Protocol message before retransmits, reduce loss risk */ @@ -2303,14 +2115,11 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) 0, rec_gap, 0, 0, max_pkt_ack); } if (msg_seq_gap(msg)) { - msg_dbg(msg, "With Gap:"); l_ptr->stats.recv_nacks++; tipc_link_retransmit(l_ptr, l_ptr->first_out, msg_seq_gap(msg)); } break; - default: - msg_dbg(buf_msg(buf), "<DISCARDING UNKNOWN<"); } exit: buf_discard(buf); @@ -2345,8 +2154,6 @@ static void tipc_link_tunnel(struct link *l_ptr, } skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE); skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length); - dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane); - msg_dbg(buf_msg(buf), ">SEND>"); tipc_link_send_buf(tunnel, buf); } @@ -2378,7 +2185,6 @@ void tipc_link_changeover(struct link *l_ptr) ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); - dbg("Link changeover requires %u tunnel messages\n", msgcount); if (!l_ptr->first_out) { struct sk_buff *buf; @@ -2387,9 +2193,6 @@ void tipc_link_changeover(struct link *l_ptr) if (buf) { skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE); msg_set_size(&tunnel_hdr, INT_H_SIZE); - dbg("%c->%c:", l_ptr->b_ptr->net_plane, - tunnel->b_ptr->net_plane); - msg_dbg(&tunnel_hdr, "EMPTY>SEND>"); tipc_link_send_buf(tunnel, buf); } else { warn("Link changeover error, " @@ -2406,11 +2209,11 @@ void tipc_link_changeover(struct link *l_ptr) if ((msg_user(msg) == MSG_BUNDLER) && split_bundles) { struct tipc_msg *m = msg_get_wrapped(msg); - unchar* pos = (unchar*)m; + unchar *pos = (unchar *)m; msgcount = msg_msgcnt(msg); while (msgcount--) { - msg_set_seqno(m,msg_seqno(msg)); + msg_set_seqno(m, msg_seqno(msg)); tipc_link_tunnel(l_ptr, &tunnel_hdr, m, msg_link_selector(m)); pos += align(msg_size(m)); @@ -2453,9 +2256,6 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel) skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE); skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data, length); - dbg("%c->%c:", l_ptr->b_ptr->net_plane, - tunnel->b_ptr->net_plane); - msg_dbg(buf_msg(outbuf), ">SEND>"); tipc_link_send_buf(tunnel, outbuf); if (!tipc_link_is_up(l_ptr)) return; @@ -2502,31 +2302,24 @@ static int link_recv_changeover_msg(struct link **l_ptr, u32 msg_count = msg_msgcnt(tunnel_msg); dest_link = (*l_ptr)->owner->links[msg_bearer_id(tunnel_msg)]; - if (!dest_link) { - msg_dbg(tunnel_msg, "NOLINK/<REC<"); + if (!dest_link) goto exit; - } if (dest_link == *l_ptr) { err("Unexpected changeover message on link <%s>\n", (*l_ptr)->name); goto exit; } - dbg("%c<-%c:", dest_link->b_ptr->net_plane, - (*l_ptr)->b_ptr->net_plane); *l_ptr = dest_link; msg = msg_get_wrapped(tunnel_msg); if (msg_typ == DUPLICATE_MSG) { - if (less(msg_seqno(msg), mod(dest_link->next_in_no))) { - msg_dbg(tunnel_msg, "DROP/<REC<"); + if (less(msg_seqno(msg), mod(dest_link->next_in_no))) goto exit; - } - *buf = buf_extract(tunnel_buf,INT_H_SIZE); + *buf = buf_extract(tunnel_buf, INT_H_SIZE); if (*buf == NULL) { warn("Link changeover error, duplicate msg dropped\n"); goto exit; } - msg_dbg(tunnel_msg, "TNL<REC<"); buf_discard(tunnel_buf); return 1; } @@ -2534,18 +2327,14 @@ static int link_recv_changeover_msg(struct link **l_ptr, /* First original message ?: */ if (tipc_link_is_up(dest_link)) { - msg_dbg(tunnel_msg, "UP/FIRST/<REC<"); info("Resetting link <%s>, changeover initiated by peer\n", dest_link->name); tipc_link_reset(dest_link); dest_link->exp_msg_count = msg_count; - dbg("Expecting %u tunnelled messages\n", msg_count); if (!msg_count) goto exit; } else if (dest_link->exp_msg_count == START_CHANGEOVER) { - msg_dbg(tunnel_msg, "BLK/FIRST/<REC<"); dest_link->exp_msg_count = msg_count; - dbg("Expecting %u tunnelled messages\n", msg_count); if (!msg_count) goto exit; } @@ -2555,18 +2344,14 @@ static int link_recv_changeover_msg(struct link **l_ptr, if (dest_link->exp_msg_count == 0) { warn("Link switchover error, " "got too many tunnelled messages\n"); - msg_dbg(tunnel_msg, "OVERDUE/DROP/<REC<"); - dbg_print_link(dest_link, "LINK:"); goto exit; } dest_link->exp_msg_count--; if (less(msg_seqno(msg), dest_link->reset_checkpoint)) { - msg_dbg(tunnel_msg, "DROP/DUPL/<REC<"); goto exit; } else { *buf = buf_extract(tunnel_buf, INT_H_SIZE); if (*buf != NULL) { - msg_dbg(tunnel_msg, "TNL<REC<"); buf_discard(tunnel_buf); return 1; } else { @@ -2588,7 +2373,6 @@ void tipc_link_recv_bundle(struct sk_buff *buf) u32 pos = INT_H_SIZE; struct sk_buff *obuf; - msg_dbg(buf_msg(buf), "<BNDL<: "); while (msgcount--) { obuf = buf_extract(buf, pos); if (obuf == NULL) { @@ -2596,7 +2380,6 @@ void tipc_link_recv_bundle(struct sk_buff *buf) break; } pos += align(msg_size(buf_msg(obuf))); - msg_dbg(buf_msg(obuf), " /"); tipc_net_route_msg(obuf); } buf_discard(buf); @@ -2733,7 +2516,6 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, u32 long_msg_seq_no = msg_long_msgno(fragm); *fb = NULL; - msg_dbg(fragm,"FRG<REC<"); /* Is there an incomplete message waiting for this fragment? */ @@ -2752,7 +2534,6 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, if (msg_type(imsg) == TIPC_MCAST_MSG) max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE; if (msg_size(imsg) > max) { - msg_dbg(fragm,"<REC<Oversized: "); buf_discard(fbuf); return 0; } @@ -2765,8 +2546,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, /* Prepare buffer for subsequent fragments. */ set_long_msg_seqno(pbuf, long_msg_seq_no); - set_fragm_size(pbuf,fragm_sz); - set_expected_frags(pbuf,exp_fragm_cnt - 1); + set_fragm_size(pbuf, fragm_sz); + set_expected_frags(pbuf, exp_fragm_cnt - 1); } else { warn("Link unable to reassemble fragmented message\n"); } @@ -2793,13 +2574,9 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, *m = buf_msg(pbuf); return 1; } - set_expected_frags(pbuf,exp_frags); + set_expected_frags(pbuf, exp_frags); return 0; } - dbg(" Discarding orphan fragment %x\n",fbuf); - msg_dbg(fragm,"ORPHAN:"); - dbg("Pending long buffers:\n"); - dbg_print_buf_chain(*pending); buf_discard(fbuf); return 0; } @@ -2827,11 +2604,6 @@ static void link_check_defragm_bufs(struct link *l_ptr) incr_timer_cnt(buf); prev = buf; } else { - dbg(" Discarding incomplete long buffer\n"); - msg_dbg(buf_msg(buf), "LONG:"); - dbg_print_link(l_ptr, "curr:"); - dbg("Pending long buffers:\n"); - dbg_print_buf_chain(l_ptr->defragm_buf); if (prev) prev->next = buf->next; else @@ -2866,7 +2638,6 @@ void tipc_link_set_queue_limits(struct link *l_ptr, u32 window) l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE + 4] = 900; l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE + 4] = 1200; l_ptr->queue_limit[CONN_MANAGER] = 1200; - l_ptr->queue_limit[ROUTE_DISTRIBUTOR] = 1200; l_ptr->queue_limit[CHANGEOVER_PROTOCOL] = 2500; l_ptr->queue_limit[NAME_DISTRIBUTOR] = 3000; /* FRAGMENT and LAST_FRAGMENT packets */ @@ -3168,7 +2939,7 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector) return MAX_MSG_SIZE; read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_select(dest, selector); + n_ptr = tipc_node_find(dest); if (n_ptr) { tipc_node_lock(n_ptr); l_ptr = n_ptr->active_links[selector & 1]; @@ -3180,27 +2951,22 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector) return res; } -static void link_dump_send_queue(struct link *l_ptr) +static void link_print(struct link *l_ptr, const char *str) { - if (l_ptr->next_out) { - info("\nContents of unsent queue:\n"); - dbg_print_buf_chain(l_ptr->next_out); - } - info("\nContents of send queue:\n"); - if (l_ptr->first_out) { - dbg_print_buf_chain(l_ptr->first_out); - } - info("Empty send queue\n"); -} + char print_area[256]; + struct print_buf pb; + struct print_buf *buf = &pb; + + tipc_printbuf_init(buf, print_area, sizeof(print_area)); -static void link_print(struct link *l_ptr, struct print_buf *buf, - const char *str) -{ tipc_printf(buf, str); - if (link_reset_reset(l_ptr) || link_reset_unknown(l_ptr)) - return; tipc_printf(buf, "Link %x<%s>:", l_ptr->addr, l_ptr->b_ptr->publ.name); + +#ifdef CONFIG_TIPC_DEBUG + if (link_reset_reset(l_ptr) || link_reset_unknown(l_ptr)) + goto print_state; + tipc_printf(buf, ": NXO(%u):", mod(l_ptr->next_out_no)); tipc_printf(buf, "NXI(%u):", mod(l_ptr->next_in_no)); tipc_printf(buf, "SQUE"); @@ -3218,7 +2984,6 @@ static void link_print(struct link *l_ptr, struct print_buf *buf, tipc_printf(buf, "first_out= %x ", l_ptr->first_out); tipc_printf(buf, "next_out= %x ", l_ptr->next_out); tipc_printf(buf, "last_out= %x ", l_ptr->last_out); - link_dump_send_queue(l_ptr); } } else tipc_printf(buf, "[]"); @@ -3232,14 +2997,20 @@ static void link_print(struct link *l_ptr, struct print_buf *buf, l_ptr->deferred_inqueue_sz); } } +print_state: +#endif + if (link_working_unknown(l_ptr)) tipc_printf(buf, ":WU"); - if (link_reset_reset(l_ptr)) + else if (link_reset_reset(l_ptr)) tipc_printf(buf, ":RR"); - if (link_reset_unknown(l_ptr)) + else if (link_reset_unknown(l_ptr)) tipc_printf(buf, ":RU"); - if (link_working_working(l_ptr)) + else if (link_working_working(l_ptr)) tipc_printf(buf, ":WW"); tipc_printf(buf, "\n"); + + tipc_printbuf_validate(buf); + info("%s", print_area); } diff --git a/net/tipc/link.h b/net/tipc/link.h index f98bc613de67..70967e637027 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -37,9 +37,8 @@ #ifndef _TIPC_LINK_H #define _TIPC_LINK_H -#include "dbg.h" +#include "log.h" #include "msg.h" -#include "bearer.h" #include "node.h" #define PUSH_FAILED 1 @@ -108,7 +107,6 @@ * @long_msg_seq_no: next identifier to use for outbound fragmented messages * @defragm_buf: list of partially reassembled inbound message fragments * @stats: collects statistics regarding link activity - * @print_buf: print buffer used to log link activity */ struct link { @@ -211,8 +209,6 @@ struct link { u32 msg_lengths_total; u32 msg_length_profile[7]; } stats; - - struct print_buf print_buf; }; struct port; @@ -233,8 +229,8 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_ void tipc_link_reset(struct link *l_ptr); int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf); -u32 tipc_link_get_max_pkt(u32 dest,u32 selector); -int tipc_link_send_sections_fast(struct port* sender, +u32 tipc_link_get_max_pkt(u32 dest, u32 selector); +int tipc_link_send_sections_fast(struct port *sender, struct iovec const *msg_sect, const u32 num_sect, u32 destnode); diff --git a/net/tipc/dbg.c b/net/tipc/log.c index 46f51d208e5e..952c39f643e6 100644 --- a/net/tipc/dbg.c +++ b/net/tipc/log.c @@ -1,5 +1,5 @@ /* - * net/tipc/dbg.c: TIPC print buffer routines for debugging + * net/tipc/log.c: TIPC print buffer routines for debugging * * Copyright (c) 1996-2006, Ericsson AB * Copyright (c) 2005-2007, Wind River Systems @@ -36,7 +36,7 @@ #include "core.h" #include "config.h" -#include "dbg.h" +#include "log.h" /* * TIPC pre-defines the following print buffers: @@ -52,7 +52,7 @@ static struct print_buf null_buf = { NULL, 0, NULL, 0 }; struct print_buf *const TIPC_NULL = &null_buf; static struct print_buf cons_buf = { NULL, 0, NULL, 1 }; -static struct print_buf *const TIPC_CONS = &cons_buf; +struct print_buf *const TIPC_CONS = &cons_buf; static struct print_buf log_buf = { NULL, 0, NULL, 1 }; struct print_buf *const TIPC_LOG = &log_buf; @@ -64,9 +64,9 @@ struct print_buf *const TIPC_LOG = &log_buf; * 'print_string' when writing to a print buffer. This also protects against * concurrent writes to the print buffer being written to. * - * 2) tipc_dump() and tipc_log_XXX() leverage the aforementioned - * use of 'print_lock' to protect against all types of concurrent operations - * on their associated print buffer (not just write operations). + * 2) tipc_log_XXX() leverages the aforementioned use of 'print_lock' to + * protect against all types of concurrent operations on their associated + * print buffer (not just write operations). * * Note: All routines of the form tipc_printbuf_XXX() are lock-free, and rely * on the caller to prevent simultaneous use of the print buffer(s) being @@ -76,18 +76,16 @@ struct print_buf *const TIPC_LOG = &log_buf; static char print_string[TIPC_PB_MAX_STR]; static DEFINE_SPINLOCK(print_lock); -static void tipc_printbuf_reset(struct print_buf *pb); -static int tipc_printbuf_empty(struct print_buf *pb); static void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from); -#define FORMAT(PTR,LEN,FMT) \ +#define FORMAT(PTR, LEN, FMT) \ {\ - va_list args;\ - va_start(args, FMT);\ - LEN = vsprintf(PTR, FMT, args);\ - va_end(args);\ - *(PTR + LEN) = '\0';\ + va_list args;\ + va_start(args, FMT);\ + LEN = vsprintf(PTR, FMT, args);\ + va_end(args);\ + *(PTR + LEN) = '\0';\ } /** @@ -268,81 +266,6 @@ void tipc_printf(struct print_buf *pb, const char *fmt, ...) spin_unlock_bh(&print_lock); } -#ifdef CONFIG_TIPC_DEBUG - -/** - * print_to_console - write string of bytes to console in multiple chunks - */ - -static void print_to_console(char *crs, int len) -{ - int rest = len; - - while (rest > 0) { - int sz = rest < TIPC_PB_MAX_STR ? rest : TIPC_PB_MAX_STR; - char c = crs[sz]; - - crs[sz] = 0; - printk((const char *)crs); - crs[sz] = c; - rest -= sz; - crs += sz; - } -} - -/** - * printbuf_dump - write print buffer contents to console - */ - -static void printbuf_dump(struct print_buf *pb) -{ - int len; - - if (!pb->buf) { - printk("*** PRINT BUFFER NOT ALLOCATED ***"); - return; - } - - /* Dump print buffer from char after cursor to end (if used) */ - - len = pb->buf + pb->size - pb->crs - 2; - if ((pb->buf[pb->size - 1] == 0) && (len > 0)) - print_to_console(pb->crs + 1, len); - - /* Dump print buffer from start to cursor (always) */ - - len = pb->crs - pb->buf; - print_to_console(pb->buf, len); -} - -/** - * tipc_dump_dbg - dump (non-console) print buffer to console - * @pb: pointer to print buffer - */ - -void tipc_dump_dbg(struct print_buf *pb, const char *fmt, ...) -{ - int len; - - if (pb == TIPC_CONS) - return; - - spin_lock_bh(&print_lock); - - FORMAT(print_string, len, fmt); - printk(print_string); - - printk("\n---- Start of %s log dump ----\n\n", - (pb == TIPC_LOG) ? "global" : "local"); - printbuf_dump(pb); - tipc_printbuf_reset(pb); - printk("\n---- End of dump ----\n"); - - spin_unlock_bh(&print_lock); -} - -#endif - /** * tipc_log_resize - change the size of the TIPC log buffer * @log_size: print buffer size to use @@ -353,10 +276,8 @@ int tipc_log_resize(int log_size) int res = 0; spin_lock_bh(&print_lock); - if (TIPC_LOG->buf) { - kfree(TIPC_LOG->buf); - TIPC_LOG->buf = NULL; - } + kfree(TIPC_LOG->buf); + TIPC_LOG->buf = NULL; if (log_size) { if (log_size < TIPC_PB_MIN_SIZE) log_size = TIPC_PB_MIN_SIZE; @@ -407,8 +328,7 @@ struct sk_buff *tipc_log_dump(void) } else if (tipc_printbuf_empty(TIPC_LOG)) { spin_unlock_bh(&print_lock); reply = tipc_cfg_reply_ultra_string("log is empty\n"); - } - else { + } else { struct tlv_desc *rep_tlv; struct print_buf pb; int str_len; @@ -429,4 +349,3 @@ struct sk_buff *tipc_log_dump(void) } return reply; } - diff --git a/net/tipc/dbg.h b/net/tipc/log.h index 3ba6ba8b434a..2248d96238e6 100644 --- a/net/tipc/dbg.h +++ b/net/tipc/log.h @@ -1,5 +1,5 @@ /* - * net/tipc/dbg.h: Include file for TIPC print buffer routines + * net/tipc/log.h: Include file for TIPC print buffer routines * * Copyright (c) 1997-2006, Ericsson AB * Copyright (c) 2005-2007, Wind River Systems @@ -34,8 +34,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _TIPC_DBG_H -#define _TIPC_DBG_H +#ifndef _TIPC_LOG_H +#define _TIPC_LOG_H /** * struct print_buf - TIPC print buffer structure diff --git a/net/tipc/msg.c b/net/tipc/msg.c index ecb532fb0351..bb6180c4fcbb 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -35,10 +35,7 @@ */ #include "core.h" -#include "addr.h" -#include "dbg.h" #include "msg.h" -#include "bearer.h" u32 tipc_msg_tot_importance(struct tipc_msg *m) { @@ -94,7 +91,7 @@ int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect) int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, - int max_size, int usrmem, struct sk_buff** buf) + int max_size, int usrmem, struct sk_buff **buf) { int dsz, sz, hsz, pos, res, cnt; @@ -140,6 +137,7 @@ int tipc_msg_build(struct tipc_msg *hdr, void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) { u32 usr = msg_user(msg); + tipc_printf(buf, KERN_DEBUG); tipc_printf(buf, str); switch (usr) { @@ -163,10 +161,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) tipc_printf(buf, "LAST:"); break; default: - tipc_printf(buf, "UNKNOWN:%x",msg_type(msg)); + tipc_printf(buf, "UNKNOWN:%x", msg_type(msg)); } - tipc_printf(buf, "NO(%u/%u):",msg_long_msgno(msg), + tipc_printf(buf, "NO(%u/%u):", msg_long_msgno(msg), msg_fragm_no(msg)); break; case TIPC_LOW_IMPORTANCE: @@ -192,7 +190,7 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) tipc_printf(buf, "DIR:"); break; default: - tipc_printf(buf, "UNKNOWN TYPE %u",msg_type(msg)); + tipc_printf(buf, "UNKNOWN TYPE %u", msg_type(msg)); } if (msg_routed(msg) && !msg_non_seq(msg)) tipc_printf(buf, "ROUT:"); @@ -210,7 +208,7 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) tipc_printf(buf, "WDRW:"); break; default: - tipc_printf(buf, "UNKNOWN:%x",msg_type(msg)); + tipc_printf(buf, "UNKNOWN:%x", msg_type(msg)); } if (msg_routed(msg)) tipc_printf(buf, "ROUT:"); @@ -229,39 +227,39 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) break; case CONN_ACK: tipc_printf(buf, "CONN_ACK:"); - tipc_printf(buf, "ACK(%u):",msg_msgcnt(msg)); + tipc_printf(buf, "ACK(%u):", msg_msgcnt(msg)); break; default: - tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg)); + tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); } if (msg_routed(msg)) tipc_printf(buf, "ROUT:"); if (msg_reroute_cnt(msg)) - tipc_printf(buf, "REROUTED(%u):",msg_reroute_cnt(msg)); + tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg)); break; case LINK_PROTOCOL: - tipc_printf(buf, "PROT:TIM(%u):",msg_timestamp(msg)); + tipc_printf(buf, "PROT:TIM(%u):", msg_timestamp(msg)); switch (msg_type(msg)) { case STATE_MSG: tipc_printf(buf, "STATE:"); - tipc_printf(buf, "%s:",msg_probe(msg) ? "PRB" :""); - tipc_printf(buf, "NXS(%u):",msg_next_sent(msg)); - tipc_printf(buf, "GAP(%u):",msg_seq_gap(msg)); - tipc_printf(buf, "LSTBC(%u):",msg_last_bcast(msg)); + tipc_printf(buf, "%s:", msg_probe(msg) ? "PRB" : ""); + tipc_printf(buf, "NXS(%u):", msg_next_sent(msg)); + tipc_printf(buf, "GAP(%u):", msg_seq_gap(msg)); + tipc_printf(buf, "LSTBC(%u):", msg_last_bcast(msg)); break; case RESET_MSG: tipc_printf(buf, "RESET:"); if (msg_size(msg) != msg_hdr_sz(msg)) - tipc_printf(buf, "BEAR:%s:",msg_data(msg)); + tipc_printf(buf, "BEAR:%s:", msg_data(msg)); break; case ACTIVATE_MSG: tipc_printf(buf, "ACTIVATE:"); break; default: - tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg)); + tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); } - tipc_printf(buf, "PLANE(%c):",msg_net_plane(msg)); - tipc_printf(buf, "SESS(%u):",msg_session(msg)); + tipc_printf(buf, "PLANE(%c):", msg_net_plane(msg)); + tipc_printf(buf, "SESS(%u):", msg_session(msg)); break; case CHANGEOVER_PROTOCOL: tipc_printf(buf, "TUNL:"); @@ -271,10 +269,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) break; case ORIGINAL_MSG: tipc_printf(buf, "ORIG:"); - tipc_printf(buf, "EXP(%u)",msg_msgcnt(msg)); + tipc_printf(buf, "EXP(%u)", msg_msgcnt(msg)); break; default: - tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg)); + tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); } break; case ROUTE_DISTRIBUTOR: @@ -282,26 +280,26 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) switch (msg_type(msg)) { case EXT_ROUTING_TABLE: tipc_printf(buf, "EXT_TBL:"); - tipc_printf(buf, "TO:%x:",msg_remote_node(msg)); + tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); break; case LOCAL_ROUTING_TABLE: tipc_printf(buf, "LOCAL_TBL:"); - tipc_printf(buf, "TO:%x:",msg_remote_node(msg)); + tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); break; case SLAVE_ROUTING_TABLE: tipc_printf(buf, "DP_TBL:"); - tipc_printf(buf, "TO:%x:",msg_remote_node(msg)); + tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); break; case ROUTE_ADDITION: tipc_printf(buf, "ADD:"); - tipc_printf(buf, "TO:%x:",msg_remote_node(msg)); + tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); break; case ROUTE_REMOVAL: tipc_printf(buf, "REMOVE:"); - tipc_printf(buf, "TO:%x:",msg_remote_node(msg)); + tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); break; default: - tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg)); + tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); } break; case LINK_CONFIG: @@ -314,7 +312,7 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) tipc_printf(buf, "DSC_RESP:"); break; default: - tipc_printf(buf, "UNKNOWN TYPE:%x:",msg_type(msg)); + tipc_printf(buf, "UNKNOWN TYPE:%x:", msg_type(msg)); break; } break; @@ -350,7 +348,8 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) tipc_printf(buf, "UNKNOWN ERROR(%x):", msg_errcode(msg)); } - default:{} + default: + break; } tipc_printf(buf, "HZ(%u):", msg_hdr_sz(msg)); @@ -359,9 +358,8 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) if (msg_non_seq(msg)) tipc_printf(buf, "NOSEQ:"); - else { + else tipc_printf(buf, "ACK(%u):", msg_ack(msg)); - } tipc_printf(buf, "BACK(%u):", msg_bcast_ack(msg)); tipc_printf(buf, "PRND(%x)", msg_prevnode(msg)); @@ -389,14 +387,13 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) if (msg_user(msg) == NAME_DISTRIBUTOR) { tipc_printf(buf, ":ONOD(%x):", msg_orignode(msg)); tipc_printf(buf, ":DNOD(%x):", msg_destnode(msg)); - if (msg_routed(msg)) { + if (msg_routed(msg)) tipc_printf(buf, ":CSEQN(%u)", msg_transp_seqno(msg)); - } } if (msg_user(msg) == LINK_CONFIG) { - u32* raw = (u32*)msg; - struct tipc_media_addr* orig = (struct tipc_media_addr*)&raw[5]; + u32 *raw = (u32 *)msg; + struct tipc_media_addr *orig = (struct tipc_media_addr *)&raw[5]; tipc_printf(buf, ":REQL(%u):", msg_req_links(msg)); tipc_printf(buf, ":DDOM(%x):", msg_dest_domain(msg)); tipc_printf(buf, ":NETID(%u):", msg_bc_netid(msg)); @@ -407,12 +404,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) tipc_printf(buf, "TO(%u):", msg_bcgap_to(msg)); } tipc_printf(buf, "\n"); - if ((usr == CHANGEOVER_PROTOCOL) && (msg_msgcnt(msg))) { + if ((usr == CHANGEOVER_PROTOCOL) && (msg_msgcnt(msg))) tipc_msg_dbg(buf, msg_get_wrapped(msg), " /"); - } - if ((usr == MSG_FRAGMENTER) && (msg_type(msg) == FIRST_FRAGMENT)) { + if ((usr == MSG_FRAGMENTER) && (msg_type(msg) == FIRST_FRAGMENT)) tipc_msg_dbg(buf, msg_get_wrapped(msg), " /"); - } } #endif diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 031aad18efce..92c4c4fd7b3f 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -37,10 +37,51 @@ #ifndef _TIPC_MSG_H #define _TIPC_MSG_H -#include "core.h" +#include "bearer.h" #define TIPC_VERSION 2 +/* + * TIPC user data message header format, version 2: + * + * + * 1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w0:|vers | user |hdr sz |n|d|s|-| message size | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w1:|mstyp| error |rer cnt|lsc|opt p| broadcast ack no | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w2:| link level ack no | broadcast/link level seq no | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w3:| previous node | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w4:| originating port | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w5:| destination port | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w6:| originating node | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w7:| destination node | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w8:| name type / transport sequence number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w9:| name instance/multicast lower bound | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * wA:| multicast upper bound | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / / + * \ options \ + * / / + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + */ + +#define TIPC_CONN_MSG 0 +#define TIPC_MCAST_MSG 1 +#define TIPC_NAMED_MSG 2 +#define TIPC_DIRECT_MSG 3 + + #define SHORT_H_SIZE 24 /* Connected, in-cluster messages */ #define DIR_MSG_H_SIZE 32 /* Directly addressed messages */ #define LONG_H_SIZE 40 /* Named messages */ @@ -52,20 +93,26 @@ #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) -/* - TIPC user data message header format, version 2 +struct tipc_msg { + __be32 hdr[15]; +}; - - Fundamental definitions available to privileged TIPC users - are located in tipc_msg.h. - - Remaining definitions available to TIPC internal users appear below. -*/ +static inline u32 msg_word(struct tipc_msg *m, u32 pos) +{ + return ntohl(m->hdr[pos]); +} static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val) { m->hdr[w] = htonl(val); } +static inline u32 msg_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask) +{ + return (msg_word(m, w) >> pos) & mask; +} + static inline void msg_set_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask, u32 val) { @@ -112,16 +159,36 @@ static inline void msg_set_user(struct tipc_msg *m, u32 n) msg_set_bits(m, 0, 25, 0xf, n); } +static inline u32 msg_importance(struct tipc_msg *m) +{ + return msg_bits(m, 0, 25, 0xf); +} + static inline void msg_set_importance(struct tipc_msg *m, u32 i) { msg_set_user(m, i); } -static inline void msg_set_hdr_sz(struct tipc_msg *m,u32 n) +static inline u32 msg_hdr_sz(struct tipc_msg *m) +{ + return msg_bits(m, 0, 21, 0xf) << 2; +} + +static inline void msg_set_hdr_sz(struct tipc_msg *m, u32 n) { msg_set_bits(m, 0, 21, 0xf, n>>2); } +static inline u32 msg_size(struct tipc_msg *m) +{ + return msg_bits(m, 0, 0, 0x1ffff); +} + +static inline u32 msg_data_sz(struct tipc_msg *m) +{ + return msg_size(m) - msg_hdr_sz(m); +} + static inline int msg_non_seq(struct tipc_msg *m) { return msg_bits(m, 0, 20, 1); @@ -162,11 +229,36 @@ static inline void msg_set_size(struct tipc_msg *m, u32 sz) * Word 1 */ +static inline u32 msg_type(struct tipc_msg *m) +{ + return msg_bits(m, 1, 29, 0x7); +} + static inline void msg_set_type(struct tipc_msg *m, u32 n) { msg_set_bits(m, 1, 29, 0x7, n); } +static inline u32 msg_named(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_NAMED_MSG; +} + +static inline u32 msg_mcast(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_MCAST_MSG; +} + +static inline u32 msg_connected(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_CONN_MSG; +} + +static inline u32 msg_errcode(struct tipc_msg *m) +{ + return msg_bits(m, 1, 25, 0xf); +} + static inline void msg_set_errcode(struct tipc_msg *m, u32 err) { msg_set_bits(m, 1, 25, 0xf, err); @@ -257,31 +349,68 @@ static inline void msg_set_destnode_cache(struct tipc_msg *m, u32 dnode) */ +static inline u32 msg_prevnode(struct tipc_msg *m) +{ + return msg_word(m, 3); +} + static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) { msg_set_word(m, 3, a); } +static inline u32 msg_origport(struct tipc_msg *m) +{ + return msg_word(m, 4); +} + static inline void msg_set_origport(struct tipc_msg *m, u32 p) { msg_set_word(m, 4, p); } +static inline u32 msg_destport(struct tipc_msg *m) +{ + return msg_word(m, 5); +} + static inline void msg_set_destport(struct tipc_msg *m, u32 p) { msg_set_word(m, 5, p); } +static inline u32 msg_mc_netid(struct tipc_msg *m) +{ + return msg_word(m, 5); +} + static inline void msg_set_mc_netid(struct tipc_msg *m, u32 p) { msg_set_word(m, 5, p); } +static inline int msg_short(struct tipc_msg *m) +{ + return msg_hdr_sz(m) == 24; +} + +static inline u32 msg_orignode(struct tipc_msg *m) +{ + if (likely(msg_short(m))) + return msg_prevnode(m); + return msg_word(m, 6); +} + static inline void msg_set_orignode(struct tipc_msg *m, u32 a) { msg_set_word(m, 6, a); } +static inline u32 msg_destnode(struct tipc_msg *m) +{ + return msg_word(m, 7); +} + static inline void msg_set_destnode(struct tipc_msg *m, u32 a) { msg_set_word(m, 7, a); @@ -296,7 +425,12 @@ static inline u32 msg_routed(struct tipc_msg *m) { if (likely(msg_short(m))) return 0; - return(msg_destnode(m) ^ msg_orignode(m)) >> 11; + return (msg_destnode(m) ^ msg_orignode(m)) >> 11; +} + +static inline u32 msg_nametype(struct tipc_msg *m) +{ + return msg_word(m, 8); } static inline void msg_set_nametype(struct tipc_msg *m, u32 n) @@ -324,6 +458,16 @@ static inline void msg_set_transp_seqno(struct tipc_msg *m, u32 n) msg_set_word(m, 8, n); } +static inline u32 msg_nameinst(struct tipc_msg *m) +{ + return msg_word(m, 9); +} + +static inline u32 msg_namelower(struct tipc_msg *m) +{ + return msg_nameinst(m); +} + static inline void msg_set_namelower(struct tipc_msg *m, u32 n) { msg_set_word(m, 9, n); @@ -334,11 +478,21 @@ static inline void msg_set_nameinst(struct tipc_msg *m, u32 n) msg_set_namelower(m, n); } +static inline u32 msg_nameupper(struct tipc_msg *m) +{ + return msg_word(m, 10); +} + static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) { msg_set_word(m, 10, n); } +static inline unchar *msg_data(struct tipc_msg *m) +{ + return ((unchar *)m) + msg_hdr_sz(m); +} + static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) { return (struct tipc_msg *)msg_data(m); @@ -386,7 +540,7 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) #define MSG_BUNDLER 6 #define LINK_PROTOCOL 7 #define CONN_MANAGER 8 -#define ROUTE_DISTRIBUTOR 9 +#define ROUTE_DISTRIBUTOR 9 /* obsoleted */ #define CHANGEOVER_PROTOCOL 10 #define NAME_DISTRIBUTOR 11 #define MSG_FRAGMENTER 12 @@ -665,11 +819,6 @@ static inline void msg_set_remote_node(struct tipc_msg *m, u32 a) msg_set_word(m, msg_hdr_sz(m)/4, a); } -static inline void msg_set_dataoctet(struct tipc_msg *m, u32 pos) -{ - msg_data(m)[pos + 4] = 1; -} - /* * Segmentation message types */ @@ -696,7 +845,7 @@ static inline void msg_set_dataoctet(struct tipc_msg *m, u32 pos) * Routing table message types */ #define EXT_ROUTING_TABLE 0 -#define LOCAL_ROUTING_TABLE 1 +#define LOCAL_ROUTING_TABLE 1 /* obsoleted */ #define SLAVE_ROUTING_TABLE 2 #define ROUTE_ADDITION 3 #define ROUTE_REMOVAL 4 @@ -714,7 +863,7 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect); int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, - int max_size, int usrmem, struct sk_buff** buf); + int max_size, int usrmem, struct sk_buff **buf); static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr *a) { @@ -723,7 +872,7 @@ static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr static inline void msg_get_media_addr(struct tipc_msg *m, struct tipc_media_addr *a) { - memcpy(a, &((int*)m)[5], sizeof(*a)); + memcpy(a, &((int *)m)[5], sizeof(*a)); } #endif diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 7b907171f879..483c226c9581 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -35,10 +35,7 @@ */ #include "core.h" -#include "cluster.h" -#include "dbg.h" #include "link.h" -#include "msg.h" #include "name_distr.h" #define ITEM_SIZE sizeof(struct distr_item) @@ -76,7 +73,7 @@ struct distr_item { */ static LIST_HEAD(publ_root); -static u32 publ_cnt = 0; +static u32 publ_cnt; /** * publ_to_item - add publication info to a publication message @@ -89,7 +86,6 @@ static void publ_to_item(struct distr_item *i, struct publication *p) i->upper = htonl(p->upper); i->ref = htonl(p->ref); i->key = htonl(p->key); - dbg("publ_to_item: %u, %u, %u\n", p->type, p->lower, p->upper); } /** @@ -109,6 +105,26 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) return buf; } +static void named_cluster_distribute(struct sk_buff *buf) +{ + struct sk_buff *buf_copy; + struct tipc_node *n_ptr; + u32 n_num; + + for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { + n_ptr = tipc_net.nodes[n_num]; + if (n_ptr && tipc_node_has_active_links(n_ptr)) { + buf_copy = skb_copy(buf, GFP_ATOMIC); + if (!buf_copy) + break; + msg_set_destnode(buf_msg(buf_copy), n_ptr->addr); + tipc_link_send(buf_copy, n_ptr->addr, n_ptr->addr); + } + } + + buf_discard(buf); +} + /** * tipc_named_publish - tell other nodes about a new publication by this node */ @@ -129,8 +145,7 @@ void tipc_named_publish(struct publication *publ) item = (struct distr_item *)msg_data(buf_msg(buf)); publ_to_item(item, publ); - dbg("tipc_named_withdraw: broadcasting publish msg\n"); - tipc_cltr_broadcast(buf); + named_cluster_distribute(buf); } /** @@ -153,8 +168,7 @@ void tipc_named_withdraw(struct publication *publ) item = (struct distr_item *)msg_data(buf_msg(buf)); publ_to_item(item, publ); - dbg("tipc_named_withdraw: broadcasting withdraw msg\n"); - tipc_cltr_broadcast(buf); + named_cluster_distribute(buf); } /** @@ -191,9 +205,6 @@ void tipc_named_node_up(unsigned long node) left -= ITEM_SIZE; if (!left) { msg_set_link_selector(buf_msg(buf), node); - dbg("tipc_named_node_up: sending publish msg to " - "<%u.%u.%u>\n", tipc_zone(node), - tipc_cluster(node), tipc_node(node)); tipc_link_send(buf, node, node); buf = NULL; } @@ -218,8 +229,6 @@ static void node_is_down(struct publication *publ) struct publication *p; write_lock_bh(&tipc_nametbl_lock); - dbg("node_is_down: withdrawing %u, %u, %u\n", - publ->type, publ->lower, publ->upper); publ->key += 1222345; p = tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); @@ -231,9 +240,7 @@ static void node_is_down(struct publication *publ) publ->type, publ->lower, publ->node, publ->ref, publ->key); } - if (p) { - kfree(p); - } + kfree(p); } /** @@ -250,9 +257,6 @@ void tipc_named_recv(struct sk_buff *buf) write_lock_bh(&tipc_nametbl_lock); while (count--) { if (msg_type(msg) == PUBLICATION) { - dbg("tipc_named_recv: got publication for %u, %u, %u\n", - ntohl(item->type), ntohl(item->lower), - ntohl(item->upper)); publ = tipc_nametbl_insert_publ(ntohl(item->type), ntohl(item->lower), ntohl(item->upper), @@ -267,9 +271,6 @@ void tipc_named_recv(struct sk_buff *buf) (net_ev_handler)node_is_down); } } else if (msg_type(msg) == WITHDRAWAL) { - dbg("tipc_named_recv: got withdrawl for %u, %u, %u\n", - ntohl(item->type), ntohl(item->lower), - ntohl(item->upper)); publ = tipc_nametbl_remove_publ(ntohl(item->type), ntohl(item->lower), msg_orignode(msg), diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 3a8de4334da1..205ed4a4e186 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -36,15 +36,10 @@ #include "core.h" #include "config.h" -#include "dbg.h" #include "name_table.h" #include "name_distr.h" -#include "addr.h" -#include "node_subscr.h" #include "subscr.h" #include "port.h" -#include "cluster.h" -#include "bcast.h" static int tipc_nametbl_size = 1024; /* must be a power of 2 */ @@ -109,7 +104,7 @@ struct name_table { u32 local_publ_count; }; -static struct name_table table = { NULL } ; +static struct name_table table; static atomic_t rsv_publ_ok = ATOMIC_INIT(0); DEFINE_RWLOCK(tipc_nametbl_lock); @@ -177,8 +172,6 @@ static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_hea spin_lock_init(&nseq->lock); nseq->type = type; nseq->sseqs = sseq; - dbg("tipc_nameseq_create(): nseq = %p, type %u, ssseqs %p, ff: %u\n", - nseq, type, nseq->sseqs, nseq->first_free); nseq->alloc = 1; INIT_HLIST_NODE(&nseq->ns_list); INIT_LIST_HEAD(&nseq->subscriptions); @@ -256,8 +249,6 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, int created_subseq = 0; sseq = nameseq_find_subseq(nseq, lower); - dbg("nameseq_ins: for seq %p, {%u,%u}, found sseq %p\n", - nseq, type, lower, sseq); if (sseq) { /* Lower end overlaps existing entry => need an exact match */ @@ -294,38 +285,30 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, type, lower, upper); return NULL; } - dbg("Allocated %u more sseqs\n", nseq->alloc); memcpy(sseqs, nseq->sseqs, nseq->alloc * sizeof(struct sub_seq)); kfree(nseq->sseqs); nseq->sseqs = sseqs; nseq->alloc *= 2; } - dbg("Have %u sseqs for type %u\n", nseq->alloc, type); /* Insert new sub-sequence */ - dbg("ins in pos %u, ff = %u\n", inspos, nseq->first_free); sseq = &nseq->sseqs[inspos]; freesseq = &nseq->sseqs[nseq->first_free]; - memmove(sseq + 1, sseq, (freesseq - sseq) * sizeof (*sseq)); - memset(sseq, 0, sizeof (*sseq)); + memmove(sseq + 1, sseq, (freesseq - sseq) * sizeof(*sseq)); + memset(sseq, 0, sizeof(*sseq)); nseq->first_free++; sseq->lower = lower; sseq->upper = upper; created_subseq = 1; } - dbg("inserting {%u,%u,%u} from <0x%x:%u> into sseq %p(%u,%u) of seq %p\n", - type, lower, upper, node, port, sseq, - sseq->lower, sseq->upper, nseq); /* Insert a publication: */ publ = publ_create(type, lower, upper, scope, node, port, key); if (!publ) return NULL; - dbg("inserting publ %p, node=0x%x publ->node=0x%x, subscr->node=%p\n", - publ, node, publ->node, publ->subscr.node); sseq->zone_list_size++; if (!sseq->zone_list) @@ -360,7 +343,6 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, * Any subscriptions waiting for notification? */ list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { - dbg("calling report_overlap()\n"); tipc_subscr_report_overlap(s, publ->lower, publ->upper, @@ -398,9 +380,6 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i if (!sseq) return NULL; - dbg("tipc_nameseq_remove_publ: seq: %p, sseq %p, {%u,%u}, key %u\n", - nseq, sseq, nseq->type, inst, key); - /* Remove publication from zone scope list */ prev = sseq->zone_list; @@ -492,7 +471,7 @@ end_node: if (!sseq->zone_list) { free = &nseq->sseqs[nseq->first_free--]; - memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof (*sseq)); + memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq)); removed_subseq = 1; } @@ -528,7 +507,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, struct subscription *s while (sseq != &nseq->sseqs[nseq->first_free]) { struct publication *zl = sseq->zone_list; - if (zl && tipc_subscr_overlap(s,sseq->lower,sseq->upper)) { + if (zl && tipc_subscr_overlap(s, sseq->lower, sseq->upper)) { struct publication *crs = zl; int must_report = 1; @@ -554,15 +533,10 @@ static struct name_seq *nametbl_find_seq(u32 type) struct hlist_node *seq_node; struct name_seq *ns; - dbg("find_seq %u,(%u,0x%x) table = %p, hash[type] = %u\n", - type, htonl(type), type, table.types, hash(type)); - seq_head = &table.types[hash(type)]; hlist_for_each_entry(ns, seq_node, seq_head, ns_list) { - if (ns->type == type) { - dbg("found %p\n", ns); + if (ns->type == type) return ns; - } } return NULL; @@ -573,18 +547,14 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, { struct name_seq *seq = nametbl_find_seq(type); - dbg("tipc_nametbl_insert_publ: {%u,%u,%u} found %p\n", type, lower, upper, seq); if (lower > upper) { warn("Failed to publish illegal {%u,%u,%u}\n", type, lower, upper); return NULL; } - dbg("Publishing {%u,%u,%u} from 0x%x\n", type, lower, upper, node); - if (!seq) { + if (!seq) seq = tipc_nameseq_create(type, &table.types[hash(type)]); - dbg("tipc_nametbl_insert_publ: created %p\n", seq); - } if (!seq) return NULL; @@ -601,7 +571,6 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, if (!seq) return NULL; - dbg("Withdrawing {%u,%u} from 0x%x\n", type, lower, node); publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key); if (!seq->first_free && list_empty(&seq->subscriptions)) { @@ -782,9 +751,8 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, table.local_publ_count++; publ = tipc_nametbl_insert_publ(type, lower, upper, scope, tipc_own_addr, port_ref, key); - if (publ && (scope != TIPC_NODE_SCOPE)) { + if (publ && (scope != TIPC_NODE_SCOPE)) tipc_named_publish(publ); - } write_unlock_bh(&tipc_nametbl_lock); return publ; } @@ -797,7 +765,6 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) { struct publication *publ; - dbg("tipc_nametbl_withdraw: {%u,%u}, key=%u\n", type, lower, key); write_lock_bh(&tipc_nametbl_lock); publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key); if (likely(publ)) { @@ -827,13 +794,10 @@ void tipc_nametbl_subscribe(struct subscription *s) write_lock_bh(&tipc_nametbl_lock); seq = nametbl_find_seq(type); - if (!seq) { + if (!seq) seq = tipc_nameseq_create(type, &table.types[hash(type)]); - } - if (seq){ + if (seq) { spin_lock_bh(&seq->lock); - dbg("tipc_nametbl_subscribe:found %p for {%u,%u,%u}\n", - seq, type, s->seq.lower, s->seq.upper); tipc_nameseq_subscribe(seq, s); spin_unlock_bh(&seq->lock); } else { @@ -853,7 +817,7 @@ void tipc_nametbl_unsubscribe(struct subscription *s) write_lock_bh(&tipc_nametbl_lock); seq = nametbl_find_seq(s->seq.type); - if (seq != NULL){ + if (seq != NULL) { spin_lock_bh(&seq->lock); list_del_init(&s->nameseq_list); spin_unlock_bh(&seq->lock); @@ -886,7 +850,7 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth, } do { - sprintf (portIdStr, "<%u.%u.%u:%u>", + sprintf(portIdStr, "<%u.%u.%u:%u>", tipc_zone(publ->node), tipc_cluster(publ->node), tipc_node(publ->node), publ->ref); tipc_printf(buf, "%-26s ", portIdStr); diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 139882d4ed00..d228bd682655 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -46,7 +46,7 @@ struct port_list; * TIPC name types reserved for internal TIPC use (both current and planned) */ -#define TIPC_ZM_SRV 3 /* zone master service name type */ +#define TIPC_ZM_SRV 3 /* zone master service name type */ /** diff --git a/net/tipc/net.c b/net/tipc/net.c index 1a621cfd6604..9bacfd00b91e 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -35,18 +35,10 @@ */ #include "core.h" -#include "bearer.h" #include "net.h" -#include "zone.h" -#include "addr.h" -#include "name_table.h" #include "name_distr.h" #include "subscr.h" -#include "link.h" -#include "msg.h" #include "port.h" -#include "bcast.h" -#include "discover.h" #include "config.h" /* @@ -116,46 +108,25 @@ */ DEFINE_RWLOCK(tipc_net_lock); -static struct _zone *tipc_zones[256] = { NULL, }; -struct network tipc_net = { tipc_zones }; +struct network tipc_net; -struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref) +static int net_start(void) { - return tipc_zone_select_remote_node(tipc_net.zones[tipc_zone(addr)], addr, ref); -} - -u32 tipc_net_select_router(u32 addr, u32 ref) -{ - return tipc_zone_select_router(tipc_net.zones[tipc_zone(addr)], addr, ref); -} - -void tipc_net_remove_as_router(u32 router) -{ - u32 z_num; - - for (z_num = 1; z_num <= tipc_max_zones; z_num++) { - if (!tipc_net.zones[z_num]) - continue; - tipc_zone_remove_as_router(tipc_net.zones[z_num], router); - } -} - -void tipc_net_send_external_routes(u32 dest) -{ - u32 z_num; + tipc_net.nodes = kcalloc(tipc_max_nodes + 1, + sizeof(*tipc_net.nodes), GFP_ATOMIC); + tipc_net.highest_node = 0; - for (z_num = 1; z_num <= tipc_max_zones; z_num++) { - if (tipc_net.zones[z_num]) - tipc_zone_send_external_routes(tipc_net.zones[z_num], dest); - } + return tipc_net.nodes ? 0 : -ENOMEM; } static void net_stop(void) { - u32 z_num; + u32 n_num; - for (z_num = 1; z_num <= tipc_max_zones; z_num++) - tipc_zone_delete(tipc_net.zones[z_num]); + for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) + tipc_node_delete(tipc_net.nodes[n_num]); + kfree(tipc_net.nodes); + tipc_net.nodes = NULL; } static void net_route_named_msg(struct sk_buff *buf) @@ -165,22 +136,18 @@ static void net_route_named_msg(struct sk_buff *buf) u32 dport; if (!msg_named(msg)) { - msg_dbg(msg, "tipc_net->drop_nam:"); buf_discard(buf); return; } dnode = addr_domain(msg_lookup_scope(msg)); dport = tipc_nametbl_translate(msg_nametype(msg), msg_nameinst(msg), &dnode); - dbg("tipc_net->lookup<%u,%u>-><%u,%x>\n", - msg_nametype(msg), msg_nameinst(msg), dport, dnode); if (dport) { msg_set_destnode(msg, dnode); msg_set_destport(msg, dport); tipc_net_route_msg(buf); return; } - msg_dbg(msg, "tipc_net->rej:NO NAME: "); tipc_reject_msg(buf, TIPC_ERR_NO_NAME); } @@ -196,18 +163,14 @@ void tipc_net_route_msg(struct sk_buff *buf) msg_incr_reroute_cnt(msg); if (msg_reroute_cnt(msg) > 6) { if (msg_errcode(msg)) { - msg_dbg(msg, "NET>DISC>:"); buf_discard(buf); } else { - msg_dbg(msg, "NET>REJ>:"); tipc_reject_msg(buf, msg_destport(msg) ? TIPC_ERR_NO_PORT : TIPC_ERR_NO_NAME); } return; } - msg_dbg(msg, "tipc_net->rout: "); - /* Handle message for this node */ dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg); if (tipc_in_scope(dnode, tipc_own_addr)) { @@ -221,9 +184,6 @@ void tipc_net_route_msg(struct sk_buff *buf) return; } switch (msg_user(msg)) { - case ROUTE_DISTRIBUTOR: - tipc_cltr_recv_routing_table(buf); - break; case NAME_DISTRIBUTOR: tipc_named_recv(buf); break; @@ -231,14 +191,12 @@ void tipc_net_route_msg(struct sk_buff *buf) tipc_port_recv_proto_msg(buf); break; default: - msg_dbg(msg,"DROP/NET/<REC<"); buf_discard(buf); } return; } /* Handle message for another node */ - msg_dbg(msg, "NET>SEND>: "); skb_trim(buf, msg_size(msg)); tipc_link_send(buf, dnode, msg_link_selector(msg)); } @@ -259,10 +217,12 @@ int tipc_net_start(u32 addr) tipc_named_reinit(); tipc_port_reinit(); - if ((res = tipc_cltr_init()) || - (res = tipc_bclink_init())) { + res = net_start(); + if (res) + return res; + res = tipc_bclink_init(); + if (res) return res; - } tipc_k_signal((Handler)tipc_subscr_start, 0); tipc_k_signal((Handler)tipc_cfg_init, 0); diff --git a/net/tipc/net.h b/net/tipc/net.h index de2b9ad8f646..4ae59ad04893 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -37,26 +37,26 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H -struct _zone; +struct tipc_node; /** * struct network - TIPC network structure - * @zones: array of pointers to all zones within network + * @nodes: array of pointers to all nodes within cluster + * @highest_node: id of highest numbered node within cluster + * @links: number of (unicast) links to cluster */ struct network { - struct _zone **zones; + struct tipc_node **nodes; + u32 highest_node; + u32 links; }; extern struct network tipc_net; extern rwlock_t tipc_net_lock; -void tipc_net_remove_as_router(u32 router); -void tipc_net_send_external_routes(u32 dest); void tipc_net_route_msg(struct sk_buff *buf); -struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref); -u32 tipc_net_select_router(u32 addr, u32 ref); int tipc_net_start(u32 addr); void tipc_net_stop(void); diff --git a/net/tipc/node.c b/net/tipc/node.c index b4d87eb2dc5d..3af53e327f49 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -37,25 +37,14 @@ #include "core.h" #include "config.h" #include "node.h" -#include "cluster.h" -#include "net.h" -#include "addr.h" -#include "node_subscr.h" -#include "link.h" -#include "port.h" -#include "bearer.h" #include "name_distr.h" -void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str); static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); -/* sorted list of nodes within cluster */ -static struct tipc_node *tipc_nodes = NULL; - static DEFINE_SPINLOCK(node_create_lock); -u32 tipc_own_tag = 0; +u32 tipc_own_tag; /** * tipc_node_create - create neighboring node @@ -69,65 +58,51 @@ u32 tipc_own_tag = 0; struct tipc_node *tipc_node_create(u32 addr) { - struct cluster *c_ptr; struct tipc_node *n_ptr; - struct tipc_node **curr_node; + u32 n_num; spin_lock_bh(&node_create_lock); - for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) { - if (addr < n_ptr->addr) - break; - if (addr == n_ptr->addr) { - spin_unlock_bh(&node_create_lock); - return n_ptr; - } + n_ptr = tipc_node_find(addr); + if (n_ptr) { + spin_unlock_bh(&node_create_lock); + return n_ptr; } - n_ptr = kzalloc(sizeof(*n_ptr),GFP_ATOMIC); + n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC); if (!n_ptr) { spin_unlock_bh(&node_create_lock); warn("Node creation failed, no memory\n"); return NULL; } - c_ptr = tipc_cltr_find(addr); - if (!c_ptr) { - c_ptr = tipc_cltr_create(addr); - } - if (!c_ptr) { - spin_unlock_bh(&node_create_lock); - kfree(n_ptr); - return NULL; - } - n_ptr->addr = addr; - spin_lock_init(&n_ptr->lock); + spin_lock_init(&n_ptr->lock); INIT_LIST_HEAD(&n_ptr->nsub); - n_ptr->owner = c_ptr; - tipc_cltr_attach_node(c_ptr, n_ptr); - n_ptr->last_router = -1; - - /* Insert node into ordered list */ - for (curr_node = &tipc_nodes; *curr_node; - curr_node = &(*curr_node)->next) { - if (addr < (*curr_node)->addr) { - n_ptr->next = *curr_node; - break; - } - } - (*curr_node) = n_ptr; + + n_num = tipc_node(addr); + tipc_net.nodes[n_num] = n_ptr; + if (n_num > tipc_net.highest_node) + tipc_net.highest_node = n_num; + spin_unlock_bh(&node_create_lock); return n_ptr; } void tipc_node_delete(struct tipc_node *n_ptr) { + u32 n_num; + if (!n_ptr) return; - dbg("node %x deleted\n", n_ptr->addr); + n_num = tipc_node(n_ptr->addr); + tipc_net.nodes[n_num] = NULL; kfree(n_ptr); + + while (!tipc_net.nodes[tipc_net.highest_node]) + if (--tipc_net.highest_node == 0) + break; } @@ -147,7 +122,6 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr) l_ptr->name, l_ptr->b_ptr->net_plane); if (!active[0]) { - dbg(" link %x into %x/%x\n", l_ptr, &active[0], &active[1]); active[0] = active[1] = l_ptr; node_established_contact(n_ptr); return; @@ -236,14 +210,9 @@ int tipc_node_has_redundant_links(struct tipc_node *n_ptr) return n_ptr->working_links > 1; } -static int tipc_node_has_active_routes(struct tipc_node *n_ptr) -{ - return n_ptr && (n_ptr->last_router >= 0); -} - int tipc_node_is_up(struct tipc_node *n_ptr) { - return tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr); + return tipc_node_has_active_links(n_ptr); } struct tipc_node *tipc_node_attach_link(struct link *l_ptr) @@ -264,7 +233,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr) if (!n_ptr->links[bearer_id]) { n_ptr->links[bearer_id] = l_ptr; - tipc_net.zones[tipc_zone(l_ptr->addr)]->links++; + tipc_net.links++; n_ptr->link_cnt++; return n_ptr; } @@ -278,7 +247,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr) void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr) { n_ptr->links[l_ptr->b_ptr->identity] = NULL; - tipc_net.zones[tipc_zone(l_ptr->addr)]->links--; + tipc_net.links--; n_ptr->link_cnt--; } @@ -330,48 +299,16 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr) static void node_established_contact(struct tipc_node *n_ptr) { - struct cluster *c_ptr; - - dbg("node_established_contact:-> %x\n", n_ptr->addr); - if (!tipc_node_has_active_routes(n_ptr) && in_own_cluster(n_ptr->addr)) { - tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr); - } + tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr); /* Syncronize broadcast acks */ n_ptr->bclink.acked = tipc_bclink_get_last_sent(); - if (is_slave(tipc_own_addr)) - return; - if (!in_own_cluster(n_ptr->addr)) { - /* Usage case 1 (see above) */ - c_ptr = tipc_cltr_find(tipc_own_addr); - if (!c_ptr) - c_ptr = tipc_cltr_create(tipc_own_addr); - if (c_ptr) - tipc_cltr_bcast_new_route(c_ptr, n_ptr->addr, 1, - tipc_max_nodes); - return; - } - - c_ptr = n_ptr->owner; - if (is_slave(n_ptr->addr)) { - /* Usage case 2 (see above) */ - tipc_cltr_bcast_new_route(c_ptr, n_ptr->addr, 1, tipc_max_nodes); - tipc_cltr_send_local_routes(c_ptr, n_ptr->addr); - return; - } - if (n_ptr->bclink.supported) { - tipc_nmap_add(&tipc_cltr_bcast_nodes, n_ptr->addr); + tipc_nmap_add(&tipc_bcast_nmap, n_ptr->addr); if (n_ptr->addr < tipc_own_addr) tipc_own_tag++; } - - /* Case 3 (see above) */ - tipc_net_send_external_routes(n_ptr->addr); - tipc_cltr_send_slave_routes(c_ptr, n_ptr->addr); - tipc_cltr_bcast_new_route(c_ptr, n_ptr->addr, LOWEST_SLAVE, - tipc_highest_allowed_slave); } static void node_cleanup_finished(unsigned long node_addr) @@ -390,7 +327,6 @@ static void node_cleanup_finished(unsigned long node_addr) static void node_lost_contact(struct tipc_node *n_ptr) { - struct cluster *c_ptr; struct tipc_node_subscr *ns, *tns; char addr_string[16]; u32 i; @@ -398,7 +334,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) /* Clean up broadcast reception remains */ n_ptr->bclink.gap_after = n_ptr->bclink.gap_to = 0; while (n_ptr->bclink.deferred_head) { - struct sk_buff* buf = n_ptr->bclink.deferred_head; + struct sk_buff *buf = n_ptr->bclink.deferred_head; n_ptr->bclink.deferred_head = buf->next; buf_discard(buf); } @@ -406,41 +342,14 @@ static void node_lost_contact(struct tipc_node *n_ptr) buf_discard(n_ptr->bclink.defragm); n_ptr->bclink.defragm = NULL; } - if (in_own_cluster(n_ptr->addr) && n_ptr->bclink.supported) { - tipc_bclink_acknowledge(n_ptr, mod(n_ptr->bclink.acked + 10000)); - } - /* Update routing tables */ - if (is_slave(tipc_own_addr)) { - tipc_net_remove_as_router(n_ptr->addr); - } else { - if (!in_own_cluster(n_ptr->addr)) { - /* Case 4 (see above) */ - c_ptr = tipc_cltr_find(tipc_own_addr); - tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr, 1, - tipc_max_nodes); - } else { - /* Case 5 (see above) */ - c_ptr = tipc_cltr_find(n_ptr->addr); - if (is_slave(n_ptr->addr)) { - tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr, 1, - tipc_max_nodes); - } else { - if (n_ptr->bclink.supported) { - tipc_nmap_remove(&tipc_cltr_bcast_nodes, - n_ptr->addr); - if (n_ptr->addr < tipc_own_addr) - tipc_own_tag--; - } - tipc_net_remove_as_router(n_ptr->addr); - tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr, - LOWEST_SLAVE, - tipc_highest_allowed_slave); - } - } + if (n_ptr->bclink.supported) { + tipc_bclink_acknowledge(n_ptr, + mod(n_ptr->bclink.acked + 10000)); + tipc_nmap_remove(&tipc_bcast_nmap, n_ptr->addr); + if (n_ptr->addr < tipc_own_addr) + tipc_own_tag--; } - if (tipc_node_has_active_routes(n_ptr)) - return; info("Lost contact with %s\n", tipc_addr_string_fill(addr_string, n_ptr->addr)); @@ -469,125 +378,6 @@ static void node_lost_contact(struct tipc_node *n_ptr) tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr); } -/** - * tipc_node_select_next_hop - find the next-hop node for a message - * - * Called by when cluster local lookup has failed. - */ - -struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector) -{ - struct tipc_node *n_ptr; - u32 router_addr; - - if (!tipc_addr_domain_valid(addr)) - return NULL; - - /* Look for direct link to destination processsor */ - n_ptr = tipc_node_find(addr); - if (n_ptr && tipc_node_has_active_links(n_ptr)) - return n_ptr; - - /* Cluster local system nodes *must* have direct links */ - if (!is_slave(addr) && in_own_cluster(addr)) - return NULL; - - /* Look for cluster local router with direct link to node */ - router_addr = tipc_node_select_router(n_ptr, selector); - if (router_addr) - return tipc_node_select(router_addr, selector); - - /* Slave nodes can only be accessed within own cluster via a - known router with direct link -- if no router was found,give up */ - if (is_slave(addr)) - return NULL; - - /* Inter zone/cluster -- find any direct link to remote cluster */ - addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0); - n_ptr = tipc_net_select_remote_node(addr, selector); - if (n_ptr && tipc_node_has_active_links(n_ptr)) - return n_ptr; - - /* Last resort -- look for any router to anywhere in remote zone */ - router_addr = tipc_net_select_router(addr, selector); - if (router_addr) - return tipc_node_select(router_addr, selector); - - return NULL; -} - -/** - * tipc_node_select_router - select router to reach specified node - * - * Uses a deterministic and fair algorithm for selecting router node. - */ - -u32 tipc_node_select_router(struct tipc_node *n_ptr, u32 ref) -{ - u32 ulim; - u32 mask; - u32 start; - u32 r; - - if (!n_ptr) - return 0; - - if (n_ptr->last_router < 0) - return 0; - ulim = ((n_ptr->last_router + 1) * 32) - 1; - - /* Start entry must be random */ - mask = tipc_max_nodes; - while (mask > ulim) - mask >>= 1; - start = ref & mask; - r = start; - - /* Lookup upwards with wrap-around */ - do { - if (((n_ptr->routers[r / 32]) >> (r % 32)) & 1) - break; - } while (++r <= ulim); - if (r > ulim) { - r = 1; - do { - if (((n_ptr->routers[r / 32]) >> (r % 32)) & 1) - break; - } while (++r < start); - assert(r != start); - } - assert(r && (r <= ulim)); - return tipc_addr(own_zone(), own_cluster(), r); -} - -void tipc_node_add_router(struct tipc_node *n_ptr, u32 router) -{ - u32 r_num = tipc_node(router); - - n_ptr->routers[r_num / 32] = - ((1 << (r_num % 32)) | n_ptr->routers[r_num / 32]); - n_ptr->last_router = tipc_max_nodes / 32; - while ((--n_ptr->last_router >= 0) && - !n_ptr->routers[n_ptr->last_router]); -} - -void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router) -{ - u32 r_num = tipc_node(router); - - if (n_ptr->last_router < 0) - return; /* No routes */ - - n_ptr->routers[r_num / 32] = - ((~(1 << (r_num % 32))) & (n_ptr->routers[r_num / 32])); - n_ptr->last_router = tipc_max_nodes / 32; - while ((--n_ptr->last_router >= 0) && - !n_ptr->routers[n_ptr->last_router]); - - if (!tipc_node_is_up(n_ptr)) - node_lost_contact(n_ptr); -} - struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) { u32 domain; @@ -595,6 +385,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) struct tipc_node *n_ptr; struct tipc_node_info node_info; u32 payload_size; + u32 n_num; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -605,15 +396,15 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) " (network address)"); read_lock_bh(&tipc_net_lock); - if (!tipc_nodes) { + if (!tipc_net.nodes) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_none(); } - /* For now, get space for all other nodes - (will need to modify this when slave nodes are supported */ + /* For now, get space for all other nodes */ - payload_size = TLV_SPACE(sizeof(node_info)) * (tipc_max_nodes - 1); + payload_size = TLV_SPACE(sizeof(node_info)) * + (tipc_net.highest_node - 1); if (payload_size > 32768u) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED @@ -627,8 +418,9 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) /* Add TLVs for all nodes in scope */ - for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) { - if (!tipc_in_scope(domain, n_ptr->addr)) + for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { + n_ptr = tipc_net.nodes[n_num]; + if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr)) continue; node_info.addr = htonl(n_ptr->addr); node_info.up = htonl(tipc_node_is_up(n_ptr)); @@ -647,6 +439,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) struct tipc_node *n_ptr; struct tipc_link_info link_info; u32 payload_size; + u32 n_num; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -663,8 +456,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) /* Get space for all unicast links + multicast link */ - payload_size = TLV_SPACE(sizeof(link_info)) * - (tipc_net.zones[tipc_zone(tipc_own_addr)]->links + 1); + payload_size = TLV_SPACE(sizeof(link_info)) * (tipc_net.links + 1); if (payload_size > 32768u) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED @@ -685,10 +477,11 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) /* Add TLVs for any other links in scope */ - for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) { + for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { u32 i; - if (!tipc_in_scope(domain, n_ptr->addr)) + n_ptr = tipc_net.nodes[n_num]; + if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr)) continue; tipc_node_lock(n_ptr); for (i = 0; i < MAX_BEARERS; i++) { diff --git a/net/tipc/node.h b/net/tipc/node.h index fff331b2d26c..206a8efa410e 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -39,14 +39,13 @@ #include "node_subscr.h" #include "addr.h" -#include "cluster.h" +#include "net.h" #include "bearer.h" /** * struct tipc_node - TIPC node structure * @addr: network address of node * @lock: spinlock governing access to structure - * @owner: pointer to cluster that node belongs to * @next: pointer to next node in sorted list of cluster's nodes * @nsub: list of "node down" subscriptions monitoring node * @active_links: pointers to active links to node @@ -55,8 +54,6 @@ * @cleanup_required: non-zero if cleaning up after a prior loss of contact * @link_cnt: number of links to node * @permit_changeover: non-zero if node has redundant links to this system - * @routers: bitmap (used for multicluster communication) - * @last_router: (used for multicluster communication) * @bclink: broadcast-related info * @supported: non-zero if node supports TIPC b'cast capability * @acked: sequence # of last outbound b'cast message acknowledged by node @@ -72,7 +69,6 @@ struct tipc_node { u32 addr; spinlock_t lock; - struct cluster *owner; struct tipc_node *next; struct list_head nsub; struct link *active_links[2]; @@ -81,8 +77,6 @@ struct tipc_node { int working_links; int cleanup_required; int permit_changeover; - u32 routers[512/32]; - int last_router; struct { int supported; u32 acked; @@ -106,34 +100,17 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr); void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr); int tipc_node_has_active_links(struct tipc_node *n_ptr); int tipc_node_has_redundant_links(struct tipc_node *n_ptr); -u32 tipc_node_select_router(struct tipc_node *n_ptr, u32 ref); -struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector); int tipc_node_is_up(struct tipc_node *n_ptr); -void tipc_node_add_router(struct tipc_node *n_ptr, u32 router); -void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); static inline struct tipc_node *tipc_node_find(u32 addr) { if (likely(in_own_cluster(addr))) - return tipc_local_nodes[tipc_node(addr)]; - else if (tipc_addr_domain_valid(addr)) { - struct cluster *c_ptr = tipc_cltr_find(addr); - - if (c_ptr) - return c_ptr->nodes[tipc_node(addr)]; - } + return tipc_net.nodes[tipc_node(addr)]; return NULL; } -static inline struct tipc_node *tipc_node_select(u32 addr, u32 selector) -{ - if (likely(in_own_cluster(addr))) - return tipc_local_nodes[tipc_node(addr)]; - return tipc_node_select_next_hop(addr, selector); -} - static inline void tipc_node_lock(struct tipc_node *n_ptr) { spin_lock_bh(&n_ptr->lock); diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 19194d476a9e..018a55332d91 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -35,10 +35,8 @@ */ #include "core.h" -#include "dbg.h" #include "node_subscr.h" #include "node.h" -#include "addr.h" /** * tipc_nodesub_subscribe - create "node down" subscription for specified node diff --git a/net/tipc/port.c b/net/tipc/port.c index 82092eaa1536..067bab2a0b98 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -36,15 +36,8 @@ #include "core.h" #include "config.h" -#include "dbg.h" #include "port.h" -#include "addr.h" -#include "link.h" -#include "node.h" #include "name_table.h" -#include "user_reg.h" -#include "msg.h" -#include "bcast.h" /* Connection management: */ #define PROBING_INTERVAL 3600000 /* [ms] => 1 h */ @@ -53,16 +46,16 @@ #define MAX_REJECT_SIZE 1024 -static struct sk_buff *msg_queue_head = NULL; -static struct sk_buff *msg_queue_tail = NULL; +static struct sk_buff *msg_queue_head; +static struct sk_buff *msg_queue_tail; DEFINE_SPINLOCK(tipc_port_list_lock); static DEFINE_SPINLOCK(queue_lock); static LIST_HEAD(ports); static void port_handle_node_down(unsigned long ref); -static struct sk_buff* port_build_self_abort_msg(struct port *,u32 err); -static struct sk_buff* port_build_peer_abort_msg(struct port *,u32 err); +static struct sk_buff *port_build_self_abort_msg(struct port *, u32 err); +static struct sk_buff *port_build_peer_abort_msg(struct port *, u32 err); static void port_timeout(unsigned long ref); @@ -94,7 +87,7 @@ static void port_incr_out_seqno(struct port *p_ptr) * tipc_multicast - send a multicast message to local and remote destinations */ -int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, u32 domain, +int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, u32 num_sect, struct iovec const *msg_sect) { struct tipc_msg *hdr; @@ -138,9 +131,8 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, u32 domain, } } res = tipc_bclink_send_msg(buf); - if ((res < 0) && (dports.count != 0)) { + if ((res < 0) && (dports.count != 0)) buf_discard(ibuf); - } } else { ibuf = buf; } @@ -162,7 +154,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, u32 domain, void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp) { - struct tipc_msg* msg; + struct tipc_msg *msg; struct port_list dports = {0, NULL, }; struct port_list *item = dp; int cnt = 0; @@ -195,13 +187,11 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp) if (b == NULL) { warn("Unable to deliver multicast message(s)\n"); - msg_dbg(msg, "LOST:"); goto exit; } - if ((index == 0) && (cnt != 0)) { + if ((index == 0) && (cnt != 0)) item = item->next; - } - msg_set_destport(buf_msg(b),item->ports[index]); + msg_set_destport(buf_msg(b), item->ports[index]); tipc_port_recv_msg(b); } } @@ -277,10 +267,7 @@ int tipc_deleteport(u32 ref) buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); tipc_nodesub_unsubscribe(&p_ptr->subscription); } - if (p_ptr->user_port) { - tipc_reg_remove_port(p_ptr->user_port); - kfree(p_ptr->user_port); - } + kfree(p_ptr->user_port); spin_lock_bh(&tipc_port_list_lock); list_del(&p_ptr->port_list); @@ -288,7 +275,6 @@ int tipc_deleteport(u32 ref) spin_unlock_bh(&tipc_port_list_lock); k_term_timer(&p_ptr->timer); kfree(p_ptr); - dbg("Deleted port %u\n", ref); tipc_net_route_msg(buf); return 0; } @@ -374,7 +360,6 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode, msg_set_orignode(msg, orignode); msg_set_transp_seqno(msg, seqno); msg_set_msgcnt(msg, ack); - msg_dbg(msg, "PORT>SEND>:"); } return buf; } @@ -392,7 +377,6 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) data_sz = MAX_REJECT_SIZE; if (msg_connected(msg) && (imp < TIPC_CRITICAL_IMPORTANCE)) imp++; - msg_dbg(msg, "port->rej: "); /* discard rejected message if it shouldn't be returned to sender */ if (msg_errcode(msg) || msg_dest_droppable(msg)) { @@ -498,7 +482,7 @@ static void port_timeout(unsigned long ref) static void port_handle_node_down(unsigned long ref) { struct port *p_ptr = tipc_port_lock(ref); - struct sk_buff* buf = NULL; + struct sk_buff *buf = NULL; if (!p_ptr) return; @@ -555,8 +539,6 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf) struct sk_buff *r_buf = NULL; struct sk_buff *abort_buf = NULL; - msg_dbg(msg, "PORT<RECV<:"); - if (!p_ptr) { err = TIPC_ERR_NO_PORT; } else if (p_ptr->publ.connected) { @@ -636,8 +618,7 @@ static void port_print(struct port *p_ptr, struct print_buf *buf, int full_id) tipc_printf(buf, " via {%u,%u}", p_ptr->publ.conn_type, p_ptr->publ.conn_instance); - } - else if (p_ptr->publ.published) { + } else if (p_ptr->publ.published) { tipc_printf(buf, " bound to"); list_for_each_entry(publ, &p_ptr->publications, pport_list) { if (publ->lower == publ->upper) @@ -940,12 +921,10 @@ void tipc_acknowledge(u32 ref, u32 ack) } /* - * tipc_createport(): user level call. Will add port to - * registry if non-zero user_ref. + * tipc_createport(): user level call. */ -int tipc_createport(u32 user_ref, - void *usr_handle, +int tipc_createport(void *usr_handle, unsigned int importance, tipc_msg_err_event error_cb, tipc_named_msg_err_event named_error_cb, @@ -972,7 +951,6 @@ int tipc_createport(u32 user_ref, } p_ptr->user_port = up_ptr; - up_ptr->user_ref = user_ref; up_ptr->usr_handle = usr_handle; up_ptr->ref = p_ptr->publ.ref; up_ptr->err_cb = error_cb; @@ -982,20 +960,11 @@ int tipc_createport(u32 user_ref, up_ptr->named_msg_cb = named_msg_cb; up_ptr->conn_msg_cb = conn_msg_cb; up_ptr->continue_event_cb = continue_event_cb; - INIT_LIST_HEAD(&up_ptr->uport_list); - tipc_reg_add_port(up_ptr); *portref = p_ptr->publ.ref; tipc_port_unlock(p_ptr); return 0; } -int tipc_ownidentity(u32 ref, struct tipc_portid *id) -{ - id->ref = ref; - id->node = tipc_own_addr; - return 0; -} - int tipc_portimportance(u32 ref, unsigned int *importance) { struct port *p_ptr; @@ -1035,9 +1004,6 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) if (!p_ptr) return -EINVAL; - dbg("tipc_publ %u, p_ptr = %x, conn = %x, scope = %x, " - "lower = %u, upper = %u\n", - ref, p_ptr, p_ptr->publ.connected, scope, seq->lower, seq->upper); if (p_ptr->publ.connected) goto exit; if (seq->lower > seq->upper) @@ -1123,17 +1089,14 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer) msg_set_origport(msg, p_ptr->publ.ref); msg_set_transp_seqno(msg, 42); msg_set_type(msg, TIPC_CONN_MSG); - if (!may_route(peer->node)) - msg_set_hdr_sz(msg, SHORT_H_SIZE); - else - msg_set_hdr_sz(msg, LONG_H_SIZE); + msg_set_hdr_sz(msg, SHORT_H_SIZE); p_ptr->probing_interval = PROBING_INTERVAL; p_ptr->probing_state = CONFIRMED; p_ptr->publ.connected = 1; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); - tipc_nodesub_subscribe(&p_ptr->subscription,peer->node, + tipc_nodesub_subscribe(&p_ptr->subscription, peer->node, (void *)(unsigned long)ref, (net_ev_handler)port_handle_node_down); res = 0; @@ -1271,16 +1234,11 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect) } /** - * tipc_forward2name - forward message sections to port name + * tipc_send2name - send message sections to port name */ -static int tipc_forward2name(u32 ref, - struct tipc_name const *name, - u32 domain, - u32 num_sect, - struct iovec const *msg_sect, - struct tipc_portid const *orig, - unsigned int importance) +int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, + unsigned int num_sect, struct iovec const *msg_sect) { struct port *p_ptr; struct tipc_msg *msg; @@ -1294,14 +1252,12 @@ static int tipc_forward2name(u32 ref, msg = &p_ptr->publ.phdr; msg_set_type(msg, TIPC_NAMED_MSG); - msg_set_orignode(msg, orig->node); - msg_set_origport(msg, orig->ref); + msg_set_orignode(msg, tipc_own_addr); + msg_set_origport(msg, ref); msg_set_hdr_sz(msg, LONG_H_SIZE); msg_set_nametype(msg, name->type); msg_set_nameinst(msg, name->instance); msg_set_lookup_scope(msg, tipc_addr_scope(domain)); - if (importance <= TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(msg,importance); destport = tipc_nametbl_translate(name->type, name->instance, &destnode); msg_set_destnode(msg, destnode); msg_set_destport(msg, destport); @@ -1325,33 +1281,11 @@ static int tipc_forward2name(u32 ref, } /** - * tipc_send2name - send message sections to port name - */ - -int tipc_send2name(u32 ref, - struct tipc_name const *name, - unsigned int domain, - unsigned int num_sect, - struct iovec const *msg_sect) -{ - struct tipc_portid orig; - - orig.ref = ref; - orig.node = tipc_own_addr; - return tipc_forward2name(ref, name, domain, num_sect, msg_sect, &orig, - TIPC_PORT_IMPORTANCE); -} - -/** - * tipc_forward2port - forward message sections to port identity + * tipc_send2port - send message sections to port identity */ -static int tipc_forward2port(u32 ref, - struct tipc_portid const *dest, - unsigned int num_sect, - struct iovec const *msg_sect, - struct tipc_portid const *orig, - unsigned int importance) +int tipc_send2port(u32 ref, struct tipc_portid const *dest, + unsigned int num_sect, struct iovec const *msg_sect) { struct port *p_ptr; struct tipc_msg *msg; @@ -1363,13 +1297,11 @@ static int tipc_forward2port(u32 ref, msg = &p_ptr->publ.phdr; msg_set_type(msg, TIPC_DIRECT_MSG); - msg_set_orignode(msg, orig->node); - msg_set_origport(msg, orig->ref); + msg_set_orignode(msg, tipc_own_addr); + msg_set_origport(msg, ref); msg_set_destnode(msg, dest->node); msg_set_destport(msg, dest->ref); msg_set_hdr_sz(msg, DIR_MSG_H_SIZE); - if (importance <= TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(msg, importance); p_ptr->sent++; if (dest->node == tipc_own_addr) return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); @@ -1384,31 +1316,11 @@ static int tipc_forward2port(u32 ref, } /** - * tipc_send2port - send message sections to port identity + * tipc_send_buf2port - send message buffer to port identity */ -int tipc_send2port(u32 ref, - struct tipc_portid const *dest, - unsigned int num_sect, - struct iovec const *msg_sect) -{ - struct tipc_portid orig; - - orig.ref = ref; - orig.node = tipc_own_addr; - return tipc_forward2port(ref, dest, num_sect, msg_sect, &orig, - TIPC_PORT_IMPORTANCE); -} - -/** - * tipc_forward_buf2port - forward message buffer to port identity - */ -static int tipc_forward_buf2port(u32 ref, - struct tipc_portid const *dest, - struct sk_buff *buf, - unsigned int dsz, - struct tipc_portid const *orig, - unsigned int importance) +int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest, + struct sk_buff *buf, unsigned int dsz) { struct port *p_ptr; struct tipc_msg *msg; @@ -1420,20 +1332,17 @@ static int tipc_forward_buf2port(u32 ref, msg = &p_ptr->publ.phdr; msg_set_type(msg, TIPC_DIRECT_MSG); - msg_set_orignode(msg, orig->node); - msg_set_origport(msg, orig->ref); + msg_set_orignode(msg, tipc_own_addr); + msg_set_origport(msg, ref); msg_set_destnode(msg, dest->node); msg_set_destport(msg, dest->ref); msg_set_hdr_sz(msg, DIR_MSG_H_SIZE); - if (importance <= TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(msg, importance); msg_set_size(msg, DIR_MSG_H_SIZE + dsz); if (skb_cow(buf, DIR_MSG_H_SIZE)) return -ENOMEM; skb_push(buf, DIR_MSG_H_SIZE); skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE); - msg_dbg(msg, "buf2port: "); p_ptr->sent++; if (dest->node == tipc_own_addr) return tipc_port_recv_msg(buf); @@ -1445,20 +1354,3 @@ static int tipc_forward_buf2port(u32 ref, return -ELINKCONG; } -/** - * tipc_send_buf2port - send message buffer to port identity - */ - -int tipc_send_buf2port(u32 ref, - struct tipc_portid const *dest, - struct sk_buff *buf, - unsigned int dsz) -{ - struct tipc_portid orig; - - orig.ref = ref; - orig.node = tipc_own_addr; - return tipc_forward_buf2port(ref, dest, buf, dsz, &orig, - TIPC_PORT_IMPORTANCE); -} - diff --git a/net/tipc/port.h b/net/tipc/port.h index 73bbf442b346..8e84b989949c 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -37,24 +37,52 @@ #ifndef _TIPC_PORT_H #define _TIPC_PORT_H -#include "core.h" #include "ref.h" #include "net.h" #include "msg.h" -#include "dbg.h" #include "node_subscr.h" +#define TIPC_FLOW_CONTROL_WIN 512 + +typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, int reason, + struct tipc_portid const *attmpt_destid); + +typedef void (*tipc_named_msg_err_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, int reason, + struct tipc_name_seq const *attmpt_dest); + +typedef void (*tipc_conn_shutdown_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, int reason); + +typedef void (*tipc_msg_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, unsigned int importance, + struct tipc_portid const *origin); + +typedef void (*tipc_named_msg_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, unsigned int importance, + struct tipc_portid const *orig, + struct tipc_name_seq const *dest); + +typedef void (*tipc_conn_msg_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size); + +typedef void (*tipc_continue_event) (void *usr_handle, u32 portref); + /** * struct user_port - TIPC user port (used with native API) - * @user_ref: id of user who created user port * @usr_handle: user-specified field * @ref: object reference to associated TIPC port * <various callback routines> - * @uport_list: adjacent user ports in list of ports held by user */ struct user_port { - u32 user_ref; void *usr_handle; u32 ref; tipc_msg_err_event err_cb; @@ -64,7 +92,34 @@ struct user_port { tipc_named_msg_event named_msg_cb; tipc_conn_msg_event conn_msg_cb; tipc_continue_event continue_event_cb; - struct list_head uport_list; +}; + +/** + * struct tipc_port - TIPC port info available to socket API + * @usr_handle: pointer to additional user-defined information about port + * @lock: pointer to spinlock for controlling access to port + * @connected: non-zero if port is currently connected to a peer port + * @conn_type: TIPC type used when connection was established + * @conn_instance: TIPC instance used when connection was established + * @conn_unacked: number of unacknowledged messages received from peer port + * @published: non-zero if port has one or more associated names + * @congested: non-zero if cannot send because of link or port congestion + * @max_pkt: maximum packet size "hint" used when building messages sent by port + * @ref: unique reference to port in TIPC object registry + * @phdr: preformatted message header used when sending messages + */ +struct tipc_port { + void *usr_handle; + spinlock_t *lock; + int connected; + u32 conn_type; + u32 conn_instance; + u32 conn_unacked; + int published; + u32 congested; + u32 max_pkt; + u32 ref; + struct tipc_msg phdr; }; /** @@ -109,11 +164,76 @@ struct port { extern spinlock_t tipc_port_list_lock; struct port_list; +/* + * TIPC port manipulation routines + */ +struct tipc_port *tipc_createport_raw(void *usr_handle, + u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), + void (*wakeup)(struct tipc_port *), const u32 importance); + +int tipc_reject_msg(struct sk_buff *buf, u32 err); + +int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode); + +void tipc_acknowledge(u32 port_ref, u32 ack); + +int tipc_createport(void *usr_handle, + unsigned int importance, tipc_msg_err_event error_cb, + tipc_named_msg_err_event named_error_cb, + tipc_conn_shutdown_event conn_error_cb, tipc_msg_event msg_cb, + tipc_named_msg_event named_msg_cb, + tipc_conn_msg_event conn_msg_cb, + tipc_continue_event continue_event_cb, u32 *portref); + +int tipc_deleteport(u32 portref); + +int tipc_portimportance(u32 portref, unsigned int *importance); +int tipc_set_portimportance(u32 portref, unsigned int importance); + +int tipc_portunreliable(u32 portref, unsigned int *isunreliable); +int tipc_set_portunreliable(u32 portref, unsigned int isunreliable); + +int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); +int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); + +int tipc_publish(u32 portref, unsigned int scope, + struct tipc_name_seq const *name_seq); +int tipc_withdraw(u32 portref, unsigned int scope, + struct tipc_name_seq const *name_seq); + +int tipc_connect2port(u32 portref, struct tipc_portid const *port); + +int tipc_disconnect(u32 portref); + +int tipc_shutdown(u32 ref); + + +/* + * The following routines require that the port be locked on entry + */ +int tipc_disconnect_port(struct tipc_port *tp_ptr); + +/* + * TIPC messaging routines + */ +int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect); + +int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain, + unsigned int num_sect, struct iovec const *msg_sect); + +int tipc_send2port(u32 portref, struct tipc_portid const *dest, + unsigned int num_sect, struct iovec const *msg_sect); + +int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest, + struct sk_buff *buf, unsigned int dsz); + +int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, + unsigned int section_count, struct iovec const *msg); + int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, int err); struct sk_buff *tipc_port_get_ports(void); -struct sk_buff *port_show_stats(const void *req_tlv_area, int req_tlv_space); void tipc_port_recv_proto_msg(struct sk_buff *buf); void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp); void tipc_port_reinit(void); @@ -138,7 +258,7 @@ static inline void tipc_port_unlock(struct port *p_ptr) spin_unlock_bh(p_ptr->publ.lock); } -static inline struct port* tipc_port_deref(u32 ref) +static inline struct port *tipc_port_deref(u32 ref) { return (struct port *)tipc_ref_deref(ref); } @@ -196,7 +316,6 @@ static inline int tipc_port_recv_msg(struct sk_buff *buf) err = TIPC_ERR_NO_PORT; } reject: - dbg("port->rejecting, err = %x..\n",err); return tipc_reject_msg(buf, err); } diff --git a/net/tipc/ref.c b/net/tipc/ref.c index ab8ad32d8c20..83116892528b 100644 --- a/net/tipc/ref.c +++ b/net/tipc/ref.c @@ -89,7 +89,7 @@ struct ref_table { * have a reference value of 0 (although this is unlikely). */ -static struct ref_table tipc_ref_table = { NULL }; +static struct ref_table tipc_ref_table; static DEFINE_RWLOCK(ref_table_lock); @@ -178,14 +178,12 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock) next_plus_upper = entry->ref; tipc_ref_table.first_free = next_plus_upper & index_mask; ref = (next_plus_upper & ~index_mask) + index; - } - else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { + } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { index = tipc_ref_table.init_point++; entry = &(tipc_ref_table.entries[index]); spin_lock_init(&entry->lock); ref = tipc_ref_table.start_mask + index; - } - else { + } else { ref = 0; } write_unlock_bh(&ref_table_lock); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 33217fc3d697..2b02a3a80313 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -34,25 +34,13 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <linux/module.h> -#include <linux/types.h> -#include <linux/net.h> -#include <linux/socket.h> -#include <linux/errno.h> -#include <linux/mm.h> -#include <linux/poll.h> -#include <linux/fcntl.h> -#include <linux/gfp.h> -#include <asm/string.h> -#include <asm/atomic.h> #include <net/sock.h> #include <linux/tipc.h> #include <linux/tipc_config.h> -#include <net/tipc/tipc_msg.h> -#include <net/tipc/tipc_port.h> #include "core.h" +#include "port.h" #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ @@ -80,7 +68,7 @@ static const struct proto_ops msg_ops; static struct proto tipc_proto; -static int sockets_enabled = 0; +static int sockets_enabled; static atomic_t tipc_queue_size = ATOMIC_INIT(0); @@ -387,7 +375,7 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) * * NOTE: This routine doesn't need to take the socket lock since it only * accesses socket information that is unchanging (or which changes in - * a completely predictable manner). + * a completely predictable manner). */ static int get_name(struct socket *sock, struct sockaddr *uaddr, @@ -396,6 +384,7 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr, struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; struct tipc_sock *tsock = tipc_sk(sock->sk); + memset(addr, 0, sizeof(*addr)); if (peer) { if ((sock->state != SS_CONNECTED) && ((peer != 2) || (sock->state != SS_DISCONNECTING))) @@ -403,7 +392,8 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr, addr->addr.id.ref = tsock->peer_name.ref; addr->addr.id.node = tsock->peer_name.node; } else { - tipc_ownidentity(tsock->p->ref, &addr->addr.id); + addr->addr.id.ref = tsock->p->ref; + addr->addr.id.node = tipc_own_addr; } *uaddr_len = sizeof(*addr); @@ -573,37 +563,35 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, do { if (dest->addrtype == TIPC_ADDR_NAME) { - if ((res = dest_name_check(dest, m))) + res = dest_name_check(dest, m); + if (res) break; res = tipc_send2name(tport->ref, &dest->addr.name.name, dest->addr.name.domain, m->msg_iovlen, m->msg_iov); - } - else if (dest->addrtype == TIPC_ADDR_ID) { + } else if (dest->addrtype == TIPC_ADDR_ID) { res = tipc_send2port(tport->ref, &dest->addr.id, m->msg_iovlen, m->msg_iov); - } - else if (dest->addrtype == TIPC_ADDR_MCAST) { + } else if (dest->addrtype == TIPC_ADDR_MCAST) { if (needs_conn) { res = -EOPNOTSUPP; break; } - if ((res = dest_name_check(dest, m))) + res = dest_name_check(dest, m); + if (res) break; res = tipc_multicast(tport->ref, &dest->addr.nameseq, - 0, m->msg_iovlen, m->msg_iov); } if (likely(res != -ELINKCONG)) { - if (needs_conn && (res >= 0)) { + if (needs_conn && (res >= 0)) sock->state = SS_CONNECTING; - } break; } if (m->msg_flags & MSG_DONTWAIT) { @@ -662,9 +650,8 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, } res = tipc_send(tport->ref, m->msg_iovlen, m->msg_iov); - if (likely(res != -ELINKCONG)) { + if (likely(res != -ELINKCONG)) break; - } if (m->msg_flags & MSG_DONTWAIT) { res = -EWOULDBLOCK; break; @@ -763,7 +750,8 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, bytes_to_send = curr_left; my_iov.iov_base = curr_start; my_iov.iov_len = bytes_to_send; - if ((res = send_packet(NULL, sock, &my_msg, 0)) < 0) { + res = send_packet(NULL, sock, &my_msg, 0); + if (res < 0) { if (bytes_sent) res = bytes_sent; goto exit; @@ -823,8 +811,8 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) addr->addrtype = TIPC_ADDR_ID; addr->addr.id.ref = msg_origport(msg); addr->addr.id.node = msg_orignode(msg); - addr->addr.name.domain = 0; /* could leave uninitialized */ - addr->scope = 0; /* could leave uninitialized */ + addr->addr.name.domain = 0; /* could leave uninitialized */ + addr->scope = 0; /* could leave uninitialized */ m->msg_namelen = sizeof(struct sockaddr_tipc); } } @@ -858,12 +846,15 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, if (unlikely(err)) { anc_data[0] = err; anc_data[1] = msg_data_sz(msg); - if ((res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data))) - return res; - if (anc_data[1] && - (res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1], - msg_data(msg)))) + res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data); + if (res) return res; + if (anc_data[1]) { + res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1], + msg_data(msg)); + if (res) + return res; + } } /* Optionally capture message destination object */ @@ -891,9 +882,11 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, default: has_name = 0; } - if (has_name && - (res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data))) - return res; + if (has_name) { + res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data); + if (res) + return res; + } return 0; } @@ -1226,42 +1219,25 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) */ if (sock->state == SS_READY) { - if (msg_connected(msg)) { - msg_dbg(msg, "dispatch filter 1\n"); + if (msg_connected(msg)) return TIPC_ERR_NO_PORT; - } } else { - if (msg_mcast(msg)) { - msg_dbg(msg, "dispatch filter 2\n"); + if (msg_mcast(msg)) return TIPC_ERR_NO_PORT; - } if (sock->state == SS_CONNECTED) { - if (!msg_connected(msg)) { - msg_dbg(msg, "dispatch filter 3\n"); + if (!msg_connected(msg)) return TIPC_ERR_NO_PORT; - } - } - else if (sock->state == SS_CONNECTING) { - if (!msg_connected(msg) && (msg_errcode(msg) == 0)) { - msg_dbg(msg, "dispatch filter 4\n"); + } else if (sock->state == SS_CONNECTING) { + if (!msg_connected(msg) && (msg_errcode(msg) == 0)) return TIPC_ERR_NO_PORT; - } - } - else if (sock->state == SS_LISTENING) { - if (msg_connected(msg) || msg_errcode(msg)) { - msg_dbg(msg, "dispatch filter 5\n"); + } else if (sock->state == SS_LISTENING) { + if (msg_connected(msg) || msg_errcode(msg)) return TIPC_ERR_NO_PORT; - } - } - else if (sock->state == SS_DISCONNECTING) { - msg_dbg(msg, "dispatch filter 6\n"); + } else if (sock->state == SS_DISCONNECTING) { return TIPC_ERR_NO_PORT; - } - else /* (sock->state == SS_UNCONNECTED) */ { - if (msg_connected(msg) || msg_errcode(msg)) { - msg_dbg(msg, "dispatch filter 7\n"); + } else /* (sock->state == SS_UNCONNECTED) */ { + if (msg_connected(msg) || msg_errcode(msg)) return TIPC_ERR_NO_PORT; - } } } @@ -1280,7 +1256,6 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) /* Enqueue message (finally!) */ - msg_dbg(msg, "<DISP<: "); TIPC_SKB_CB(buf)->handle = msg_data(msg); atomic_inc(&tipc_queue_size); __skb_queue_tail(&sk->sk_receive_queue, buf); @@ -1441,9 +1416,8 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, m.msg_name = dest; m.msg_namelen = destlen; res = send_msg(NULL, sock, &m, 0); - if (res < 0) { + if (res < 0) goto exit; - } /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ @@ -1465,11 +1439,10 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, advance_rx_queue(sk); } } else { - if (sock->state == SS_CONNECTED) { + if (sock->state == SS_CONNECTED) res = -EISCONN; - } else { + else res = -ECONNREFUSED; - } } } else { if (res == 0) @@ -1588,7 +1561,6 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) * Respond to 'SYN+' by queuing it on new socket. */ - msg_dbg(msg,"<ACC<: "); if (!msg_data_sz(msg)) { struct msghdr m = {NULL,}; @@ -1696,7 +1668,8 @@ static int setsockopt(struct socket *sock, return -ENOPROTOOPT; if (ol < sizeof(value)) return -EINVAL; - if ((res = get_user(value, (u32 __user *)ov))) + res = get_user(value, (u32 __user *)ov); + if (res) return res; lock_sock(sk); @@ -1754,7 +1727,8 @@ static int getsockopt(struct socket *sock, return put_user(0, ol); if (lvl != SOL_TIPC) return -ENOPROTOOPT; - if ((res = get_user(len, ol))) + res = get_user(len, ol); + if (res) return res; lock_sock(sk); @@ -1773,10 +1747,10 @@ static int getsockopt(struct socket *sock, value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout); /* no need to set "res", since already 0 at this point */ break; - case TIPC_NODE_RECVQ_DEPTH: + case TIPC_NODE_RECVQ_DEPTH: value = (u32)atomic_read(&tipc_queue_size); break; - case TIPC_SOCK_RECVQ_DEPTH: + case TIPC_SOCK_RECVQ_DEPTH: value = skb_queue_len(&sk->sk_receive_queue); break; default: @@ -1785,20 +1759,16 @@ static int getsockopt(struct socket *sock, release_sock(sk); - if (res) { - /* "get" failed */ - } - else if (len < sizeof(value)) { - res = -EINVAL; - } - else if (copy_to_user(ov, &value, sizeof(value))) { - res = -EFAULT; - } - else { - res = put_user(sizeof(value), ol); - } + if (res) + return res; /* "get" failed */ - return res; + if (len < sizeof(value)) + return -EINVAL; + + if (copy_to_user(ov, &value, sizeof(value))) + return -EFAULT; + + return put_user(sizeof(value), ol); } /** @@ -1806,7 +1776,7 @@ static int getsockopt(struct socket *sock, */ static const struct proto_ops msg_ops = { - .owner = THIS_MODULE, + .owner = THIS_MODULE, .family = AF_TIPC, .release = release, .bind = bind, @@ -1827,7 +1797,7 @@ static const struct proto_ops msg_ops = { }; static const struct proto_ops packet_ops = { - .owner = THIS_MODULE, + .owner = THIS_MODULE, .family = AF_TIPC, .release = release, .bind = bind, @@ -1848,7 +1818,7 @@ static const struct proto_ops packet_ops = { }; static const struct proto_ops stream_ops = { - .owner = THIS_MODULE, + .owner = THIS_MODULE, .family = AF_TIPC, .release = release, .bind = bind, @@ -1869,7 +1839,7 @@ static const struct proto_ops stream_ops = { }; static const struct net_proto_family tipc_family_ops = { - .owner = THIS_MODULE, + .owner = THIS_MODULE, .family = AF_TIPC, .create = tipc_create }; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 33313961d010..ca04479c3d42 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -35,10 +35,8 @@ */ #include "core.h" -#include "dbg.h" #include "name_table.h" #include "port.h" -#include "ref.h" #include "subscr.h" /** @@ -66,14 +64,13 @@ struct subscriber { */ struct top_srv { - u32 user_ref; u32 setup_port; atomic_t subscription_count; struct list_head subscriber_list; spinlock_t lock; }; -static struct top_srv topsrv = { 0 }; +static struct top_srv topsrv; /** * htohl - convert value to endianness used by destination @@ -252,8 +249,6 @@ static void subscr_terminate(struct subscriber *subscriber) k_cancel_timer(&sub->timer); k_term_timer(&sub->timer); } - dbg("Term: Removing sub %u,%u,%u from subscriber %x list\n", - sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber); subscr_del(sub); } @@ -310,8 +305,6 @@ static void subscr_cancel(struct tipc_subscr *s, k_term_timer(&sub->timer); spin_lock_bh(subscriber->lock); } - dbg("Cancel: removing sub %u,%u,%u from subscriber %x list\n", - sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber); subscr_del(sub); } @@ -496,8 +489,7 @@ static void subscr_named_msg_event(void *usr_handle, /* Create server port & establish connection to subscriber */ - tipc_createport(topsrv.user_ref, - subscriber, + tipc_createport(subscriber, importance, NULL, NULL, @@ -544,21 +536,14 @@ static void subscr_named_msg_event(void *usr_handle, int tipc_subscr_start(void) { struct tipc_name_seq seq = {TIPC_TOP_SRV, TIPC_TOP_SRV, TIPC_TOP_SRV}; - int res = -1; + int res; - memset(&topsrv, 0, sizeof (topsrv)); + memset(&topsrv, 0, sizeof(topsrv)); spin_lock_init(&topsrv.lock); INIT_LIST_HEAD(&topsrv.subscriber_list); spin_lock_bh(&topsrv.lock); - res = tipc_attach(&topsrv.user_ref, NULL, NULL); - if (res) { - spin_unlock_bh(&topsrv.lock); - return res; - } - - res = tipc_createport(topsrv.user_ref, - NULL, + res = tipc_createport(NULL, TIPC_CRITICAL_IMPORTANCE, NULL, NULL, @@ -572,16 +557,17 @@ int tipc_subscr_start(void) goto failed; res = tipc_nametbl_publish_rsv(topsrv.setup_port, TIPC_NODE_SCOPE, &seq); - if (res) + if (res) { + tipc_deleteport(topsrv.setup_port); + topsrv.setup_port = 0; goto failed; + } spin_unlock_bh(&topsrv.lock); return 0; failed: err("Failed to create subscription service\n"); - tipc_detach(topsrv.user_ref); - topsrv.user_ref = 0; spin_unlock_bh(&topsrv.lock); return res; } @@ -592,8 +578,10 @@ void tipc_subscr_stop(void) struct subscriber *subscriber_temp; spinlock_t *subscriber_lock; - if (topsrv.user_ref) { + if (topsrv.setup_port) { tipc_deleteport(topsrv.setup_port); + topsrv.setup_port = 0; + list_for_each_entry_safe(subscriber, subscriber_temp, &topsrv.subscriber_list, subscriber_list) { @@ -602,7 +590,5 @@ void tipc_subscr_stop(void) subscr_terminate(subscriber); spin_unlock_bh(subscriber_lock); } - tipc_detach(topsrv.user_ref); - topsrv.user_ref = 0; } } diff --git a/net/tipc/user_reg.c b/net/tipc/user_reg.c deleted file mode 100644 index 506928803162..000000000000 --- a/net/tipc/user_reg.c +++ /dev/null @@ -1,264 +0,0 @@ -/* - * net/tipc/user_reg.c: TIPC user registry code - * - * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" -#include "user_reg.h" - -/* - * TIPC user registry keeps track of users of the tipc_port interface. - * - * The registry utilizes an array of "TIPC user" entries; - * a user's ID is the index of their associated array entry. - * Array entry 0 is not used, so userid 0 is not valid; - * TIPC sometimes uses this value to denote an anonymous user. - * The list of free entries is initially chained from last entry to entry 1. - */ - -/** - * struct tipc_user - registered TIPC user info - * @next: index of next free registry entry (or -1 for an allocated entry) - * @callback: ptr to routine to call when TIPC mode changes (NULL if none) - * @usr_handle: user-defined value passed to callback routine - * @ports: list of user ports owned by the user - */ - -struct tipc_user { - int next; - tipc_mode_event callback; - void *usr_handle; - struct list_head ports; -}; - -#define MAX_USERID 64 -#define USER_LIST_SIZE ((MAX_USERID + 1) * sizeof(struct tipc_user)) - -static struct tipc_user *users = NULL; -static u32 next_free_user = MAX_USERID + 1; -static DEFINE_SPINLOCK(reg_lock); - -/** - * reg_init - create TIPC user registry (but don't activate it) - * - * If registry has been pre-initialized it is left "as is". - * NOTE: This routine may be called when TIPC is inactive. - */ - -static int reg_init(void) -{ - u32 i; - - spin_lock_bh(®_lock); - if (!users) { - users = kzalloc(USER_LIST_SIZE, GFP_ATOMIC); - if (users) { - for (i = 1; i <= MAX_USERID; i++) { - users[i].next = i - 1; - } - next_free_user = MAX_USERID; - } - } - spin_unlock_bh(®_lock); - return users ? 0 : -ENOMEM; -} - -/** - * reg_callback - inform TIPC user about current operating mode - */ - -static void reg_callback(struct tipc_user *user_ptr) -{ - tipc_mode_event cb; - void *arg; - - spin_lock_bh(®_lock); - cb = user_ptr->callback; - arg = user_ptr->usr_handle; - spin_unlock_bh(®_lock); - - if (cb) - cb(arg, tipc_mode, tipc_own_addr); -} - -/** - * tipc_reg_start - activate TIPC user registry - */ - -int tipc_reg_start(void) -{ - u32 u; - int res; - - if ((res = reg_init())) - return res; - - for (u = 1; u <= MAX_USERID; u++) { - if (users[u].callback) - tipc_k_signal((Handler)reg_callback, - (unsigned long)&users[u]); - } - return 0; -} - -/** - * tipc_reg_stop - shut down & delete TIPC user registry - */ - -void tipc_reg_stop(void) -{ - int id; - - if (!users) - return; - - for (id = 1; id <= MAX_USERID; id++) { - if (users[id].callback) - reg_callback(&users[id]); - } - kfree(users); - users = NULL; -} - -/** - * tipc_attach - register a TIPC user - * - * NOTE: This routine may be called when TIPC is inactive. - */ - -int tipc_attach(u32 *userid, tipc_mode_event cb, void *usr_handle) -{ - struct tipc_user *user_ptr; - - if ((tipc_mode == TIPC_NOT_RUNNING) && !cb) - return -ENOPROTOOPT; - if (!users) - reg_init(); - - spin_lock_bh(®_lock); - if (!next_free_user) { - spin_unlock_bh(®_lock); - return -EBUSY; - } - user_ptr = &users[next_free_user]; - *userid = next_free_user; - next_free_user = user_ptr->next; - user_ptr->next = -1; - spin_unlock_bh(®_lock); - - user_ptr->callback = cb; - user_ptr->usr_handle = usr_handle; - INIT_LIST_HEAD(&user_ptr->ports); - atomic_inc(&tipc_user_count); - - if (cb && (tipc_mode != TIPC_NOT_RUNNING)) - tipc_k_signal((Handler)reg_callback, (unsigned long)user_ptr); - return 0; -} - -/** - * tipc_detach - deregister a TIPC user - */ - -void tipc_detach(u32 userid) -{ - struct tipc_user *user_ptr; - struct list_head ports_temp; - struct user_port *up_ptr, *temp_up_ptr; - - if ((userid == 0) || (userid > MAX_USERID)) - return; - - spin_lock_bh(®_lock); - if ((!users) || (users[userid].next >= 0)) { - spin_unlock_bh(®_lock); - return; - } - - user_ptr = &users[userid]; - user_ptr->callback = NULL; - INIT_LIST_HEAD(&ports_temp); - list_splice(&user_ptr->ports, &ports_temp); - user_ptr->next = next_free_user; - next_free_user = userid; - spin_unlock_bh(®_lock); - - atomic_dec(&tipc_user_count); - - list_for_each_entry_safe(up_ptr, temp_up_ptr, &ports_temp, uport_list) { - tipc_deleteport(up_ptr->ref); - } -} - -/** - * tipc_reg_add_port - register a user's driver port - */ - -int tipc_reg_add_port(struct user_port *up_ptr) -{ - struct tipc_user *user_ptr; - - if (up_ptr->user_ref == 0) - return 0; - if (up_ptr->user_ref > MAX_USERID) - return -EINVAL; - if ((tipc_mode == TIPC_NOT_RUNNING) || !users ) - return -ENOPROTOOPT; - - spin_lock_bh(®_lock); - user_ptr = &users[up_ptr->user_ref]; - list_add(&up_ptr->uport_list, &user_ptr->ports); - spin_unlock_bh(®_lock); - return 0; -} - -/** - * tipc_reg_remove_port - deregister a user's driver port - */ - -int tipc_reg_remove_port(struct user_port *up_ptr) -{ - if (up_ptr->user_ref == 0) - return 0; - if (up_ptr->user_ref > MAX_USERID) - return -EINVAL; - if (!users ) - return -ENOPROTOOPT; - - spin_lock_bh(®_lock); - list_del_init(&up_ptr->uport_list); - spin_unlock_bh(®_lock); - return 0; -} - diff --git a/net/tipc/user_reg.h b/net/tipc/user_reg.h deleted file mode 100644 index 81dc12e2882f..000000000000 --- a/net/tipc/user_reg.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * net/tipc/user_reg.h: Include file for TIPC user registry code - * - * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _TIPC_USER_REG_H -#define _TIPC_USER_REG_H - -#include "port.h" - -int tipc_reg_start(void); -void tipc_reg_stop(void); - -int tipc_reg_add_port(struct user_port *up_ptr); -int tipc_reg_remove_port(struct user_port *up_ptr); - -#endif diff --git a/net/tipc/zone.c b/net/tipc/zone.c deleted file mode 100644 index 83f8b5e91fc8..000000000000 --- a/net/tipc/zone.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * net/tipc/zone.c: TIPC zone management routines - * - * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" -#include "zone.h" -#include "net.h" -#include "addr.h" -#include "node_subscr.h" -#include "cluster.h" -#include "node.h" - -struct _zone *tipc_zone_create(u32 addr) -{ - struct _zone *z_ptr; - u32 z_num; - - if (!tipc_addr_domain_valid(addr)) { - err("Zone creation failed, invalid domain 0x%x\n", addr); - return NULL; - } - - z_ptr = kzalloc(sizeof(*z_ptr), GFP_ATOMIC); - if (!z_ptr) { - warn("Zone creation failed, insufficient memory\n"); - return NULL; - } - - z_num = tipc_zone(addr); - z_ptr->addr = tipc_addr(z_num, 0, 0); - tipc_net.zones[z_num] = z_ptr; - return z_ptr; -} - -void tipc_zone_delete(struct _zone *z_ptr) -{ - u32 c_num; - - if (!z_ptr) - return; - for (c_num = 1; c_num <= tipc_max_clusters; c_num++) { - tipc_cltr_delete(z_ptr->clusters[c_num]); - } - kfree(z_ptr); -} - -void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr) -{ - u32 c_num = tipc_cluster(c_ptr->addr); - - assert(c_ptr->addr); - assert(c_num <= tipc_max_clusters); - assert(z_ptr->clusters[c_num] == NULL); - z_ptr->clusters[c_num] = c_ptr; -} - -void tipc_zone_remove_as_router(struct _zone *z_ptr, u32 router) -{ - u32 c_num; - - for (c_num = 1; c_num <= tipc_max_clusters; c_num++) { - if (z_ptr->clusters[c_num]) { - tipc_cltr_remove_as_router(z_ptr->clusters[c_num], - router); - } - } -} - -void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest) -{ - u32 c_num; - - for (c_num = 1; c_num <= tipc_max_clusters; c_num++) { - if (z_ptr->clusters[c_num]) { - if (in_own_cluster(z_ptr->addr)) - continue; - tipc_cltr_send_ext_routes(z_ptr->clusters[c_num], dest); - } - } -} - -struct tipc_node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref) -{ - struct cluster *c_ptr; - struct tipc_node *n_ptr; - u32 c_num; - - if (!z_ptr) - return NULL; - c_ptr = z_ptr->clusters[tipc_cluster(addr)]; - if (!c_ptr) - return NULL; - n_ptr = tipc_cltr_select_node(c_ptr, ref); - if (n_ptr) - return n_ptr; - - /* Links to any other clusters within this zone ? */ - for (c_num = 1; c_num <= tipc_max_clusters; c_num++) { - c_ptr = z_ptr->clusters[c_num]; - if (!c_ptr) - return NULL; - n_ptr = tipc_cltr_select_node(c_ptr, ref); - if (n_ptr) - return n_ptr; - } - return NULL; -} - -u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref) -{ - struct cluster *c_ptr; - u32 c_num; - u32 router; - - if (!z_ptr) - return 0; - c_ptr = z_ptr->clusters[tipc_cluster(addr)]; - router = c_ptr ? tipc_cltr_select_router(c_ptr, ref) : 0; - if (router) - return router; - - /* Links to any other clusters within the zone? */ - for (c_num = 1; c_num <= tipc_max_clusters; c_num++) { - c_ptr = z_ptr->clusters[c_num]; - router = c_ptr ? tipc_cltr_select_router(c_ptr, ref) : 0; - if (router) - return router; - } - return 0; -} diff --git a/net/tipc/zone.h b/net/tipc/zone.h deleted file mode 100644 index bd1c20ce9d06..000000000000 --- a/net/tipc/zone.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * net/tipc/zone.h: Include file for TIPC zone management routines - * - * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _TIPC_ZONE_H -#define _TIPC_ZONE_H - -#include "node_subscr.h" -#include "net.h" - - -/** - * struct _zone - TIPC zone structure - * @addr: network address of zone - * @clusters: array of pointers to all clusters within zone - * @links: number of (unicast) links to zone - */ - -struct _zone { - u32 addr; - struct cluster *clusters[2]; /* currently limited to just 1 cluster */ - u32 links; -}; - -struct tipc_node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref); -u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref); -void tipc_zone_remove_as_router(struct _zone *z_ptr, u32 router); -void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest); -struct _zone *tipc_zone_create(u32 addr); -void tipc_zone_delete(struct _zone *z_ptr); -void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr); - -static inline struct _zone *tipc_zone_find(u32 addr) -{ - return tipc_net.zones[tipc_zone(addr)]; -} - -#endif diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7ff31c60186a..dd419d286204 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1157,7 +1157,7 @@ restart: goto restart; } - err = security_unix_stream_connect(sock, other->sk_socket, newsk); + err = security_unix_stream_connect(sk, other, newsk); if (err) { unix_state_unlock(sk); goto out_unlock; @@ -1344,9 +1344,25 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +#define MAX_RECURSION_LEVEL 4 + static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; + unsigned char max_level = 0; + int unix_sock_count = 0; + + for (i = scm->fp->count - 1; i >= 0; i--) { + struct sock *sk = unix_get_socket(scm->fp->fp[i]); + + if (sk) { + unix_sock_count++; + max_level = max(max_level, + unix_sk(sk)->recursion_level); + } + } + if (unlikely(max_level > MAX_RECURSION_LEVEL)) + return -ETOOMANYREFS; /* * Need to duplicate file references for the sake of garbage @@ -1357,9 +1373,11 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - for (i = scm->fp->count-1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - return 0; + if (unix_sock_count) { + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->fp[i]); + } + return max_level; } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) @@ -1394,6 +1412,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sk_buff *skb; long timeo; struct scm_cookie tmp_scm; + int max_level; if (NULL == siocb->scm) siocb->scm = &tmp_scm; @@ -1432,8 +1451,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; err = unix_scm_to_skb(siocb->scm, skb, true); - if (err) + if (err < 0) goto out_free; + max_level = err + 1; unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); @@ -1515,6 +1535,8 @@ restart: if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); skb_queue_tail(&other->sk_receive_queue, skb); + if (max_level > unix_sk(other)->recursion_level) + unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other, len); sock_put(other); @@ -1545,6 +1567,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, int sent = 0; struct scm_cookie tmp_scm; bool fds_sent = false; + int max_level; if (NULL == siocb->scm) siocb->scm = &tmp_scm; @@ -1608,10 +1631,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, /* Only send the fds in the first buffer */ err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); - if (err) { + if (err < 0) { kfree_skb(skb); goto out_err; } + max_level = err + 1; fds_sent = true; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); @@ -1627,6 +1651,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, goto pipe_err_free; skb_queue_tail(&other->sk_receive_queue, skb); + if (max_level > unix_sk(other)->recursion_level) + unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other, size); sent += size; @@ -1847,6 +1873,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, unix_state_lock(sk); skb = skb_dequeue(&sk->sk_receive_queue); if (skb == NULL) { + unix_sk(sk)->recursion_level = 0; if (copied >= target) goto unlock; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index c8df6fda0b1f..f89f83bf828e 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -96,7 +96,7 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); unsigned int unix_tot_inflight; -static struct sock *unix_get_socket(struct file *filp) +struct sock *unix_get_socket(struct file *filp) { struct sock *u_sock = NULL; struct inode *inode = filp->f_path.dentry->d_inode; @@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u) } static bool gc_in_progress = false; +#define UNIX_INFLIGHT_TRIGGER_GC 16000 void wait_for_unix_gc(void) { + /* + * If number of inflight sockets is insane, + * force a garbage collect right now. + */ + if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) + unix_gc(); wait_event(unix_gc_wait, gc_in_progress == false); } diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile index 9f188ab3dcd0..4da14bc48078 100644 --- a/net/wanrouter/Makefile +++ b/net/wanrouter/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_WAN_ROUTER) += wanrouter.o -wanrouter-objs := wanproc.o wanmain.o +wanrouter-y := wanproc.o wanmain.o diff --git a/net/wireless/Makefile b/net/wireless/Makefile index e77e508126fa..55a28ab21db9 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_WEXT_SPY) += wext-spy.o obj-$(CONFIG_WEXT_PRIV) += wext-priv.o cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o -cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o +cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o diff --git a/net/wireless/chan.c b/net/wireless/chan.c index d0c92dddb26b..17cd0c04d139 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -44,6 +44,38 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev, return chan; } +static bool can_beacon_sec_chan(struct wiphy *wiphy, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type) +{ + struct ieee80211_channel *sec_chan; + int diff; + + switch (channel_type) { + case NL80211_CHAN_HT40PLUS: + diff = 20; + break; + case NL80211_CHAN_HT40MINUS: + diff = -20; + break; + default: + return false; + } + + sec_chan = ieee80211_get_channel(wiphy, chan->center_freq + diff); + if (!sec_chan) + return false; + + /* we'll need a DFS capability later */ + if (sec_chan->flags & (IEEE80211_CHAN_DISABLED | + IEEE80211_CHAN_PASSIVE_SCAN | + IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_RADAR)) + return false; + + return true; +} + int cfg80211_set_freq(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, int freq, enum nl80211_channel_type channel_type) @@ -68,6 +100,28 @@ int cfg80211_set_freq(struct cfg80211_registered_device *rdev, if (!chan) return -EINVAL; + /* Both channels should be able to initiate communication */ + if (wdev && (wdev->iftype == NL80211_IFTYPE_ADHOC || + wdev->iftype == NL80211_IFTYPE_AP || + wdev->iftype == NL80211_IFTYPE_AP_VLAN || + wdev->iftype == NL80211_IFTYPE_MESH_POINT || + wdev->iftype == NL80211_IFTYPE_P2P_GO)) { + switch (channel_type) { + case NL80211_CHAN_HT40PLUS: + case NL80211_CHAN_HT40MINUS: + if (!can_beacon_sec_chan(&rdev->wiphy, chan, + channel_type)) { + printk(KERN_DEBUG + "cfg80211: Secondary channel not " + "allowed to initiate communication\n"); + return -EINVAL; + } + break; + default: + break; + } + } + result = rdev->ops->set_channel(&rdev->wiphy, wdev ? wdev->netdev : NULL, chan, channel_type); diff --git a/net/wireless/core.c b/net/wireless/core.c index 9c21ebf9780e..e9a5f8ca4c27 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -4,6 +4,8 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/if.h> #include <linux/module.h> #include <linux/err.h> @@ -216,8 +218,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, rdev->wiphy.debugfsdir, rdev->wiphy.debugfsdir->d_parent, newname)) - printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n", - newname); + pr_err("failed to rename debugfs dir to %s!\n", newname); nl80211_notify_dev_rename(rdev); @@ -331,6 +332,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) WARN_ON(ops->add_virtual_intf && !ops->del_virtual_intf); WARN_ON(ops->add_station && !ops->del_station); WARN_ON(ops->add_mpath && !ops->del_mpath); + WARN_ON(ops->join_mesh && !ops->leave_mesh); alloc_size = sizeof(*rdev) + sizeof_priv; @@ -699,8 +701,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj, "phy80211")) { - printk(KERN_ERR "wireless: failed to add phy80211 " - "symlink to netdev!\n"); + pr_err("failed to add phy80211 symlink to netdev!\n"); } wdev->netdev = dev; wdev->sme_state = CFG80211_SME_IDLE; @@ -752,6 +753,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, cfg80211_mlme_down(rdev, dev); wdev_unlock(wdev); break; + case NL80211_IFTYPE_MESH_POINT: + cfg80211_leave_mesh(rdev, dev); + break; default: break; } @@ -775,20 +779,37 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, } cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); -#ifdef CONFIG_CFG80211_WEXT wdev_lock(wdev); switch (wdev->iftype) { +#ifdef CONFIG_CFG80211_WEXT case NL80211_IFTYPE_ADHOC: cfg80211_ibss_wext_join(rdev, wdev); break; case NL80211_IFTYPE_STATION: cfg80211_mgd_wext_connect(rdev, wdev); break; +#endif +#ifdef CONFIG_MAC80211_MESH + case NL80211_IFTYPE_MESH_POINT: + { + /* backward compat code... */ + struct mesh_setup setup; + memcpy(&setup, &default_mesh_setup, + sizeof(setup)); + /* back compat only needed for mesh_id */ + setup.mesh_id = wdev->ssid; + setup.mesh_id_len = wdev->mesh_id_up_len; + if (wdev->mesh_id_up_len) + __cfg80211_join_mesh(rdev, dev, + &setup, + &default_mesh_config); + break; + } +#endif default: break; } wdev_unlock(wdev); -#endif rdev->opencount++; mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); diff --git a/net/wireless/core.h b/net/wireless/core.h index 6583cca0e2ee..26a0a084e16b 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -285,6 +285,20 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid); int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +/* mesh */ +extern const struct mesh_config default_mesh_config; +extern const struct mesh_setup default_mesh_setup; +int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev, + const struct mesh_setup *setup, + const struct mesh_config *conf); +int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev, + const struct mesh_setup *setup, + const struct mesh_config *conf); +int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev); + /* MLME */ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, struct net_device *dev, @@ -341,9 +355,9 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie); /* SME */ diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c index 97d411f74507..3268fac5ab22 100644 --- a/net/wireless/lib80211.c +++ b/net/wireless/lib80211.c @@ -13,6 +13,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/ctype.h> #include <linux/ieee80211.h> @@ -224,8 +226,8 @@ int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops) return -EINVAL; found: - printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm " - "'%s'\n", ops->name); + printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm '%s'\n", + ops->name); list_del(&alg->list); spin_unlock_irqrestore(&lib80211_crypto_lock, flags); kfree(alg); @@ -270,7 +272,7 @@ static struct lib80211_crypto_ops lib80211_crypt_null = { static int __init lib80211_init(void) { - printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n"); + pr_info(DRV_DESCRIPTION "\n"); return lib80211_register_crypto_ops(&lib80211_crypt_null); } diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index 0fe40510e2cb..7ea4f2b0770e 100644 --- a/net/wireless/lib80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -10,6 +10,8 @@ * more details. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/err.h> #include <linux/module.h> #include <linux/init.h> @@ -99,8 +101,7 @@ static void *lib80211_tkip_init(int key_idx) priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm_arc4)) { - printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " - "crypto API arc4\n"); + printk(KERN_DEBUG pr_fmt("could not allocate crypto API arc4\n")); priv->tx_tfm_arc4 = NULL; goto fail; } @@ -108,8 +109,7 @@ static void *lib80211_tkip_init(int key_idx) priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm_michael)) { - printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " - "crypto API michael_mic\n"); + printk(KERN_DEBUG pr_fmt("could not allocate crypto API michael_mic\n")); priv->tx_tfm_michael = NULL; goto fail; } @@ -117,8 +117,7 @@ static void *lib80211_tkip_init(int key_idx) priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm_arc4)) { - printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " - "crypto API arc4\n"); + printk(KERN_DEBUG pr_fmt("could not allocate crypto API arc4\n")); priv->rx_tfm_arc4 = NULL; goto fail; } @@ -126,8 +125,7 @@ static void *lib80211_tkip_init(int key_idx) priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm_michael)) { - printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " - "crypto API michael_mic\n"); + printk(KERN_DEBUG pr_fmt("could not allocate crypto API michael_mic\n")); priv->rx_tfm_michael = NULL; goto fail; } @@ -536,7 +534,7 @@ static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr, struct scatterlist sg[2]; if (tfm_michael == NULL) { - printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n"); + pr_warn("%s(): tfm_michael == NULL\n", __func__); return -1; } sg_init_table(sg, 2); diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c new file mode 100644 index 000000000000..73e39c171ffb --- /dev/null +++ b/net/wireless/mesh.c @@ -0,0 +1,142 @@ +#include <linux/ieee80211.h> +#include <net/cfg80211.h> +#include "core.h" + +/* Default values, timeouts in ms */ +#define MESH_TTL 31 +#define MESH_DEFAULT_ELEMENT_TTL 31 +#define MESH_MAX_RETR 3 +#define MESH_RET_T 100 +#define MESH_CONF_T 100 +#define MESH_HOLD_T 100 + +#define MESH_PATH_TIMEOUT 5000 + +/* + * Minimum interval between two consecutive PREQs originated by the same + * interface + */ +#define MESH_PREQ_MIN_INT 10 +#define MESH_DIAM_TRAVERSAL_TIME 50 + +/* + * A path will be refreshed if it is used PATH_REFRESH_TIME milliseconds + * before timing out. This way it will remain ACTIVE and no data frames + * will be unnecessarily held in the pending queue. + */ +#define MESH_PATH_REFRESH_TIME 1000 +#define MESH_MIN_DISCOVERY_TIMEOUT (2 * MESH_DIAM_TRAVERSAL_TIME) + +/* Default maximum number of established plinks per interface */ +#define MESH_MAX_ESTAB_PLINKS 32 + +#define MESH_MAX_PREQ_RETRIES 4 + + +const struct mesh_config default_mesh_config = { + .dot11MeshRetryTimeout = MESH_RET_T, + .dot11MeshConfirmTimeout = MESH_CONF_T, + .dot11MeshHoldingTimeout = MESH_HOLD_T, + .dot11MeshMaxRetries = MESH_MAX_RETR, + .dot11MeshTTL = MESH_TTL, + .element_ttl = MESH_DEFAULT_ELEMENT_TTL, + .auto_open_plinks = true, + .dot11MeshMaxPeerLinks = MESH_MAX_ESTAB_PLINKS, + .dot11MeshHWMPactivePathTimeout = MESH_PATH_TIMEOUT, + .dot11MeshHWMPpreqMinInterval = MESH_PREQ_MIN_INT, + .dot11MeshHWMPnetDiameterTraversalTime = MESH_DIAM_TRAVERSAL_TIME, + .dot11MeshHWMPmaxPREQretries = MESH_MAX_PREQ_RETRIES, + .path_refresh_time = MESH_PATH_REFRESH_TIME, + .min_discovery_timeout = MESH_MIN_DISCOVERY_TIMEOUT, +}; + +const struct mesh_setup default_mesh_setup = { + .path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP, + .path_metric = IEEE80211_PATH_METRIC_AIRTIME, + .vendor_ie = NULL, + .vendor_ie_len = 0, +}; + +int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev, + const struct mesh_setup *setup, + const struct mesh_config *conf) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); + + ASSERT_WDEV_LOCK(wdev); + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + + if (wdev->mesh_id_len) + return -EALREADY; + + if (!setup->mesh_id_len) + return -EINVAL; + + if (!rdev->ops->join_mesh) + return -EOPNOTSUPP; + + err = rdev->ops->join_mesh(&rdev->wiphy, dev, conf, setup); + if (!err) { + memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); + wdev->mesh_id_len = setup->mesh_id_len; + } + + return err; +} + +int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev, + const struct mesh_setup *setup, + const struct mesh_config *conf) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + wdev_lock(wdev); + err = __cfg80211_join_mesh(rdev, dev, setup, conf); + wdev_unlock(wdev); + + return err; +} + +static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + ASSERT_WDEV_LOCK(wdev); + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + + if (!rdev->ops->leave_mesh) + return -EOPNOTSUPP; + + if (!wdev->mesh_id_len) + return -ENOTCONN; + + err = rdev->ops->leave_mesh(&rdev->wiphy, dev); + if (!err) + wdev->mesh_id_len = 0; + return err; +} + +int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + wdev_lock(wdev); + err = __cfg80211_leave_mesh(rdev, dev); + wdev_unlock(wdev); + + return err; +} diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 26838d903b9a..aa5df8865ff7 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -263,6 +263,28 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) } EXPORT_SYMBOL(cfg80211_send_disassoc); +void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf, + size_t len) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_send_unprot_deauth(rdev, dev, buf, len, GFP_ATOMIC); +} +EXPORT_SYMBOL(cfg80211_send_unprot_deauth); + +void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf, + size_t len) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_send_unprot_disassoc(rdev, dev, buf, len, GFP_ATOMIC); +} +EXPORT_SYMBOL(cfg80211_send_unprot_disassoc); + static void __cfg80211_auth_remove(struct wireless_dev *wdev, const u8 *addr) { int i; @@ -864,9 +886,9 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -946,8 +968,9 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return -EINVAL; /* Transmit the Action frame as requested by user space */ - return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type, - channel_type_valid, buf, len, cookie); + return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, offchan, + channel_type, channel_type_valid, + wait, buf, len, cookie); } bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, @@ -1028,3 +1051,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp); } EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); + +void cfg80211_cqm_pktloss_notify(struct net_device *dev, + const u8 *peer, u32 num_packets, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + /* Indicate roaming trigger event to user space */ + nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp); +} +EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c506241f8637..9b62710891a2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -121,8 +121,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_BASIC_RATES] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, + [NL80211_ATTR_BSS_HT_OPMODE] = { .type = NLA_U16 }, - [NL80211_ATTR_MESH_PARAMS] = { .type = NLA_NESTED }, + [NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED }, [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY, .len = NL80211_HT_CAPABILITY_LEN }, @@ -163,10 +164,14 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_CQM] = { .type = NLA_NESTED, }, [NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG }, [NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 }, - [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, + [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, + [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 }, + [NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG }, + [NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, }; /* policy for the key attributes */ @@ -178,6 +183,14 @@ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = { [NL80211_KEY_DEFAULT] = { .type = NLA_FLAG }, [NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG }, [NL80211_KEY_TYPE] = { .type = NLA_U32 }, + [NL80211_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, +}; + +/* policy for the key default flags */ +static const struct nla_policy +nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = { + [NL80211_KEY_DEFAULT_TYPE_UNICAST] = { .type = NLA_FLAG }, + [NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG }, }; /* ifidx get helper */ @@ -224,8 +237,8 @@ static int nl80211_prepare_netdev_dump(struct sk_buff *skb, } *rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx); - if (IS_ERR(dev)) { - err = PTR_ERR(dev); + if (IS_ERR(*rdev)) { + err = PTR_ERR(*rdev); goto out_rtnl; } @@ -310,6 +323,7 @@ struct key_parse { int idx; int type; bool def, defmgmt; + bool def_uni, def_multi; }; static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) @@ -323,6 +337,13 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) k->def = !!tb[NL80211_KEY_DEFAULT]; k->defmgmt = !!tb[NL80211_KEY_DEFAULT_MGMT]; + if (k->def) { + k->def_uni = true; + k->def_multi = true; + } + if (k->defmgmt) + k->def_multi = true; + if (tb[NL80211_KEY_IDX]) k->idx = nla_get_u8(tb[NL80211_KEY_IDX]); @@ -345,6 +366,19 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) return -EINVAL; } + if (tb[NL80211_KEY_DEFAULT_TYPES]) { + struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; + int err = nla_parse_nested(kdt, + NUM_NL80211_KEY_DEFAULT_TYPES - 1, + tb[NL80211_KEY_DEFAULT_TYPES], + nl80211_key_default_policy); + if (err) + return err; + + k->def_uni = kdt[NL80211_KEY_DEFAULT_TYPE_UNICAST]; + k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST]; + } + return 0; } @@ -369,12 +403,32 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k) k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT]; k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]; + if (k->def) { + k->def_uni = true; + k->def_multi = true; + } + if (k->defmgmt) + k->def_multi = true; + if (info->attrs[NL80211_ATTR_KEY_TYPE]) { k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]); if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES) return -EINVAL; } + if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) { + struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; + int err = nla_parse_nested( + kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, + info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES], + nl80211_key_default_policy); + if (err) + return err; + + k->def_uni = kdt[NL80211_KEY_DEFAULT_TYPE_UNICAST]; + k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST]; + } + return 0; } @@ -397,6 +451,11 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k) if (k->def && k->defmgmt) return -EINVAL; + if (k->defmgmt) { + if (k->def_uni || !k->def_multi) + return -EINVAL; + } + if (k->idx != -1) { if (k->defmgmt) { if (k->idx < 4 || k->idx > 5) @@ -446,6 +505,8 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, goto error; def = 1; result->def = parse.idx; + if (!parse.def_uni || !parse.def_multi) + goto error; } else if (parse.defmgmt) goto error; err = cfg80211_validate_key_settings(rdev, &parse.p, @@ -526,7 +587,6 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, dev->wiphy.rts_threshold); NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, dev->wiphy.coverage_class); - NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, dev->wiphy.max_scan_ssids); NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, @@ -545,6 +605,22 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, + dev->wiphy.available_antennas_tx); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, + dev->wiphy.available_antennas_rx); + + if ((dev->wiphy.available_antennas_tx || + dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) { + u32 tx_ant = 0, rx_ant = 0; + int res; + res = dev->ops->get_antenna(&dev->wiphy, &tx_ant, &rx_ant); + if (!res) { + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, tx_ant); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, rx_ant); + } + } + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); if (!nl_modes) goto nla_put_failure; @@ -649,19 +725,21 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(add_beacon, NEW_BEACON); CMD(add_station, NEW_STATION); CMD(add_mpath, NEW_MPATH); - CMD(set_mesh_params, SET_MESH_PARAMS); + CMD(update_mesh_config, SET_MESH_CONFIG); CMD(change_bss, SET_BSS); CMD(auth, AUTHENTICATE); CMD(assoc, ASSOCIATE); CMD(deauth, DEAUTHENTICATE); CMD(disassoc, DISASSOCIATE); CMD(join_ibss, JOIN_IBSS); + CMD(join_mesh, JOIN_MESH); CMD(set_pmksa, SET_PMKSA); CMD(del_pmksa, DEL_PMKSA); CMD(flush_pmksa, FLUSH_PMKSA); CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); CMD(mgmt_tx, FRAME); + CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); @@ -683,6 +761,14 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_nest_end(msg, nl_cmds); + if (dev->ops->remain_on_channel) + NLA_PUT_U32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, + dev->wiphy.max_remain_on_channel_duration); + + /* for now at least assume all drivers have it */ + if (dev->ops->mgmt_tx) + NLA_PUT_FLAG(msg, NL80211_ATTR_OFFCHANNEL_TX_OK); + if (mgmt_stypes) { u16 stypes; struct nlattr *nl_ftypes, *nl_ifs; @@ -1024,6 +1110,35 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) goto bad_res; } + if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] && + info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) { + u32 tx_ant, rx_ant; + if ((!rdev->wiphy.available_antennas_tx && + !rdev->wiphy.available_antennas_rx) || + !rdev->ops->set_antenna) { + result = -EOPNOTSUPP; + goto bad_res; + } + + tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]); + rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]); + + /* reject antenna configurations which don't match the + * available antenna masks, except for the "all" mask */ + if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas_tx)) || + (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) { + result = -EINVAL; + goto bad_res; + } + + tx_ant = tx_ant & rdev->wiphy.available_antennas_tx; + rx_ant = rx_ant & rdev->wiphy.available_antennas_rx; + + result = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant); + if (result) + goto bad_res; + } + changed = 0; if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) { @@ -1291,11 +1406,21 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_MESH_ID]) { + struct wireless_dev *wdev = dev->ieee80211_ptr; + if (ntype != NL80211_IFTYPE_MESH_POINT) return -EINVAL; - params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); - params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); - change = true; + if (netif_running(dev)) + return -EBUSY; + + wdev_lock(wdev); + BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != + IEEE80211_MAX_MESH_ID_LEN); + wdev->mesh_id_up_len = + nla_len(info->attrs[NL80211_ATTR_MESH_ID]); + memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), + wdev->mesh_id_up_len); + wdev_unlock(wdev); } if (info->attrs[NL80211_ATTR_4ADDR]) { @@ -1335,6 +1460,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct vif_params params; + struct net_device *dev; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; u32 flags; @@ -1354,12 +1480,6 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) !(rdev->wiphy.interface_modes & (1 << type))) return -EOPNOTSUPP; - if (type == NL80211_IFTYPE_MESH_POINT && - info->attrs[NL80211_ATTR_MESH_ID]) { - params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); - params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); - } - if (info->attrs[NL80211_ATTR_4ADDR]) { params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type); @@ -1370,11 +1490,27 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); - err = rdev->ops->add_virtual_intf(&rdev->wiphy, + dev = rdev->ops->add_virtual_intf(&rdev->wiphy, nla_data(info->attrs[NL80211_ATTR_IFNAME]), type, err ? NULL : &flags, ¶ms); + if (IS_ERR(dev)) + return PTR_ERR(dev); - return err; + if (type == NL80211_IFTYPE_MESH_POINT && + info->attrs[NL80211_ATTR_MESH_ID]) { + struct wireless_dev *wdev = dev->ieee80211_ptr; + + wdev_lock(wdev); + BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != + IEEE80211_MAX_MESH_ID_LEN); + wdev->mesh_id_up_len = + nla_len(info->attrs[NL80211_ATTR_MESH_ID]); + memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), + wdev->mesh_id_up_len); + wdev_unlock(wdev); + } + + return 0; } static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) @@ -1519,8 +1655,6 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) struct key_parse key; int err; struct net_device *dev = info->user_ptr[1]; - int (*func)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index); err = nl80211_parse_key(info, &key); if (err) @@ -1533,27 +1667,61 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (!key.def && !key.defmgmt) return -EINVAL; - if (key.def) - func = rdev->ops->set_default_key; - else - func = rdev->ops->set_default_mgmt_key; + wdev_lock(dev->ieee80211_ptr); - if (!func) - return -EOPNOTSUPP; + if (key.def) { + if (!rdev->ops->set_default_key) { + err = -EOPNOTSUPP; + goto out; + } - wdev_lock(dev->ieee80211_ptr); - err = nl80211_key_allowed(dev->ieee80211_ptr); - if (!err) - err = func(&rdev->wiphy, dev, key.idx); + err = nl80211_key_allowed(dev->ieee80211_ptr); + if (err) + goto out; + + if (!(rdev->wiphy.flags & + WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS)) { + if (!key.def_uni || !key.def_multi) { + err = -EOPNOTSUPP; + goto out; + } + } + + err = rdev->ops->set_default_key(&rdev->wiphy, dev, key.idx, + key.def_uni, key.def_multi); + + if (err) + goto out; #ifdef CONFIG_CFG80211_WEXT - if (!err) { - if (func == rdev->ops->set_default_key) - dev->ieee80211_ptr->wext.default_key = key.idx; - else - dev->ieee80211_ptr->wext.default_mgmt_key = key.idx; - } + dev->ieee80211_ptr->wext.default_key = key.idx; #endif + } else { + if (key.def_uni || !key.def_multi) { + err = -EINVAL; + goto out; + } + + if (!rdev->ops->set_default_mgmt_key) { + err = -EOPNOTSUPP; + goto out; + } + + err = nl80211_key_allowed(dev->ieee80211_ptr); + if (err) + goto out; + + err = rdev->ops->set_default_mgmt_key(&rdev->wiphy, + dev, key.idx); + if (err) + goto out; + +#ifdef CONFIG_CFG80211_WEXT + dev->ieee80211_ptr->wext.default_mgmt_key = key.idx; +#endif + } + + out: wdev_unlock(dev->ieee80211_ptr); return err; @@ -1841,6 +2009,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, if (sinfo->filled & STATION_INFO_SIGNAL) NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL, sinfo->signal); + if (sinfo->filled & STATION_INFO_SIGNAL_AVG) + NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG, + sinfo->signal_avg); if (sinfo->filled & STATION_INFO_TX_BITRATE) { txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); if (!txrate) @@ -2404,6 +2575,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) params.use_short_preamble = -1; params.use_short_slot_time = -1; params.ap_isolate = -1; + params.ht_opmode = -1; if (info->attrs[NL80211_ATTR_BSS_CTS_PROT]) params.use_cts_prot = @@ -2422,6 +2594,9 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_AP_ISOLATE]) params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]); + if (info->attrs[NL80211_ATTR_BSS_HT_OPMODE]) + params.ht_opmode = + nla_get_u16(info->attrs[NL80211_ATTR_BSS_HT_OPMODE]); if (!rdev->ops->change_bss) return -EOPNOTSUPP; @@ -2506,22 +2681,33 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) return r; } -static int nl80211_get_mesh_params(struct sk_buff *skb, - struct genl_info *info) +static int nl80211_get_mesh_config(struct sk_buff *skb, + struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct mesh_config cur_params; - int err; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct mesh_config cur_params; + int err = 0; void *hdr; struct nlattr *pinfoattr; struct sk_buff *msg; - if (!rdev->ops->get_mesh_params) + if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + + if (!rdev->ops->get_mesh_config) return -EOPNOTSUPP; - /* Get the mesh params */ - err = rdev->ops->get_mesh_params(&rdev->wiphy, dev, &cur_params); + wdev_lock(wdev); + /* If not connected, get default parameters */ + if (!wdev->mesh_id_len) + memcpy(&cur_params, &default_mesh_config, sizeof(cur_params)); + else + err = rdev->ops->get_mesh_config(&rdev->wiphy, dev, + &cur_params); + wdev_unlock(wdev); + if (err) return err; @@ -2530,10 +2716,10 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, - NL80211_CMD_GET_MESH_PARAMS); + NL80211_CMD_GET_MESH_CONFIG); if (!hdr) goto nla_put_failure; - pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_PARAMS); + pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG); if (!pinfoattr) goto nla_put_failure; NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); @@ -2549,6 +2735,8 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, cur_params.dot11MeshMaxRetries); NLA_PUT_U8(msg, NL80211_MESHCONF_TTL, cur_params.dot11MeshTTL); + NLA_PUT_U8(msg, NL80211_MESHCONF_ELEMENT_TTL, + cur_params.element_ttl); NLA_PUT_U8(msg, NL80211_MESHCONF_AUTO_OPEN_PLINKS, cur_params.auto_open_plinks); NLA_PUT_U8(msg, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, @@ -2575,14 +2763,6 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, return -ENOBUFS; } -#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \ -do {\ - if (table[attr_num]) {\ - cfg.param = nla_fn(table[attr_num]); \ - mask |= (1 << (attr_num - 1)); \ - } \ -} while (0);\ - static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = { [NL80211_MESHCONF_RETRY_TIMEOUT] = { .type = NLA_U16 }, [NL80211_MESHCONF_CONFIRM_TIMEOUT] = { .type = NLA_U16 }, @@ -2590,6 +2770,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_MAX_PEER_LINKS] = { .type = NLA_U16 }, [NL80211_MESHCONF_MAX_RETRIES] = { .type = NLA_U8 }, [NL80211_MESHCONF_TTL] = { .type = NLA_U8 }, + [NL80211_MESHCONF_ELEMENT_TTL] = { .type = NLA_U8 }, [NL80211_MESHCONF_AUTO_OPEN_PLINKS] = { .type = NLA_U8 }, [NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES] = { .type = NLA_U8 }, @@ -2600,31 +2781,42 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 }, }; -static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) +static const struct nla_policy + nl80211_mesh_setup_params_policy[NL80211_MESH_SETUP_ATTR_MAX+1] = { + [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, + [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, + [NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE] = { .type = NLA_BINARY, + .len = IEEE80211_MAX_DATA_LEN }, +}; + +static int nl80211_parse_mesh_config(struct genl_info *info, + struct mesh_config *cfg, + u32 *mask_out) { - u32 mask; - struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; - struct mesh_config cfg; struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1]; - struct nlattr *parent_attr; + u32 mask = 0; + +#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \ +do {\ + if (table[attr_num]) {\ + cfg->param = nla_fn(table[attr_num]); \ + mask |= (1 << (attr_num - 1)); \ + } \ +} while (0);\ + - parent_attr = info->attrs[NL80211_ATTR_MESH_PARAMS]; - if (!parent_attr) + if (!info->attrs[NL80211_ATTR_MESH_CONFIG]) return -EINVAL; if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX, - parent_attr, nl80211_meshconf_params_policy)) + info->attrs[NL80211_ATTR_MESH_CONFIG], + nl80211_meshconf_params_policy)) return -EINVAL; - if (!rdev->ops->set_mesh_params) - return -EOPNOTSUPP; - /* This makes sure that there aren't more than 32 mesh config * parameters (otherwise our bitfield scheme would not work.) */ BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32); /* Fill in the params struct */ - mask = 0; FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, mask, NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, @@ -2637,6 +2829,8 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, mask, NL80211_MESHCONF_TTL, nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, + mask, NL80211_MESHCONF_ELEMENT_TTL, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, @@ -2661,12 +2855,82 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) dot11MeshHWMPRootMode, mask, NL80211_MESHCONF_HWMP_ROOTMODE, nla_get_u8); + if (mask_out) + *mask_out = mask; - /* Apply changes */ - return rdev->ops->set_mesh_params(&rdev->wiphy, dev, &cfg, mask); -} + return 0; #undef FILL_IN_MESH_PARAM_IF_SET +} + +static int nl80211_parse_mesh_setup(struct genl_info *info, + struct mesh_setup *setup) +{ + struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1]; + + if (!info->attrs[NL80211_ATTR_MESH_SETUP]) + return -EINVAL; + if (nla_parse_nested(tb, NL80211_MESH_SETUP_ATTR_MAX, + info->attrs[NL80211_ATTR_MESH_SETUP], + nl80211_mesh_setup_params_policy)) + return -EINVAL; + + if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL]) + setup->path_sel_proto = + (nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL])) ? + IEEE80211_PATH_PROTOCOL_VENDOR : + IEEE80211_PATH_PROTOCOL_HWMP; + + if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC]) + setup->path_metric = + (nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC])) ? + IEEE80211_PATH_METRIC_VENDOR : + IEEE80211_PATH_METRIC_AIRTIME; + + if (tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE]) { + struct nlattr *ieattr = + tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE]; + if (!is_valid_ie_attr(ieattr)) + return -EINVAL; + setup->vendor_ie = nla_data(ieattr); + setup->vendor_ie_len = nla_len(ieattr); + } + + return 0; +} + +static int nl80211_update_mesh_config(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct mesh_config cfg; + u32 mask; + int err; + + if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + + if (!rdev->ops->update_mesh_config) + return -EOPNOTSUPP; + + err = nl80211_parse_mesh_config(info, &cfg, &mask); + if (err) + return err; + + wdev_lock(wdev); + if (!wdev->mesh_id_len) + err = -ENOLINK; + + if (!err) + err = rdev->ops->update_mesh_config(&rdev->wiphy, dev, + mask, &cfg); + + wdev_unlock(wdev); + + return err; +} static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) { @@ -3569,6 +3833,34 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) local_state_change); } +static bool +nl80211_parse_mcast_rate(struct cfg80211_registered_device *rdev, + int mcast_rate[IEEE80211_NUM_BANDS], + int rateval) +{ + struct wiphy *wiphy = &rdev->wiphy; + bool found = false; + int band, i; + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + struct ieee80211_supported_band *sband; + + sband = wiphy->bands[band]; + if (!sband) + continue; + + for (i = 0; i < sband->n_bitrates; i++) { + if (sband->bitrates[i].bitrate == rateval) { + mcast_rate[band] = i + 1; + found = true; + break; + } + } + } + + return found; +} + static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -3653,6 +3945,11 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) } } + if (info->attrs[NL80211_ATTR_MCAST_RATE] && + !nl80211_parse_mcast_rate(rdev, ibss.mcast_rate, + nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]))) + return -EINVAL; + if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) { connkeys = nl80211_parse_connkeys(rdev, info->attrs[NL80211_ATTR_KEYS]); @@ -3987,7 +4284,8 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, * We should be on that channel for at least one jiffie, * and more than 5 seconds seems excessive. */ - if (!duration || !msecs_to_jiffies(duration) || duration > 5000) + if (!duration || !msecs_to_jiffies(duration) || + duration > rdev->wiphy.max_remain_on_channel_duration) return -EINVAL; if (!rdev->ops->remain_on_channel) @@ -4155,6 +4453,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; @@ -4180,6 +4479,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) void *hdr; u64 cookie; struct sk_buff *msg; + unsigned int wait = 0; + bool offchan; if (!info->attrs[NL80211_ATTR_FRAME] || !info->attrs[NL80211_ATTR_WIPHY_FREQ]) @@ -4193,9 +4494,16 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; + if (info->attrs[NL80211_ATTR_DURATION]) { + if (!rdev->ops->mgmt_tx_cancel_wait) + return -EINVAL; + wait = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); + } + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { channel_type = nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); @@ -4207,6 +4515,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) channel_type_valid = true; } + offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK]; + freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); chan = rdev_freq_to_chan(rdev, freq, channel_type); if (chan == NULL) @@ -4223,8 +4533,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) err = PTR_ERR(hdr); goto free_msg; } - err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type, - channel_type_valid, + err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, offchan, channel_type, + channel_type_valid, wait, nla_data(info->attrs[NL80211_ATTR_FRAME]), nla_len(info->attrs[NL80211_ATTR_FRAME]), &cookie); @@ -4243,6 +4553,31 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + u64 cookie; + + if (!info->attrs[NL80211_ATTR_COOKIE]) + return -EINVAL; + + if (!rdev->ops->mgmt_tx_cancel_wait) + return -EOPNOTSUPP; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); + + return rdev->ops->mgmt_tx_cancel_wait(&rdev->wiphy, dev, cookie); +} + static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -4381,6 +4716,50 @@ out: return err; } +static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct mesh_config cfg; + struct mesh_setup setup; + int err; + + /* start with default */ + memcpy(&cfg, &default_mesh_config, sizeof(cfg)); + memcpy(&setup, &default_mesh_setup, sizeof(setup)); + + if (info->attrs[NL80211_ATTR_MESH_CONFIG]) { + /* and parse parameters if given */ + err = nl80211_parse_mesh_config(info, &cfg, NULL); + if (err) + return err; + } + + if (!info->attrs[NL80211_ATTR_MESH_ID] || + !nla_len(info->attrs[NL80211_ATTR_MESH_ID])) + return -EINVAL; + + setup.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); + setup.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); + + if (info->attrs[NL80211_ATTR_MESH_SETUP]) { + /* parse additional setup parameters if given */ + err = nl80211_parse_mesh_setup(info, &setup); + if (err) + return err; + } + + return cfg80211_join_mesh(rdev, dev, &setup, &cfg); +} + +static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + + return cfg80211_leave_mesh(rdev, dev); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -4636,19 +5015,19 @@ static struct genl_ops nl80211_ops[] = { .flags = GENL_ADMIN_PERM, }, { - .cmd = NL80211_CMD_GET_MESH_PARAMS, - .doit = nl80211_get_mesh_params, + .cmd = NL80211_CMD_GET_MESH_CONFIG, + .doit = nl80211_get_mesh_config, .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { - .cmd = NL80211_CMD_SET_MESH_PARAMS, - .doit = nl80211_set_mesh_params, + .cmd = NL80211_CMD_SET_MESH_CONFIG, + .doit = nl80211_update_mesh_config, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -4816,6 +5195,14 @@ static struct genl_ops nl80211_ops[] = { NL80211_FLAG_NEED_RTNL, }, { + .cmd = NL80211_CMD_FRAME_WAIT_CANCEL, + .doit = nl80211_tx_mgmt_cancel_wait, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { .cmd = NL80211_CMD_SET_POWER_SAVE, .doit = nl80211_set_power_save, .policy = nl80211_policy, @@ -4855,6 +5242,22 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_JOIN_MESH, + .doit = nl80211_join_mesh, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_LEAVE_MESH, + .doit = nl80211_leave_mesh, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -5133,6 +5536,22 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, NL80211_CMD_DISASSOCIATE, gfp); } +void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *buf, + size_t len, gfp_t gfp) +{ + nl80211_send_mlme_event(rdev, netdev, buf, len, + NL80211_CMD_UNPROT_DEAUTHENTICATE, gfp); +} + +void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *buf, + size_t len, gfp_t gfp) +{ + nl80211_send_mlme_event(rdev, netdev, buf, len, + NL80211_CMD_UNPROT_DISASSOCIATE, gfp); +} + static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, int cmd, const u8 *addr, gfp_t gfp) @@ -5651,6 +6070,51 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void +nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, gfp_t gfp) +{ + struct sk_buff *msg; + struct nlattr *pinfoattr; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, peer); + + pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM); + if (!pinfoattr) + goto nla_put_failure; + + NLA_PUT_U32(msg, NL80211_ATTR_CQM_PKT_LOSS_EVENT, num_packets); + + nla_nest_end(msg, pinfoattr); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + static int nl80211_netlink_notify(struct notifier_block * nb, unsigned long state, void *_notify) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 30d2f939150d..e3f7fa886966 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -25,6 +25,12 @@ void nl80211_send_deauth(struct cfg80211_registered_device *rdev, void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, gfp_t gfp); +void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + const u8 *buf, size_t len, gfp_t gfp); +void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + const u8 *buf, size_t len, gfp_t gfp); void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, gfp_t gfp); @@ -87,5 +93,9 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); +void +nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, gfp_t gfp); #endif /* __NET_WIRELESS_NL80211_H */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 4b9f8912526c..37693b6ef23a 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -32,6 +32,9 @@ * rely on some SHA1 checksum of the regdomain for example. * */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/slab.h> #include <linux/list.h> @@ -48,7 +51,7 @@ #ifdef CONFIG_CFG80211_REG_DEBUG #define REG_DBG_PRINT(format, args...) \ do { \ - printk(KERN_DEBUG format , ## args); \ + printk(KERN_DEBUG pr_fmt(format), ##args); \ } while (0) #else #define REG_DBG_PRINT(args...) @@ -96,6 +99,9 @@ struct reg_beacon { struct ieee80211_channel chan; }; +static void reg_todo(struct work_struct *work); +static DECLARE_WORK(reg_work, reg_todo); + /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { .n_reg_rules = 5, @@ -367,11 +373,10 @@ static int call_crda(const char *alpha2) }; if (!is_world_regdom((char *) alpha2)) - printk(KERN_INFO "cfg80211: Calling CRDA for country: %c%c\n", + pr_info("Calling CRDA for country: %c%c\n", alpha2[0], alpha2[1]); else - printk(KERN_INFO "cfg80211: Calling CRDA to update world " - "regulatory domain\n"); + pr_info("Calling CRDA to update world regulatory domain\n"); /* query internal regulatory database (if it exists) */ reg_regdb_query(alpha2); @@ -656,7 +661,8 @@ static int freq_reg_info_regd(struct wiphy *wiphy, * Follow the driver's regulatory domain, if present, unless a country * IE has been processed or a user wants to help complaince further */ - if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && + if (!custom_regd && + last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && last_request->initiator != NL80211_REGDOM_SET_BY_USER && wiphy->regd) regd = wiphy->regd; @@ -711,6 +717,60 @@ int freq_reg_info(struct wiphy *wiphy, } EXPORT_SYMBOL(freq_reg_info); +#ifdef CONFIG_CFG80211_REG_DEBUG +static const char *reg_initiator_name(enum nl80211_reg_initiator initiator) +{ + switch (initiator) { + case NL80211_REGDOM_SET_BY_CORE: + return "Set by core"; + case NL80211_REGDOM_SET_BY_USER: + return "Set by user"; + case NL80211_REGDOM_SET_BY_DRIVER: + return "Set by driver"; + case NL80211_REGDOM_SET_BY_COUNTRY_IE: + return "Set by country IE"; + default: + WARN_ON(1); + return "Set by bug"; + } +} + +static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, + u32 desired_bw_khz, + const struct ieee80211_reg_rule *reg_rule) +{ + const struct ieee80211_power_rule *power_rule; + const struct ieee80211_freq_range *freq_range; + char max_antenna_gain[32]; + + power_rule = ®_rule->power_rule; + freq_range = ®_rule->freq_range; + + if (!power_rule->max_antenna_gain) + snprintf(max_antenna_gain, 32, "N/A"); + else + snprintf(max_antenna_gain, 32, "%d", power_rule->max_antenna_gain); + + REG_DBG_PRINT("Updating information on frequency %d MHz " + "for a %d MHz width channel with regulatory rule:\n", + chan->center_freq, + KHZ_TO_MHZ(desired_bw_khz)); + + REG_DBG_PRINT("%d KHz - %d KHz @ KHz), (%s mBi, %d mBm)\n", + freq_range->start_freq_khz, + freq_range->end_freq_khz, + max_antenna_gain, + power_rule->max_eirp); +} +#else +static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, + u32 desired_bw_khz, + const struct ieee80211_reg_rule *reg_rule) +{ + return; +} +#endif + /* * Note that right now we assume the desired channel bandwidth * is always 20 MHz for each individual channel (HT40 uses 20 MHz @@ -720,7 +780,9 @@ EXPORT_SYMBOL(freq_reg_info); * on the wiphy with the target_bw specified. Then we can simply use * that below for the desired_bw_khz below. */ -static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, +static void handle_channel(struct wiphy *wiphy, + enum nl80211_reg_initiator initiator, + enum ieee80211_band band, unsigned int chan_idx) { int r; @@ -748,8 +810,27 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, desired_bw_khz, ®_rule); - if (r) + if (r) { + /* + * We will disable all channels that do not match our + * recieved regulatory rule unless the hint is coming + * from a Country IE and the Country IE had no information + * about a band. The IEEE 802.11 spec allows for an AP + * to send only a subset of the regulatory rules allowed, + * so an AP in the US that only supports 2.4 GHz may only send + * a country IE with information for the 2.4 GHz band + * while 5 GHz is still supported. + */ + if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && + r == -ERANGE) + return; + + REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq); + chan->flags = IEEE80211_CHAN_DISABLED; return; + } + + chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule); power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; @@ -784,7 +865,9 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp); } -static void handle_band(struct wiphy *wiphy, enum ieee80211_band band) +static void handle_band(struct wiphy *wiphy, + enum ieee80211_band band, + enum nl80211_reg_initiator initiator) { unsigned int i; struct ieee80211_supported_band *sband; @@ -793,24 +876,42 @@ static void handle_band(struct wiphy *wiphy, enum ieee80211_band band) sband = wiphy->bands[band]; for (i = 0; i < sband->n_channels; i++) - handle_channel(wiphy, band, i); + handle_channel(wiphy, initiator, band, i); } static bool ignore_reg_update(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { - if (!last_request) + if (!last_request) { + REG_DBG_PRINT("Ignoring regulatory request %s since " + "last_request is not set\n", + reg_initiator_name(initiator)); return true; + } + if (initiator == NL80211_REGDOM_SET_BY_CORE && - wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) + wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) { + REG_DBG_PRINT("Ignoring regulatory request %s " + "since the driver uses its own custom " + "regulatory domain ", + reg_initiator_name(initiator)); return true; + } + /* * wiphy->regd will be set once the device has its own * desired regulatory domain set */ if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd && - !is_world_regdom(last_request->alpha2)) + initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && + !is_world_regdom(last_request->alpha2)) { + REG_DBG_PRINT("Ignoring regulatory request %s " + "since the driver requires its own regulaotry " + "domain to be set first", + reg_initiator_name(initiator)); return true; + } + return false; } @@ -1030,7 +1131,7 @@ void wiphy_update_regulatory(struct wiphy *wiphy, goto out; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) - handle_band(wiphy, band); + handle_band(wiphy, band, initiator); } out: reg_process_beacons(wiphy); @@ -1066,10 +1167,17 @@ static void handle_channel_custom(struct wiphy *wiphy, regd); if (r) { + REG_DBG_PRINT("Disabling freq %d MHz as custom " + "regd has no rule that fits a %d MHz " + "wide channel\n", + chan->center_freq, + KHZ_TO_MHZ(desired_bw_khz)); chan->flags = IEEE80211_CHAN_DISABLED; return; } + chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule); + power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; @@ -1215,6 +1323,21 @@ static int ignore_request(struct wiphy *wiphy, return -EINVAL; } +static void reg_set_request_processed(void) +{ + bool need_more_processing = false; + + last_request->processed = true; + + spin_lock(®_requests_lock); + if (!list_empty(®_requests_list)) + need_more_processing = true; + spin_unlock(®_requests_lock); + + if (need_more_processing) + schedule_work(®_work); +} + /** * __regulatory_hint - hint to the wireless core a regulatory domain * @wiphy: if the hint comes from country information from an AP, this @@ -1290,8 +1413,10 @@ new_request: * have applied the requested regulatory domain before we just * inform userspace we have processed the request */ - if (r == -EALREADY) + if (r == -EALREADY) { nl80211_send_reg_change_event(last_request); + reg_set_request_processed(); + } return r; } @@ -1307,16 +1432,13 @@ static void reg_process_hint(struct regulatory_request *reg_request) BUG_ON(!reg_request->alpha2); - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); - if (wiphy_idx_valid(reg_request->wiphy_idx)) wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); if (reg_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && !wiphy) { kfree(reg_request); - goto out; + return; } r = __regulatory_hint(wiphy, reg_request); @@ -1324,28 +1446,46 @@ static void reg_process_hint(struct regulatory_request *reg_request) if (r == -EALREADY && wiphy && wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) wiphy_update_regulatory(wiphy, initiator); -out: - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); } -/* Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* */ +/* + * Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* + * Regulatory hints come on a first come first serve basis and we + * must process each one atomically. + */ static void reg_process_pending_hints(void) - { +{ struct regulatory_request *reg_request; + mutex_lock(&cfg80211_mutex); + mutex_lock(®_mutex); + + /* When last_request->processed becomes true this will be rescheduled */ + if (last_request && !last_request->processed) { + REG_DBG_PRINT("Pending regulatory request, waiting " + "for it to be processed..."); + goto out; + } + spin_lock(®_requests_lock); - while (!list_empty(®_requests_list)) { - reg_request = list_first_entry(®_requests_list, - struct regulatory_request, - list); - list_del_init(®_request->list); + if (list_empty(®_requests_list)) { spin_unlock(®_requests_lock); - reg_process_hint(reg_request); - spin_lock(®_requests_lock); + goto out; } + + reg_request = list_first_entry(®_requests_list, + struct regulatory_request, + list); + list_del_init(®_request->list); + spin_unlock(®_requests_lock); + + reg_process_hint(reg_request); + +out: + mutex_unlock(®_mutex); + mutex_unlock(&cfg80211_mutex); } /* Processes beacon hints -- this has nothing to do with country IEs */ @@ -1392,8 +1532,6 @@ static void reg_todo(struct work_struct *work) reg_process_pending_beacon_hints(); } -static DECLARE_WORK(reg_work, reg_todo); - static void queue_regulatory_request(struct regulatory_request *request) { if (isalpha(request->alpha2[0])) @@ -1428,12 +1566,7 @@ static int regulatory_hint_core(const char *alpha2) request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_CORE; - /* - * This ensures last_request is populated once modules - * come swinging in and calling regulatory hints and - * wiphy_apply_custom_regulatory(). - */ - reg_process_hint(request); + queue_regulatory_request(request); return 0; } @@ -1559,7 +1692,7 @@ static void restore_alpha2(char *alpha2, bool reset_user) if (is_user_regdom_saved()) { /* Unless we're asked to ignore it and reset it */ if (reset_user) { - REG_DBG_PRINT("cfg80211: Restoring regulatory settings " + REG_DBG_PRINT("Restoring regulatory settings " "including user preference\n"); user_alpha2[0] = '9'; user_alpha2[1] = '7'; @@ -1570,7 +1703,7 @@ static void restore_alpha2(char *alpha2, bool reset_user) * back as they were for a full restore. */ if (!is_world_regdom(ieee80211_regdom)) { - REG_DBG_PRINT("cfg80211: Keeping preference on " + REG_DBG_PRINT("Keeping preference on " "module parameter ieee80211_regdom: %c%c\n", ieee80211_regdom[0], ieee80211_regdom[1]); @@ -1578,7 +1711,7 @@ static void restore_alpha2(char *alpha2, bool reset_user) alpha2[1] = ieee80211_regdom[1]; } } else { - REG_DBG_PRINT("cfg80211: Restoring regulatory settings " + REG_DBG_PRINT("Restoring regulatory settings " "while preserving user preference for: %c%c\n", user_alpha2[0], user_alpha2[1]); @@ -1586,14 +1719,14 @@ static void restore_alpha2(char *alpha2, bool reset_user) alpha2[1] = user_alpha2[1]; } } else if (!is_world_regdom(ieee80211_regdom)) { - REG_DBG_PRINT("cfg80211: Keeping preference on " + REG_DBG_PRINT("Keeping preference on " "module parameter ieee80211_regdom: %c%c\n", ieee80211_regdom[0], ieee80211_regdom[1]); alpha2[0] = ieee80211_regdom[0]; alpha2[1] = ieee80211_regdom[1]; } else - REG_DBG_PRINT("cfg80211: Restoring regulatory settings\n"); + REG_DBG_PRINT("Restoring regulatory settings\n"); } /* @@ -1661,7 +1794,7 @@ static void restore_regulatory_settings(bool reset_user) void regulatory_hint_disconnect(void) { - REG_DBG_PRINT("cfg80211: All devices are disconnected, going to " + REG_DBG_PRINT("All devices are disconnected, going to " "restore regulatory settings\n"); restore_regulatory_settings(false); } @@ -1691,7 +1824,7 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy, if (!reg_beacon) return -ENOMEM; - REG_DBG_PRINT("cfg80211: Found new beacon on " + REG_DBG_PRINT("Found new beacon on " "frequency: %d MHz (Ch %d) on %s\n", beacon_chan->center_freq, ieee80211_frequency_to_channel(beacon_chan->center_freq), @@ -1721,8 +1854,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) const struct ieee80211_freq_range *freq_range = NULL; const struct ieee80211_power_rule *power_rule = NULL; - printk(KERN_INFO " (start_freq - end_freq @ bandwidth), " - "(max_antenna_gain, max_eirp)\n"); + pr_info(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp)\n"); for (i = 0; i < rd->n_reg_rules; i++) { reg_rule = &rd->reg_rules[i]; @@ -1734,16 +1866,14 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) * in certain regions */ if (power_rule->max_antenna_gain) - printk(KERN_INFO " (%d KHz - %d KHz @ %d KHz), " - "(%d mBi, %d mBm)\n", + pr_info(" (%d KHz - %d KHz @ %d KHz), (%d mBi, %d mBm)\n", freq_range->start_freq_khz, freq_range->end_freq_khz, freq_range->max_bandwidth_khz, power_rule->max_antenna_gain, power_rule->max_eirp); else - printk(KERN_INFO " (%d KHz - %d KHz @ %d KHz), " - "(N/A, %d mBm)\n", + pr_info(" (%d KHz - %d KHz @ %d KHz), (N/A, %d mBm)\n", freq_range->start_freq_khz, freq_range->end_freq_khz, freq_range->max_bandwidth_khz, @@ -1762,27 +1892,20 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) rdev = cfg80211_rdev_by_wiphy_idx( last_request->wiphy_idx); if (rdev) { - printk(KERN_INFO "cfg80211: Current regulatory " - "domain updated by AP to: %c%c\n", + pr_info("Current regulatory domain updated by AP to: %c%c\n", rdev->country_ie_alpha2[0], rdev->country_ie_alpha2[1]); } else - printk(KERN_INFO "cfg80211: Current regulatory " - "domain intersected:\n"); + pr_info("Current regulatory domain intersected:\n"); } else - printk(KERN_INFO "cfg80211: Current regulatory " - "domain intersected:\n"); + pr_info("Current regulatory domain intersected:\n"); } else if (is_world_regdom(rd->alpha2)) - printk(KERN_INFO "cfg80211: World regulatory " - "domain updated:\n"); + pr_info("World regulatory domain updated:\n"); else { if (is_unknown_alpha2(rd->alpha2)) - printk(KERN_INFO "cfg80211: Regulatory domain " - "changed to driver built-in settings " - "(unknown country)\n"); + pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n"); else - printk(KERN_INFO "cfg80211: Regulatory domain " - "changed to country: %c%c\n", + pr_info("Regulatory domain changed to country: %c%c\n", rd->alpha2[0], rd->alpha2[1]); } print_rd_rules(rd); @@ -1790,8 +1913,7 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) static void print_regdomain_info(const struct ieee80211_regdomain *rd) { - printk(KERN_INFO "cfg80211: Regulatory domain: %c%c\n", - rd->alpha2[0], rd->alpha2[1]); + pr_info("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]); print_rd_rules(rd); } @@ -1842,8 +1964,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return -EINVAL; if (!is_valid_rd(rd)) { - printk(KERN_ERR "cfg80211: Invalid " - "regulatory domain detected:\n"); + pr_err("Invalid regulatory domain detected:\n"); print_regdomain_info(rd); return -EINVAL; } @@ -1959,6 +2080,8 @@ int set_regdom(const struct ieee80211_regdomain *rd) nl80211_send_reg_change_event(last_request); + reg_set_request_processed(); + mutex_unlock(®_mutex); return r; @@ -2015,8 +2138,7 @@ int __init regulatory_init(void) * early boot for call_usermodehelper(). For now treat these * errors as non-fatal. */ - printk(KERN_ERR "cfg80211: kobject_uevent_env() was unable " - "to call CRDA during init"); + pr_err("kobject_uevent_env() was unable to call CRDA during init\n"); #ifdef CONFIG_CFG80211_REG_DEBUG /* We want to find out exactly why when debugging */ WARN_ON(err); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 503ebb86ba18..ea427f418f64 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -464,6 +464,9 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, if (res->pub.beacon_ies) { size_t used = dev->wiphy.bss_priv_size + sizeof(*res); size_t ielen = res->pub.len_beacon_ies; + bool information_elements_is_beacon_ies = + (found->pub.information_elements == + found->pub.beacon_ies); if (found->pub.beacon_ies && !found->beacon_ies_allocated && @@ -487,6 +490,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, found->pub.len_beacon_ies = ielen; } } + + /* Override IEs if they were from a beacon before */ + if (information_elements_is_beacon_ies) { + found->pub.information_elements = + found->pub.beacon_ies; + found->pub.len_information_elements = + found->pub.len_beacon_ies; + } } kref_put(&res->ref, bss_release); diff --git a/net/wireless/util.c b/net/wireless/util.c index 76120aeda57d..7620ae2fcf18 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -502,7 +502,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, skb_orphan(skb); if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC)) { - printk(KERN_ERR "failed to reallocate Tx buffer\n"); + pr_err("failed to reallocate Tx buffer\n"); return -ENOMEM; } skb->truesize += head_need; @@ -685,20 +685,18 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) continue; if (rdev->ops->add_key(wdev->wiphy, dev, i, false, NULL, &wdev->connect_keys->params[i])) { - printk(KERN_ERR "%s: failed to set key %d\n", - dev->name, i); + netdev_err(dev, "failed to set key %d\n", i); continue; } if (wdev->connect_keys->def == i) - if (rdev->ops->set_default_key(wdev->wiphy, dev, i)) { - printk(KERN_ERR "%s: failed to set defkey %d\n", - dev->name, i); + if (rdev->ops->set_default_key(wdev->wiphy, dev, + i, true, true)) { + netdev_err(dev, "failed to set defkey %d\n", i); continue; } if (wdev->connect_keys->defmgmt == i) if (rdev->ops->set_default_mgmt_key(wdev->wiphy, dev, i)) - printk(KERN_ERR "%s: failed to set mgtdef %d\n", - dev->name, i); + netdev_err(dev, "failed to set mgtdef %d\n", i); } kfree(wdev->connect_keys); @@ -795,6 +793,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, if (ntype != otype) { dev->ieee80211_ptr->use_4addr = false; + dev->ieee80211_ptr->mesh_id_up_len = 0; switch (otype) { case NL80211_IFTYPE_ADHOC: diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 12222ee6ebf2..3e5dbd4e4cd5 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -548,8 +548,8 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, __cfg80211_leave_ibss(rdev, wdev->netdev, true); rejoin = true; } - err = rdev->ops->set_default_key(&rdev->wiphy, - dev, idx); + err = rdev->ops->set_default_key(&rdev->wiphy, dev, + idx, true, true); } if (!err) { wdev->wext.default_key = idx; @@ -627,8 +627,8 @@ int cfg80211_wext_siwencode(struct net_device *dev, err = 0; wdev_lock(wdev); if (wdev->current_bss) - err = rdev->ops->set_default_key(&rdev->wiphy, - dev, idx); + err = rdev->ops->set_default_key(&rdev->wiphy, dev, + idx, true, true); if (!err) wdev->wext.default_key = idx; wdev_unlock(wdev); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index dc675a3daa3d..fdbc23c10d8c 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -467,8 +467,8 @@ void wireless_send_event(struct net_device * dev, * The best the driver could do is to log an error message. * We will do it ourselves instead... */ - printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n", - dev->name, cmd); + netdev_err(dev, "(WE) : Invalid/Unknown Wireless Event (0x%04X)\n", + cmd); return; } @@ -476,11 +476,13 @@ void wireless_send_event(struct net_device * dev, if (descr->header_type == IW_HEADER_TYPE_POINT) { /* Check if number of token fits within bounds */ if (wrqu->data.length > descr->max_tokens) { - printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length); + netdev_err(dev, "(WE) : Wireless Event too big (%d)\n", + wrqu->data.length); return; } if (wrqu->data.length < descr->min_tokens) { - printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length); + netdev_err(dev, "(WE) : Wireless Event too small (%d)\n", + wrqu->data.length); return; } /* Calculate extra_len - extra is NULL for restricted events */ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index f7af98dff409..ad96ee90fe27 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1357,11 +1357,11 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) void __user *argp = (void __user *)arg; int rc; - lock_kernel(); switch (cmd) { case TIOCOUTQ: { - int amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); + int amount; + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; rc = put_user(amount, (unsigned int __user *)argp); @@ -1375,8 +1375,10 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) * These two are safe on a single CPU system as * only user tasks fiddle here */ + lock_sock(sk); if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; + release_sock(sk); rc = put_user(amount, (unsigned int __user *)argp); break; } @@ -1422,9 +1424,11 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = x25_subscr_ioctl(cmd, argp); break; case SIOCX25GFACILITIES: { - struct x25_facilities fac = x25->facilities; - rc = copy_to_user(argp, &fac, - sizeof(fac)) ? -EFAULT : 0; + lock_sock(sk); + rc = copy_to_user(argp, &x25->facilities, + sizeof(x25->facilities)) + ? -EFAULT : 0; + release_sock(sk); break; } @@ -1435,18 +1439,19 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) sizeof(facilities))) break; rc = -EINVAL; + lock_sock(sk); if (sk->sk_state != TCP_LISTEN && sk->sk_state != TCP_CLOSE) - break; + goto out_fac_release; if (facilities.pacsize_in < X25_PS16 || facilities.pacsize_in > X25_PS4096) - break; + goto out_fac_release; if (facilities.pacsize_out < X25_PS16 || facilities.pacsize_out > X25_PS4096) - break; + goto out_fac_release; if (facilities.winsize_in < 1 || facilities.winsize_in > 127) - break; + goto out_fac_release; if (facilities.throughput) { int out = facilities.throughput & 0xf0; int in = facilities.throughput & 0x0f; @@ -1454,24 +1459,28 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) facilities.throughput |= X25_DEFAULT_THROUGHPUT << 4; else if (out < 0x30 || out > 0xD0) - break; + goto out_fac_release; if (!in) facilities.throughput |= X25_DEFAULT_THROUGHPUT; else if (in < 0x03 || in > 0x0D) - break; + goto out_fac_release; } if (facilities.reverse && (facilities.reverse & 0x81) != 0x81) - break; + goto out_fac_release; x25->facilities = facilities; rc = 0; +out_fac_release: + release_sock(sk); break; } case SIOCX25GDTEFACILITIES: { + lock_sock(sk); rc = copy_to_user(argp, &x25->dte_facilities, sizeof(x25->dte_facilities)); + release_sock(sk); if (rc) rc = -EFAULT; break; @@ -1483,26 +1492,31 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (copy_from_user(&dtefacs, argp, sizeof(dtefacs))) break; rc = -EINVAL; + lock_sock(sk); if (sk->sk_state != TCP_LISTEN && sk->sk_state != TCP_CLOSE) - break; + goto out_dtefac_release; if (dtefacs.calling_len > X25_MAX_AE_LEN) - break; + goto out_dtefac_release; if (dtefacs.calling_ae == NULL) - break; + goto out_dtefac_release; if (dtefacs.called_len > X25_MAX_AE_LEN) - break; + goto out_dtefac_release; if (dtefacs.called_ae == NULL) - break; + goto out_dtefac_release; x25->dte_facilities = dtefacs; rc = 0; +out_dtefac_release: + release_sock(sk); break; } case SIOCX25GCALLUSERDATA: { - struct x25_calluserdata cud = x25->calluserdata; - rc = copy_to_user(argp, &cud, - sizeof(cud)) ? -EFAULT : 0; + lock_sock(sk); + rc = copy_to_user(argp, &x25->calluserdata, + sizeof(x25->calluserdata)) + ? -EFAULT : 0; + release_sock(sk); break; } @@ -1516,16 +1530,19 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = -EINVAL; if (calluserdata.cudlength > X25_MAX_CUD_LEN) break; + lock_sock(sk); x25->calluserdata = calluserdata; + release_sock(sk); rc = 0; break; } case SIOCX25GCAUSEDIAG: { - struct x25_causediag causediag; - causediag = x25->causediag; - rc = copy_to_user(argp, &causediag, - sizeof(causediag)) ? -EFAULT : 0; + lock_sock(sk); + rc = copy_to_user(argp, &x25->causediag, + sizeof(x25->causediag)) + ? -EFAULT : 0; + release_sock(sk); break; } @@ -1534,7 +1551,9 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = -EFAULT; if (copy_from_user(&causediag, argp, sizeof(causediag))) break; + lock_sock(sk); x25->causediag = causediag; + release_sock(sk); rc = 0; break; @@ -1543,31 +1562,37 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCX25SCUDMATCHLEN: { struct x25_subaddr sub_addr; rc = -EINVAL; + lock_sock(sk); if(sk->sk_state != TCP_CLOSE) - break; + goto out_cud_release; rc = -EFAULT; if (copy_from_user(&sub_addr, argp, sizeof(sub_addr))) - break; + goto out_cud_release; rc = -EINVAL; if(sub_addr.cudmatchlength > X25_MAX_CUD_LEN) - break; + goto out_cud_release; x25->cudmatchlength = sub_addr.cudmatchlength; rc = 0; +out_cud_release: + release_sock(sk); break; } case SIOCX25CALLACCPTAPPRV: { rc = -EINVAL; + lock_kernel(); if (sk->sk_state != TCP_CLOSE) break; clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + unlock_kernel(); rc = 0; break; } case SIOCX25SENDCALLACCPT: { rc = -EINVAL; + lock_kernel(); if (sk->sk_state != TCP_ESTABLISHED) break; /* must call accptapprv above */ @@ -1575,6 +1600,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; x25_write_internal(sk, X25_CALL_ACCEPTED); x25->state = X25_STATE_3; + unlock_kernel(); rc = 0; break; } @@ -1583,7 +1609,6 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = -ENOIOCTLCMD; break; } - unlock_kernel(); return rc; } @@ -1619,16 +1644,20 @@ static int compat_x25_subscr_ioctl(unsigned int cmd, dev_put(dev); if (cmd == SIOCX25GSUBSCRIP) { + read_lock_bh(&x25_neigh_list_lock); x25_subscr.extended = nb->extended; x25_subscr.global_facil_mask = nb->global_facil_mask; + read_unlock_bh(&x25_neigh_list_lock); rc = copy_to_user(x25_subscr32, &x25_subscr, sizeof(*x25_subscr32)) ? -EFAULT : 0; } else { rc = -EINVAL; if (x25_subscr.extended == 0 || x25_subscr.extended == 1) { rc = 0; + write_lock_bh(&x25_neigh_list_lock); nb->extended = x25_subscr.extended; nb->global_facil_mask = x25_subscr.global_facil_mask; + write_unlock_bh(&x25_neigh_list_lock); } } x25_neigh_put(nb); @@ -1654,19 +1683,15 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd, break; case SIOCGSTAMP: rc = -EINVAL; - lock_kernel(); if (sk) rc = compat_sock_get_timestamp(sk, (struct timeval __user*)argp); - unlock_kernel(); break; case SIOCGSTAMPNS: rc = -EINVAL; - lock_kernel(); if (sk) rc = compat_sock_get_timestampns(sk, (struct timespec __user*)argp); - unlock_kernel(); break; case SIOCGIFADDR: case SIOCSIFADDR: @@ -1685,22 +1710,16 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd, rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; - lock_kernel(); rc = x25_route_ioctl(cmd, argp); - unlock_kernel(); break; case SIOCX25GSUBSCRIP: - lock_kernel(); rc = compat_x25_subscr_ioctl(cmd, argp); - unlock_kernel(); break; case SIOCX25SSUBSCRIP: rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; - lock_kernel(); rc = compat_x25_subscr_ioctl(cmd, argp); - unlock_kernel(); break; case SIOCX25GFACILITIES: case SIOCX25SFACILITIES: diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 3a8c4c419cd4..55187c8f6420 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -61,6 +61,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, while (len > 0) { switch (*p & X25_FAC_CLASS_MASK) { case X25_FAC_CLASS_A: + if (len < 2) + return 0; switch (*p) { case X25_FAC_REVERSE: if((p[1] & 0x81) == 0x81) { @@ -104,6 +106,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, len -= 2; break; case X25_FAC_CLASS_B: + if (len < 3) + return 0; switch (*p) { case X25_FAC_PACKET_SIZE: facilities->pacsize_in = p[1]; @@ -125,6 +129,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, len -= 3; break; case X25_FAC_CLASS_C: + if (len < 4) + return 0; printk(KERN_DEBUG "X.25: unknown facility %02X, " "values %02X, %02X, %02X\n", p[0], p[1], p[2], p[3]); @@ -132,6 +138,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, len -= 4; break; case X25_FAC_CLASS_D: + if (len < p[1] + 2) + return 0; switch (*p) { case X25_FAC_CALLING_AE: if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1) @@ -149,9 +157,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, break; default: printk(KERN_DEBUG "X.25: unknown facility %02X," - "length %d, values %02X, %02X, " - "%02X, %02X\n", - p[0], p[1], p[2], p[3], p[4], p[5]); + "length %d\n", p[0], p[1]); break; } len -= p[1] + 2; diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 73e7b954ad28..4cbc942f762a 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -31,8 +31,8 @@ #include <linux/init.h> #include <net/x25.h> -static LIST_HEAD(x25_neigh_list); -static DEFINE_RWLOCK(x25_neigh_list_lock); +LIST_HEAD(x25_neigh_list); +DEFINE_RWLOCK(x25_neigh_list_lock); static void x25_t20timer_expiry(unsigned long); @@ -360,16 +360,20 @@ int x25_subscr_ioctl(unsigned int cmd, void __user *arg) dev_put(dev); if (cmd == SIOCX25GSUBSCRIP) { + read_lock_bh(&x25_neigh_list_lock); x25_subscr.extended = nb->extended; x25_subscr.global_facil_mask = nb->global_facil_mask; + read_unlock_bh(&x25_neigh_list_lock); rc = copy_to_user(arg, &x25_subscr, sizeof(x25_subscr)) ? -EFAULT : 0; } else { rc = -EINVAL; if (!(x25_subscr.extended && x25_subscr.extended != 1)) { rc = 0; + write_lock_bh(&x25_neigh_list_lock); nb->extended = x25_subscr.extended; nb->global_facil_mask = x25_subscr.global_facil_mask; + write_unlock_bh(&x25_neigh_list_lock); } } x25_neigh_put(nb); @@ -394,6 +398,7 @@ void __exit x25_link_free(void) list_for_each_safe(entry, tmp, &x25_neigh_list) { nb = list_entry(entry, struct x25_neigh, node); __x25_remove_neigh(nb); + dev_put(nb->dev); } write_unlock_bh(&x25_neigh_list_lock); } diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c index a2023ec52329..1e98bc0fe0a5 100644 --- a/net/xfrm/xfrm_hash.c +++ b/net/xfrm/xfrm_hash.c @@ -19,7 +19,7 @@ struct hlist_head *xfrm_hash_alloc(unsigned int sz) if (sz <= PAGE_SIZE) n = kzalloc(sz, GFP_KERNEL); else if (hashdist) - n = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); + n = vzalloc(sz); else n = (struct hlist_head *) __get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 044e77898512..8b3ef404c794 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1433,7 +1433,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, } xdst->route = dst; - memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics)); + dst_copy_metrics(dst1, dst); if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { family = xfrm[i]->props.family; @@ -2271,7 +2271,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) if (pmtu > route_mtu_cached) pmtu = route_mtu_cached; - dst->metrics[RTAX_MTU-1] = pmtu; + dst_metric_set(dst, RTAX_MTU, pmtu); } while ((dst = dst->next)); } @@ -2349,7 +2349,7 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, mtu = xfrm_state_mtu(dst->xfrm, mtu); if (mtu > last->route_mtu_cached) mtu = last->route_mtu_cached; - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); if (last == first) break; @@ -2361,6 +2361,16 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, return 1; } +static unsigned int xfrm_default_advmss(const struct dst_entry *dst) +{ + return dst_metric_advmss(dst->path); +} + +static unsigned int xfrm_default_mtu(const struct dst_entry *dst) +{ + return dst_mtu(dst->path); +} + int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { struct net *net; @@ -2378,6 +2388,10 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->kmem_cachep = xfrm_dst_cache; if (likely(dst_ops->check == NULL)) dst_ops->check = xfrm_dst_check; + if (likely(dst_ops->default_advmss == NULL)) + dst_ops->default_advmss = xfrm_default_advmss; + if (likely(dst_ops->default_mtu == NULL)) + dst_ops->default_mtu = xfrm_default_mtu; if (likely(dst_ops->negative_advice == NULL)) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index eb96ce52f178..220ebc05c7af 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1268,7 +1268,7 @@ struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x, return xc; error: - kfree(xc); + xfrm_state_put(xc); return NULL; } EXPORT_SYMBOL(xfrm_state_migrate); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8bae6b22c846..8eb889510916 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -148,7 +148,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, !attrs[XFRMA_ALG_AUTH_TRUNC]) || attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || - attrs[XFRMA_ALG_COMP]) + attrs[XFRMA_ALG_COMP] || + attrs[XFRMA_TFCPAD]) goto out; break; @@ -165,6 +166,9 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, attrs[XFRMA_ALG_CRYPT]) && attrs[XFRMA_ALG_AEAD]) goto out; + if (attrs[XFRMA_TFCPAD] && + p->mode != XFRM_MODE_TUNNEL) + goto out; break; case IPPROTO_COMP: @@ -172,7 +176,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_AUTH] || attrs[XFRMA_ALG_AUTH_TRUNC] || - attrs[XFRMA_ALG_CRYPT]) + attrs[XFRMA_ALG_CRYPT] || + attrs[XFRMA_TFCPAD]) goto out; break; @@ -186,6 +191,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ENCAP] || attrs[XFRMA_SEC_CTX] || + attrs[XFRMA_TFCPAD] || !attrs[XFRMA_COADDR]) goto out; break; @@ -439,6 +445,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; } + if (attrs[XFRMA_TFCPAD]) + x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]); + if (attrs[XFRMA_COADDR]) { x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), sizeof(*x->coaddr), GFP_KERNEL); @@ -688,6 +697,9 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (x->encap) NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + if (x->tfcpad) + NLA_PUT_U32(skb, XFRMA_TFCPAD, x->tfcpad); + if (xfrm_mark_put(skb, &x->mark)) goto nla_put_failure; @@ -2122,6 +2134,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, + [XFRMA_TFCPAD] = { .type = NLA_U32 }, }; static struct xfrm_link { @@ -2301,6 +2314,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(*x->calg)); if (x->encap) l += nla_total_size(sizeof(*x->encap)); + if (x->tfcpad) + l += nla_total_size(sizeof(x->tfcpad)); if (x->security) l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) + x->security->ctx_len); |