diff options
Diffstat (limited to 'net')
164 files changed, 2502 insertions, 1092 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 7850412f52b7..e47600b4e2e3 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -327,10 +327,6 @@ static void vlan_sync_address(struct net_device *dev, static void vlan_transfer_features(struct net_device *dev, struct net_device *vlandev) { - u32 old_features = vlandev->features; - - vlandev->features &= ~dev->vlan_features; - vlandev->features |= dev->features & dev->vlan_features; vlandev->gso_max_size = dev->gso_max_size; if (dev->features & NETIF_F_HW_VLAN_TX) @@ -341,8 +337,8 @@ static void vlan_transfer_features(struct net_device *dev, #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; #endif - if (old_features != vlandev->features) - netdev_features_change(vlandev); + + netdev_update_features(vlandev); } static void __vlan_device_event(struct net_device *dev, unsigned long event) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index e34ea9e5e28b..b84a46b30c0c 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -704,8 +704,8 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); - dev->features |= real_dev->features & real_dev->vlan_features; - dev->features |= NETIF_F_LLTX; + dev->hw_features = real_dev->vlan_features & NETIF_F_ALL_TX_OFFLOADS; + dev->features |= real_dev->vlan_features | NETIF_F_LLTX; dev->gso_max_size = real_dev->gso_max_size; /* ipv6 shared card related stuff */ @@ -759,6 +759,17 @@ static void vlan_dev_uninit(struct net_device *dev) } } +static u32 vlan_dev_fix_features(struct net_device *dev, u32 features) +{ + struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + + features &= (real_dev->features | NETIF_F_LLTX); + if (dev_ethtool_get_rx_csum(real_dev)) + features |= NETIF_F_RXCSUM; + + return features; +} + static int vlan_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { @@ -774,18 +785,6 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev, strcpy(info->fw_version, "N/A"); } -static u32 vlan_ethtool_get_rx_csum(struct net_device *dev) -{ - const struct vlan_dev_info *vlan = vlan_dev_info(dev); - return dev_ethtool_get_rx_csum(vlan->real_dev); -} - -static u32 vlan_ethtool_get_flags(struct net_device *dev) -{ - const struct vlan_dev_info *vlan = vlan_dev_info(dev); - return dev_ethtool_get_flags(vlan->real_dev); -} - static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -823,32 +822,10 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st return stats; } -static int vlan_ethtool_set_tso(struct net_device *dev, u32 data) -{ - if (data) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; - - /* Underlying device must support TSO for VLAN-tagged packets - * and must have TSO enabled now. - */ - if (!(real_dev->vlan_features & NETIF_F_TSO)) - return -EOPNOTSUPP; - if (!(real_dev->features & NETIF_F_TSO)) - return -EINVAL; - dev->features |= NETIF_F_TSO; - } else { - dev->features &= ~NETIF_F_TSO; - } - return 0; -} - static const struct ethtool_ops vlan_ethtool_ops = { .get_settings = vlan_ethtool_get_settings, .get_drvinfo = vlan_ethtool_get_drvinfo, .get_link = ethtool_op_get_link, - .get_rx_csum = vlan_ethtool_get_rx_csum, - .get_flags = vlan_ethtool_get_flags, - .set_tso = vlan_ethtool_set_tso, }; static const struct net_device_ops vlan_netdev_ops = { @@ -874,6 +851,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target, #endif + .ndo_fix_features = vlan_dev_fix_features, }; void vlan_setup(struct net_device *dev) diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index d1314cf18adf..d940c49d168a 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -54,7 +54,7 @@ static const char name_conf[] = "config"; /* * Structures for interfacing with the /proc filesystem. - * VLAN creates its own directory /proc/net/vlan with the folowing + * VLAN creates its own directory /proc/net/vlan with the following * entries: * config device status/configuration * <device> entry for each device diff --git a/net/9p/client.c b/net/9p/client.c index 2ccbf04d37df..48b8e084e710 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -178,7 +178,7 @@ free_and_return: * @tag: numeric id for transaction * * this is a simple array lookup, but will grow the - * request_slots as necessary to accomodate transaction + * request_slots as necessary to accommodate transaction * ids which did not previously have a slot. * * this code relies on the client spinlock to manage locks, its diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index 9172ab78fcb0..d47880e971dd 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -36,7 +36,7 @@ p9_release_req_pages(struct trans_rpage_info *rpinfo) EXPORT_SYMBOL(p9_release_req_pages); /** - * p9_nr_pages - Return number of pages needed to accomodate the payload. + * p9_nr_pages - Return number of pages needed to accommodate the payload. */ int p9_nr_pages(struct p9_req_t *req) @@ -55,7 +55,7 @@ EXPORT_SYMBOL(p9_nr_pages); * @req: Request to be sent to server. * @pdata_off: data offset into the first page after translation (gup). * @pdata_len: Total length of the IO. gup may not return requested # of pages. - * @nr_pages: number of pages to accomodate the payload + * @nr_pages: number of pages to accommodate the payload * @rw: Indicates if the pages are for read or write. */ int diff --git a/net/9p/util.c b/net/9p/util.c index b84619b5ba22..da6af81e59d9 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -67,7 +67,7 @@ EXPORT_SYMBOL(p9_idpool_create); /** * p9_idpool_destroy - create a new per-connection id pool - * @p: idpool to destory + * @p: idpool to destroy */ void p9_idpool_destroy(struct p9_idpool *p) diff --git a/net/atm/br2684.c b/net/atm/br2684.c index fce2eae8d476..2252c2085dac 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -509,7 +509,7 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) write_lock_irq(&devs_lock); net_dev = br2684_find_dev(&be.ifspec); if (net_dev == NULL) { - pr_err("tried to attach to non-existant device\n"); + pr_err("tried to attach to non-existent device\n"); err = -ENXIO; goto error; } diff --git a/net/atm/lec.h b/net/atm/lec.h index 9d14d196cc1d..dfc071966463 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -35,7 +35,7 @@ struct lecdatahdr_8025 { * Operations that LANE2 capable device can do. Two first functions * are used to make the device do things. See spec 3.1.3 and 3.1.4. * - * The third function is intented for the MPOA component sitting on + * The third function is intended for the MPOA component sitting on * top of the LANE device. The MPOA component assigns it's own function * to (*associate_indicator)() and the LANE device will use that * function to tell about TLVs it sees floating through. diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 9ed26140a269..824e1f6e50f2 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -474,7 +474,7 @@ void interface_rx(struct net_device *soft_iface, goto dropped; skb->protocol = eth_type_trans(skb, soft_iface); - /* should not be neccesary anymore as we use skb_pull_rcsum() + /* should not be necessary anymore as we use skb_pull_rcsum() * TODO: please verify this and remove this TODO * -- Dec 21st 2009, Simon Wunderlich */ diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a80bc1cdb35b..1ad4907766c7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1948,7 +1948,7 @@ static void hci_tx_task(unsigned long arg) read_unlock(&hci_task_lock); } -/* ----- HCI RX task (incoming data proccessing) ----- */ +/* ----- HCI RX task (incoming data processing) ----- */ /* ACL data packet */ static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index f77308e63e58..299fe56a9668 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -679,7 +679,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch if (opt == BT_FLUSHABLE_OFF) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; - /* proceed futher only when we have l2cap_conn and + /* proceed further only when we have l2cap_conn and No Flush support in the LM */ if (!conn || !lmp_no_flush_capable(conn->hcon->hdev)) { err = -EINVAL; diff --git a/net/bridge/br.c b/net/bridge/br.c index 84bbb82599b2..f20c4fd915a8 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -104,3 +104,4 @@ module_init(br_init) module_exit(br_deinit) MODULE_LICENSE("GPL"); MODULE_VERSION(BR_VERSION); +MODULE_ALIAS_RTNL_LINK("bridge"); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 21e5901186ea..45cfd54b06d3 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -74,6 +74,17 @@ out: return NETDEV_TX_OK; } +static int br_dev_init(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + br->stats = alloc_percpu(struct br_cpu_netstats); + if (!br->stats) + return -ENOMEM; + + return 0; +} + static int br_dev_open(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -334,6 +345,7 @@ static const struct ethtool_ops br_ethtool_ops = { static const struct net_device_ops br_netdev_ops = { .ndo_open = br_dev_open, .ndo_stop = br_dev_stop, + .ndo_init = br_dev_init, .ndo_start_xmit = br_dev_xmit, .ndo_get_stats64 = br_get_stats64, .ndo_set_mac_address = br_set_mac_address, @@ -357,18 +369,47 @@ static void br_dev_free(struct net_device *dev) free_netdev(dev); } +static struct device_type br_type = { + .name = "bridge", +}; + void br_dev_setup(struct net_device *dev) { + struct net_bridge *br = netdev_priv(dev); + random_ether_addr(dev->dev_addr); ether_setup(dev); dev->netdev_ops = &br_netdev_ops; dev->destructor = br_dev_free; SET_ETHTOOL_OPS(dev, &br_ethtool_ops); + SET_NETDEV_DEVTYPE(dev, &br_type); dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | NETIF_F_GSO | NETIF_F_HW_VLAN_TX; + + br->dev = dev; + spin_lock_init(&br->lock); + INIT_LIST_HEAD(&br->port_list); + spin_lock_init(&br->hash_lock); + + br->bridge_id.prio[0] = 0x80; + br->bridge_id.prio[1] = 0x00; + + memcpy(br->group_addr, br_group_address, ETH_ALEN); + + br->feature_mask = dev->features; + br->stp_enabled = BR_NO_STP; + br->designated_root = br->bridge_id; + br->bridge_max_age = br->max_age = 20 * HZ; + br->bridge_hello_time = br->hello_time = 2 * HZ; + br->bridge_forward_delay = br->forward_delay = 15 * HZ; + br->ageing_time = 300 * HZ; + + br_netfilter_rtable_init(br); + br_stp_timer_init(br); + br_multicast_init(br); } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 88485cc74dc3..e0dfbc151dd7 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -28,6 +28,7 @@ static struct kmem_cache *br_fdb_cache __read_mostly; static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr); +static void fdb_notify(const struct net_bridge_fdb_entry *, int); static u32 fdb_salt __read_mostly; @@ -62,7 +63,7 @@ static inline int has_expired(const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb) { return !fdb->is_static && - time_before_eq(fdb->ageing_timer + hold_time(br), jiffies); + time_before_eq(fdb->updated + hold_time(br), jiffies); } static inline int br_mac_hash(const unsigned char *mac) @@ -81,6 +82,7 @@ static void fdb_rcu_free(struct rcu_head *head) static inline void fdb_delete(struct net_bridge_fdb_entry *f) { + fdb_notify(f, RTM_DELNEIGH); hlist_del_rcu(&f->hlist); call_rcu(&f->rcu, fdb_rcu_free); } @@ -140,7 +142,7 @@ void br_fdb_cleanup(unsigned long _data) unsigned long this_timer; if (f->is_static) continue; - this_timer = f->ageing_timer + delay; + this_timer = f->updated + delay; if (time_before_eq(this_timer, jiffies)) fdb_delete(f); else if (time_before(this_timer, next_timer)) @@ -169,7 +171,7 @@ void br_fdb_flush(struct net_bridge *br) spin_unlock_bh(&br->hash_lock); } -/* Flush all entries refering to a specific port. +/* Flush all entries referring to a specific port. * if do_all is set also flush static entries */ void br_fdb_delete_by_port(struct net_bridge *br, @@ -293,7 +295,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, fe->is_local = f->is_local; if (!f->is_static) - fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->ageing_timer); + fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->updated); ++fe; ++num; } @@ -305,8 +307,21 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, return num; } -static inline struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, - const unsigned char *addr) +static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, + const unsigned char *addr) +{ + struct hlist_node *h; + struct net_bridge_fdb_entry *fdb; + + hlist_for_each_entry(fdb, h, head, hlist) { + if (!compare_ether_addr(fdb->addr.addr, addr)) + return fdb; + } + return NULL; +} + +static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, + const unsigned char *addr) { struct hlist_node *h; struct net_bridge_fdb_entry *fdb; @@ -320,8 +335,7 @@ static inline struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, struct net_bridge_port *source, - const unsigned char *addr, - int is_local) + const unsigned char *addr) { struct net_bridge_fdb_entry *fdb; @@ -329,11 +343,11 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, if (fdb) { memcpy(fdb->addr.addr, addr, ETH_ALEN); fdb->dst = source; - fdb->is_local = is_local; - fdb->is_static = is_local; - fdb->ageing_timer = jiffies; - + fdb->is_local = 0; + fdb->is_static = 0; + fdb->updated = fdb->used = jiffies; hlist_add_head_rcu(&fdb->hlist, head); + fdb_notify(fdb, RTM_NEWNEIGH); } return fdb; } @@ -360,12 +374,15 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, fdb_delete(fdb); } - if (!fdb_create(head, source, addr, 1)) + fdb = fdb_create(head, source, addr); + if (!fdb) return -ENOMEM; + fdb->is_local = fdb->is_static = 1; return 0; } +/* Add entry for local address of interface */ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr) { @@ -392,7 +409,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, source->state == BR_STATE_FORWARDING)) return; - fdb = fdb_find(head, addr); + fdb = fdb_find_rcu(head, addr); if (likely(fdb)) { /* attempt to update an entry for a local interface */ if (unlikely(fdb->is_local)) { @@ -403,15 +420,277 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } else { /* fastpath: update of existing entry */ fdb->dst = source; - fdb->ageing_timer = jiffies; + fdb->updated = jiffies; } } else { spin_lock(&br->hash_lock); - if (!fdb_find(head, addr)) - fdb_create(head, source, addr, 0); + if (likely(!fdb_find(head, addr))) + fdb_create(head, source, addr); + /* else we lose race and someone else inserts * it first, don't bother updating */ spin_unlock(&br->hash_lock); } } + +static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb) +{ + if (fdb->is_local) + return NUD_PERMANENT; + else if (fdb->is_static) + return NUD_NOARP; + else if (has_expired(fdb->dst->br, fdb)) + return NUD_STALE; + else + return NUD_REACHABLE; +} + +static int fdb_fill_info(struct sk_buff *skb, + const struct net_bridge_fdb_entry *fdb, + u32 pid, u32 seq, int type, unsigned int flags) +{ + unsigned long now = jiffies; + struct nda_cacheinfo ci; + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); + if (nlh == NULL) + return -EMSGSIZE; + + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = 0; + ndm->ndm_type = 0; + ndm->ndm_ifindex = fdb->dst->dev->ifindex; + ndm->ndm_state = fdb_to_nud(fdb); + + NLA_PUT(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr); + + ci.ndm_used = jiffies_to_clock_t(now - fdb->used); + ci.ndm_confirmed = 0; + ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); + ci.ndm_refcnt = 0; + NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static inline size_t fdb_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(struct nda_cacheinfo)); +} + +static void fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) +{ + struct net *net = dev_net(fdb->dst->dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = fdb_fill_info(skb, fdb, 0, 0, type, 0); + if (err < 0) { + /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); +} + +/* Dump information about entries, in response to GETNEIGH */ +int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int idx = 0; + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + struct net_bridge *br = netdev_priv(dev); + int i; + + if (!(dev->priv_flags & IFF_EBRIDGE)) + continue; + + for (i = 0; i < BR_HASH_SIZE; i++) { + struct hlist_node *h; + struct net_bridge_fdb_entry *f; + + hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) { + if (idx < cb->args[0]) + goto skip; + + if (fdb_fill_info(skb, f, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, + NLM_F_MULTI) < 0) + break; +skip: + ++idx; + } + } + } + rcu_read_unlock(); + + cb->args[0] = idx; + + return skb->len; +} + +/* Create new static fdb entry */ +static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, + __u16 state) +{ + struct net_bridge *br = source->br; + struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct net_bridge_fdb_entry *fdb; + + fdb = fdb_find(head, addr); + if (fdb) + return -EEXIST; + + fdb = fdb_create(head, source, addr); + if (!fdb) + return -ENOMEM; + + if (state & NUD_PERMANENT) + fdb->is_local = fdb->is_static = 1; + else if (state & NUD_NOARP) + fdb->is_static = 1; + return 0; +} + +/* Add new permanent fdb entry with RTM_NEWNEIGH */ +int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ndmsg *ndm; + struct nlattr *tb[NDA_MAX+1]; + struct net_device *dev; + struct net_bridge_port *p; + const __u8 *addr; + int err; + + ASSERT_RTNL(); + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err < 0) + return err; + + ndm = nlmsg_data(nlh); + if (ndm->ndm_ifindex == 0) { + pr_info("bridge: RTM_NEWNEIGH with invalid ifindex\n"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, ndm->ndm_ifindex); + if (dev == NULL) { + pr_info("bridge: RTM_NEWNEIGH with unknown ifindex\n"); + return -ENODEV; + } + + if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { + pr_info("bridge: RTM_NEWNEIGH with invalid address\n"); + return -EINVAL; + } + + addr = nla_data(tb[NDA_LLADDR]); + if (!is_valid_ether_addr(addr)) { + pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n"); + return -EINVAL; + } + + p = br_port_get_rtnl(dev); + if (p == NULL) { + pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", + dev->name); + return -EINVAL; + } + + spin_lock_bh(&p->br->hash_lock); + err = fdb_add_entry(p, addr, ndm->ndm_state); + spin_unlock_bh(&p->br->hash_lock); + + return err; +} + +static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) +{ + struct net_bridge *br = p->br; + struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct net_bridge_fdb_entry *fdb; + + fdb = fdb_find(head, addr); + if (!fdb) + return -ENOENT; + + fdb_delete(fdb); + return 0; +} + +/* Remove neighbor entry with RTM_DELNEIGH */ +int br_fdb_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ndmsg *ndm; + struct net_bridge_port *p; + struct nlattr *llattr; + const __u8 *addr; + struct net_device *dev; + int err; + + ASSERT_RTNL(); + if (nlmsg_len(nlh) < sizeof(*ndm)) + return -EINVAL; + + ndm = nlmsg_data(nlh); + if (ndm->ndm_ifindex == 0) { + pr_info("bridge: RTM_DELNEIGH with invalid ifindex\n"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, ndm->ndm_ifindex); + if (dev == NULL) { + pr_info("bridge: RTM_DELNEIGH with unknown ifindex\n"); + return -ENODEV; + } + + llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR); + if (llattr == NULL || nla_len(llattr) != ETH_ALEN) { + pr_info("bridge: RTM_DELNEIGH with invalid address\n"); + return -EINVAL; + } + + addr = nla_data(llattr); + + p = br_port_get_rtnl(dev); + if (p == NULL) { + pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", + dev->name); + return -EINVAL; + } + + spin_lock_bh(&p->br->hash_lock); + err = fdb_delete_by_addr(p, addr); + spin_unlock_bh(&p->br->hash_lock); + + return err; +} diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 718b60366dfe..7f5379c593d9 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -175,56 +175,6 @@ static void del_br(struct net_bridge *br, struct list_head *head) unregister_netdevice_queue(br->dev, head); } -static struct net_device *new_bridge_dev(struct net *net, const char *name) -{ - struct net_bridge *br; - struct net_device *dev; - - dev = alloc_netdev(sizeof(struct net_bridge), name, - br_dev_setup); - - if (!dev) - return NULL; - dev_net_set(dev, net); - - br = netdev_priv(dev); - br->dev = dev; - - br->stats = alloc_percpu(struct br_cpu_netstats); - if (!br->stats) { - free_netdev(dev); - return NULL; - } - - spin_lock_init(&br->lock); - INIT_LIST_HEAD(&br->port_list); - spin_lock_init(&br->hash_lock); - - br->bridge_id.prio[0] = 0x80; - br->bridge_id.prio[1] = 0x00; - - memcpy(br->group_addr, br_group_address, ETH_ALEN); - - br->feature_mask = dev->features; - br->stp_enabled = BR_NO_STP; - br->designated_root = br->bridge_id; - br->root_path_cost = 0; - br->root_port = 0; - br->bridge_max_age = br->max_age = 20 * HZ; - br->bridge_hello_time = br->hello_time = 2 * HZ; - br->bridge_forward_delay = br->forward_delay = 15 * HZ; - br->topology_change = 0; - br->topology_change_detected = 0; - br->ageing_time = 300 * HZ; - - br_netfilter_rtable_init(br); - - br_stp_timer_init(br); - br_multicast_init(br); - - return dev; -} - /* find an available port number */ static int find_portno(struct net_bridge *br) { @@ -277,42 +227,19 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, return p; } -static struct device_type br_type = { - .name = "bridge", -}; - int br_add_bridge(struct net *net, const char *name) { struct net_device *dev; - int ret; - dev = new_bridge_dev(net, name); + dev = alloc_netdev(sizeof(struct net_bridge), name, + br_dev_setup); + if (!dev) return -ENOMEM; - rtnl_lock(); - if (strchr(dev->name, '%')) { - ret = dev_alloc_name(dev, dev->name); - if (ret < 0) - goto out_free; - } - - SET_NETDEV_DEVTYPE(dev, &br_type); - - ret = register_netdevice(dev); - if (ret) - goto out_free; - - ret = br_sysfs_addbr(dev); - if (ret) - unregister_netdevice(dev); - out: - rtnl_unlock(); - return ret; + dev_net_set(dev, net); -out_free: - free_netdev(dev); - goto out; + return register_netdev(dev); } int br_del_bridge(struct net *net, const char *name) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e2160792e1bc..785932d7ad32 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -98,9 +98,10 @@ int br_handle_frame_finish(struct sk_buff *skb) } if (skb) { - if (dst) + if (dst) { + dst->used = jiffies; br_forward(dst->dst, skb, skb2); - else + } else br_flood_forward(br, skb, skb2); } diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index cb43312b846e..7222fe1d5460 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -106,7 +106,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) /* * Legacy ioctl's through SIOCDEVPRIVATE * This interface is deprecated because it was too difficult to - * to do the translation for 32/64bit ioctl compatability. + * to do the translation for 32/64bit ioctl compatibility. */ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { @@ -181,40 +181,19 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!capable(CAP_NET_ADMIN)) return -EPERM; - spin_lock_bh(&br->lock); - br->bridge_forward_delay = clock_t_to_jiffies(args[1]); - if (br_is_root_bridge(br)) - br->forward_delay = br->bridge_forward_delay; - spin_unlock_bh(&br->lock); - return 0; + return br_set_forward_delay(br, args[1]); case BRCTL_SET_BRIDGE_HELLO_TIME: - { - unsigned long t = clock_t_to_jiffies(args[1]); if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (t < HZ) - return -EINVAL; - - spin_lock_bh(&br->lock); - br->bridge_hello_time = t; - if (br_is_root_bridge(br)) - br->hello_time = br->bridge_hello_time; - spin_unlock_bh(&br->lock); - return 0; - } + return br_set_hello_time(br, args[1]); case BRCTL_SET_BRIDGE_MAX_AGE: if (!capable(CAP_NET_ADMIN)) return -EPERM; - spin_lock_bh(&br->lock); - br->bridge_max_age = clock_t_to_jiffies(args[1]); - if (br_is_root_bridge(br)) - br->max_age = br->bridge_max_age; - spin_unlock_bh(&br->lock); - return 0; + return br_set_max_age(br, args[1]); case BRCTL_SET_AGEING_TIME: if (!capable(CAP_NET_ADMIN)) @@ -275,19 +254,16 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case BRCTL_SET_PORT_PRIORITY: { struct net_bridge_port *p; - int ret = 0; + int ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (args[2] >= (1<<(16-BR_PORT_BITS))) - return -ERANGE; - spin_lock_bh(&br->lock); if ((p = br_get_port(br, args[1])) == NULL) ret = -EINVAL; else - br_stp_set_port_priority(p, args[2]); + ret = br_stp_set_port_priority(p, args[2]); spin_unlock_bh(&br->lock); return ret; } @@ -295,15 +271,17 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case BRCTL_SET_PATH_COST: { struct net_bridge_port *p; - int ret = 0; + int ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; + spin_lock_bh(&br->lock); if ((p = br_get_port(br, args[1])) == NULL) ret = -EINVAL; else - br_stp_set_path_cost(p, args[2]); + ret = br_stp_set_path_cost(p, args[2]); + spin_unlock_bh(&br->lock); return ret; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index f8bf4c7f842c..134a2ff6b98b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -12,9 +12,11 @@ #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/etherdevice.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> + #include "br_private.h" static inline size_t br_nlmsg_size(void) @@ -188,20 +190,61 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return 0; } +static int br_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + return 0; +} + +static struct rtnl_link_ops br_link_ops __read_mostly = { + .kind = "bridge", + .priv_size = sizeof(struct net_bridge), + .setup = br_dev_setup, + .validate = br_validate, +}; int __init br_netlink_init(void) { - if (__rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo)) - return -ENOBUFS; + int err; - /* Only the first call to __rtnl_register can fail */ - __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL); + err = rtnl_link_register(&br_link_ops); + if (err < 0) + goto err1; + + err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo); + if (err) + goto err2; + err = __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL); + if (err) + goto err3; + err = __rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, br_fdb_add, NULL); + if (err) + goto err3; + err = __rtnl_register(PF_BRIDGE, RTM_DELNEIGH, br_fdb_delete, NULL); + if (err) + goto err3; + err = __rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, br_fdb_dump); + if (err) + goto err3; return 0; + +err3: + rtnl_unregister_all(PF_BRIDGE); +err2: + rtnl_link_unregister(&br_link_ops); +err1: + return err; } void __exit br_netlink_fini(void) { + rtnl_link_unregister(&br_link_ops); rtnl_unregister_all(PF_BRIDGE); } - diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 7d337c9b6082..7a03bb975375 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -36,6 +36,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v struct net_bridge *br; int err; + /* register of bridge completed, add sysfs entries */ + if ((dev->priv_flags && IFF_EBRIDGE) && event == NETDEV_REGISTER) { + br_sysfs_addbr(dev); + return NOTIFY_DONE; + } + /* not a port of a bridge */ p = br_port_get_rtnl(dev); if (!p) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 387013d33745..e2a40343aa09 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -64,7 +64,8 @@ struct net_bridge_fdb_entry struct net_bridge_port *dst; struct rcu_head rcu; - unsigned long ageing_timer; + unsigned long updated; + unsigned long used; mac_addr addr; unsigned char is_local; unsigned char is_static; @@ -353,6 +354,9 @@ extern int br_fdb_insert(struct net_bridge *br, extern void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr); +extern int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb); +extern int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); +extern int br_fdb_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); /* br_forward.c */ extern void br_deliver(const struct net_bridge_port *to, @@ -491,6 +495,11 @@ extern struct net_bridge_port *br_get_port(struct net_bridge *br, extern void br_init_port(struct net_bridge_port *p); extern void br_become_designated_port(struct net_bridge_port *p); +extern int br_set_forward_delay(struct net_bridge *br, unsigned long x); +extern int br_set_hello_time(struct net_bridge *br, unsigned long x); +extern int br_set_max_age(struct net_bridge *br, unsigned long x); + + /* br_stp_if.c */ extern void br_stp_enable_bridge(struct net_bridge *br); extern void br_stp_disable_bridge(struct net_bridge *br); @@ -501,10 +510,10 @@ extern bool br_stp_recalculate_bridge_id(struct net_bridge *br); extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); extern void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio); -extern void br_stp_set_port_priority(struct net_bridge_port *p, - u8 newprio); -extern void br_stp_set_path_cost(struct net_bridge_port *p, - u32 path_cost); +extern int br_stp_set_port_priority(struct net_bridge_port *p, + unsigned long newprio); +extern int br_stp_set_path_cost(struct net_bridge_port *p, + unsigned long path_cost); extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); /* br_stp_bpdu.c */ diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h index 8b650f7fbfa0..642ef47a867e 100644 --- a/net/bridge/br_private_stp.h +++ b/net/bridge/br_private_stp.h @@ -16,6 +16,19 @@ #define BPDU_TYPE_CONFIG 0 #define BPDU_TYPE_TCN 0x80 +/* IEEE 802.1D-1998 timer values */ +#define BR_MIN_HELLO_TIME (1*HZ) +#define BR_MAX_HELLO_TIME (10*HZ) + +#define BR_MIN_FORWARD_DELAY (2*HZ) +#define BR_MAX_FORWARD_DELAY (30*HZ) + +#define BR_MIN_MAX_AGE (6*HZ) +#define BR_MAX_MAX_AGE (40*HZ) + +#define BR_MIN_PATH_COST 1 +#define BR_MAX_PATH_COST 65535 + struct br_config_bpdu { unsigned topology_change:1; diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 7370d14f634d..bb4383e84de9 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -484,3 +484,51 @@ void br_received_tcn_bpdu(struct net_bridge_port *p) br_topology_change_acknowledge(p); } } + +/* Change bridge STP parameter */ +int br_set_hello_time(struct net_bridge *br, unsigned long val) +{ + unsigned long t = clock_t_to_jiffies(val); + + if (t < BR_MIN_HELLO_TIME || t > BR_MAX_HELLO_TIME) + return -ERANGE; + + spin_lock_bh(&br->lock); + br->bridge_hello_time = t; + if (br_is_root_bridge(br)) + br->hello_time = br->bridge_hello_time; + spin_unlock_bh(&br->lock); + return 0; +} + +int br_set_max_age(struct net_bridge *br, unsigned long val) +{ + unsigned long t = clock_t_to_jiffies(val); + + if (t < BR_MIN_MAX_AGE || t > BR_MAX_MAX_AGE) + return -ERANGE; + + spin_lock_bh(&br->lock); + br->bridge_max_age = t; + if (br_is_root_bridge(br)) + br->max_age = br->bridge_max_age; + spin_unlock_bh(&br->lock); + return 0; + +} + +int br_set_forward_delay(struct net_bridge *br, unsigned long val) +{ + unsigned long t = clock_t_to_jiffies(val); + + if (br->stp_enabled != BR_NO_STP && + (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY)) + return -ERANGE; + + spin_lock_bh(&br->lock); + br->bridge_forward_delay = t; + if (br_is_root_bridge(br)) + br->forward_delay = br->bridge_forward_delay; + spin_unlock_bh(&br->lock); + return 0; +} diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 9b61d09de9b9..6f615b8192f4 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -20,7 +20,7 @@ /* Port id is composed of priority and port number. - * NB: least significant bits of priority are dropped to + * NB: some bits of priority are dropped to * make room for more ports. */ static inline port_id br_make_port_id(__u8 priority, __u16 port_no) @@ -29,6 +29,8 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no) | (port_no & ((1<<BR_PORT_BITS)-1)); } +#define BR_MAX_PORT_PRIORITY ((u16)~0 >> BR_PORT_BITS) + /* called under bridge lock */ void br_init_port(struct net_bridge_port *p) { @@ -255,10 +257,14 @@ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) } /* called under bridge lock */ -void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio) +int br_stp_set_port_priority(struct net_bridge_port *p, unsigned long newprio) { - port_id new_port_id = br_make_port_id(newprio, p->port_no); + port_id new_port_id; + + if (newprio > BR_MAX_PORT_PRIORITY) + return -ERANGE; + new_port_id = br_make_port_id(newprio, p->port_no); if (br_is_designated_port(p)) p->designated_port = new_port_id; @@ -269,14 +275,21 @@ void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio) br_become_designated_port(p); br_port_state_selection(p->br); } + + return 0; } /* called under bridge lock */ -void br_stp_set_path_cost(struct net_bridge_port *p, u32 path_cost) +int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost) { + if (path_cost < BR_MIN_PATH_COST || + path_cost > BR_MAX_PATH_COST) + return -ERANGE; + p->path_cost = path_cost; br_configuration_update(p->br); br_port_state_selection(p->br); + return 0; } ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 5c1e5559ebba..68b893ea8c3a 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -43,9 +43,7 @@ static ssize_t store_bridge_parm(struct device *d, if (endp == buf) return -EINVAL; - spin_lock_bh(&br->lock); err = (*set)(br, val); - spin_unlock_bh(&br->lock); return err ? err : len; } @@ -57,20 +55,11 @@ static ssize_t show_forward_delay(struct device *d, return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); } -static int set_forward_delay(struct net_bridge *br, unsigned long val) -{ - unsigned long delay = clock_t_to_jiffies(val); - br->forward_delay = delay; - if (br_is_root_bridge(br)) - br->bridge_forward_delay = delay; - return 0; -} - static ssize_t store_forward_delay(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { - return store_bridge_parm(d, buf, len, set_forward_delay); + return store_bridge_parm(d, buf, len, br_set_forward_delay); } static DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, show_forward_delay, store_forward_delay); @@ -82,24 +71,11 @@ static ssize_t show_hello_time(struct device *d, struct device_attribute *attr, jiffies_to_clock_t(to_bridge(d)->hello_time)); } -static int set_hello_time(struct net_bridge *br, unsigned long val) -{ - unsigned long t = clock_t_to_jiffies(val); - - if (t < HZ) - return -EINVAL; - - br->hello_time = t; - if (br_is_root_bridge(br)) - br->bridge_hello_time = t; - return 0; -} - static ssize_t store_hello_time(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { - return store_bridge_parm(d, buf, len, set_hello_time); + return store_bridge_parm(d, buf, len, br_set_hello_time); } static DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, store_hello_time); @@ -111,19 +87,10 @@ static ssize_t show_max_age(struct device *d, struct device_attribute *attr, jiffies_to_clock_t(to_bridge(d)->max_age)); } -static int set_max_age(struct net_bridge *br, unsigned long val) -{ - unsigned long t = clock_t_to_jiffies(val); - br->max_age = t; - if (br_is_root_bridge(br)) - br->bridge_max_age = t; - return 0; -} - static ssize_t store_max_age(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { - return store_bridge_parm(d, buf, len, set_max_age); + return store_bridge_parm(d, buf, len, br_set_max_age); } static DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age); diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index fd5799c9bc8d..6229b62749e8 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -23,7 +23,7 @@ struct brport_attribute { struct attribute attr; ssize_t (*show)(struct net_bridge_port *, char *); - ssize_t (*store)(struct net_bridge_port *, unsigned long); + int (*store)(struct net_bridge_port *, unsigned long); }; #define BRPORT_ATTR(_name,_mode,_show,_store) \ @@ -38,27 +38,17 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) { return sprintf(buf, "%d\n", p->path_cost); } -static ssize_t store_path_cost(struct net_bridge_port *p, unsigned long v) -{ - br_stp_set_path_cost(p, v); - return 0; -} + static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR, - show_path_cost, store_path_cost); + show_path_cost, br_stp_set_path_cost); static ssize_t show_priority(struct net_bridge_port *p, char *buf) { return sprintf(buf, "%d\n", p->priority); } -static ssize_t store_priority(struct net_bridge_port *p, unsigned long v) -{ - if (v >= (1<<(16-BR_PORT_BITS))) - return -ERANGE; - br_stp_set_port_priority(p, v); - return 0; -} + static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR, - show_priority, store_priority); + show_priority, br_stp_set_port_priority); static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) { @@ -136,7 +126,7 @@ static ssize_t show_hold_timer(struct net_bridge_port *p, } static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL); -static ssize_t store_flush(struct net_bridge_port *p, unsigned long v) +static int store_flush(struct net_bridge_port *p, unsigned long v) { br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry return 0; @@ -148,7 +138,7 @@ static ssize_t show_hairpin_mode(struct net_bridge_port *p, char *buf) int hairpin_mode = (p->flags & BR_HAIRPIN_MODE) ? 1 : 0; return sprintf(buf, "%d\n", hairpin_mode); } -static ssize_t store_hairpin_mode(struct net_bridge_port *p, unsigned long v) +static int store_hairpin_mode(struct net_bridge_port *p, unsigned long v) { if (v) p->flags |= BR_HAIRPIN_MODE; @@ -165,7 +155,7 @@ static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) return sprintf(buf, "%d\n", p->multicast_router); } -static ssize_t store_multicast_router(struct net_bridge_port *p, +static int store_multicast_router(struct net_bridge_port *p, unsigned long v) { return br_multicast_set_port_router(p, v); diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c index d522d8c1703e..9b63e4e3910e 100644 --- a/net/caif/caif_config_util.c +++ b/net/caif/caif_config_util.c @@ -10,9 +10,9 @@ #include <net/caif/cfcnfg.h> #include <net/caif/caif_dev.h> -int connect_req_to_link_param(struct cfcnfg *cnfg, - struct caif_connect_request *s, - struct cfctrl_link_param *l) +int caif_connect_req_to_link_param(struct cfcnfg *cnfg, + struct caif_connect_request *s, + struct cfctrl_link_param *l) { struct dev_info *dev_info; enum cfcnfg_phy_preference pref; diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index a42a408306e4..a518fdd4da0a 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -120,25 +120,12 @@ static int transmit(struct cflayer *layer, struct cfpkt *pkt) { struct caif_device_entry *caifd = container_of(layer, struct caif_device_entry, layer); - struct sk_buff *skb, *skb2; - int ret = -EINVAL; + struct sk_buff *skb; + skb = cfpkt_tonative(pkt); skb->dev = caifd->netdev; - /* - * Don't allow SKB to be destroyed upon error, but signal resend - * notification to clients. We can't rely on the return value as - * congestion (NET_XMIT_CN) sometimes drops the packet, sometimes don't. - */ - if (netif_queue_stopped(caifd->netdev)) - return -EAGAIN; - skb2 = skb_get(skb); - - ret = dev_queue_xmit(skb2); - - if (!ret) - kfree_skb(skb); - else - return -EAGAIN; + + dev_queue_xmit(skb); return 0; } @@ -257,7 +244,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, break; } dev_hold(dev); - cfcnfg_add_phy_layer(get_caif_conf(), + cfcnfg_add_phy_layer(cfg, phy_type, dev, &caifd->layer, @@ -300,7 +287,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, if (atomic_read(&caifd->in_use)) netdev_warn(dev, "Unregistering an active CAIF device\n"); - cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer); + cfcnfg_del_phy_layer(cfg, &caifd->layer); dev_put(dev); atomic_set(&caifd->state, what); break; @@ -322,24 +309,18 @@ static struct notifier_block caif_device_notifier = { .priority = 0, }; - -struct cfcnfg *get_caif_conf(void) -{ - return cfg; -} -EXPORT_SYMBOL(get_caif_conf); - int caif_connect_client(struct caif_connect_request *conn_req, struct cflayer *client_layer, int *ifindex, int *headroom, int *tailroom) { struct cfctrl_link_param param; int ret; - ret = connect_req_to_link_param(get_caif_conf(), conn_req, ¶m); + + ret = caif_connect_req_to_link_param(cfg, conn_req, ¶m); if (ret) return ret; /* Hook up the adaptation layer. */ - return cfcnfg_add_adaptation_layer(get_caif_conf(), ¶m, + return cfcnfg_add_adaptation_layer(cfg, ¶m, client_layer, ifindex, headroom, tailroom); } @@ -347,16 +328,10 @@ EXPORT_SYMBOL(caif_connect_client); int caif_disconnect_client(struct cflayer *adap_layer) { - return cfcnfg_disconn_adapt_layer(get_caif_conf(), adap_layer); + return cfcnfg_disconn_adapt_layer(cfg, adap_layer); } EXPORT_SYMBOL(caif_disconnect_client); -void caif_release_client(struct cflayer *adap_layer) -{ - cfcnfg_release_adap_layer(adap_layer); -} -EXPORT_SYMBOL(caif_release_client); - /* Per-namespace Caif devices handling */ static int caif_init_net(struct net *net) { diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 8184c031d028..20212424e2e8 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -519,43 +519,14 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk, int noblock, long timeo) { struct cfpkt *pkt; - int ret, loopcnt = 0; pkt = cfpkt_fromnative(CAIF_DIR_OUT, skb); memset(cfpkt_info(pkt), 0, sizeof(struct caif_payload_info)); - do { - - ret = -ETIMEDOUT; - /* Slight paranoia, probably not needed. */ - if (unlikely(loopcnt++ > 1000)) { - pr_warn("transmit retries failed, error = %d\n", ret); - break; - } + if (cf_sk->layer.dn == NULL) + return -EINVAL; - if (cf_sk->layer.dn != NULL) - ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt); - if (likely(ret >= 0)) - break; - /* if transmit return -EAGAIN, then retry */ - if (noblock && ret == -EAGAIN) - break; - timeo = caif_wait_for_flow_on(cf_sk, 0, timeo, &ret); - if (signal_pending(current)) { - ret = sock_intr_errno(timeo); - break; - } - if (ret) - break; - if (cf_sk->sk.sk_state != CAIF_CONNECTED || - sock_flag(&cf_sk->sk, SOCK_DEAD) || - (cf_sk->sk.sk_shutdown & RCV_SHUTDOWN)) { - ret = -EPIPE; - cf_sk->sk.sk_err = EPIPE; - break; - } - } while (ret == -EAGAIN); - return ret; + return cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt); } /* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */ @@ -852,7 +823,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTING; sk->sk_state = CAIF_CONNECTING; - /* Check priority value comming from socket */ + /* Check priority value coming from socket */ /* if priority value is out of range it will be ajusted */ if (cf_sk->sk.sk_priority > CAIF_PRIO_MAX) cf_sk->conn_req.priority = CAIF_PRIO_MAX; diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index f1f98d967d8a..25c0b198e285 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -253,7 +253,7 @@ static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id) { } -int protohead[CFCTRL_SRV_MASK] = { +static const int protohead[CFCTRL_SRV_MASK] = { [CFCTRL_SRV_VEI] = 4, [CFCTRL_SRV_DATAGRAM] = 7, [CFCTRL_SRV_UTIL] = 4, diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index 3cd8f978e309..397a2c099e2c 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -58,7 +58,8 @@ struct cflayer *cfctrl_create(void) return &this->serv.layer; } -static bool param_eq(struct cfctrl_link_param *p1, struct cfctrl_link_param *p2) +static bool param_eq(const struct cfctrl_link_param *p1, + const struct cfctrl_link_param *p2) { bool eq = p1->linktype == p2->linktype && @@ -100,8 +101,8 @@ static bool param_eq(struct cfctrl_link_param *p1, struct cfctrl_link_param *p2) return false; } -bool cfctrl_req_eq(struct cfctrl_request_info *r1, - struct cfctrl_request_info *r2) +static bool cfctrl_req_eq(const struct cfctrl_request_info *r1, + const struct cfctrl_request_info *r2) { if (r1->cmd != r2->cmd) return false; @@ -112,7 +113,7 @@ bool cfctrl_req_eq(struct cfctrl_request_info *r1, } /* Insert request at the end */ -void cfctrl_insert_req(struct cfctrl *ctrl, +static void cfctrl_insert_req(struct cfctrl *ctrl, struct cfctrl_request_info *req) { spin_lock(&ctrl->info_list_lock); @@ -123,8 +124,8 @@ void cfctrl_insert_req(struct cfctrl *ctrl, } /* Compare and remove request */ -struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl, - struct cfctrl_request_info *req) +static struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl, + struct cfctrl_request_info *req) { struct cfctrl_request_info *p, *tmp, *first; @@ -154,16 +155,6 @@ struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer) return &this->res; } -void cfctrl_set_dnlayer(struct cflayer *this, struct cflayer *dn) -{ - this->dn = dn; -} - -void cfctrl_set_uplayer(struct cflayer *this, struct cflayer *up) -{ - this->up = up; -} - static void init_info(struct caif_payload_info *info, struct cfctrl *cfctrl) { info->hdr_len = 0; @@ -304,58 +295,6 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid, return ret; } -void cfctrl_sleep_req(struct cflayer *layer) -{ - int ret; - struct cfctrl *cfctrl = container_obj(layer); - struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); - if (!pkt) { - pr_warn("Out of memory\n"); - return; - } - cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP); - init_info(cfpkt_info(pkt), cfctrl); - ret = - cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); - if (ret < 0) - cfpkt_destroy(pkt); -} - -void cfctrl_wake_req(struct cflayer *layer) -{ - int ret; - struct cfctrl *cfctrl = container_obj(layer); - struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); - if (!pkt) { - pr_warn("Out of memory\n"); - return; - } - cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE); - init_info(cfpkt_info(pkt), cfctrl); - ret = - cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); - if (ret < 0) - cfpkt_destroy(pkt); -} - -void cfctrl_getstartreason_req(struct cflayer *layer) -{ - int ret; - struct cfctrl *cfctrl = container_obj(layer); - struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); - if (!pkt) { - pr_warn("Out of memory\n"); - return; - } - cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON); - init_info(cfpkt_info(pkt), cfctrl); - ret = - cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); - if (ret < 0) - cfpkt_destroy(pkt); -} - - void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer) { struct cfctrl_request_info *p, *tmp; diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index 27dab26ad3b8..0382dec84fdc 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c @@ -13,6 +13,7 @@ #include <net/caif/cfsrvl.h> #include <net/caif/cfpkt.h> + #define container_obj(layr) ((struct cfsrvl *) layr) #define DGM_CMD_BIT 0x80 @@ -83,6 +84,7 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt) static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt) { + u8 packet_type; u32 zero = 0; struct caif_payload_info *info; struct cfsrvl *service = container_obj(layr); @@ -94,7 +96,9 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt) if (cfpkt_getlen(pkt) > DGM_MTU) return -EMSGSIZE; - cfpkt_add_head(pkt, &zero, 4); + cfpkt_add_head(pkt, &zero, 3); + packet_type = 0x08; /* B9 set - UNCLASSIFIED */ + cfpkt_add_head(pkt, &packet_type, 1); /* Add info for MUX-layer to route the packet out. */ info = cfpkt_info(pkt); @@ -104,10 +108,5 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt) */ info->hdr_len = 4; info->dev_info = &service->dev_info; - ret = layr->dn->transmit(layr->dn, pkt); - if (ret < 0) { - u32 tmp32; - cfpkt_extr_head(pkt, &tmp32, 4); - } - return ret; + return layr->dn->transmit(layr->dn, pkt); } diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index a445043931ae..2423fed8e26c 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -120,7 +120,6 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt) int tmp; u16 chks; u16 len; - int ret; struct cffrml *this = container_obj(layr); if (this->dofcs) { chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff); @@ -137,12 +136,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt) pr_err("Packet is erroneous!\n"); return -EPROTO; } - ret = layr->dn->transmit(layr->dn, pkt); - if (ret < 0) { - /* Remove header on faulty packet. */ - cfpkt_extr_head(pkt, &tmp, 2); - } - return ret; + return layr->dn->transmit(layr->dn, pkt); } static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 46f34b2e0478..fc2497468571 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -71,41 +71,6 @@ int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid) return 0; } -bool cfmuxl_is_phy_inuse(struct cflayer *layr, u8 phyid) -{ - struct list_head *node; - struct cflayer *layer; - struct cfmuxl *muxl = container_obj(layr); - bool match = false; - spin_lock(&muxl->receive_lock); - - list_for_each(node, &muxl->srvl_list) { - layer = list_entry(node, struct cflayer, node); - if (cfsrvl_phyid_match(layer, phyid)) { - match = true; - break; - } - - } - spin_unlock(&muxl->receive_lock); - return match; -} - -u8 cfmuxl_get_phyid(struct cflayer *layr, u8 channel_id) -{ - struct cflayer *up; - int phyid; - struct cfmuxl *muxl = container_obj(layr); - spin_lock(&muxl->receive_lock); - up = get_up(muxl, channel_id); - if (up != NULL) - phyid = cfsrvl_getphyid(up); - else - phyid = 0; - spin_unlock(&muxl->receive_lock); - return phyid; -} - int cfmuxl_set_dnlayer(struct cflayer *layr, struct cflayer *dn, u8 phyid) { struct cfmuxl *muxl = (struct cfmuxl *) layr; @@ -219,12 +184,12 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt) static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt) { - int ret; struct cfmuxl *muxl = container_obj(layr); u8 linkid; struct cflayer *dn; struct caif_payload_info *info = cfpkt_info(pkt); - dn = get_dn(muxl, cfpkt_info(pkt)->dev_info); + BUG_ON(!info); + dn = get_dn(muxl, info->dev_info); if (dn == NULL) { pr_warn("Send data on unknown phy ID = %d (0x%x)\n", info->dev_info->id, info->dev_info->id); @@ -233,20 +198,16 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt) info->hdr_len += 1; linkid = info->channel_id; cfpkt_add_head(pkt, &linkid, 1); - ret = dn->transmit(dn, pkt); - /* Remove MUX protocol header upon error. */ - if (ret < 0) - cfpkt_extr_head(pkt, &linkid, 1); - return ret; + return dn->transmit(dn, pkt); } static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, int phyid) { struct cfmuxl *muxl = container_obj(layr); - struct list_head *node; + struct list_head *node, *next; struct cflayer *layer; - list_for_each(node, &muxl->srvl_list) { + list_for_each_safe(node, next, &muxl->srvl_list) { layer = list_entry(node, struct cflayer, node); if (cfsrvl_phyid_match(layer, phyid)) layer->ctrlcmd(layer, ctrl, phyid); diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index d7e865e2ff65..20c6cb3522e0 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -42,22 +42,22 @@ struct cfpkt_priv_data { bool erronous; }; -inline struct cfpkt_priv_data *cfpkt_priv(struct cfpkt *pkt) +static inline struct cfpkt_priv_data *cfpkt_priv(struct cfpkt *pkt) { return (struct cfpkt_priv_data *) pkt->skb.cb; } -inline bool is_erronous(struct cfpkt *pkt) +static inline bool is_erronous(struct cfpkt *pkt) { return cfpkt_priv(pkt)->erronous; } -inline struct sk_buff *pkt_to_skb(struct cfpkt *pkt) +static inline struct sk_buff *pkt_to_skb(struct cfpkt *pkt) { return &pkt->skb; } -inline struct cfpkt *skb_to_pkt(struct sk_buff *skb) +static inline struct cfpkt *skb_to_pkt(struct sk_buff *skb) { return (struct cfpkt *) skb; } @@ -317,17 +317,6 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len) } EXPORT_SYMBOL(cfpkt_setlen); -struct cfpkt *cfpkt_create_uplink(const unsigned char *data, unsigned int len) -{ - struct cfpkt *pkt = cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX); - if (!pkt) - return NULL; - if (unlikely(data != NULL)) - cfpkt_add_body(pkt, data, len); - return pkt; -} -EXPORT_SYMBOL(cfpkt_create_uplink); - struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, struct cfpkt *addpkt, u16 expectlen) @@ -408,169 +397,12 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos) } EXPORT_SYMBOL(cfpkt_split); -char *cfpkt_log_pkt(struct cfpkt *pkt, char *buf, int buflen) -{ - struct sk_buff *skb = pkt_to_skb(pkt); - char *p = buf; - int i; - - /* - * Sanity check buffer length, it needs to be at least as large as - * the header info: ~=50+ bytes - */ - if (buflen < 50) - return NULL; - - snprintf(buf, buflen, "%s: pkt:%p len:%ld(%ld+%ld) {%ld,%ld} data: [", - is_erronous(pkt) ? "ERRONOUS-SKB" : - (skb->data_len != 0 ? "COMPLEX-SKB" : "SKB"), - skb, - (long) skb->len, - (long) (skb_tail_pointer(skb) - skb->data), - (long) skb->data_len, - (long) (skb->data - skb->head), - (long) (skb_tail_pointer(skb) - skb->head)); - p = buf + strlen(buf); - - for (i = 0; i < skb_tail_pointer(skb) - skb->data && i < 300; i++) { - if (p > buf + buflen - 10) { - sprintf(p, "..."); - p = buf + strlen(buf); - break; - } - sprintf(p, "%02x,", skb->data[i]); - p = buf + strlen(buf); - } - sprintf(p, "]\n"); - return buf; -} -EXPORT_SYMBOL(cfpkt_log_pkt); - -int cfpkt_raw_append(struct cfpkt *pkt, void **buf, unsigned int buflen) -{ - struct sk_buff *skb = pkt_to_skb(pkt); - struct sk_buff *lastskb; - - caif_assert(buf != NULL); - if (unlikely(is_erronous(pkt))) - return -EPROTO; - /* Make sure SKB is writable */ - if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) { - PKT_ERROR(pkt, "skb_cow_data failed\n"); - return -EPROTO; - } - - if (unlikely(skb_linearize(skb) != 0)) { - PKT_ERROR(pkt, "linearize failed\n"); - return -EPROTO; - } - - if (unlikely(skb_tailroom(skb) < buflen)) { - PKT_ERROR(pkt, "buffer too short - failed\n"); - return -EPROTO; - } - - *buf = skb_put(skb, buflen); - return 1; -} -EXPORT_SYMBOL(cfpkt_raw_append); - -int cfpkt_raw_extract(struct cfpkt *pkt, void **buf, unsigned int buflen) -{ - struct sk_buff *skb = pkt_to_skb(pkt); - - caif_assert(buf != NULL); - if (unlikely(is_erronous(pkt))) - return -EPROTO; - - if (unlikely(buflen > skb->len)) { - PKT_ERROR(pkt, "buflen too large - failed\n"); - return -EPROTO; - } - - if (unlikely(buflen > skb_headlen(skb))) { - if (unlikely(skb_linearize(skb) != 0)) { - PKT_ERROR(pkt, "linearize failed\n"); - return -EPROTO; - } - } - - *buf = skb->data; - skb_pull(skb, buflen); - - return 1; -} -EXPORT_SYMBOL(cfpkt_raw_extract); - -inline bool cfpkt_erroneous(struct cfpkt *pkt) +bool cfpkt_erroneous(struct cfpkt *pkt) { return cfpkt_priv(pkt)->erronous; } EXPORT_SYMBOL(cfpkt_erroneous); -struct cfpktq *cfpktq_create(void) -{ - struct cfpktq *q = kmalloc(sizeof(struct cfpktq), GFP_ATOMIC); - if (!q) - return NULL; - skb_queue_head_init(&q->head); - atomic_set(&q->count, 0); - spin_lock_init(&q->lock); - return q; -} -EXPORT_SYMBOL(cfpktq_create); - -void cfpkt_queue(struct cfpktq *pktq, struct cfpkt *pkt, unsigned short prio) -{ - atomic_inc(&pktq->count); - spin_lock(&pktq->lock); - skb_queue_tail(&pktq->head, pkt_to_skb(pkt)); - spin_unlock(&pktq->lock); - -} -EXPORT_SYMBOL(cfpkt_queue); - -struct cfpkt *cfpkt_qpeek(struct cfpktq *pktq) -{ - struct cfpkt *tmp; - spin_lock(&pktq->lock); - tmp = skb_to_pkt(skb_peek(&pktq->head)); - spin_unlock(&pktq->lock); - return tmp; -} -EXPORT_SYMBOL(cfpkt_qpeek); - -struct cfpkt *cfpkt_dequeue(struct cfpktq *pktq) -{ - struct cfpkt *pkt; - spin_lock(&pktq->lock); - pkt = skb_to_pkt(skb_dequeue(&pktq->head)); - if (pkt) { - atomic_dec(&pktq->count); - caif_assert(atomic_read(&pktq->count) >= 0); - } - spin_unlock(&pktq->lock); - return pkt; -} -EXPORT_SYMBOL(cfpkt_dequeue); - -int cfpkt_qcount(struct cfpktq *pktq) -{ - return atomic_read(&pktq->count); -} -EXPORT_SYMBOL(cfpkt_qcount); - -struct cfpkt *cfpkt_clone_release(struct cfpkt *pkt) -{ - struct cfpkt *clone; - clone = skb_to_pkt(skb_clone(pkt_to_skb(pkt), GFP_ATOMIC)); - /* Free original packet. */ - cfpkt_destroy(pkt); - if (!clone) - return NULL; - return clone; -} -EXPORT_SYMBOL(cfpkt_clone_release); struct caif_payload_info *cfpkt_info(struct cfpkt *pkt) { diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 8303fe3ebf89..2715c84cfa87 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -179,15 +179,10 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt) static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt) { struct cfserl *layr = container_obj(layer); - int ret; u8 tmp8 = CFSERL_STX; if (layr->usestx) cfpkt_add_head(newpkt, &tmp8, 1); - ret = layer->dn->transmit(layer->dn, newpkt); - if (ret < 0) - cfpkt_extr_head(newpkt, &tmp8, 1); - - return ret; + return layer->dn->transmit(layer->dn, newpkt); } static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index ab5e542526bf..24ba392f203b 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -151,12 +151,7 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) return -EINVAL; } -void cfservl_destroy(struct cflayer *layer) -{ - kfree(layer); -} - -void cfsrvl_release(struct kref *kref) +static void cfsrvl_release(struct kref *kref) { struct cfsrvl *service = container_of(kref, struct cfsrvl, ref); kfree(service); diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c index 315c0d601368..98e027db18ed 100644 --- a/net/caif/cfutill.c +++ b/net/caif/cfutill.c @@ -100,10 +100,5 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt) */ info->hdr_len = 1; info->dev_info = &service->dev_info; - ret = layr->dn->transmit(layr->dn, pkt); - if (ret < 0) { - u32 tmp32; - cfpkt_extr_head(pkt, &tmp32, 4); - } - return ret; + return layr->dn->transmit(layr->dn, pkt); } diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index c3b1dec4acf6..1a588cd818ea 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -96,8 +96,5 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt) info->channel_id = service->layer.id; info->hdr_len = 1; info->dev_info = &service->dev_info; - ret = layr->dn->transmit(layr->dn, pkt); - if (ret < 0) - cfpkt_extr_head(pkt, &tmp, 1); - return ret; + return layr->dn->transmit(layr->dn, pkt); } diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c index bf6fef2a0eff..b2f5989ad455 100644 --- a/net/caif/cfvidl.c +++ b/net/caif/cfvidl.c @@ -60,8 +60,5 @@ static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt) info = cfpkt_info(pkt); info->channel_id = service->layer.id; info->dev_info = &service->dev_info; - ret = layr->dn->transmit(layr->dn, pkt); - if (ret < 0) - cfpkt_extr_head(pkt, &videoheader, 4); - return ret; + return layr->dn->transmit(layr->dn, pkt); } diff --git a/net/can/af_can.c b/net/can/af_can.c index 733d66f1b05a..a8dcaa49675a 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -85,7 +85,7 @@ static struct kmem_cache *rcv_cache __read_mostly; /* table of registered CAN protocols */ static struct can_proto *proto_tab[CAN_NPROTO] __read_mostly; -static DEFINE_SPINLOCK(proto_tab_lock); +static DEFINE_MUTEX(proto_tab_lock); struct timer_list can_stattimer; /* timer for statistics update */ struct s_stats can_stats; /* packet statistics */ @@ -115,6 +115,19 @@ static void can_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); } +static struct can_proto *can_try_module_get(int protocol) +{ + struct can_proto *cp; + + rcu_read_lock(); + cp = rcu_dereference(proto_tab[protocol]); + if (cp && !try_module_get(cp->prot->owner)) + cp = NULL; + rcu_read_unlock(); + + return cp; +} + static int can_create(struct net *net, struct socket *sock, int protocol, int kern) { @@ -130,9 +143,12 @@ static int can_create(struct net *net, struct socket *sock, int protocol, if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; + cp = can_try_module_get(protocol); + #ifdef CONFIG_MODULES - /* try to load protocol module kernel is modular */ - if (!proto_tab[protocol]) { + if (!cp) { + /* try to load protocol module if kernel is modular */ + err = request_module("can-proto-%d", protocol); /* @@ -143,22 +159,18 @@ static int can_create(struct net *net, struct socket *sock, int protocol, if (err && printk_ratelimit()) printk(KERN_ERR "can: request_module " "(can-proto-%d) failed.\n", protocol); + + cp = can_try_module_get(protocol); } #endif - spin_lock(&proto_tab_lock); - cp = proto_tab[protocol]; - if (cp && !try_module_get(cp->prot->owner)) - cp = NULL; - spin_unlock(&proto_tab_lock); - /* check for available protocol and correct usage */ if (!cp) return -EPROTONOSUPPORT; if (cp->type != sock->type) { - err = -EPROTONOSUPPORT; + err = -EPROTOTYPE; goto errout; } @@ -694,15 +706,16 @@ int can_proto_register(struct can_proto *cp) if (err < 0) return err; - spin_lock(&proto_tab_lock); + mutex_lock(&proto_tab_lock); + if (proto_tab[proto]) { printk(KERN_ERR "can: protocol %d already registered\n", proto); err = -EBUSY; } else - proto_tab[proto] = cp; + rcu_assign_pointer(proto_tab[proto], cp); - spin_unlock(&proto_tab_lock); + mutex_unlock(&proto_tab_lock); if (err < 0) proto_unregister(cp->prot); @@ -719,13 +732,12 @@ void can_proto_unregister(struct can_proto *cp) { int proto = cp->protocol; - spin_lock(&proto_tab_lock); - if (!proto_tab[proto]) { - printk(KERN_ERR "BUG: can: protocol %d is not registered\n", - proto); - } - proto_tab[proto] = NULL; - spin_unlock(&proto_tab_lock); + mutex_lock(&proto_tab_lock); + BUG_ON(proto_tab[proto] != cp); + rcu_assign_pointer(proto_tab[proto], NULL); + mutex_unlock(&proto_tab_lock); + + synchronize_rcu(); proto_unregister(cp->prot); } diff --git a/net/can/bcm.c b/net/can/bcm.c index 871a0ad51025..57b1aed79014 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -387,7 +387,7 @@ static void bcm_tx_timeout_tsklet(unsigned long data) } /* - * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions + * bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) { diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index ad424049b0cf..be683f2d401f 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -4,6 +4,7 @@ config CEPH_LIB select LIBCRC32C select CRYPTO_AES select CRYPTO + select KEYS default n help Choose Y or M here to include cephlib, which provides the diff --git a/net/ceph/auth.c b/net/ceph/auth.c index 549c1f43e1d5..b4bf4ac090f1 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -35,12 +35,12 @@ static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) /* * setup, teardown. */ -struct ceph_auth_client *ceph_auth_init(const char *name, const char *secret) +struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_crypto_key *key) { struct ceph_auth_client *ac; int ret; - dout("auth_init name '%s' secret '%s'\n", name, secret); + dout("auth_init name '%s'\n", name); ret = -ENOMEM; ac = kzalloc(sizeof(*ac), GFP_NOFS); @@ -52,8 +52,8 @@ struct ceph_auth_client *ceph_auth_init(const char *name, const char *secret) ac->name = name; else ac->name = CEPH_AUTH_NAME_DEFAULT; - dout("auth_init name %s secret %s\n", ac->name, secret); - ac->secret = secret; + dout("auth_init name %s\n", ac->name); + ac->key = key; return ac; out: diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 7fd5dfcf6e18..1587dc6010c6 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -662,14 +662,16 @@ int ceph_x_init(struct ceph_auth_client *ac) goto out; ret = -EINVAL; - if (!ac->secret) { + if (!ac->key) { pr_err("no secret set (for auth_x protocol)\n"); goto out_nomem; } - ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret); - if (ret) + ret = ceph_crypto_key_clone(&xi->secret, ac->key); + if (ret < 0) { + pr_err("cannot clone key: %d\n", ret); goto out_nomem; + } xi->starting = true; xi->ticket_handlers = RB_ROOT; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 95f96ab94bba..132963abc266 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -5,6 +5,8 @@ #include <linux/fs.h> #include <linux/inet.h> #include <linux/in6.h> +#include <linux/key.h> +#include <keys/ceph-type.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/parser.h> @@ -20,6 +22,7 @@ #include <linux/ceph/decode.h> #include <linux/ceph/mon_client.h> #include <linux/ceph/auth.h> +#include "crypto.h" @@ -117,9 +120,29 @@ int ceph_compare_options(struct ceph_options *new_opt, if (ret) return ret; - ret = strcmp_null(opt1->secret, opt2->secret); - if (ret) - return ret; + if (opt1->key && !opt2->key) + return -1; + if (!opt1->key && opt2->key) + return 1; + if (opt1->key && opt2->key) { + if (opt1->key->type != opt2->key->type) + return -1; + if (opt1->key->created.tv_sec != opt2->key->created.tv_sec) + return -1; + if (opt1->key->created.tv_nsec != opt2->key->created.tv_nsec) + return -1; + if (opt1->key->len != opt2->key->len) + return -1; + if (opt1->key->key && !opt2->key->key) + return -1; + if (!opt1->key->key && opt2->key->key) + return 1; + if (opt1->key->key && opt2->key->key) { + ret = memcmp(opt1->key->key, opt2->key->key, opt1->key->len); + if (ret) + return ret; + } + } /* any matching mon ip implies a match */ for (i = 0; i < opt1->num_mon; i++) { @@ -176,6 +199,7 @@ enum { Opt_fsid, Opt_name, Opt_secret, + Opt_key, Opt_ip, Opt_last_string, /* string args above */ @@ -192,6 +216,7 @@ static match_table_t opt_tokens = { {Opt_fsid, "fsid=%s"}, {Opt_name, "name=%s"}, {Opt_secret, "secret=%s"}, + {Opt_key, "key=%s"}, {Opt_ip, "ip=%s"}, /* string args above */ {Opt_noshare, "noshare"}, @@ -203,11 +228,56 @@ void ceph_destroy_options(struct ceph_options *opt) { dout("destroy_options %p\n", opt); kfree(opt->name); - kfree(opt->secret); + if (opt->key) { + ceph_crypto_key_destroy(opt->key); + kfree(opt->key); + } kfree(opt); } EXPORT_SYMBOL(ceph_destroy_options); +/* get secret from key store */ +static int get_secret(struct ceph_crypto_key *dst, const char *name) { + struct key *ukey; + int key_err; + int err = 0; + struct ceph_crypto_key *ckey; + + ukey = request_key(&key_type_ceph, name, NULL); + if (!ukey || IS_ERR(ukey)) { + /* request_key errors don't map nicely to mount(2) + errors; don't even try, but still printk */ + key_err = PTR_ERR(ukey); + switch (key_err) { + case -ENOKEY: + pr_warning("ceph: Mount failed due to key not found: %s\n", name); + break; + case -EKEYEXPIRED: + pr_warning("ceph: Mount failed due to expired key: %s\n", name); + break; + case -EKEYREVOKED: + pr_warning("ceph: Mount failed due to revoked key: %s\n", name); + break; + default: + pr_warning("ceph: Mount failed due to unknown key error" + " %d: %s\n", key_err, name); + } + err = -EPERM; + goto out; + } + + ckey = ukey->payload.data; + err = ceph_crypto_key_clone(dst, ckey); + if (err) + goto out_key; + /* pass through, err is 0 */ + +out_key: + key_put(ukey); +out: + return err; +} + int ceph_parse_options(struct ceph_options **popt, char *options, const char *dev_name, const char *dev_name_end, int (*parse_extra_token)(char *c, void *private), @@ -295,9 +365,24 @@ int ceph_parse_options(struct ceph_options **popt, char *options, GFP_KERNEL); break; case Opt_secret: - opt->secret = kstrndup(argstr[0].from, - argstr[0].to-argstr[0].from, - GFP_KERNEL); + opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); + if (!opt->key) { + err = -ENOMEM; + goto out; + } + err = ceph_crypto_key_unarmor(opt->key, argstr[0].from); + if (err < 0) + goto out; + break; + case Opt_key: + opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); + if (!opt->key) { + err = -ENOMEM; + goto out; + } + err = get_secret(opt->key, argstr[0].from); + if (err < 0) + goto out; break; /* misc */ @@ -394,8 +479,8 @@ void ceph_destroy_client(struct ceph_client *client) ceph_osdc_stop(&client->osdc); /* - * make sure mds and osd connections close out before destroying - * the auth module, which is needed to free those connections' + * make sure osd connections close out before destroying the + * auth module, which is needed to free those connections' * ceph_authorizers. */ ceph_msgr_flush(); @@ -496,10 +581,14 @@ static int __init init_ceph_lib(void) if (ret < 0) goto out; - ret = ceph_msgr_init(); + ret = ceph_crypto_init(); if (ret < 0) goto out_debugfs; + ret = ceph_msgr_init(); + if (ret < 0) + goto out_crypto; + pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, @@ -507,6 +596,8 @@ static int __init init_ceph_lib(void) return 0; +out_crypto: + ceph_crypto_shutdown(); out_debugfs: ceph_debugfs_cleanup(); out: @@ -517,6 +608,7 @@ static void __exit exit_ceph_lib(void) { dout("exit_ceph_lib\n"); ceph_msgr_exit(); + ceph_crypto_shutdown(); ceph_debugfs_cleanup(); } diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 7b505b0c983f..5a8009c9e0cd 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -5,10 +5,23 @@ #include <linux/scatterlist.h> #include <linux/slab.h> #include <crypto/hash.h> +#include <linux/key-type.h> +#include <keys/ceph-type.h> #include <linux/ceph/decode.h> #include "crypto.h" +int ceph_crypto_key_clone(struct ceph_crypto_key *dst, + const struct ceph_crypto_key *src) +{ + memcpy(dst, src, sizeof(struct ceph_crypto_key)); + dst->key = kmalloc(src->len, GFP_NOFS); + if (!dst->key) + return -ENOMEM; + memcpy(dst->key, src->key, src->len); + return 0; +} + int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) { if (*p + sizeof(u16) + sizeof(key->created) + @@ -410,3 +423,63 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, return -EINVAL; } } + +int ceph_key_instantiate(struct key *key, const void *data, size_t datalen) +{ + struct ceph_crypto_key *ckey; + int ret; + void *p; + + ret = -EINVAL; + if (datalen <= 0 || datalen > 32767 || !data) + goto err; + + ret = key_payload_reserve(key, datalen); + if (ret < 0) + goto err; + + ret = -ENOMEM; + ckey = kmalloc(sizeof(*ckey), GFP_KERNEL); + if (!ckey) + goto err; + + /* TODO ceph_crypto_key_decode should really take const input */ + p = (void*)data; + ret = ceph_crypto_key_decode(ckey, &p, (char*)data+datalen); + if (ret < 0) + goto err_ckey; + + key->payload.data = ckey; + return 0; + +err_ckey: + kfree(ckey); +err: + return ret; +} + +int ceph_key_match(const struct key *key, const void *description) +{ + return strcmp(key->description, description) == 0; +} + +void ceph_key_destroy(struct key *key) { + struct ceph_crypto_key *ckey = key->payload.data; + + ceph_crypto_key_destroy(ckey); +} + +struct key_type key_type_ceph = { + .name = "ceph", + .instantiate = ceph_key_instantiate, + .match = ceph_key_match, + .destroy = ceph_key_destroy, +}; + +int ceph_crypto_init(void) { + return register_key_type(&key_type_ceph); +} + +void ceph_crypto_shutdown(void) { + unregister_key_type(&key_type_ceph); +} diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index f9eccace592b..1919d1550d75 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -19,6 +19,8 @@ static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) kfree(key->key); } +extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst, + const struct ceph_crypto_key *src); extern int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end); extern int ceph_crypto_key_decode(struct ceph_crypto_key *key, @@ -40,6 +42,8 @@ extern int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, const void *src1, size_t src1_len, const void *src2, size_t src2_len); +extern int ceph_crypto_init(void); +extern void ceph_crypto_shutdown(void); /* armor.c */ extern int ceph_armor(char *dst, const char *src, const char *end); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 8a079399174a..cbe31fa45508 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -759,7 +759,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) /* authentication */ monc->auth = ceph_auth_init(cl->options->name, - cl->options->secret); + cl->options->key); if (IS_ERR(monc->auth)) return PTR_ERR(monc->auth); monc->auth->want_keys = diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 02212ed50852..50af02737a3d 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -837,8 +837,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, dout("moving osd to %p lru\n", req->r_osd); __move_osd_to_lru(osdc, req->r_osd); } - if (list_empty(&req->r_osd_item) && - list_empty(&req->r_linger_item)) + if (list_empty(&req->r_linger_item)) req->r_osd = NULL; } @@ -883,7 +882,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, dout("moving osd to %p lru\n", req->r_osd); __move_osd_to_lru(osdc, req->r_osd); } - req->r_osd = NULL; + if (list_empty(&req->r_osd_item)) + req->r_osd = NULL; } } @@ -917,7 +917,7 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger); /* * Pick an osd (the first 'up' osd in the pg), allocate the osd struct * (as needed), and set the request r_osd appropriately. If there is - * no up osd, set r_osd to NULL. Move the request to the appropiate list + * no up osd, set r_osd to NULL. Move the request to the appropriate list * (unsent, homeless) or leave on in-flight lru. * * Return 0 if unchanged, 1 if changed, or negative on error. @@ -1602,11 +1602,11 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) cookie, ver, event); if (event) { event_work = kmalloc(sizeof(*event_work), GFP_NOIO); - INIT_WORK(&event_work->work, do_event_work); if (!event_work) { dout("ERROR: could not allocate event_work\n"); goto done_err; } + INIT_WORK(&event_work->work, do_event_work); event_work->event = event; event_work->ver = ver; event_work->notify_id = notify_id; @@ -1672,7 +1672,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, if (req->r_sent == 0) { rc = __map_request(osdc, req); if (rc < 0) - return rc; + goto out_unlock; if (req->r_osd == NULL) { dout("send_request %p no up osds in pg\n", req); ceph_monc_request_next_osdmap(&osdc->client->monc); @@ -1689,6 +1689,8 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, } } } + +out_unlock: mutex_unlock(&osdc->request_mutex); up_read(&osdc->map_sem); return rc; diff --git a/net/core/dev.c b/net/core/dev.c index 3da9fb06d47a..95897ff3a76f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2091,7 +2091,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 features; /* - * If device doesnt need skb->dst, release it right now while + * If device doesn't need skb->dst, release it right now while * its hot in this cpu cache */ if (dev->priv_flags & IFF_XMIT_DST_RELEASE) @@ -2151,7 +2151,7 @@ gso: nskb->next = NULL; /* - * If device doesnt need nskb->dst, release it right now while + * If device doesn't need nskb->dst, release it right now while * its hot in this cpu cache */ if (dev->priv_flags & IFF_XMIT_DST_RELEASE) @@ -2970,8 +2970,8 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions * a compare and 2 stores extra right now if we dont have it on * but have CONFIG_NET_CLS_ACT - * NOTE: This doesnt stop any functionality; if you dont have - * the ingress scheduler, you just cant add policies on ingress. + * NOTE: This doesn't stop any functionality; if you dont have + * the ingress scheduler, you just can't add policies on ingress. * */ static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) @@ -3800,7 +3800,7 @@ static void net_rx_action(struct softirq_action *h) * with netpoll's poll_napi(). Only the entity which * obtains the lock and sees NAPI_STATE_SCHED set will * actually make the ->poll() call. Therefore we avoid - * accidently calling ->poll() when NAPI is not scheduled. + * accidentally calling ->poll() when NAPI is not scheduled. */ work = 0; if (test_bit(NAPI_STATE_SCHED, &n->state)) { @@ -5236,7 +5236,7 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) } EXPORT_SYMBOL(netdev_fix_features); -void netdev_update_features(struct net_device *dev) +int __netdev_update_features(struct net_device *dev) { u32 features; int err = 0; @@ -5250,7 +5250,7 @@ void netdev_update_features(struct net_device *dev) features = netdev_fix_features(dev, features); if (dev->features == features) - return; + return 0; netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", dev->features, features); @@ -5258,12 +5258,23 @@ void netdev_update_features(struct net_device *dev) if (dev->netdev_ops->ndo_set_features) err = dev->netdev_ops->ndo_set_features(dev, features); - if (!err) - dev->features = features; - else if (err < 0) + if (unlikely(err < 0)) { netdev_err(dev, "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", err, features, dev->features); + return -1; + } + + if (!err) + dev->features = features; + + return 1; +} + +void netdev_update_features(struct net_device *dev) +{ + if (__netdev_update_features(dev)) + netdev_features_change(dev); } EXPORT_SYMBOL(netdev_update_features); @@ -5414,6 +5425,14 @@ int register_netdevice(struct net_device *dev) dev->features &= ~NETIF_F_GSO; } + /* Turn on no cache copy if HW is doing checksum */ + dev->hw_features |= NETIF_F_NOCACHE_COPY; + if ((dev->features & NETIF_F_ALL_CSUM) && + !(dev->features & NETIF_F_NO_CSUM)) { + dev->wanted_features |= NETIF_F_NOCACHE_COPY; + dev->features |= NETIF_F_NOCACHE_COPY; + } + /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features * are enabled only if supported by underlying device. @@ -5430,7 +5449,7 @@ int register_netdevice(struct net_device *dev) goto err_uninit; dev->reg_state = NETREG_REGISTERED; - netdev_update_features(dev); + __netdev_update_features(dev); /* * Default initial state at registry is that the @@ -6171,6 +6190,10 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask) } } + /* If device can't no cache copy, don't do for all */ + if (!(one & NETIF_F_NOCACHE_COPY)) + all &= ~NETIF_F_NOCACHE_COPY; + one |= NETIF_F_ALL_CSUM; one |= all & NETIF_F_ONE_FOR_ALL; @@ -6336,7 +6359,7 @@ static void __net_exit default_device_exit(struct net *net) if (dev->rtnl_link_ops) continue; - /* Push remaing network devices to init_net */ + /* Push remaining network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 74ead9eca126..704e176ad3a9 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -21,6 +21,8 @@ #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/slab.h> +#include <linux/rtnetlink.h> +#include <linux/sched.h> /* * Some useful ethtool_ops methods that're device independent. @@ -317,7 +319,7 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) dev->wanted_features &= ~features[0].valid; dev->wanted_features |= features[0].valid & features[0].requested; - netdev_update_features(dev); + __netdev_update_features(dev); if ((dev->wanted_features ^ dev->features) & features[0].valid) ret |= ETHTOOL_F_WISH; @@ -359,7 +361,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS /* NETIF_F_NTUPLE */ "rx-ntuple-filter", /* NETIF_F_RXHASH */ "rx-hashing", /* NETIF_F_RXCSUM */ "rx-checksum", - "", + /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy" "", }; @@ -499,7 +501,7 @@ static int ethtool_set_one_feature(struct net_device *dev, else dev->wanted_features &= ~mask; - netdev_update_features(dev); + __netdev_update_features(dev); return 0; } @@ -551,7 +553,7 @@ int __ethtool_set_flags(struct net_device *dev, u32 data) dev->wanted_features = (dev->wanted_features & ~changed) | data; - netdev_update_features(dev); + __netdev_update_features(dev); return 0; } @@ -908,6 +910,9 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL; int ret; + if (!ops->set_rx_ntuple) + return -EOPNOTSUPP; + if (!(dev->features & NETIF_F_NTUPLE)) return -EINVAL; @@ -1618,14 +1623,63 @@ out: static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) { struct ethtool_value id; + static bool busy; + int rc; - if (!dev->ethtool_ops->phys_id) + if (!dev->ethtool_ops->set_phys_id && !dev->ethtool_ops->phys_id) return -EOPNOTSUPP; + if (busy) + return -EBUSY; + if (copy_from_user(&id, useraddr, sizeof(id))) return -EFAULT; - return dev->ethtool_ops->phys_id(dev, id.data); + if (!dev->ethtool_ops->set_phys_id) + /* Do it the old way */ + return dev->ethtool_ops->phys_id(dev, id.data); + + rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); + if (rc && rc != -EINVAL) + return rc; + + /* Drop the RTNL lock while waiting, but prevent reentry or + * removal of the device. + */ + busy = true; + dev_hold(dev); + rtnl_unlock(); + + if (rc == 0) { + /* Driver will handle this itself */ + schedule_timeout_interruptible( + id.data ? id.data : MAX_SCHEDULE_TIMEOUT); + } else { + /* Driver expects to be called periodically */ + do { + rtnl_lock(); + rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ON); + rtnl_unlock(); + if (rc) + break; + schedule_timeout_interruptible(HZ / 2); + + rtnl_lock(); + rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_OFF); + rtnl_unlock(); + if (rc) + break; + schedule_timeout_interruptible(HZ / 2); + } while (!signal_pending(current) && + (id.data == 0 || --id.data != 0)); + } + + rtnl_lock(); + dev_put(dev); + busy = false; + + (void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); + return rc; } static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) diff --git a/net/core/filter.c b/net/core/filter.c index 232b1873bb28..afb8afb066bb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -425,7 +425,7 @@ EXPORT_SYMBOL(sk_run_filter); * As we dont want to clear mem[] array for each packet going through * sk_run_filter(), we check that filter loaded by user never try to read * a cell if not previously written, and we check all branches to be sure - * a malicious user doesnt try to abuse us. + * a malicious user doesn't try to abuse us. */ static int check_load_and_stores(struct sock_filter *filter, int flen) { diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 01a1101b5936..a7b342131869 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -129,7 +129,7 @@ static void linkwatch_schedule_work(int urgent) if (!cancel_delayed_work(&linkwatch_work)) return; - /* Otherwise we reschedule it again for immediate exection. */ + /* Otherwise we reschedule it again for immediate execution. */ schedule_delayed_work(&linkwatch_work, 0); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 49f7ea5b4c75..d7c4bb4b1820 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -196,7 +196,7 @@ EXPORT_SYMBOL_GPL(__rtnl_register); * as failure of this function is very unlikely, it can only happen due * to lack of memory when allocating the chain to store all message * handlers for a protocol. Meant for use in init functions where lack - * of memory implies no sense in continueing. + * of memory implies no sense in continuing. */ void rtnl_register(int protocol, int msgtype, rtnl_doit_func doit, rtnl_dumpit_func dumpit) @@ -1440,7 +1440,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, errout: if (err < 0 && modified && net_ratelimit()) printk(KERN_WARNING "A link change request failed with " - "some changes comitted already. Interface %s may " + "some changes committed already. Interface %s may " "have been left with an inconsistent configuration, " "please check.\n", dev->name); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 801dd08908f9..7ebeed0a877c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2267,7 +2267,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read); * of bytes already consumed and the next call to * skb_seq_read() will return the remaining part of the block. * - * Note 1: The size of each block of data returned can be arbitary, + * Note 1: The size of each block of data returned can be arbitrary, * this limitation is the cost for zerocopy seqeuental * reads of potentially non linear data. * diff --git a/net/core/sock.c b/net/core/sock.c index 7dfed792434d..6e819780c232 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -215,7 +215,7 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; -/* Maximal space eaten by iovec or ancilliary data plus some space */ +/* Maximal space eaten by iovec or ancillary data plus some space */ int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); @@ -1175,7 +1175,7 @@ static void __sk_free(struct sock *sk) void sk_free(struct sock *sk) { /* - * We substract one from sk_wmem_alloc and can know if + * We subtract one from sk_wmem_alloc and can know if * some packets are still in some tx queue. * If not null, sock_wfree() will call __sk_free(sk) later */ @@ -1185,10 +1185,10 @@ void sk_free(struct sock *sk) EXPORT_SYMBOL(sk_free); /* - * Last sock_put should drop referrence to sk->sk_net. It has already - * been dropped in sk_change_net. Taking referrence to stopping namespace + * Last sock_put should drop reference to sk->sk_net. It has already + * been dropped in sk_change_net. Taking reference to stopping namespace * is not an option. - * Take referrence to a socket to remove it from hash _alive_ and after that + * Take reference to a socket to remove it from hash _alive_ and after that * destroy it in the context of init_net. */ void sk_release_kernel(struct sock *sk) diff --git a/net/dccp/output.c b/net/dccp/output.c index 784d30210543..136d41cbcd02 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -143,7 +143,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) } /** - * dccp_determine_ccmps - Find out about CCID-specfic packet-size limits + * dccp_determine_ccmps - Find out about CCID-specific packet-size limits * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.), * since the RX CCID is restricted to feedback packets (Acks), which are small * in comparison with the data traffic. A value of 0 means "no current CCMPS". diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c index bb2b41bc854e..3da418894efc 100644 --- a/net/dsa/mv88e6131.c +++ b/net/dsa/mv88e6131.c @@ -14,6 +14,13 @@ #include "dsa_priv.h" #include "mv88e6xxx.h" +/* + * Switch product IDs + */ +#define ID_6085 0x04a0 +#define ID_6095 0x0950 +#define ID_6131 0x1060 + static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) { int ret; @@ -21,9 +28,11 @@ static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); if (ret >= 0) { ret &= 0xfff0; - if (ret == 0x0950) + if (ret == ID_6085) + return "Marvell 88E6085"; + if (ret == ID_6095) return "Marvell 88E6095/88E6095F"; - if (ret == 0x1060) + if (ret == ID_6131) return "Marvell 88E6131"; } @@ -124,7 +133,7 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) * Ignore removed tag data on doubly tagged packets, disable * flow control messages, force flow control priority to the * highest, and send all special multicast frames to the CPU - * port at the higest priority. + * port at the highest priority. */ REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); @@ -164,6 +173,7 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) static int mv88e6131_setup_port(struct dsa_switch *ds, int p) { + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); int addr = REG_PORT(p); u16 val; @@ -171,10 +181,13 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p) * MAC Forcing register: don't force link, speed, duplex * or flow control state to any particular values on physical * ports, but force the CPU port and all DSA ports to 1000 Mb/s - * full duplex. + * (100 Mb/s on 6085) full duplex. */ if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) - REG_WRITE(addr, 0x01, 0x003e); + if (ps->id == ID_6085) + REG_WRITE(addr, 0x01, 0x003d); /* 100 Mb/s */ + else + REG_WRITE(addr, 0x01, 0x003e); /* 1000 Mb/s */ else REG_WRITE(addr, 0x01, 0x0003); @@ -286,6 +299,8 @@ static int mv88e6131_setup(struct dsa_switch *ds) mv88e6xxx_ppu_state_init(ds); mutex_init(&ps->stats_mutex); + ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0; + ret = mv88e6131_switch_reset(ds); if (ret < 0) return ret; diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h index eb0e0aaa9f1b..61156ca26a0d 100644 --- a/net/dsa/mv88e6xxx.h +++ b/net/dsa/mv88e6xxx.h @@ -39,6 +39,8 @@ struct mv88e6xxx_priv_state { * Hold this mutex over snapshot + dump sequences. */ struct mutex stats_mutex; + + int id; /* switch product id */ }; struct mv88e6xxx_hw_stat { diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 094e150c6260..a0af7ea87870 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -112,7 +112,7 @@ int cipso_v4_rbm_strictvalid = 1; /* The maximum number of category ranges permitted in the ranged category tag * (tag #5). You may note that the IETF draft states that the maximum number * of category ranges is 7, but if the low end of the last category range is - * zero then it is possibile to fit 8 category ranges because the zero should + * zero then it is possible to fit 8 category ranges because the zero should * be omitted. */ #define CIPSO_V4_TAG_RNG_CAT_MAX 8 @@ -438,7 +438,7 @@ cache_add_failure: * * Description: * Search the DOI definition list for a DOI definition with a DOI value that - * matches @doi. The caller is responsibile for calling rcu_read_[un]lock(). + * matches @doi. The caller is responsible for calling rcu_read_[un]lock(). * Returns a pointer to the DOI definition on success and NULL on failure. */ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) @@ -1293,7 +1293,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, return ret_val; /* This will send packets using the "optimized" format when - * possibile as specified in section 3.4.2.6 of the + * possible as specified in section 3.4.2.6 of the * CIPSO draft. */ if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10) tag_len = 14; @@ -1752,7 +1752,7 @@ validate_return: } /** - * cipso_v4_error - Send the correct reponse for a bad packet + * cipso_v4_error - Send the correct response for a bad packet * @skb: the packet * @error: the error code * @gateway: CIPSO gateway flag diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 451088330bbb..22524716fe70 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -44,6 +44,7 @@ #include <net/arp.h> #include <net/ip_fib.h> #include <net/rtnetlink.h> +#include <net/xfrm.h> #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -188,9 +189,9 @@ EXPORT_SYMBOL(inet_dev_addr_type); * - check, that packet arrived from expected physical interface. * called with rcu_read_lock() */ -int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, - struct net_device *dev, __be32 *spec_dst, - u32 *itag, u32 mark) +int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, + int oif, struct net_device *dev, __be32 *spec_dst, + u32 *itag) { struct in_device *in_dev; struct flowi4 fl4; @@ -202,7 +203,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, fl4.flowi4_oif = 0; fl4.flowi4_iif = oif; - fl4.flowi4_mark = mark; fl4.daddr = src; fl4.saddr = dst; fl4.flowi4_tos = tos; @@ -212,10 +212,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, in_dev = __in_dev_get_rcu(dev); if (in_dev) { no_addr = in_dev->ifa_list == NULL; - rpf = IN_DEV_RPFILTER(in_dev); + + /* Ignore rp_filter for packets protected by IPsec. */ + rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev); + accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); - if (mark && !IN_DEV_SRC_VMARK(in_dev)) - fl4.flowi4_mark = 0; + fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; } if (in_dev == NULL) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index b92c86f6e9b3..bde80c450b52 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -12,7 +12,7 @@ * * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet * - * This work is based on the LPC-trie which is originally descibed in: + * This work is based on the LPC-trie which is originally described in: * * An experimental study of compression methods for dynamic tries * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. @@ -126,7 +126,7 @@ struct tnode { struct work_struct work; struct tnode *tnode_free; }; - struct rt_trie_node *child[0]; + struct rt_trie_node __rcu *child[0]; }; #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -151,7 +151,7 @@ struct trie_stat { }; struct trie { - struct rt_trie_node *trie; + struct rt_trie_node __rcu *trie; #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats stats; #endif @@ -177,16 +177,29 @@ static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; -static inline struct tnode *node_parent(struct rt_trie_node *node) +/* + * caller must hold RTNL + */ +static inline struct tnode *node_parent(const struct rt_trie_node *node) { - return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); + unsigned long parent; + + parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); + + return (struct tnode *)(parent & ~NODE_TYPE_MASK); } -static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) +/* + * caller must hold RCU read lock or RTNL + */ +static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) { - struct tnode *ret = node_parent(node); + unsigned long parent; + + parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() || + lockdep_rtnl_is_held()); - return rcu_dereference_rtnl(ret); + return (struct tnode *)(parent & ~NODE_TYPE_MASK); } /* Same as rcu_assign_pointer @@ -198,18 +211,24 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) node->parent = (unsigned long)ptr | NODE_TYPE(node); } -static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) +/* + * caller must hold RTNL + */ +static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) { BUG_ON(i >= 1U << tn->bits); - return tn->child[i]; + return rtnl_dereference(tn->child[i]); } -static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) +/* + * caller must hold RCU read lock or RTNL + */ +static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) { - struct rt_trie_node *ret = tnode_get_child(tn, i); + BUG_ON(i >= 1U << tn->bits); - return rcu_dereference_rtnl(ret); + return rcu_dereference_rtnl(tn->child[i]); } static inline int tnode_child_length(const struct tnode *tn) @@ -487,7 +506,7 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, int wasfull) { - struct rt_trie_node *chi = tn->child[i]; + struct rt_trie_node *chi = rtnl_dereference(tn->child[i]); int isfull; BUG_ON(i >= 1<<tn->bits); @@ -665,7 +684,7 @@ one_child: for (i = 0; i < tnode_child_length(tn); i++) { struct rt_trie_node *n; - n = tn->child[i]; + n = rtnl_dereference(tn->child[i]); if (!n) continue; @@ -679,6 +698,20 @@ one_child: return (struct rt_trie_node *) tn; } + +static void tnode_clean_free(struct tnode *tn) +{ + int i; + struct tnode *tofree; + + for (i = 0; i < tnode_child_length(tn); i++) { + tofree = (struct tnode *)rtnl_dereference(tn->child[i]); + if (tofree) + tnode_free(tofree); + } + tnode_free(tn); +} + static struct tnode *inflate(struct trie *t, struct tnode *tn) { struct tnode *oldtnode = tn; @@ -755,8 +788,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) inode = (struct tnode *) node; if (inode->bits == 1) { - put_child(t, tn, 2*i, inode->child[0]); - put_child(t, tn, 2*i+1, inode->child[1]); + put_child(t, tn, 2*i, rtnl_dereference(inode->child[0])); + put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1])); tnode_free_safe(inode); continue; @@ -797,8 +830,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) size = tnode_child_length(left); for (j = 0; j < size; j++) { - put_child(t, left, j, inode->child[j]); - put_child(t, right, j, inode->child[j + size]); + put_child(t, left, j, rtnl_dereference(inode->child[j])); + put_child(t, right, j, rtnl_dereference(inode->child[j + size])); } put_child(t, tn, 2*i, resize(t, left)); put_child(t, tn, 2*i+1, resize(t, right)); @@ -808,18 +841,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) tnode_free_safe(oldtnode); return tn; nomem: - { - int size = tnode_child_length(tn); - int j; - - for (j = 0; j < size; j++) - if (tn->child[j]) - tnode_free((struct tnode *)tn->child[j]); - - tnode_free(tn); - - return ERR_PTR(-ENOMEM); - } + tnode_clean_free(tn); + return ERR_PTR(-ENOMEM); } static struct tnode *halve(struct trie *t, struct tnode *tn) @@ -890,18 +913,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) tnode_free_safe(oldtnode); return tn; nomem: - { - int size = tnode_child_length(tn); - int j; - - for (j = 0; j < size; j++) - if (tn->child[j]) - tnode_free((struct tnode *)tn->child[j]); - - tnode_free(tn); - - return ERR_PTR(-ENOMEM); - } + tnode_clean_free(tn); + return ERR_PTR(-ENOMEM); } /* readside must use rcu_read_lock currently dump routines @@ -1033,7 +1046,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) t_key cindex; pos = 0; - n = t->trie; + n = rtnl_dereference(t->trie); /* If we point to NULL, stop. Either the tree is empty and we should * just put a new leaf in if, or we have reached an empty child slot, @@ -1756,7 +1769,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) continue; if (IS_LEAF(c)) { - prefetch(p->child[idx]); + prefetch(rcu_dereference_rtnl(p->child[idx])); return (struct leaf *) c; } @@ -2272,7 +2285,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) /* walk rest of this hash chain */ h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); - while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { + while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { tb = hlist_entry(tb_node, struct fib_table, tb_hlist); n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); if (n) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a91dc1611081..e5f8a71d3a2a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -704,7 +704,7 @@ static void icmp_unreach(struct sk_buff *skb) */ /* - * Check the other end isnt violating RFC 1122. Some routers send + * Check the other end isn't violating RFC 1122. Some routers send * bogus responses to broadcast frames. If you see this message * first check your netmask matches at both ends, if it does then * get the other vendor to fix their kit. diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6c0b7f4a3d7d..f784608a4c45 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -356,20 +356,14 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options *opt = inet_rsk(req)->opt; - struct flowi4 fl4 = { - .flowi4_oif = sk->sk_bound_dev_if, - .flowi4_mark = sk->sk_mark, - .daddr = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .saddr = ireq->loc_addr, - .flowi4_tos = RT_CONN_FLAGS(sk), - .flowi4_proto = sk->sk_protocol, - .flowi4_flags = inet_sk_flowi_flags(sk), - .fl4_sport = inet_sk(sk)->inet_sport, - .fl4_dport = ireq->rmt_port, - }; struct net *net = sock_net(sk); + struct flowi4 fl4; + flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), + (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 67f241b97649..bdad3d60aa82 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -603,7 +603,7 @@ slow_path: /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) len = mtu; - /* IF: we are not sending upto and including the packet end + /* IF: we are not sending up to and including the packet end then align the next start on an eight byte boundary */ if (len < left) { len &= ~7; @@ -1474,16 +1474,14 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar } { - struct flowi4 fl4 = { - .flowi4_oif = arg->bound_dev_if, - .daddr = daddr, - .saddr = rt->rt_spec_dst, - .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), - .fl4_sport = tcp_hdr(skb)->dest, - .fl4_dport = tcp_hdr(skb)->source, - .flowi4_proto = sk->sk_protocol, - .flowi4_flags = ip_reply_arg_flowi_flags(arg), - }; + struct flowi4 fl4; + + flowi4_init_output(&fl4, arg->bound_dev_if, 0, + RT_TOS(ip_hdr(skb)->tos), + RT_SCOPE_UNIVERSE, sk->sk_protocol, + ip_reply_arg_flowi_flags(arg), + daddr, rt->rt_spec_dst, + tcp_hdr(skb)->source, tcp_hdr(skb)->dest); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 2b097752426b..cbff2ecccf3d 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1444,7 +1444,7 @@ static int __init ip_auto_config(void) root_server_addr = addr; /* - * Use defaults whereever applicable. + * Use defaults wherever applicable. */ if (ic_defaults() < 0) return -1; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f3c0b549b8e1..4614babdc45f 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -221,9 +221,10 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, return csum; } -static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) +static int nf_ip_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict __always_unused) { - struct rtable *rt = ip_route_output_key(&init_net, &fl->u.ip4); + struct rtable *rt = ip_route_output_key(net, &fl->u.ip4); if (IS_ERR(rt)) return PTR_ERR(rt); *dst = &rt->dst; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 4b5d457c2d76..89bc7e66d598 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -76,7 +76,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, } /* - * Unfortunatly, _b and _mask are not aligned to an int (or long int) + * Unfortunately, _b and _mask are not aligned to an int (or long int) * Some arches dont care, unrolling the loop is a win on them. * For other arches, we only have a 16bit alignement. */ @@ -1874,7 +1874,7 @@ static int __init arp_tables_init(void) if (ret < 0) goto err1; - /* Noone else will be downing sem now, so we won't sleep */ + /* No one else will be downing sem now, so we won't sleep */ ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); if (ret < 0) goto err2; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ffcea0d1678e..704915028009 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2233,7 +2233,7 @@ static int __init ip_tables_init(void) if (ret < 0) goto err1; - /* Noone else will be downing sem now, so we won't sleep */ + /* No one else will be downing sem now, so we won't sleep */ ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); if (ret < 0) goto err2; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 21bcf471b25a..9c71b2755ce3 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -521,7 +521,7 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto) } EXPORT_SYMBOL(nf_nat_protocol_register); -/* Noone stores the protocol anywhere; simply delete it. */ +/* No one stores the protocol anywhere; simply delete it. */ void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) { spin_lock_bh(&nf_nat_lock); @@ -532,7 +532,7 @@ void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) } EXPORT_SYMBOL(nf_nat_protocol_unregister); -/* Noone using conntrack by the time this called. */ +/* No one using conntrack by the time this called. */ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) { struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2d3c72e5bbbf..2b50cc2da90a 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -548,17 +548,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } { - struct flowi4 fl4 = { - .flowi4_oif = ipc.oif, - .flowi4_mark = sk->sk_mark, - .daddr = daddr, - .saddr = saddr, - .flowi4_tos = tos, - .flowi4_proto = (inet->hdrincl ? - IPPROTO_RAW : - sk->sk_protocol), - .flowi4_flags = FLOWI_FLAG_CAN_SLEEP, - }; + struct flowi4 fl4; + + flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, + RT_SCOPE_UNIVERSE, + inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0); + if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); if (err) @@ -622,7 +618,7 @@ do_confirm: static void raw_close(struct sock *sk, long timeout) { /* - * Raw sockets may have direct kernel refereneces. Kill them. + * Raw sockets may have direct kernel references. Kill them. */ ip_ra_control(sk, 0, NULL); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4b0c81180804..0e7430c327a7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -821,7 +821,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) } /* - * Pertubation of rt_genid by a small quantity [1..256] + * Perturbation of rt_genid by a small quantity [1..256] * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() * many times (2^24) without giving recent rt_genid. * Jenkins hash is strong enough that litle changes of rt_genid are OK. @@ -1191,7 +1191,7 @@ restart: #endif /* * Since lookup is lockfree, we must make sure - * previous writes to rt are comitted to memory + * previous writes to rt are committed to memory * before making rt visible to other CPUS. */ rcu_assign_pointer(rt_hash_table[hash].chain, rt); @@ -1871,8 +1871,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto e_inval; spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); } else { - err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, - &itag, 0); + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, + &itag); if (err < 0) goto e_err; } @@ -1891,6 +1891,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif + rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->dst.dev = init_net.loopback_dev; dev_hold(rth->dst.dev); @@ -1980,8 +1981,8 @@ static int __mkroute_input(struct sk_buff *skb, } - err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), - in_dev->dev, &spec_dst, &itag, skb->mark); + err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), + in_dev->dev, &spec_dst, &itag); if (err < 0) { ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); @@ -2026,6 +2027,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_key_src = saddr; rth->rt_src = saddr; rth->rt_gateway = daddr; + rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; rth->dst.dev = (out_dev)->dev; dev_hold(rth->dst.dev); @@ -2148,9 +2150,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto brd_input; if (res.type == RTN_LOCAL) { - err = fib_validate_source(saddr, daddr, tos, + err = fib_validate_source(skb, saddr, daddr, tos, net->loopback_dev->ifindex, - dev, &spec_dst, &itag, skb->mark); + dev, &spec_dst, &itag); if (err < 0) goto martian_source_keep_err; if (err) @@ -2174,8 +2176,8 @@ brd_input: if (ipv4_is_zeronet(saddr)) spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); else { - err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, - &itag, skb->mark); + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, + &itag); if (err < 0) goto martian_source_keep_err; if (err) @@ -2202,6 +2204,7 @@ local_input: #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif + rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->dst.dev = net->loopback_dev; dev_hold(rth->dst.dev); @@ -2401,7 +2404,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_mark = oldflp4->flowi4_mark; rth->rt_dst = fl4->daddr; rth->rt_src = fl4->saddr; - rth->rt_iif = 0; + rth->rt_route_iif = 0; + rth->rt_iif = oldflp4->flowi4_oif ? : dev_out->ifindex; /* get references to the devices that are to be hold by the routing cache entry */ rth->dst.dev = dev_out; @@ -2716,6 +2720,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_key_dst = ort->rt_key_dst; rt->rt_key_src = ort->rt_key_src; rt->rt_tos = ort->rt_tos; + rt->rt_route_iif = ort->rt_route_iif; rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; rt->rt_mark = ort->rt_mark; @@ -2725,7 +2730,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_type = ort->rt_type; rt->rt_dst = ort->rt_dst; rt->rt_src = ort->rt_src; - rt->rt_iif = ort->rt_iif; rt->rt_gateway = ort->rt_gateway; rt->rt_spec_dst = ort->rt_spec_dst; rt->peer = ort->peer; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 8b44c6d2a79b..71e029691908 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -345,17 +345,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * no easy way to do this. */ { - struct flowi4 fl4 = { - .flowi4_mark = sk->sk_mark, - .daddr = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .saddr = ireq->loc_addr, - .flowi4_tos = RT_CONN_FLAGS(sk), - .flowi4_proto = IPPROTO_TCP, - .flowi4_flags = inet_sk_flowi_flags(sk), - .fl4_sport = th->dest, - .fl4_dport = th->source, - }; + struct flowi4 fl4; + + flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), + RT_SCOPE_UNIVERSE, IPPROTO_TCP, + inet_sk_flowi_flags(sk), + (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, + ireq->loc_addr, th->source, th->dest); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b22d45010545..054a59d21eb0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -999,7 +999,8 @@ new_segment: /* We have some space in skb head. Superb! */ if (copy > skb_tailroom(skb)) copy = skb_tailroom(skb); - if ((err = skb_add_data(skb, from, copy)) != 0) + err = skb_add_data_nocache(sk, skb, from, copy); + if (err) goto do_fault; } else { int merge = 0; @@ -1042,8 +1043,8 @@ new_segment: /* Time to copy data. We are close to * the end! */ - err = skb_copy_to_page(sk, from, skb, page, - off, copy); + err = skb_copy_to_page_nocache(sk, from, skb, + page, off, copy); if (err) { /* If this page was new, give it to the * socket so it does not get leaked. diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 656d431c99ad..72f7218b03f5 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -12,7 +12,7 @@ * within cong_avoid. * o Error correcting in remote HZ, therefore remote HZ will be keeped * on checking and updating. - * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, sicne + * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, since * OWD have a similar meaning as RTT. Also correct the buggy formular. * o Handle reaction for Early Congestion Indication (ECI) within * pkts_acked, as mentioned within pseudo code. diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8b0d0167e44a..17388c7f49c4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -73,7 +73,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) tcp_advance_send_head(sk, skb); tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - /* Don't override Nagle indefinately with F-RTO */ + /* Don't override Nagle indefinitely with F-RTO */ if (tp->frto_counter == 2) tp->frto_counter = 3; diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index dc7f43179c9a..05c3b6f0e8e1 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -20,7 +20,7 @@ #define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss #define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion #define TCP_YEAH_PHY 8 //lin maximum delta from base -#define TCP_YEAH_RHO 16 //lin minumum number of consecutive rtt to consider competition on loss +#define TCP_YEAH_RHO 16 //lin minimum number of consecutive rtt to consider competition on loss #define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count #define TCP_SCALABLE_AI_CNT 100U diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 588f47af5faf..a15c8fb653af 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -189,7 +189,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, * @sk: socket struct in question * @snum: port number to look up * @saddr_comp: AF-dependent comparison of bound local IP addresses - * @hash2_nulladdr: AF-dependant hash value in secondary hash chains, + * @hash2_nulladdr: AF-dependent hash value in secondary hash chains, * with NULL address */ int udp_lib_get_port(struct sock *sk, unsigned short snum, @@ -909,20 +909,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, rt = (struct rtable *)sk_dst_check(sk, 0); if (rt == NULL) { - struct flowi4 fl4 = { - .flowi4_oif = ipc.oif, - .flowi4_mark = sk->sk_mark, - .daddr = faddr, - .saddr = saddr, - .flowi4_tos = tos, - .flowi4_proto = sk->sk_protocol, - .flowi4_flags = (inet_sk_flowi_flags(sk) | - FLOWI_FLAG_CAN_SLEEP), - .fl4_sport = inet->inet_sport, - .fl4_dport = dport, - }; + struct flowi4 fl4; struct net *net = sock_net(sk); + flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, + RT_SCOPE_UNIVERSE, sk->sk_protocol, + inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, + faddr, saddr, dport, inet->inet_sport); + security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 13e0e7f659ff..d20a05e970d8 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -74,6 +74,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, rt->rt_key_dst = fl4->daddr; rt->rt_key_src = fl4->saddr; rt->rt_tos = fl4->flowi4_tos; + rt->rt_route_iif = fl4->flowi4_iif; rt->rt_iif = fl4->flowi4_iif; rt->rt_oif = fl4->flowi4_oif; rt->rt_mark = fl4->flowi4_mark; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3daaf3c7703c..1493534116df 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1084,7 +1084,7 @@ static int ipv6_get_saddr_eval(struct net *net, case IPV6_SADDR_RULE_PRIVACY: { /* Rule 7: Prefer public address - * Note: prefer temprary address if use_tempaddr >= 2 + * Note: prefer temporary address if use_tempaddr >= 2 */ int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ? !!(dst->prefs & IPV6_PREFER_SRC_TMP) : @@ -1968,7 +1968,7 @@ ok: * to the stored lifetime since we'll * be updating the timestamp below, * else we'll set it back to the - * minumum. + * minimum. */ if (prefered_lft != ifp->prefered_lft) { valid_lft = stored_lft; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4b13d5d8890e..afcc7099f96d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1113,7 +1113,7 @@ static int __init inet6_init(void) /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance - * in a host availiable by both INET and INET6 APIs and + * in a host available by both INET and INET6 APIs and * able to communicate via both network protocols. */ diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 18208876aa8a..46cf7bea6769 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -779,7 +779,7 @@ slow_path: /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) len = mtu; - /* IF: we are not sending upto and including the packet end + /* IF: we are not sending up to and including the packet end then align the next start on an eight byte boundary */ if (len < left) { len &= ~7; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 39aaca2b4fd2..28bc1f644b7b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -90,9 +90,18 @@ static int nf_ip6_reroute(struct sk_buff *skb, return 0; } -static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) +static int nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict) { - *dst = ip6_route_output(&init_net, NULL, &fl->u.ip6); + static const struct ipv6_pinfo fake_pinfo; + static const struct inet_sock fake_sk = { + /* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */ + .sk.sk_bound_dev_if = 1, + .pinet6 = (struct ipv6_pinfo *) &fake_pinfo, + }; + const void *sk = strict ? &fake_sk : NULL; + + *dst = ip6_route_output(net, sk, &fl->u.ip6); return (*dst)->error; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0b2af9b85cec..5a1c6f27ffaf 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2248,7 +2248,7 @@ static int __init ip6_tables_init(void) if (ret < 0) goto err1; - /* Noone else will be downing sem now, so we won't sleep */ + /* No one else will be downing sem now, so we won't sleep */ ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); if (ret < 0) goto err2; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 97c5b21b9674..cdd6d045e42e 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -71,7 +71,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, if (reasm == NULL) return NF_STOLEN; - /* error occured or not fragmented */ + /* error occurred or not fragmented */ if (reasm == skb) return NF_ACCEPT; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2b0c186862c8..4f49e5dd41bb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -503,6 +503,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { err = PTR_ERR(dst); + dst = NULL; goto done; } skb = tcp_make_synack(sk, dst, req, rvp); @@ -1621,6 +1622,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + sock_rps_save_rxhash(sk, skb->rxhash); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; if (opt_skb) @@ -1648,7 +1650,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) __kfree_skb(opt_skb); return 0; } - } + } else + sock_rps_save_rxhash(sk, skb->rxhash); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) goto reset; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d7037c006e13..15c37746845e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -505,6 +505,9 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) int rc; int is_udplite = IS_UDPLITE(sk); + if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) + sock_rps_save_rxhash(sk, skb->rxhash); + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto drop; diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 783c5f367d29..005b424494a0 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -165,7 +165,7 @@ struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos, irlap_apply_default_connection_parameters(self); - self->N3 = 3; /* # connections attemts to try before giving up */ + self->N3 = 3; /* # connections attempts to try before giving up */ self->state = LAP_NDM; diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index d434c8880745..bb47021c9a55 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -708,7 +708,7 @@ static int irlap_state_reply(struct irlap_cb *self, IRLAP_EVENT event, self->frame_sent = TRUE; } - /* Readjust our timer to accomodate devices + /* Readjust our timer to accommodate devices * doing faster or slower discovery than us... * Jean II */ irlap_start_query_timer(self, info->S, info->s); @@ -931,7 +931,7 @@ static int irlap_state_setup(struct irlap_cb *self, IRLAP_EVENT event, irlap_send_rr_frame(self, CMD_FRAME); /* The timer is set to half the normal timer to quickly - * detect a failure to negociate the new connection + * detect a failure to negotiate the new connection * parameters. IrLAP 6.11.3.2, note 3. * Note that currently we don't process this failure * properly, as we should do a quick disconnect. @@ -1052,7 +1052,7 @@ static int irlap_state_xmit_p(struct irlap_cb *self, IRLAP_EVENT event, return -EPROTO; } - /* Substract space used by this skb */ + /* Subtract space used by this skb */ self->bytes_left -= skb->len; #else /* CONFIG_IRDA_DYNAMIC_WINDOW */ /* Window has been adjusted for the max packet @@ -1808,7 +1808,7 @@ static int irlap_state_xmit_s(struct irlap_cb *self, IRLAP_EVENT event, return -EPROTO; /* Try again later */ } - /* Substract space used by this skb */ + /* Subtract space used by this skb */ self->bytes_left -= skb->len; #else /* CONFIG_IRDA_DYNAMIC_WINDOW */ /* Window has been adjusted for the max packet diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 688222cbf55b..8c004161a843 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -848,7 +848,7 @@ void irlap_send_data_primary_poll(struct irlap_cb *self, struct sk_buff *skb) * though IrLAP is currently sending the *last* frame of the * tx-window, the driver most likely has only just started * sending the *first* frame of the same tx-window. - * I.e. we are always at the very begining of or Tx window. + * I.e. we are always at the very beginning of or Tx window. * Now, we are supposed to set the final timer from the end * of our tx-window to let the other peer reply. So, we need * to add extra time to compensate for the fact that we diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c index c1fb5db81042..9505a7d06f1a 100644 --- a/net/irda/irlmp_event.c +++ b/net/irda/irlmp_event.c @@ -498,7 +498,7 @@ static int irlmp_state_disconnected(struct lsap_cb *self, IRLMP_EVENT event, switch (event) { #ifdef CONFIG_IRDA_ULTRA case LM_UDATA_INDICATION: - /* This is most bizzare. Those packets are aka unreliable + /* This is most bizarre. Those packets are aka unreliable * connected, aka IrLPT or SOCK_DGRAM/IRDAPROTO_UNITDATA. * Why do we pass them as Ultra ??? Jean II */ irlmp_connless_data_indication(self, skb); diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index 0d82ff5aeff1..979ecb2435a7 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -73,7 +73,7 @@ * Infinite thanks to those brave souls for providing the infrastructure * upon which IrNET is built. * - * Thanks to all my collegues in HP for helping me. In particular, + * Thanks to all my colleagues in HP for helping me. In particular, * thanks to Salil Pradhan and Bill Serra for W2k testing... * Thanks to Luiz Magalhaes for irnetd and much testing... * diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c index 849aaf0dabb5..9715e6e5900b 100644 --- a/net/irda/irqueue.c +++ b/net/irda/irqueue.c @@ -40,7 +40,7 @@ * o the hash function for ints is pathetic (but could be changed) * o locking is sometime suspicious (especially during enumeration) * o most users have only a few elements (== overhead) - * o most users never use seach, so don't benefit from hashing + * o most users never use search, so don't benefit from hashing * Problem already fixed : * o not 64 bit compliant (most users do hashv = (int) self) * o hashbin_remove() is broken => use hashbin_remove_this() diff --git a/net/irda/irttp.c b/net/irda/irttp.c index f6054f9ccbe3..9d9af4606970 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -1193,7 +1193,7 @@ EXPORT_SYMBOL(irttp_connect_request); /* * Function irttp_connect_confirm (handle, qos, skb) * - * Sevice user confirms TSAP connection with peer. + * Service user confirms TSAP connection with peer. * */ static void irttp_connect_confirm(void *instance, void *sap, diff --git a/net/irda/qos.c b/net/irda/qos.c index 2b00974e5bae..1b51bcf42394 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -39,16 +39,16 @@ #include <net/irda/irlap_frame.h> /* - * Maximum values of the baud rate we negociate with the other end. + * Maximum values of the baud rate we negotiate with the other end. * Most often, you don't have to change that, because Linux-IrDA will * use the maximum offered by the link layer, which usually works fine. * In some very rare cases, you may want to limit it to lower speeds... */ int sysctl_max_baud_rate = 16000000; /* - * Maximum value of the lap disconnect timer we negociate with the other end. + * Maximum value of the lap disconnect timer we negotiate with the other end. * Most often, the value below represent the best compromise, but some user - * may want to keep the LAP alive longuer or shorter in case of link failure. + * may want to keep the LAP alive longer or shorter in case of link failure. * Remember that the threshold time (early warning) is fixed to 3s... */ int sysctl_max_noreply_time = 12; @@ -411,7 +411,7 @@ static void irlap_adjust_qos_settings(struct qos_info *qos) * Fix tx data size according to user limits - Jean II */ if (qos->data_size.value > sysctl_max_tx_data_size) - /* Allow non discrete adjustement to avoid loosing capacity */ + /* Allow non discrete adjustement to avoid losing capacity */ qos->data_size.value = sysctl_max_tx_data_size; /* * Override Tx window if user request it. - Jean II diff --git a/net/irda/timer.c b/net/irda/timer.c index 0335ba0cc593..f418cb2ad49c 100644 --- a/net/irda/timer.c +++ b/net/irda/timer.c @@ -59,7 +59,7 @@ void irlap_start_query_timer(struct irlap_cb *self, int S, int s) * slot time, plus add some extra time to properly receive the last * discovery packet (which is longer due to extra discovery info), * to avoid messing with for incomming connections requests and - * to accomodate devices that perform discovery slower than us. + * to accommodate devices that perform discovery slower than us. * Jean II */ timeout = ((sysctl_slot_timeout * HZ / 1000) * (S - s) + XIDEXTRA_TIMEOUT + SMALLBUSY_TIMEOUT); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 9637e45744fa..986b2a5e8769 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -250,7 +250,7 @@ static struct device *af_iucv_dev; * PRMDATA[0..6] socket data (max 7 bytes); * PRMDATA[7] socket data length value (len is 0xff - PRMDATA[7]) * - * The socket data length is computed by substracting the socket data length + * The socket data length is computed by subtracting the socket data length * value from 0xFF. * If the socket data len is greater 7, then PRMDATA can be used for special * notifications (see iucv_sock_shutdown); and further, diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 1ee5dab3cfae..8f156bd86be7 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -735,7 +735,7 @@ static void iucv_cleanup_queue(void) struct iucv_irq_list *p, *n; /* - * When a path is severed, the pathid can be reused immediatly + * When a path is severed, the pathid can be reused immediately * on a iucv connect or a connection pending interrupt. Remove * all entries from the task queue that refer to a stale pathid * (iucv_path_table[ix] == NULL). Only then do the iucv connect @@ -807,7 +807,7 @@ void iucv_unregister(struct iucv_handler *handler, int smp) spin_lock_bh(&iucv_table_lock); /* Remove handler from the iucv_handler_list. */ list_del_init(&handler->list); - /* Sever all pathids still refering to the handler. */ + /* Sever all pathids still referring to the handler. */ list_for_each_entry_safe(p, n, &handler->paths, list) { iucv_sever_pathid(p->pathid, NULL); iucv_path_table[p->pathid] = NULL; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6eb2c8523eeb..89ce1e329b5d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -97,7 +97,7 @@ struct ieee80211_bss { size_t supp_rates_len; /* - * During assocation, we save an ERP value from a probe response so + * During association, we save an ERP value from a probe response so * that we can feed ERP info to the driver when handling the * association completes. these fields probably won't be up-to-date * otherwise, you probably don't want to use them. diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 8d65b47d9837..336ca9d0c5c4 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -628,7 +628,7 @@ void mesh_path_discard_frame(struct sk_buff *skb, * * @mpath: mesh path whose queue has to be freed * - * Locking: the function must me called withing a rcu_read_lock region + * Locking: the function must me called within a rcu_read_lock region */ void mesh_path_flush_pending(struct mesh_path *mpath) { diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index dbdebeda097f..c06aa3ac6b9d 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -259,7 +259,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) } } - /* try to sample up to half of the availble rates during each interval */ + /* try to sample up to half of the available rates during each interval */ mi->sample_count *= 4; cur_prob = 0; diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index 6510f8ee738e..19111c7bf454 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -77,7 +77,7 @@ union rc_pid_event_data { }; struct rc_pid_event { - /* The time when the event occured */ + /* The time when the event occurred */ unsigned long timestamp; /* Event ID number */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index fc2ff78582ca..1f0b010904b8 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -381,7 +381,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) * specs were sane enough this time around to require padding each A-MSDU * subframe to a length that is a multiple of four. * - * Padding like Atheros hardware adds which is inbetween the 802.11 header and + * Padding like Atheros hardware adds which is between the 802.11 header and * the payload is not supported, the driver is required to move the 802.11 * header to be directly in front of the payload in that case. */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 999f8fbf0b4b..52d4b1a695c9 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -47,9 +47,9 @@ * Station entries are added by mac80211 when you establish a link with a * peer. This means different things for the different type of interfaces * we support. For a regular station this mean we add the AP sta when we - * receive an assocation response from the AP. For IBSS this occurs when + * receive an association response from the AP. For IBSS this occurs when * get to know about a peer on the same IBSS. For WDS we add the sta for - * the peer imediately upon device open. When using AP mode we add stations + * the peer immediately upon device open. When using AP mode we add stations * for each respective station upon request from userspace through nl80211. * * In order to remove a STA info structure, various sta_info_destroy_*() diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 43238e99cfb3..87b18ba1e0e9 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -173,7 +173,7 @@ struct sta_ampdu_mlme { /** * enum plink_state - state of a mesh peer link finite state machine * - * @PLINK_LISTEN: initial state, considered the implicit state of non existant + * @PLINK_LISTEN: initial state, considered the implicit state of non existent * mesh peer links * @PLINK_OPN_SNT: mesh plink open frame has been sent to this mesh peer * @PLINK_OPN_RCVD: mesh plink open frame has been received from this mesh peer diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c3f988aa1152..32bff6d86cb2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -652,7 +652,6 @@ comment "Xtables matches" config NETFILTER_XT_MATCH_ADDRTYPE tristate '"addrtype" address type match support' depends on NETFILTER_ADVANCED - depends on (IPV6 || IPV6=n) ---help--- This option allows you to match what routing thinks of an address, eg. UNICAST, LOCAL, BROADCAST, ... diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index bca96990218d..a113ff066928 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -338,8 +338,7 @@ bitmap_ip_head(struct ip_set *set, struct sk_buff *skb) NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); if (map->netmask != 32) NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize)); if (with_timeout(map->timeout)) diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 5e790172deff..00a33242e90c 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -434,8 +434,7 @@ bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip)); NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + (map->last_ip - map->first_ip + 1) * map->dsize)); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 165f09b1a9cb..6b38eb8f6ed8 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -320,8 +320,7 @@ bitmap_port_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port)); NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize)); if (with_timeout(map->timeout)) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d6b48230a540..9152e69a162d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -26,6 +26,7 @@ static LIST_HEAD(ip_set_type_list); /* all registered set types */ static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ +static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ static struct ip_set **ip_set_list; /* all individual sets */ static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ @@ -301,13 +302,18 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); static inline void __ip_set_get(ip_set_id_t index) { - atomic_inc(&ip_set_list[index]->ref); + write_lock_bh(&ip_set_ref_lock); + ip_set_list[index]->ref++; + write_unlock_bh(&ip_set_ref_lock); } static inline void __ip_set_put(ip_set_id_t index) { - atomic_dec(&ip_set_list[index]->ref); + write_lock_bh(&ip_set_ref_lock); + BUG_ON(ip_set_list[index]->ref == 0); + ip_set_list[index]->ref--; + write_unlock_bh(&ip_set_ref_lock); } /* @@ -324,7 +330,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret = 0; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -356,7 +362,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -378,7 +384,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret = 0; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -397,7 +403,6 @@ EXPORT_SYMBOL_GPL(ip_set_del); * Find set by name, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * - * The nfnl mutex must already be activated. */ ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set) @@ -423,15 +428,12 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname); * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. * - * The nfnl mutex must already be activated. */ void ip_set_put_byindex(ip_set_id_t index) { - if (ip_set_list[index] != NULL) { - BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); + if (ip_set_list[index] != NULL) __ip_set_put(index); - } } EXPORT_SYMBOL_GPL(ip_set_put_byindex); @@ -441,7 +443,6 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex); * can't be destroyed. The set cannot be renamed due to * the referencing either. * - * The nfnl mutex must already be activated. */ const char * ip_set_name_byindex(ip_set_id_t index) @@ -449,7 +450,7 @@ ip_set_name_byindex(ip_set_id_t index) const struct ip_set *set = ip_set_list[index]; BUG_ON(set == NULL); - BUG_ON(atomic_read(&set->ref) == 0); + BUG_ON(set->ref == 0); /* Referenced, so it's safe */ return set->name; @@ -515,10 +516,7 @@ void ip_set_nfnl_put(ip_set_id_t index) { nfnl_lock(); - if (ip_set_list[index] != NULL) { - BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); - __ip_set_put(index); - } + ip_set_put_byindex(index); nfnl_unlock(); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -526,7 +524,7 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put); /* * Communication protocol with userspace over netlink. * - * We already locked by nfnl_lock. + * The commands are serialized by the nfnl mutex. */ static inline bool @@ -657,7 +655,6 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, return -ENOMEM; rwlock_init(&set->lock); strlcpy(set->name, name, IPSET_MAXNAMELEN); - atomic_set(&set->ref, 0); set->family = family; /* @@ -690,8 +687,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, /* * Here, we have a valid, constructed set and we are protected - * by nfnl_lock. Find the first free index in ip_set_list and - * check clashing. + * by the nfnl mutex. Find the first free index in ip_set_list + * and check clashing. */ if ((ret = find_free_id(set->name, &index, &clash)) != 0) { /* If this is the same set and requested, ignore error */ @@ -751,31 +748,51 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, const struct nlattr * const attr[]) { ip_set_id_t i; + int ret = 0; if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* References are protected by the nfnl mutex */ + /* Commands are serialized and references are + * protected by the ip_set_ref_lock. + * External systems (i.e. xt_set) must call + * ip_set_put|get_nfnl_* functions, that way we + * can safely check references here. + * + * list:set timer can only decrement the reference + * counter, so if it's already zero, we can proceed + * without holding the lock. + */ + read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] != NULL && - (atomic_read(&ip_set_list[i]->ref))) - return -IPSET_ERR_BUSY; + if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { + ret = IPSET_ERR_BUSY; + goto out; + } } + read_unlock_bh(&ip_set_ref_lock); for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL) ip_set_destroy_set(i); } } else { i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); - if (i == IPSET_INVALID_ID) - return -ENOENT; - else if (atomic_read(&ip_set_list[i]->ref)) - return -IPSET_ERR_BUSY; + if (i == IPSET_INVALID_ID) { + ret = -ENOENT; + goto out; + } else if (ip_set_list[i]->ref) { + ret = -IPSET_ERR_BUSY; + goto out; + } + read_unlock_bh(&ip_set_ref_lock); ip_set_destroy_set(i); } return 0; +out: + read_unlock_bh(&ip_set_ref_lock); + return ret; } /* Flush sets */ @@ -834,6 +851,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set; const char *name2; ip_set_id_t i; + int ret = 0; if (unlikely(protocol_failed(attr) || attr[IPSET_ATTR_SETNAME] == NULL || @@ -843,25 +861,33 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; - if (atomic_read(&set->ref) != 0) - return -IPSET_ERR_REFERENCED; + + read_lock_bh(&ip_set_ref_lock); + if (set->ref != 0) { + ret = -IPSET_ERR_REFERENCED; + goto out; + } name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL && - STREQ(ip_set_list[i]->name, name2)) - return -IPSET_ERR_EXIST_SETNAME2; + STREQ(ip_set_list[i]->name, name2)) { + ret = -IPSET_ERR_EXIST_SETNAME2; + goto out; + } } strncpy(set->name, name2, IPSET_MAXNAMELEN); - return 0; +out: + read_unlock_bh(&ip_set_ref_lock); + return ret; } /* Swap two sets so that name/index points to the other. * References and set names are also swapped. * - * We are protected by the nfnl mutex and references are - * manipulated only by holding the mutex. The kernel interfaces + * The commands are serialized by the nfnl mutex and references are + * protected by the ip_set_ref_lock. The kernel interfaces * do not hold the mutex but the pointer settings are atomic * so the ip_set_list always contains valid pointers to the sets. */ @@ -874,7 +900,6 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; - u32 from_ref; if (unlikely(protocol_failed(attr) || attr[IPSET_ATTR_SETNAME] == NULL || @@ -893,23 +918,21 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, to = ip_set_list[to_id]; /* Features must not change. - * Not an artifical restriction anymore, as we must prevent + * Not an artificial restriction anymore, as we must prevent * possible loops created by swapping in setlist type of sets. */ if (!(from->type->features == to->type->features && from->type->family == to->type->family)) return -IPSET_ERR_TYPE_MISMATCH; - /* No magic here: ref munging protected by the nfnl_lock */ strncpy(from_name, from->name, IPSET_MAXNAMELEN); - from_ref = atomic_read(&from->ref); - strncpy(from->name, to->name, IPSET_MAXNAMELEN); - atomic_set(&from->ref, atomic_read(&to->ref)); strncpy(to->name, from_name, IPSET_MAXNAMELEN); - atomic_set(&to->ref, from_ref); + write_lock_bh(&ip_set_ref_lock); + swap(from->ref, to->ref); ip_set_list[from_id] = to; ip_set_list[to_id] = from; + write_unlock_bh(&ip_set_ref_lock); return 0; } @@ -926,7 +949,7 @@ ip_set_dump_done(struct netlink_callback *cb) { if (cb->args[2]) { pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name); - __ip_set_put((ip_set_id_t) cb->args[1]); + ip_set_put_byindex((ip_set_id_t) cb->args[1]); } return 0; } @@ -1068,7 +1091,7 @@ release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { pr_debug("release set %s\n", ip_set_list[index]->name); - __ip_set_put(index); + ip_set_put_byindex(index); } /* If we dump all sets, continue with dumping last ones */ diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index a47c32982f06..e9159e99fc4b 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -43,14 +43,19 @@ struct list_set { static inline struct set_elem * list_set_elem(const struct list_set *map, u32 id) { - return (struct set_elem *)((char *)map->members + id * map->dsize); + return (struct set_elem *)((void *)map->members + id * map->dsize); +} + +static inline struct set_telem * +list_set_telem(const struct list_set *map, u32 id) +{ + return (struct set_telem *)((void *)map->members + id * map->dsize); } static inline bool list_set_timeout(const struct list_set *map, u32 id) { - const struct set_telem *elem = - (const struct set_telem *) list_set_elem(map, id); + const struct set_telem *elem = list_set_telem(map, id); return ip_set_timeout_test(elem->timeout); } @@ -58,19 +63,11 @@ list_set_timeout(const struct list_set *map, u32 id) static inline bool list_set_expired(const struct list_set *map, u32 id) { - const struct set_telem *elem = - (const struct set_telem *) list_set_elem(map, id); + const struct set_telem *elem = list_set_telem(map, id); return ip_set_timeout_expired(elem->timeout); } -static inline int -list_set_exist(const struct set_telem *elem) -{ - return elem->id != IPSET_INVALID_ID && - !ip_set_timeout_expired(elem->timeout); -} - /* Set list without and with timeout */ static int @@ -146,11 +143,11 @@ list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id, struct set_telem *e; for (; i < map->size; i++) { - e = (struct set_telem *)list_set_elem(map, i); + e = list_set_telem(map, i); swap(e->id, id); + swap(e->timeout, timeout); if (e->id == IPSET_INVALID_ID) break; - swap(e->timeout, timeout); } } @@ -164,7 +161,7 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id, /* Last element replaced: e.g. add new,before,last */ ip_set_put_byindex(e->id); if (with_timeout(map->timeout)) - list_elem_tadd(map, i, id, timeout); + list_elem_tadd(map, i, id, ip_set_timeout_set(timeout)); else list_elem_add(map, i, id); @@ -172,11 +169,11 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id, } static int -list_set_del(struct list_set *map, ip_set_id_t id, u32 i) +list_set_del(struct list_set *map, u32 i) { struct set_elem *a = list_set_elem(map, i), *b; - ip_set_put_byindex(id); + ip_set_put_byindex(a->id); for (; i < map->size - 1; i++) { b = list_set_elem(map, i + 1); @@ -308,11 +305,11 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], (before == 0 || (before > 0 && next_id_eq(map, i, refid)))) - ret = list_set_del(map, id, i); + ret = list_set_del(map, i); else if (before < 0 && elem->id == refid && next_id_eq(map, i, id)) - ret = list_set_del(map, id, i + 1); + ret = list_set_del(map, i + 1); } break; default: @@ -369,8 +366,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size)); if (with_timeout(map->timeout)) NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->size * map->dsize)); ipset_nest_end(skb, nested); @@ -461,16 +457,13 @@ list_set_gc(unsigned long ul_set) struct set_telem *e; u32 i; - /* We run parallel with other readers (test element) - * but adding/deleting new entries is locked out */ - read_lock_bh(&set->lock); - for (i = map->size - 1; i >= 0; i--) { - e = (struct set_telem *) list_set_elem(map, i); - if (e->id != IPSET_INVALID_ID && - list_set_expired(map, i)) - list_set_del(map, e->id, i); + write_lock_bh(&set->lock); + for (i = 0; i < map->size; i++) { + e = list_set_telem(map, i); + if (e->id != IPSET_INVALID_ID && list_set_expired(map, i)) + list_set_del(map, i); } - read_unlock_bh(&set->lock); + write_unlock_bh(&set->lock); map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; add_timer(&map->gc); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index f289306cbf12..c97bd45975be 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -595,7 +595,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) atomic_inc(&dest->inactconns); } else { /* It is a persistent connection/template, so increase - the peristent connection counter */ + the persistent connection counter */ atomic_inc(&dest->persistconns); } @@ -657,7 +657,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) } } else { /* It is a persistent connection/template, so decrease - the peristent connection counter */ + the persistent connection counter */ atomic_dec(&dest->persistconns); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 33733c8872e7..ae47090bf45f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3120,7 +3120,7 @@ nla_put_failure: static int ip_vs_genl_dump_daemons(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb_net(skb); + struct net *net = skb_sknet(skb); struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&__ip_vs_mutex); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index f276df9896b3..87e40ea77a95 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -131,7 +131,7 @@ static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { list_del(&en->list); /* - * We don't kfree dest because it is refered either by its service + * We don't kfree dest because it is referred either by its service * or the trash dest list. */ atomic_dec(&en->dest->refcnt); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index cb1c9913d38b..90f618ab6dda 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -152,7 +152,7 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) write_lock(&set->lock); list_for_each_entry_safe(e, ep, &set->list, list) { /* - * We don't kfree dest because it is refered either + * We don't kfree dest because it is referred either * by its service or by the trash dest list. */ atomic_dec(&e->dest->refcnt); diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index b027ccc49f43..d12ed53ec95f 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -566,7 +566,7 @@ static struct ipvs_sctp_nextstate * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server */ /* - * We recieved the data chuck, keep the state unchanged. I assume + * We received the data chuck, keep the state unchanged. I assume * that still data chuncks can be received by both the peers in * SHUDOWN state */ @@ -633,7 +633,7 @@ static struct ipvs_sctp_nextstate * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client */ /* - * We recieved the data chuck, keep the state unchanged. I assume + * We received the data chuck, keep the state unchanged. I assume * that still data chuncks can be received by both the peers in * SHUDOWN state */ @@ -701,7 +701,7 @@ static struct ipvs_sctp_nextstate * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server */ /* - * We recieved the data chuck, keep the state unchanged. I assume + * We received the data chuck, keep the state unchanged. I assume * that still data chuncks can be received by both the peers in * SHUDOWN state */ @@ -771,7 +771,7 @@ static struct ipvs_sctp_nextstate * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client */ /* - * We recieved the data chuck, keep the state unchanged. I assume + * We received the data chuck, keep the state unchanged. I assume * that still data chuncks can be received by both the peers in * SHUDOWN state */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 941286ca911d..2e1c11f78419 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -453,7 +453,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) REJECT will give spurious warnings here. */ /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ - /* No external references means noone else could have + /* No external references means no one else could have confirmed us. */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); @@ -901,7 +901,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, ret = l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum); if (ret <= 0) { - pr_debug("not prepared to track yet or error occured\n"); + pr_debug("not prepared to track yet or error occurred\n"); NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); ret = -ret; diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 867882313e49..bcd5ed6b7130 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -631,7 +631,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f, CHECK_BOUND(bs, 2); count = *bs->cur++; count <<= 8; - count = *bs->cur++; + count += *bs->cur++; break; case SEMI: BYTE_ALIGN(bs); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 533a183e6661..18b2ce5c8ced 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -731,10 +731,10 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->ip; - if (!afinfo->route((struct dst_entry **)&rt1, - flowi4_to_flowi(&fl1))) { - if (!afinfo->route((struct dst_entry **)&rt2, - flowi4_to_flowi(&fl2))) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, + flowi4_to_flowi(&fl1), false)) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, + flowi4_to_flowi(&fl2), false)) { if (rt1->rt_gateway == rt2->rt_gateway && rt1->dst.dev == rt2->dst.dev) ret = 1; @@ -755,10 +755,10 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); ipv6_addr_copy(&fl2.daddr, &dst->in6); - if (!afinfo->route((struct dst_entry **)&rt1, - flowi6_to_flowi(&fl1))) { - if (!afinfo->route((struct dst_entry **)&rt2, - flowi6_to_flowi(&fl2))) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, + flowi6_to_flowi(&fl1), false)) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, + flowi6_to_flowi(&fl2), false)) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && rt1->dst.dev == rt2->dst.dev) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 9ae57c57c50e..2e664a69d7db 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -98,7 +98,7 @@ static const char * const dccp_state_names[] = { #define sIV CT_DCCP_INVALID /* - * DCCP state transistion table + * DCCP state transition table * * The assumption is the same as for TCP tracking: * diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 6f4ee70f460b..6772b1154654 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -107,9 +107,9 @@ static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA}, /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA}, -/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/ +/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't have Stale cookie*/ /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */ +/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in orig dir */ /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL} }, { @@ -121,7 +121,7 @@ static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA}, /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA}, /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */ +/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in reply dir */ /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA}, /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL} } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index bcf47eb518ef..237cc1981b89 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -707,7 +707,7 @@ static const char *ct_sdp_header_search(const char *dptr, const char *limit, } /* Locate a SDP header (optionally a substring within the header value), - * optionally stopping at the first occurence of the term header, parse + * optionally stopping at the first occurrence of the term header, parse * it and return the offset and length of the data we're interested in. */ int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5ab22e2bbd7d..5b466cd1272f 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -134,7 +134,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_afinfo *afinfo; const struct nf_queue_handler *qh; - /* QUEUE == DROP if noone is waiting, to be safe. */ + /* QUEUE == DROP if no one is waiting, to be safe. */ rcu_read_lock(); qh = rcu_dereference(queue_handler[pf]); diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 6e6b46cb1db9..9e63b43faeed 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -166,7 +166,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) - ai->route((struct dst_entry **)&rt, &fl); + ai->route(&init_net, (struct dst_entry **)&rt, &fl, false); rcu_read_unlock(); if (rt != NULL) { diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 2220b85e9519..b77d383cec78 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -32,11 +32,32 @@ MODULE_ALIAS("ipt_addrtype"); MODULE_ALIAS("ip6t_addrtype"); #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) -static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt) +static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, + const struct in6_addr *addr) { + const struct nf_afinfo *afinfo; + struct flowi6 flow; + struct rt6_info *rt; u32 ret; + int route_err; - if (!rt) + memset(&flow, 0, sizeof(flow)); + ipv6_addr_copy(&flow.daddr, addr); + if (dev) + flow.flowi6_oif = dev->ifindex; + + rcu_read_lock(); + + afinfo = nf_get_afinfo(NFPROTO_IPV6); + if (afinfo != NULL) + route_err = afinfo->route(net, (struct dst_entry **)&rt, + flowi6_to_flowi(&flow), !!dev); + else + route_err = 1; + + rcu_read_unlock(); + + if (route_err) return XT_ADDRTYPE_UNREACHABLE; if (rt->rt6i_flags & RTF_REJECT) @@ -48,6 +69,9 @@ static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt) ret |= XT_ADDRTYPE_LOCAL; if (rt->rt6i_flags & RTF_ANYCAST) ret |= XT_ADDRTYPE_ANYCAST; + + + dst_release(&rt->dst); return ret; } @@ -65,18 +89,8 @@ static bool match_type6(struct net *net, const struct net_device *dev, return false; if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST | - XT_ADDRTYPE_UNREACHABLE) & mask) { - struct rt6_info *rt; - u32 type; - int ifindex = dev ? dev->ifindex : 0; - - rt = rt6_lookup(net, addr, NULL, ifindex, !!dev); - - type = xt_addrtype_rt6_to_type(rt); - - dst_release(&rt->dst); - return !!(mask & type); - } + XT_ADDRTYPE_UNREACHABLE) & mask) + return !!(mask & match_lookup_rt6(net, dev, addr)); return true; } diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 2c0086a4751e..481a86fdc409 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -195,7 +195,7 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, return info->match_flags & XT_CONNTRACK_STATE; if ((info->match_flags & XT_CONNTRACK_DIRECTION) && (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^ - !!(info->invert_flags & XT_CONNTRACK_DIRECTION)) + !(info->invert_flags & XT_CONNTRACK_DIRECTION)) return false; if (info->match_flags & XT_CONNTRACK_ORIGSRC) diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index d37b7f80fa37..de0d8e4cbfb6 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -109,7 +109,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry) * * Description: * This is the hashing function for the domain hash table, it returns the - * correct bucket number for the domain. The caller is responsibile for + * correct bucket number for the domain. The caller is responsible for * ensuring that the hash table is protected with either a RCU read lock or the * hash table lock. * @@ -134,7 +134,7 @@ static u32 netlbl_domhsh_hash(const char *key) * * Description: * Searches the domain hash table and returns a pointer to the hash table - * entry if found, otherwise NULL is returned. The caller is responsibile for + * entry if found, otherwise NULL is returned. The caller is responsible for * ensuring that the hash table is protected with either a RCU read lock or the * hash table lock. * @@ -165,7 +165,7 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain) * Searches the domain hash table and returns a pointer to the hash table * entry if an exact match is found, if an exact match is not present in the * hash table then the default entry is returned if valid otherwise NULL is - * returned. The caller is responsibile ensuring that the hash table is + * returned. The caller is responsible ensuring that the hash table is * protected with either a RCU read lock or the hash table lock. * */ @@ -193,7 +193,7 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain) * * Description: * Generate an audit record for adding a new NetLabel/LSM mapping entry with - * the given information. Caller is responsibile for holding the necessary + * the given information. Caller is responsible for holding the necessary * locks. * */ @@ -605,7 +605,7 @@ int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info) * * Description: * Look through the domain hash table searching for an entry to match @domain, - * return a pointer to a copy of the entry or NULL. The caller is responsibile + * return a pointer to a copy of the entry or NULL. The caller is responsible * for ensuring that rcu_read_[un]lock() is called. * */ diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 998e85e895d0..4f251b19fbcc 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -259,7 +259,7 @@ add_failure: * * Description: * This function is a helper function used by the LISTALL and LISTDEF command - * handlers. The caller is responsibile for ensuring that the RCU read lock + * handlers. The caller is responsible for ensuring that the RCU read lock * is held. Returns zero on success, negative values on failure. * */ diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index c47a511f203d..7c4dce8fa5e6 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -355,7 +355,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) * * Conceptually, we have two counters: * - send credits: this tells us how many WRs we're allowed - * to submit without overruning the reciever's queue. For + * to submit without overruning the receiver's queue. For * each SEND WR we post, we decrement this by one. * * - posted credits: this tells us how many WRs we recently diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 712cf2d1f28e..3a60a15d1b4a 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -181,7 +181,7 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr, unsigned int send_size, recv_size; int ret; - /* The offset of 1 is to accomodate the additional ACK WR. */ + /* The offset of 1 is to accommodate the additional ACK WR. */ send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1); recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1); rds_iw_ring_resize(send_ring, send_size - 1); diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 59509e9a9e72..6deaa77495e3 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -122,7 +122,7 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd #else /* FIXME - needs to compare the local and remote * ipaddr/port tuple, but the ipaddr is the only - * available infomation in the rds_sock (as the rest are + * available information in the rds_sock (as the rest are * zero'ed. It doesn't appear to be properly populated * during connection setup... */ diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 6280ea020d4e..545d8ee3efb1 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -307,7 +307,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) * * Conceptually, we have two counters: * - send credits: this tells us how many WRs we're allowed - * to submit without overruning the reciever's queue. For + * to submit without overruning the receiver's queue. For * each SEND WR we post, we decrement this by one. * * - posted credits: this tells us how many WRs we recently diff --git a/net/rds/send.c b/net/rds/send.c index 35b9c2e9caf1..d58ae5f9339e 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -116,7 +116,7 @@ static void release_in_xmit(struct rds_connection *conn) } /* - * We're making the concious trade-off here to only send one message + * We're making the conscious trade-off here to only send one message * down the connection at a time. * Pro: * - tx queueing is a simple fifo list diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 08dcd2f29cdc..479cae57d187 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -587,7 +587,7 @@ static int rose_clear_routes(void) /* * Check that the device given is a valid AX.25 interface that is "up". - * called whith RTNL + * called with RTNL */ static struct net_device *rose_ax25_dev_find(char *devname) { diff --git a/net/sched/Kconfig b/net/sched/Kconfig index a7a5583d4f68..aeaa2110b699 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -239,6 +239,17 @@ config NET_SCH_CHOKE To compile this code as a module, choose M here: the module will be called sch_choke. +config NET_SCH_QFQ + tristate "Quick Fair Queueing scheduler (QFQ)" + help + Say Y here if you want to use the Quick Fair Queueing Scheduler (QFQ) + packet scheduling algorithm. + + To compile this driver as a module, choose M here: the module + will be called sch_qfq. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index 2e77b8dba22e..dc5889c0a15a 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o +obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 15873e14cb54..14b42f4ad791 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -999,7 +999,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) switch (n->nlmsg_type) { case RTM_NEWACTION: /* we are going to assume all other flags - * imply create only if it doesnt exist + * imply create only if it doesn't exist * Note that CREATE | EXCL implies that * but since we want avoid ambiguity (eg when flags * is zero) then just set this diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 50c7c06c019d..7affe9a92757 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -161,7 +161,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, } if (offset > 0 && offset > skb->len) { pr_info("tc filter pedit" - " offset %d cant exceed pkt length %d\n", + " offset %d can't exceed pkt length %d\n", offset, skb->len); goto bad; } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index a4de67eca824..49130e8abff0 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -47,7 +47,7 @@ * on the meta type. Obviously, the length of the data must also * be provided for non-numeric types. * - * Additionaly, type dependant modifiers such as shift operators + * Additionally, type dependent modifiers such as shift operators * or mask may be applied to extend the functionaliy. As of now, * the variable length type supports shifting the byte string to * the right, eating up any number of octets and thus supporting diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index e1429a85091f..29b942ce9e82 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -183,7 +183,7 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) * filters in qdisc and in inner nodes (if higher filter points to the inner * node). If we end up with classid MAJOR:0 we enqueue the skb into special * internal fifo (direct). These packets then go directly thru. If we still - * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull + * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful * then finish and return direct queue. */ #define HTB_DIRECT ((struct htb_class *)-1L) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index edbbf7ad6623..69c35f6cd13f 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -160,7 +160,7 @@ static bool loss_4state(struct netem_sched_data *q) u32 rnd = net_random(); /* - * Makes a comparision between rnd and the transition + * Makes a comparison between rnd and the transition * probabilities outgoing from the current state, then decides the * next state and if the next packet has to be transmitted or lost. * The four states correspond to: @@ -212,9 +212,9 @@ static bool loss_4state(struct netem_sched_data *q) * Generates losses according to the Gilbert-Elliot loss model or * its special cases (Gilbert or Simple Gilbert) * - * Makes a comparision between random number and the transition + * Makes a comparison between random number and the transition * probabilities outgoing from the current state, then decides the - * next state. A second random number is extracted and the comparision + * next state. A second random number is extracted and the comparison * with the loss probability of the current state decides if the next * packet will be transmitted or lost. */ diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c new file mode 100644 index 000000000000..103343408593 --- /dev/null +++ b/net/sched/sch_qfq.c @@ -0,0 +1,1137 @@ +/* + * net/sched/sch_qfq.c Quick Fair Queueing Scheduler. + * + * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/pkt_sched.h> +#include <net/sch_generic.h> +#include <net/pkt_sched.h> +#include <net/pkt_cls.h> + + +/* Quick Fair Queueing + =================== + + Sources: + + Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient + Packet Scheduling with Tight Bandwidth Distribution Guarantees." + + See also: + http://retis.sssup.it/~fabio/linux/qfq/ + */ + +/* + + Virtual time computations. + + S, F and V are all computed in fixed point arithmetic with + FRAC_BITS decimal bits. + + QFQ_MAX_INDEX is the maximum index allowed for a group. We need + one bit per index. + QFQ_MAX_WSHIFT is the maximum power of two supported as a weight. + + The layout of the bits is as below: + + [ MTU_SHIFT ][ FRAC_BITS ] + [ MAX_INDEX ][ MIN_SLOT_SHIFT ] + ^.__grp->index = 0 + *.__grp->slot_shift + + where MIN_SLOT_SHIFT is derived by difference from the others. + + The max group index corresponds to Lmax/w_min, where + Lmax=1<<MTU_SHIFT, w_min = 1 . + From this, and knowing how many groups (MAX_INDEX) we want, + we can derive the shift corresponding to each group. + + Because we often need to compute + F = S + len/w_i and V = V + len/wsum + instead of storing w_i store the value + inv_w = (1<<FRAC_BITS)/w_i + so we can do F = S + len * inv_w * wsum. + We use W_TOT in the formulas so we can easily move between + static and adaptive weight sum. + + The per-scheduler-instance data contain all the data structures + for the scheduler: bitmaps and bucket lists. + + */ + +/* + * Maximum number of consecutive slots occupied by backlogged classes + * inside a group. + */ +#define QFQ_MAX_SLOTS 32 + +/* + * Shifts used for class<->group mapping. We allow class weights that are + * in the range [1, 2^MAX_WSHIFT], and we try to map each class i to the + * group with the smallest index that can support the L_i / r_i configured + * for the class. + * + * grp->index is the index of the group; and grp->slot_shift + * is the shift for the corresponding (scaled) sigma_i. + */ +#define QFQ_MAX_INDEX 19 +#define QFQ_MAX_WSHIFT 16 + +#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT) +#define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT) + +#define FRAC_BITS 30 /* fixed point arithmetic */ +#define ONE_FP (1UL << FRAC_BITS) +#define IWSUM (ONE_FP/QFQ_MAX_WSUM) + +#define QFQ_MTU_SHIFT 11 +#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX) + +/* + * Possible group states. These values are used as indexes for the bitmaps + * array of struct qfq_queue. + */ +enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE }; + +struct qfq_group; + +struct qfq_class { + struct Qdisc_class_common common; + + unsigned int refcnt; + unsigned int filter_cnt; + + struct gnet_stats_basic_packed bstats; + struct gnet_stats_queue qstats; + struct gnet_stats_rate_est rate_est; + struct Qdisc *qdisc; + + struct hlist_node next; /* Link for the slot list. */ + u64 S, F; /* flow timestamps (exact) */ + + /* group we belong to. In principle we would need the index, + * which is log_2(lmax/weight), but we never reference it + * directly, only the group. + */ + struct qfq_group *grp; + + /* these are copied from the flowset. */ + u32 inv_w; /* ONE_FP/weight */ + u32 lmax; /* Max packet size for this flow. */ +}; + +struct qfq_group { + u64 S, F; /* group timestamps (approx). */ + unsigned int slot_shift; /* Slot shift. */ + unsigned int index; /* Group index. */ + unsigned int front; /* Index of the front slot. */ + unsigned long full_slots; /* non-empty slots */ + + /* Array of RR lists of active classes. */ + struct hlist_head slots[QFQ_MAX_SLOTS]; +}; + +struct qfq_sched { + struct tcf_proto *filter_list; + struct Qdisc_class_hash clhash; + + u64 V; /* Precise virtual time. */ + u32 wsum; /* weight sum */ + + unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ + struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ +}; + +static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) +{ + struct qfq_sched *q = qdisc_priv(sch); + struct Qdisc_class_common *clc; + + clc = qdisc_class_find(&q->clhash, classid); + if (clc == NULL) + return NULL; + return container_of(clc, struct qfq_class, common); +} + +static void qfq_purge_queue(struct qfq_class *cl) +{ + unsigned int len = cl->qdisc->q.qlen; + + qdisc_reset(cl->qdisc); + qdisc_tree_decrease_qlen(cl->qdisc, len); +} + +static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { + [TCA_QFQ_WEIGHT] = { .type = NLA_U32 }, + [TCA_QFQ_LMAX] = { .type = NLA_U32 }, +}; + +/* + * Calculate a flow index, given its weight and maximum packet length. + * index = log_2(maxlen/weight) but we need to apply the scaling. + * This is used only once at flow creation. + */ +static int qfq_calc_index(u32 inv_w, unsigned int maxlen) +{ + u64 slot_size = (u64)maxlen * inv_w; + unsigned long size_map; + int index = 0; + + size_map = slot_size >> QFQ_MIN_SLOT_SHIFT; + if (!size_map) + goto out; + + index = __fls(size_map) + 1; /* basically a log_2 */ + index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1))); + + if (index < 0) + index = 0; +out: + pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n", + (unsigned long) ONE_FP/inv_w, maxlen, index); + + return index; +} + +static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + struct nlattr **tca, unsigned long *arg) +{ + struct qfq_sched *q = qdisc_priv(sch); + struct qfq_class *cl = (struct qfq_class *)*arg; + struct nlattr *tb[TCA_QFQ_MAX + 1]; + u32 weight, lmax, inv_w; + int i, err; + + if (tca[TCA_OPTIONS] == NULL) { + pr_notice("qfq: no options\n"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy); + if (err < 0) + return err; + + if (tb[TCA_QFQ_WEIGHT]) { + weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]); + if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) { + pr_notice("qfq: invalid weight %u\n", weight); + return -EINVAL; + } + } else + weight = 1; + + inv_w = ONE_FP / weight; + weight = ONE_FP / inv_w; + if (q->wsum + weight > QFQ_MAX_WSUM) { + pr_notice("qfq: total weight out of range (%u + %u)\n", + weight, q->wsum); + return -EINVAL; + } + + if (tb[TCA_QFQ_LMAX]) { + lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); + if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) { + pr_notice("qfq: invalid max length %u\n", lmax); + return -EINVAL; + } + } else + lmax = 1UL << QFQ_MTU_SHIFT; + + if (cl != NULL) { + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) + return err; + } + + sch_tree_lock(sch); + if (tb[TCA_QFQ_WEIGHT]) { + q->wsum = weight - ONE_FP / cl->inv_w; + cl->inv_w = inv_w; + } + sch_tree_unlock(sch); + + return 0; + } + + cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL); + if (cl == NULL) + return -ENOBUFS; + + cl->refcnt = 1; + cl->common.classid = classid; + cl->lmax = lmax; + cl->inv_w = inv_w; + i = qfq_calc_index(cl->inv_w, cl->lmax); + + cl->grp = &q->groups[i]; + q->wsum += weight; + + cl->qdisc = qdisc_create_dflt(sch->dev_queue, + &pfifo_qdisc_ops, classid); + if (cl->qdisc == NULL) + cl->qdisc = &noop_qdisc; + + if (tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + qdisc_destroy(cl->qdisc); + kfree(cl); + return err; + } + } + + sch_tree_lock(sch); + qdisc_class_hash_insert(&q->clhash, &cl->common); + sch_tree_unlock(sch); + + qdisc_class_hash_grow(sch, &q->clhash); + + *arg = (unsigned long)cl; + return 0; +} + +static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) +{ + struct qfq_sched *q = qdisc_priv(sch); + + if (cl->inv_w) { + q->wsum -= ONE_FP / cl->inv_w; + cl->inv_w = 0; + } + + gen_kill_estimator(&cl->bstats, &cl->rate_est); + qdisc_destroy(cl->qdisc); + kfree(cl); +} + +static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) +{ + struct qfq_sched *q = qdisc_priv(sch); + struct qfq_class *cl = (struct qfq_class *)arg; + + if (cl->filter_cnt > 0) + return -EBUSY; + + sch_tree_lock(sch); + + qfq_purge_queue(cl); + qdisc_class_hash_remove(&q->clhash, &cl->common); + + BUG_ON(--cl->refcnt == 0); + /* + * This shouldn't happen: we "hold" one cops->get() when called + * from tc_ctl_tclass; the destroy method is done from cops->put(). + */ + + sch_tree_unlock(sch); + return 0; +} + +static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid) +{ + struct qfq_class *cl = qfq_find_class(sch, classid); + + if (cl != NULL) + cl->refcnt++; + + return (unsigned long)cl; +} + +static void qfq_put_class(struct Qdisc *sch, unsigned long arg) +{ + struct qfq_class *cl = (struct qfq_class *)arg; + + if (--cl->refcnt == 0) + qfq_destroy_class(sch, cl); +} + +static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl) +{ + struct qfq_sched *q = qdisc_priv(sch); + + if (cl) + return NULL; + + return &q->filter_list; +} + +static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + struct qfq_class *cl = qfq_find_class(sch, classid); + + if (cl != NULL) + cl->filter_cnt++; + + return (unsigned long)cl; +} + +static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg) +{ + struct qfq_class *cl = (struct qfq_class *)arg; + + cl->filter_cnt--; +} + +static int qfq_graft_class(struct Qdisc *sch, unsigned long arg, + struct Qdisc *new, struct Qdisc **old) +{ + struct qfq_class *cl = (struct qfq_class *)arg; + + if (new == NULL) { + new = qdisc_create_dflt(sch->dev_queue, + &pfifo_qdisc_ops, cl->common.classid); + if (new == NULL) + new = &noop_qdisc; + } + + sch_tree_lock(sch); + qfq_purge_queue(cl); + *old = cl->qdisc; + cl->qdisc = new; + sch_tree_unlock(sch); + return 0; +} + +static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct qfq_class *cl = (struct qfq_class *)arg; + + return cl->qdisc; +} + +static int qfq_dump_class(struct Qdisc *sch, unsigned long arg, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct qfq_class *cl = (struct qfq_class *)arg; + struct nlattr *nest; + + tcm->tcm_parent = TC_H_ROOT; + tcm->tcm_handle = cl->common.classid; + tcm->tcm_info = cl->qdisc->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + NLA_PUT_U32(skb, TCA_QFQ_WEIGHT, ONE_FP/cl->inv_w); + NLA_PUT_U32(skb, TCA_QFQ_LMAX, cl->lmax); + return nla_nest_end(skb, nest); + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, + struct gnet_dump *d) +{ + struct qfq_class *cl = (struct qfq_class *)arg; + struct tc_qfq_stats xstats; + + memset(&xstats, 0, sizeof(xstats)); + cl->qdisc->qstats.qlen = cl->qdisc->q.qlen; + + xstats.weight = ONE_FP/cl->inv_w; + xstats.lmax = cl->lmax; + + if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || + gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || + gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0) + return -1; + + return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); +} + +static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct qfq_sched *q = qdisc_priv(sch); + struct qfq_class *cl; + struct hlist_node *n; + unsigned int i; + + if (arg->stop) + return; + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, (unsigned long)cl, arg) < 0) { + arg->stop = 1; + return; + } + arg->count++; + } + } +} + +static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, + int *qerr) +{ + struct qfq_sched *q = qdisc_priv(sch); + struct qfq_class *cl; + struct tcf_result res; + int result; + + if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) { + pr_debug("qfq_classify: found %d\n", skb->priority); + cl = qfq_find_class(sch, skb->priority); + if (cl != NULL) + return cl; + } + + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_QUEUED: + case TC_ACT_STOLEN: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return NULL; + } +#endif + cl = (struct qfq_class *)res.class; + if (cl == NULL) + cl = qfq_find_class(sch, res.classid); + return cl; + } + + return NULL; +} + +/* Generic comparison function, handling wraparound. */ +static inline int qfq_gt(u64 a, u64 b) +{ + return (s64)(a - b) > 0; +} + +/* Round a precise timestamp to its slotted value. */ +static inline u64 qfq_round_down(u64 ts, unsigned int shift) +{ + return ts & ~((1ULL << shift) - 1); +} + +/* return the pointer to the group with lowest index in the bitmap */ +static inline struct qfq_group *qfq_ffs(struct qfq_sched *q, + unsigned long bitmap) +{ + int index = __ffs(bitmap); + return &q->groups[index]; +} +/* Calculate a mask to mimic what would be ffs_from(). */ +static inline unsigned long mask_from(unsigned long bitmap, int from) +{ + return bitmap & ~((1UL << from) - 1); +} + +/* + * The state computation relies on ER=0, IR=1, EB=2, IB=3 + * First compute eligibility comparing grp->S, q->V, + * then check if someone is blocking us and possibly add EB + */ +static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp) +{ + /* if S > V we are not eligible */ + unsigned int state = qfq_gt(grp->S, q->V); + unsigned long mask = mask_from(q->bitmaps[ER], grp->index); + struct qfq_group *next; + + if (mask) { + next = qfq_ffs(q, mask); + if (qfq_gt(grp->F, next->F)) + state |= EB; + } + + return state; +} + + +/* + * In principle + * q->bitmaps[dst] |= q->bitmaps[src] & mask; + * q->bitmaps[src] &= ~mask; + * but we should make sure that src != dst + */ +static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask, + int src, int dst) +{ + q->bitmaps[dst] |= q->bitmaps[src] & mask; + q->bitmaps[src] &= ~mask; +} + +static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F) +{ + unsigned long mask = mask_from(q->bitmaps[ER], index + 1); + struct qfq_group *next; + + if (mask) { + next = qfq_ffs(q, mask); + if (!qfq_gt(next->F, old_F)) + return; + } + + mask = (1UL << index) - 1; + qfq_move_groups(q, mask, EB, ER); + qfq_move_groups(q, mask, IB, IR); +} + +/* + * perhaps + * + old_V ^= q->V; + old_V >>= QFQ_MIN_SLOT_SHIFT; + if (old_V) { + ... + } + * + */ +static void qfq_make_eligible(struct qfq_sched *q, u64 old_V) +{ + unsigned long vslot = q->V >> QFQ_MIN_SLOT_SHIFT; + unsigned long old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT; + + if (vslot != old_vslot) { + unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1; + qfq_move_groups(q, mask, IR, ER); + qfq_move_groups(q, mask, IB, EB); + } +} + + +/* + * XXX we should make sure that slot becomes less than 32. + * This is guaranteed by the input values. + * roundedS is always cl->S rounded on grp->slot_shift bits. + */ +static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, + u64 roundedS) +{ + u64 slot = (roundedS - grp->S) >> grp->slot_shift; + unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS; + + hlist_add_head(&cl->next, &grp->slots[i]); + __set_bit(slot, &grp->full_slots); +} + +/* Maybe introduce hlist_first_entry?? */ +static struct qfq_class *qfq_slot_head(struct qfq_group *grp) +{ + return hlist_entry(grp->slots[grp->front].first, + struct qfq_class, next); +} + +/* + * remove the entry from the slot + */ +static void qfq_front_slot_remove(struct qfq_group *grp) +{ + struct qfq_class *cl = qfq_slot_head(grp); + + BUG_ON(!cl); + hlist_del(&cl->next); + if (hlist_empty(&grp->slots[grp->front])) + __clear_bit(0, &grp->full_slots); +} + +/* + * Returns the first full queue in a group. As a side effect, + * adjust the bucket list so the first non-empty bucket is at + * position 0 in full_slots. + */ +static struct qfq_class *qfq_slot_scan(struct qfq_group *grp) +{ + unsigned int i; + + pr_debug("qfq slot_scan: grp %u full %#lx\n", + grp->index, grp->full_slots); + + if (grp->full_slots == 0) + return NULL; + + i = __ffs(grp->full_slots); /* zero based */ + if (i > 0) { + grp->front = (grp->front + i) % QFQ_MAX_SLOTS; + grp->full_slots >>= i; + } + + return qfq_slot_head(grp); +} + +/* + * adjust the bucket list. When the start time of a group decreases, + * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to + * move the objects. The mask of occupied slots must be shifted + * because we use ffs() to find the first non-empty slot. + * This covers decreases in the group's start time, but what about + * increases of the start time ? + * Here too we should make sure that i is less than 32 + */ +static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS) +{ + unsigned int i = (grp->S - roundedS) >> grp->slot_shift; + + grp->full_slots <<= i; + grp->front = (grp->front - i) % QFQ_MAX_SLOTS; +} + +static void qfq_update_eligible(struct qfq_sched *q, u64 old_V) +{ + struct qfq_group *grp; + unsigned long ineligible; + + ineligible = q->bitmaps[IR] | q->bitmaps[IB]; + if (ineligible) { + if (!q->bitmaps[ER]) { + grp = qfq_ffs(q, ineligible); + if (qfq_gt(grp->S, q->V)) + q->V = grp->S; + } + qfq_make_eligible(q, old_V); + } +} + +/* What is length of next packet in queue (0 if queue is empty) */ +static unsigned int qdisc_peek_len(struct Qdisc *sch) +{ + struct sk_buff *skb; + + skb = sch->ops->peek(sch); + return skb ? qdisc_pkt_len(skb) : 0; +} + +/* + * Updates the class, returns true if also the group needs to be updated. + */ +static bool qfq_update_class(struct qfq_group *grp, struct qfq_class *cl) +{ + unsigned int len = qdisc_peek_len(cl->qdisc); + + cl->S = cl->F; + if (!len) + qfq_front_slot_remove(grp); /* queue is empty */ + else { + u64 roundedS; + + cl->F = cl->S + (u64)len * cl->inv_w; + roundedS = qfq_round_down(cl->S, grp->slot_shift); + if (roundedS == grp->S) + return false; + + qfq_front_slot_remove(grp); + qfq_slot_insert(grp, cl, roundedS); + } + + return true; +} + +static struct sk_buff *qfq_dequeue(struct Qdisc *sch) +{ + struct qfq_sched *q = qdisc_priv(sch); + struct qfq_group *grp; + struct qfq_class *cl; + struct sk_buff *skb; + unsigned int len; + u64 old_V; + + if (!q->bitmaps[ER]) + return NULL; + + grp = qfq_ffs(q, q->bitmaps[ER]); + + cl = qfq_slot_head(grp); + skb = qdisc_dequeue_peeked(cl->qdisc); + if (!skb) { + WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n"); + return NULL; + } + + sch->q.qlen--; + qdisc_bstats_update(sch, skb); + + old_V = q->V; + len = qdisc_pkt_len(skb); + q->V += (u64)len * IWSUM; + pr_debug("qfq dequeue: len %u F %lld now %lld\n", + len, (unsigned long long) cl->F, (unsigned long long) q->V); + + if (qfq_update_class(grp, cl)) { + u64 old_F = grp->F; + + cl = qfq_slot_scan(grp); + if (!cl) + __clear_bit(grp->index, &q->bitmaps[ER]); + else { + u64 roundedS = qfq_round_down(cl->S, grp->slot_shift); + unsigned int s; + + if (grp->S == roundedS) + goto skip_unblock; + grp->S = roundedS; + grp->F = roundedS + (2ULL << grp->slot_shift); + __clear_bit(grp->index, &q->bitmaps[ER]); + s = qfq_calc_state(q, grp); + __set_bit(grp->index, &q->bitmaps[s]); + } + + qfq_unblock_groups(q, grp->index, old_F); + } + +skip_unblock: + qfq_update_eligible(q, old_V); + + return skb; +} + +/* + * Assign a reasonable start time for a new flow k in group i. + * Admissible values for \hat(F) are multiples of \sigma_i + * no greater than V+\sigma_i . Larger values mean that + * we had a wraparound so we consider the timestamp to be stale. + * + * If F is not stale and F >= V then we set S = F. + * Otherwise we should assign S = V, but this may violate + * the ordering in ER. So, if we have groups in ER, set S to + * the F_j of the first group j which would be blocking us. + * We are guaranteed not to move S backward because + * otherwise our group i would still be blocked. + */ +static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) +{ + unsigned long mask; + uint32_t limit, roundedF; + int slot_shift = cl->grp->slot_shift; + + roundedF = qfq_round_down(cl->F, slot_shift); + limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift); + + if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) { + /* timestamp was stale */ + mask = mask_from(q->bitmaps[ER], cl->grp->index); + if (mask) { + struct qfq_group *next = qfq_ffs(q, mask); + if (qfq_gt(roundedF, next->F)) { + cl->S = next->F; + return; + } + } + cl->S = q->V; + } else /* timestamp is not stale */ + cl->S = cl->F; +} + +static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct qfq_sched *q = qdisc_priv(sch); + struct qfq_group *grp; + struct qfq_class *cl; + int err; + u64 roundedS; + int s; + + cl = qfq_classify(skb, sch, &err); + if (cl == NULL) { + if (err & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return err; + } + pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid); + + err = qdisc_enqueue(skb, cl->qdisc); + if (unlikely(err != NET_XMIT_SUCCESS)) { + pr_debug("qfq_enqueue: enqueue failed %d\n", err); + if (net_xmit_drop_count(err)) { + cl->qstats.drops++; + sch->qstats.drops++; + } + return err; + } + + bstats_update(&cl->bstats, skb); + ++sch->q.qlen; + + /* If the new skb is not the head of queue, then done here. */ + if (cl->qdisc->q.qlen != 1) + return err; + + /* If reach this point, queue q was idle */ + grp = cl->grp; + qfq_update_start(q, cl); + + /* compute new finish time and rounded start. */ + cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w; + roundedS = qfq_round_down(cl->S, grp->slot_shift); + + /* + * insert cl in the correct bucket. + * If cl->S >= grp->S we don't need to adjust the + * bucket list and simply go to the insertion phase. + * Otherwise grp->S is decreasing, we must make room + * in the bucket list, and also recompute the group state. + * Finally, if there were no flows in this group and nobody + * was in ER make sure to adjust V. + */ + if (grp->full_slots) { + if (!qfq_gt(grp->S, cl->S)) + goto skip_update; + + /* create a slot for this cl->S */ + qfq_slot_rotate(grp, roundedS); + /* group was surely ineligible, remove */ + __clear_bit(grp->index, &q->bitmaps[IR]); + __clear_bit(grp->index, &q->bitmaps[IB]); + } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V)) + q->V = roundedS; + + grp->S = roundedS; + grp->F = roundedS + (2ULL << grp->slot_shift); + s = qfq_calc_state(q, grp); + __set_bit(grp->index, &q->bitmaps[s]); + + pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n", + s, q->bitmaps[s], + (unsigned long long) cl->S, + (unsigned long long) cl->F, + (unsigned long long) q->V); + +skip_update: + qfq_slot_insert(grp, cl, roundedS); + + return err; +} + + +static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp, + struct qfq_class *cl) +{ + unsigned int i, offset; + u64 roundedS; + + roundedS = qfq_round_down(cl->S, grp->slot_shift); + offset = (roundedS - grp->S) >> grp->slot_shift; + i = (grp->front + offset) % QFQ_MAX_SLOTS; + + hlist_del(&cl->next); + if (hlist_empty(&grp->slots[i])) + __clear_bit(offset, &grp->full_slots); +} + +/* + * called to forcibly destroy a queue. + * If the queue is not in the front bucket, or if it has + * other queues in the front bucket, we can simply remove + * the queue with no other side effects. + * Otherwise we must propagate the event up. + */ +static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl) +{ + struct qfq_group *grp = cl->grp; + unsigned long mask; + u64 roundedS; + int s; + + cl->F = cl->S; + qfq_slot_remove(q, grp, cl); + + if (!grp->full_slots) { + __clear_bit(grp->index, &q->bitmaps[IR]); + __clear_bit(grp->index, &q->bitmaps[EB]); + __clear_bit(grp->index, &q->bitmaps[IB]); + + if (test_bit(grp->index, &q->bitmaps[ER]) && + !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) { + mask = q->bitmaps[ER] & ((1UL << grp->index) - 1); + if (mask) + mask = ~((1UL << __fls(mask)) - 1); + else + mask = ~0UL; + qfq_move_groups(q, mask, EB, ER); + qfq_move_groups(q, mask, IB, IR); + } + __clear_bit(grp->index, &q->bitmaps[ER]); + } else if (hlist_empty(&grp->slots[grp->front])) { + cl = qfq_slot_scan(grp); + roundedS = qfq_round_down(cl->S, grp->slot_shift); + if (grp->S != roundedS) { + __clear_bit(grp->index, &q->bitmaps[ER]); + __clear_bit(grp->index, &q->bitmaps[IR]); + __clear_bit(grp->index, &q->bitmaps[EB]); + __clear_bit(grp->index, &q->bitmaps[IB]); + grp->S = roundedS; + grp->F = roundedS + (2ULL << grp->slot_shift); + s = qfq_calc_state(q, grp); + __set_bit(grp->index, &q->bitmaps[s]); + } + } + + qfq_update_eligible(q, q->V); +} + +static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) +{ + struct qfq_sched *q = qdisc_priv(sch); + struct qfq_class *cl = (struct qfq_class *)arg; + + if (cl->qdisc->q.qlen == 0) + qfq_deactivate_class(q, cl); +} + +static unsigned int qfq_drop(struct Qdisc *sch) +{ + struct qfq_sched *q = qdisc_priv(sch); + struct qfq_group *grp; + unsigned int i, j, len; + + for (i = 0; i <= QFQ_MAX_INDEX; i++) { + grp = &q->groups[i]; + for (j = 0; j < QFQ_MAX_SLOTS; j++) { + struct qfq_class *cl; + struct hlist_node *n; + + hlist_for_each_entry(cl, n, &grp->slots[j], next) { + + if (!cl->qdisc->ops->drop) + continue; + + len = cl->qdisc->ops->drop(cl->qdisc); + if (len > 0) { + sch->q.qlen--; + if (!cl->qdisc->q.qlen) + qfq_deactivate_class(q, cl); + + return len; + } + } + } + } + + return 0; +} + +static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) +{ + struct qfq_sched *q = qdisc_priv(sch); + struct qfq_group *grp; + int i, j, err; + + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + return err; + + for (i = 0; i <= QFQ_MAX_INDEX; i++) { + grp = &q->groups[i]; + grp->index = i; + grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS + - (QFQ_MAX_INDEX - i); + for (j = 0; j < QFQ_MAX_SLOTS; j++) + INIT_HLIST_HEAD(&grp->slots[j]); + } + + return 0; +} + +static void qfq_reset_qdisc(struct Qdisc *sch) +{ + struct qfq_sched *q = qdisc_priv(sch); + struct qfq_group *grp; + struct qfq_class *cl; + struct hlist_node *n, *tmp; + unsigned int i, j; + + for (i = 0; i <= QFQ_MAX_INDEX; i++) { + grp = &q->groups[i]; + for (j = 0; j < QFQ_MAX_SLOTS; j++) { + hlist_for_each_entry_safe(cl, n, tmp, + &grp->slots[j], next) { + qfq_deactivate_class(q, cl); + } + } + } + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) + qdisc_reset(cl->qdisc); + } + sch->q.qlen = 0; +} + +static void qfq_destroy_qdisc(struct Qdisc *sch) +{ + struct qfq_sched *q = qdisc_priv(sch); + struct qfq_class *cl; + struct hlist_node *n, *next; + unsigned int i; + + tcf_destroy_chain(&q->filter_list); + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + common.hnode) { + qfq_destroy_class(sch, cl); + } + } + qdisc_class_hash_destroy(&q->clhash); +} + +static const struct Qdisc_class_ops qfq_class_ops = { + .change = qfq_change_class, + .delete = qfq_delete_class, + .get = qfq_get_class, + .put = qfq_put_class, + .tcf_chain = qfq_tcf_chain, + .bind_tcf = qfq_bind_tcf, + .unbind_tcf = qfq_unbind_tcf, + .graft = qfq_graft_class, + .leaf = qfq_class_leaf, + .qlen_notify = qfq_qlen_notify, + .dump = qfq_dump_class, + .dump_stats = qfq_dump_class_stats, + .walk = qfq_walk, +}; + +static struct Qdisc_ops qfq_qdisc_ops __read_mostly = { + .cl_ops = &qfq_class_ops, + .id = "qfq", + .priv_size = sizeof(struct qfq_sched), + .enqueue = qfq_enqueue, + .dequeue = qfq_dequeue, + .peek = qdisc_peek_dequeued, + .drop = qfq_drop, + .init = qfq_init_qdisc, + .reset = qfq_reset_qdisc, + .destroy = qfq_destroy_qdisc, + .owner = THIS_MODULE, +}; + +static int __init qfq_init(void) +{ + return register_qdisc(&qfq_qdisc_ops); +} + +static void __exit qfq_exit(void) +{ + unregister_qdisc(&qfq_qdisc_ops); +} + +module_init(qfq_init); +module_exit(qfq_exit); +MODULE_LICENSE("GPL"); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 6b04287913cd..0698cad61763 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1593,7 +1593,7 @@ void sctp_assoc_clean_asconf_ack_cache(const struct sctp_association *asoc) struct sctp_chunk *ack; struct sctp_chunk *tmp; - /* We can remove all the entries from the queue upto + /* We can remove all the entries from the queue up to * the "Peer-Sequence-Number". */ list_for_each_entry_safe(ack, tmp, &asoc->asconf_ack_list, diff --git a/net/sctp/auth.c b/net/sctp/auth.c index ddbbf7c81fa1..865e68fef21c 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -113,7 +113,7 @@ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp) return new; } -/* Free the shared key stucture */ +/* Free the shared key structure */ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key) { BUG_ON(!list_empty(&sh_key->key_list)); @@ -122,7 +122,7 @@ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key) kfree(sh_key); } -/* Destory the entire key list. This is done during the +/* Destroy the entire key list. This is done during the * associon and endpoint free process. */ void sctp_auth_destroy_keys(struct list_head *keys) @@ -324,7 +324,7 @@ static struct sctp_auth_bytes *sctp_auth_asoc_create_secret( if (!peer_key_vector || !local_key_vector) goto out; - /* Figure out the order in wich the key_vectors will be + /* Figure out the order in which the key_vectors will be * added to the endpoint shared key. * SCTP-AUTH, Section 6.1: * This is performed by selecting the numerically smaller key diff --git a/net/sctp/input.c b/net/sctp/input.c index 826661be73e7..5436c6921167 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1034,7 +1034,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( * association. * * This means that any chunks that can help us identify the association need -* to be looked at to find this assocation. +* to be looked at to find this association. */ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb, const union sctp_addr *laddr, diff --git a/net/sctp/output.c b/net/sctp/output.c index 60600d337a3a..b4f3cf06d8da 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -510,7 +510,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) sh->checksum = sctp_end_cksum(crc32); } else { if (dst->dev->features & NETIF_F_SCTP_CSUM) { - /* no need to seed psuedo checksum for SCTP */ + /* no need to seed pseudo checksum for SCTP */ nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (skb_transport_header(nskb) - nskb->head); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 26dc005113a0..bf92a5b68f8b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -177,13 +177,13 @@ static inline int sctp_cacc_skip_3_2(struct sctp_transport *primary, __u32 tsn) * 3) If the missing report count for TSN t is to be * incremented according to [RFC2960] and * [SCTP_STEWART-2002], and CHANGEOVER_ACTIVE is set, - * then the sender MUST futher execute steps 3.1 and + * then the sender MUST further execute steps 3.1 and * 3.2 to determine if the missing report count for * TSN t SHOULD NOT be incremented. * * 3.3) If 3.1 and 3.2 do not dictate that the missing * report count for t should not be incremented, then - * the sender SOULD increment missing report count for + * the sender SHOULD increment missing report count for * t (according to [RFC2960] and [SCTP_STEWART_2002]). */ static inline int sctp_cacc_skip(struct sctp_transport *primary, @@ -843,7 +843,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) case SCTP_CID_ECN_CWR: case SCTP_CID_ASCONF_ACK: one_packet = 1; - /* Fall throught */ + /* Fall through */ case SCTP_CID_SACK: case SCTP_CID_HEARTBEAT: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index b21b218d564f..5f86ee4b54c1 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -482,7 +482,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, * If the timer was a heartbeat, we only increment error counts * when we already have an outstanding HEARTBEAT that has not * been acknowledged. - * Additionaly, some tranport states inhibit error increments. + * Additionally, some tranport states inhibit error increments. */ if (!is_hb) { asoc->overall_error_count++; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 4b4eb7c96bbd..76792083c379 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -551,7 +551,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, * * This means that if we only want to abort associations * in an authenticated way (i.e AUTH+ABORT), then we - * can't destroy this association just becuase the packet + * can't destroy this association just because the packet * was malformed. */ if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) @@ -1546,7 +1546,7 @@ cleanup: } /* - * Handle simultanous INIT. + * Handle simultaneous INIT. * This means we started an INIT and then we got an INIT request from * our peer. * @@ -2079,7 +2079,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( * RFC 2960, Section 3.3.7 * If an endpoint receives an ABORT with a format error or for an * association that doesn't exist, it MUST silently discard it. - * Becasue the length is "invalid", we can't really discard just + * Because the length is "invalid", we can't really discard just * as we do not know its true length. So, to be safe, discard the * packet. */ @@ -2120,7 +2120,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, * RFC 2960, Section 3.3.7 * If an endpoint receives an ABORT with a format error or for an * association that doesn't exist, it MUST silently discard it. - * Becasue the length is "invalid", we can't really discard just + * Because the length is "invalid", we can't really discard just * as we do not know its true length. So, to be safe, discard the * packet. */ @@ -2381,7 +2381,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, * RFC 2960, Section 3.3.7 * If an endpoint receives an ABORT with a format error or for an * association that doesn't exist, it MUST silently discard it. - * Becasue the length is "invalid", we can't really discard just + * Because the length is "invalid", we can't really discard just * as we do not know its true length. So, to be safe, discard the * packet. */ @@ -2448,7 +2448,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep, * RFC 2960, Section 3.3.7 * If an endpoint receives an ABORT with a format error or for an * association that doesn't exist, it MUST silently discard it. - * Becasue the length is "invalid", we can't really discard just + * Because the length is "invalid", we can't really discard just * as we do not know its true length. So, to be safe, discard the * packet. */ @@ -3855,7 +3855,7 @@ gen_shutdown: } /* - * SCTP-AUTH Section 6.3 Receving authenticated chukns + * SCTP-AUTH Section 6.3 Receiving authenticated chukns * * The receiver MUST use the HMAC algorithm indicated in the HMAC * Identifier field. If this algorithm was not specified by the @@ -4231,7 +4231,7 @@ static sctp_disposition_t sctp_sf_abort_violation( * * This means that if we only want to abort associations * in an authenticated way (i.e AUTH+ABORT), then we - * can't destroy this association just becuase the packet + * can't destroy this association just because the packet * was malformed. */ if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) @@ -4402,9 +4402,9 @@ static sctp_disposition_t sctp_sf_violation_ctsn( } /* Handle protocol violation of an invalid chunk bundling. For example, - * when we have an association and we recieve bundled INIT-ACK, or + * when we have an association and we receive bundled INIT-ACK, or * SHUDOWN-COMPLETE, our peer is clearly violationg the "MUST NOT bundle" - * statement from the specs. Additinally, there might be an attacker + * statement from the specs. Additionally, there might be an attacker * on the path and we may not want to continue this communication. */ static sctp_disposition_t sctp_sf_violation_chunk( diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3951a10605bc..deb82e35a107 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1193,7 +1193,7 @@ out_free: * an endpoint that is multi-homed. Much like sctp_bindx() this call * allows a caller to specify multiple addresses at which a peer can be * reached. The way the SCTP stack uses the list of addresses to set up - * the association is implementation dependant. This function only + * the association is implementation dependent. This function only * specifies that the stack will try to make use of all the addresses in * the list when needed. * diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index aa72e89c3ee1..dff27d5e22fd 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -554,7 +554,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed( memcpy(&ssf->ssf_info, &chunk->sinfo, sizeof(struct sctp_sndrcvinfo)); /* Per TSVWG discussion with Randy. Allow the application to - * ressemble a fragmented message. + * resemble a fragmented message. */ ssf->ssf_info.sinfo_flags = chunk->chunk_hdr->flags; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 17678189d054..f2d1de7f2ffb 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -240,7 +240,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) } else { /* * If fragment interleave is enabled, we - * can queue this to the recieve queue instead + * can queue this to the receive queue instead * of the lobby. */ if (sctp_sk(sk)->frag_interleave) diff --git a/net/socket.c b/net/socket.c index 5212447c86e7..d25f5a9d6fa2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2643,13 +2643,13 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) return -EFAULT; if (convert_in) { - /* We expect there to be holes between fs.m_u and + /* We expect there to be holes between fs.m_ext and * fs.ring_cookie and at the end of fs, but nowhere else. */ - BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) + - sizeof(compat_rxnfc->fs.m_u) != - offsetof(struct ethtool_rxnfc, fs.m_u) + - sizeof(rxnfc->fs.m_u)); + BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) + + sizeof(compat_rxnfc->fs.m_ext) != + offsetof(struct ethtool_rxnfc, fs.m_ext) + + sizeof(rxnfc->fs.m_ext)); BUILD_BUG_ON( offsetof(struct compat_ethtool_rxnfc, fs.location) - offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != @@ -2657,7 +2657,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) offsetof(struct ethtool_rxnfc, fs.ring_cookie)); if (copy_in_user(rxnfc, compat_rxnfc, - (void *)(&rxnfc->fs.m_u + 1) - + (void *)(&rxnfc->fs.m_ext + 1) - (void *)rxnfc) || copy_in_user(&rxnfc->fs.ring_cookie, &compat_rxnfc->fs.ring_cookie, @@ -2674,7 +2674,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) if (convert_out) { if (copy_in_user(compat_rxnfc, rxnfc, - (const void *)(&rxnfc->fs.m_u + 1) - + (const void *)(&rxnfc->fs.m_ext + 1) - (const void *)rxnfc) || copy_in_user(&compat_rxnfc->fs.ring_cookie, &rxnfc->fs.ring_cookie, @@ -2986,7 +2986,7 @@ out: /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE * for some operations; this forces use of the newer bridge-utils that - * use compatiable ioctls + * use compatible ioctls */ static int old_bridge_ioctl(compat_ulong_t __user *argp) { diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 9022f0a6503e..0a9a2ec2e469 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -427,7 +427,7 @@ static int context_derive_keys_rc4(struct krb5_ctx *ctx) { struct crypto_hash *hmac; - static const char sigkeyconstant[] = "signaturekey"; + char sigkeyconstant[] = "signaturekey"; int slen = strlen(sigkeyconstant) + 1; /* include null terminator */ struct hash_desc desc; struct scatterlist sg[1]; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index bcdae78fdfc6..8d0f7d3c71c8 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1101,7 +1101,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) /* credential is: * version(==1), proc(0,1,2,3), seq, service (1,2,3), handle - * at least 5 u32s, and is preceeded by length, so that makes 6. + * at least 5 u32s, and is preceded by length, so that makes 6. */ if (argv->iov_len < 5 * 4) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1e336a06d3e6..bf005d3c65ef 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -504,7 +504,7 @@ static int xs_nospace(struct rpc_task *task) * EAGAIN: The socket was blocked, please call again later to * complete the request * ENOTCONN: Caller needs to invoke connect logic then call again - * other: Some other error occured, the request was not sent + * other: Some other error occurred, the request was not sent */ static int xs_udp_send_request(struct rpc_task *task) { @@ -590,7 +590,7 @@ static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) * EAGAIN: The socket was blocked, please call again later to * complete the request * ENOTCONN: Caller needs to invoke connect logic then call again - * other: Some other error occured, the request was not sent + * other: Some other error occurred, the request was not sent * * XXX: In the case of soft timeouts, should we eventually give up * if sendmsg is not able to make progress? diff --git a/net/tipc/link.c b/net/tipc/link.c index 43639ff1cbec..ebf338f7b14e 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2471,7 +2471,7 @@ exit: * A pending message being re-assembled must store certain values * to handle subsequent fragments correctly. The following functions * help storing these values in unused, available fields in the - * pending message. This makes dynamic memory allocation unecessary. + * pending message. This makes dynamic memory allocation unnecessary. */ static void set_long_msg_seqno(struct sk_buff *buf, u32 seqno) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index c9fa6dfcf287..80025a1b3bfd 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -160,7 +160,7 @@ void tipc_named_withdraw(struct publication *publ) buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0); if (!buf) { - warn("Withdrawl distribution failure\n"); + warn("Withdrawal distribution failure\n"); return; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1663e1a2efdd..3a43a8304768 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -207,7 +207,7 @@ static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp) /* * This may look like an off by one error but it is a bit more * subtle. 108 is the longest valid AF_UNIX path for a binding. - * sun_path[108] doesnt as such exist. However in kernel space + * sun_path[108] doesn't as such exist. However in kernel space * we are guaranteed that it is a valid memory location in our * kernel address buffer. */ diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index 11f25c7a7a05..f346395314ba 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -51,7 +51,7 @@ /* * Structures for interfacing with the /proc filesystem. - * Router creates its own directory /proc/net/router with the folowing + * Router creates its own directory /proc/net/router with the following * entries: * config device configuration * status global device statistics diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2714379ce2d6..58d69959ab28 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -812,7 +812,7 @@ static void handle_channel(struct wiphy *wiphy, if (r) { /* * We will disable all channels that do not match our - * recieved regulatory rule unless the hint is coming + * received regulatory rule unless the hint is coming * from a Country IE and the Country IE had no information * about a band. The IEEE 802.11 spec allows for an AP * to send only a subset of the regulatory rules allowed, @@ -841,7 +841,7 @@ static void handle_channel(struct wiphy *wiphy, request_wiphy && request_wiphy == wiphy && request_wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { /* - * This gaurantees the driver's requested regulatory domain + * This guarantees the driver's requested regulatory domain * will always be used as a base for further regulatory * settings */ diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 406207515b5e..f77e4e75f914 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -31,7 +31,7 @@ * x25_parse_facilities - Parse facilities from skb into the facilities structs * * @skb: sk_buff to parse - * @facilities: Regular facilites, updated as facilities are found + * @facilities: Regular facilities, updated as facilities are found * @dte_facs: ITU DTE facilities, updated as DTE facilities are found * @vc_fac_mask: mask is updated with all facilities found * diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c index 25a810793968..c541b622ae16 100644 --- a/net/x25/x25_forward.c +++ b/net/x25/x25_forward.c @@ -31,7 +31,7 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from, goto out_no_route; if ((neigh_new = x25_get_neigh(rt->dev)) == NULL) { - /* This shouldnt happen, if it occurs somehow + /* This shouldn't happen, if it occurs somehow * do something sensible */ goto out_put_route; @@ -45,7 +45,7 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from, } /* Remote end sending a call request on an already - * established LCI? It shouldnt happen, just in case.. + * established LCI? It shouldn't happen, just in case.. */ read_lock_bh(&x25_forward_list_lock); list_for_each(entry, &x25_forward_list) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 3d15d3e1b2c4..5d1d60d3ca83 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -894,7 +894,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, u32 *f; nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); - if (nlh == NULL) /* shouldnt really happen ... */ + if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; f = nlmsg_data(nlh); @@ -954,7 +954,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, u32 *f; nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); - if (nlh == NULL) /* shouldnt really happen ... */ + if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; f = nlmsg_data(nlh); @@ -1361,7 +1361,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (!xp) return err; - /* shouldnt excl be based on nlh flags?? + /* shouldn't excl be based on nlh flags?? * Aha! this is anti-netlink really i.e more pfkey derived * in netlink excl is a flag and you wouldnt need * a type XFRM_MSG_UPDPOLICY - JHS */ |