diff options
Diffstat (limited to 'net')
364 files changed, 13605 insertions, 7757 deletions
diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index e4a02ef55102..7fa0f382e7d1 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -1,6 +1,61 @@ -config 6LOWPAN +menuconfig 6LOWPAN tristate "6LoWPAN Support" depends on IPV6 ---help--- This enables IPv6 over Low power Wireless Personal Area Network - "6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks. + +menuconfig 6LOWPAN_NHC + tristate "Next Header Compression Support" + depends on 6LOWPAN + default y + ---help--- + Support for next header compression. + +if 6LOWPAN_NHC + +config 6LOWPAN_NHC_DEST + tristate "Destination Options Header Support" + default y + ---help--- + 6LoWPAN IPv6 Destination Options Header compression according to + RFC6282. + +config 6LOWPAN_NHC_FRAGMENT + tristate "Fragment Header Support" + default y + ---help--- + 6LoWPAN IPv6 Fragment Header compression according to RFC6282. + +config 6LOWPAN_NHC_HOP + tristate "Hop-by-Hop Options Header Support" + default y + ---help--- + 6LoWPAN IPv6 Hop-by-Hop Options Header compression according to + RFC6282. + +config 6LOWPAN_NHC_IPV6 + tristate "IPv6 Header Support" + default y + ---help--- + 6LoWPAN IPv6 Header compression according to RFC6282. + +config 6LOWPAN_NHC_MOBILITY + tristate "Mobility Header Support" + default y + ---help--- + 6LoWPAN IPv6 Mobility Header compression according to RFC6282. + +config 6LOWPAN_NHC_ROUTING + tristate "Routing Header Support" + default y + ---help--- + 6LoWPAN IPv6 Routing Header compression according to RFC6282. + +config 6LOWPAN_NHC_UDP + tristate "UDP Header Support" + default y + ---help--- + 6LoWPAN IPv6 UDP Header compression according to RFC6282. + +endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index 415886bb456a..eb8baa72adc8 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -1,3 +1,12 @@ -obj-$(CONFIG_6LOWPAN) := 6lowpan.o +obj-$(CONFIG_6LOWPAN) += 6lowpan.o -6lowpan-y := iphc.o +6lowpan-y := iphc.o nhc.o + +#rfc6282 nhcs +obj-$(CONFIG_6LOWPAN_NHC_DEST) += nhc_dest.o +obj-$(CONFIG_6LOWPAN_NHC_FRAGMENT) += nhc_fragment.o +obj-$(CONFIG_6LOWPAN_NHC_HOP) += nhc_hop.o +obj-$(CONFIG_6LOWPAN_NHC_IPV6) += nhc_ipv6.o +obj-$(CONFIG_6LOWPAN_NHC_MOBILITY) += nhc_mobility.o +obj-$(CONFIG_6LOWPAN_NHC_ROUTING) += nhc_routing.o +obj-$(CONFIG_6LOWPAN_NHC_UDP) += nhc_udp.o diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c index 32ffec6ef164..94a375c04f21 100644 --- a/net/6lowpan/iphc.c +++ b/net/6lowpan/iphc.c @@ -54,6 +54,8 @@ #include <net/ipv6.h> #include <net/af_ieee802154.h> +#include "nhc.h" + /* Uncompress address function for source and * destination address(non-multicast). * @@ -224,77 +226,6 @@ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb, return 0; } -static int uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) -{ - bool fail; - u8 tmp = 0, val = 0; - - fail = lowpan_fetch_skb(skb, &tmp, sizeof(tmp)); - - if ((tmp & LOWPAN_NHC_UDP_MASK) == LOWPAN_NHC_UDP_ID) { - pr_debug("UDP header uncompression\n"); - switch (tmp & LOWPAN_NHC_UDP_CS_P_11) { - case LOWPAN_NHC_UDP_CS_P_00: - fail |= lowpan_fetch_skb(skb, &uh->source, - sizeof(uh->source)); - fail |= lowpan_fetch_skb(skb, &uh->dest, - sizeof(uh->dest)); - break; - case LOWPAN_NHC_UDP_CS_P_01: - fail |= lowpan_fetch_skb(skb, &uh->source, - sizeof(uh->source)); - fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); - uh->dest = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); - break; - case LOWPAN_NHC_UDP_CS_P_10: - fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); - uh->source = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); - fail |= lowpan_fetch_skb(skb, &uh->dest, - sizeof(uh->dest)); - break; - case LOWPAN_NHC_UDP_CS_P_11: - fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); - uh->source = htons(LOWPAN_NHC_UDP_4BIT_PORT + - (val >> 4)); - uh->dest = htons(LOWPAN_NHC_UDP_4BIT_PORT + - (val & 0x0f)); - break; - default: - pr_debug("ERROR: unknown UDP format\n"); - goto err; - } - - pr_debug("uncompressed UDP ports: src = %d, dst = %d\n", - ntohs(uh->source), ntohs(uh->dest)); - - /* checksum */ - if (tmp & LOWPAN_NHC_UDP_CS_C) { - pr_debug_ratelimited("checksum elided currently not supported\n"); - goto err; - } else { - fail |= lowpan_fetch_skb(skb, &uh->check, - sizeof(uh->check)); - } - - /* UDP length needs to be infered from the lower layers - * here, we obtain the hint from the remaining size of the - * frame - */ - uh->len = htons(skb->len + sizeof(struct udphdr)); - pr_debug("uncompressed UDP length: src = %d", ntohs(uh->len)); - } else { - pr_debug("ERROR: unsupported NH format\n"); - goto err; - } - - if (fail) - goto err; - - return 0; -err: - return -EINVAL; -} - /* TTL uncompression values */ static const u8 lowpan_ttl_values[] = { 0, 1, 64, 255 }; @@ -425,29 +356,11 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev, return -EINVAL; } - /* UDP data uncompression */ + /* Next header data uncompression */ if (iphc0 & LOWPAN_IPHC_NH_C) { - struct udphdr uh; - const int needed = sizeof(struct udphdr) + sizeof(hdr); - - if (uncompress_udp_header(skb, &uh)) - return -EINVAL; - - /* replace the compressed UDP head by the uncompressed UDP - * header - */ - err = skb_cow(skb, needed); - if (unlikely(err)) + err = lowpan_nhc_do_uncompression(skb, dev, &hdr); + if (err < 0) return err; - - skb_push(skb, sizeof(struct udphdr)); - skb_reset_transport_header(skb); - skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); - - raw_dump_table(__func__, "raw UDP header dump", - (u8 *)&uh, sizeof(uh)); - - hdr.nexthdr = UIP_PROTO_UDP; } else { err = skb_cow(skb, sizeof(hdr)); if (unlikely(err)) @@ -500,71 +413,6 @@ static u8 lowpan_compress_addr_64(u8 **hc_ptr, u8 shift, return rol8(val, shift); } -static void compress_udp_header(u8 **hc_ptr, struct sk_buff *skb) -{ - struct udphdr *uh; - u8 tmp; - - /* In the case of RAW sockets the transport header is not set by - * the ip6 stack so we must set it ourselves - */ - if (skb->transport_header == skb->network_header) - skb_set_transport_header(skb, sizeof(struct ipv6hdr)); - - uh = udp_hdr(skb); - - if (((ntohs(uh->source) & LOWPAN_NHC_UDP_4BIT_MASK) == - LOWPAN_NHC_UDP_4BIT_PORT) && - ((ntohs(uh->dest) & LOWPAN_NHC_UDP_4BIT_MASK) == - LOWPAN_NHC_UDP_4BIT_PORT)) { - pr_debug("UDP header: both ports compression to 4 bits\n"); - /* compression value */ - tmp = LOWPAN_NHC_UDP_CS_P_11; - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - /* source and destination port */ - tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_4BIT_PORT + - ((ntohs(uh->source) - LOWPAN_NHC_UDP_4BIT_PORT) << 4); - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - } else if ((ntohs(uh->dest) & LOWPAN_NHC_UDP_8BIT_MASK) == - LOWPAN_NHC_UDP_8BIT_PORT) { - pr_debug("UDP header: remove 8 bits of dest\n"); - /* compression value */ - tmp = LOWPAN_NHC_UDP_CS_P_01; - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - /* source port */ - lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source)); - /* destination port */ - tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_8BIT_PORT; - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - } else if ((ntohs(uh->source) & LOWPAN_NHC_UDP_8BIT_MASK) == - LOWPAN_NHC_UDP_8BIT_PORT) { - pr_debug("UDP header: remove 8 bits of source\n"); - /* compression value */ - tmp = LOWPAN_NHC_UDP_CS_P_10; - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - /* source port */ - tmp = ntohs(uh->source) - LOWPAN_NHC_UDP_8BIT_PORT; - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - /* destination port */ - lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest)); - } else { - pr_debug("UDP header: can't compress\n"); - /* compression value */ - tmp = LOWPAN_NHC_UDP_CS_P_00; - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - /* source port */ - lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source)); - /* destination port */ - lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest)); - } - - /* checksum is always inline */ - lowpan_push_hc_data(hc_ptr, &uh->check, sizeof(uh->check)); - - /* skip the UDP header */ - skb_pull(skb, sizeof(struct udphdr)); -} - int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *_daddr, const void *_saddr, unsigned int len) @@ -572,7 +420,7 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, u8 tmp, iphc0, iphc1, *hc_ptr; struct ipv6hdr *hdr; u8 head[100] = {}; - int addr_type; + int ret, addr_type; if (type != ETH_P_IPV6) return -EINVAL; @@ -649,13 +497,12 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, /* NOTE: payload length is always compressed */ - /* Next Header is compress if UDP */ - if (hdr->nexthdr == UIP_PROTO_UDP) - iphc0 |= LOWPAN_IPHC_NH_C; - - if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) - lowpan_push_hc_data(&hc_ptr, &hdr->nexthdr, - sizeof(hdr->nexthdr)); + /* Check if we provide the nhc format for nexthdr and compression + * functionality. If not nexthdr is handled inline and not compressed. + */ + ret = lowpan_nhc_check_compression(skb, hdr, &hc_ptr, &iphc0); + if (ret < 0) + return ret; /* Hop limit * if 1: compress, encoding is 01 @@ -741,9 +588,12 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, } } - /* UDP header compression */ - if (hdr->nexthdr == UIP_PROTO_UDP) - compress_udp_header(&hc_ptr, skb); + /* next header compression */ + if (iphc0 & LOWPAN_IPHC_NH_C) { + ret = lowpan_nhc_do_compression(skb, hdr, &hc_ptr); + if (ret < 0) + return ret; + } head[0] = iphc0; head[1] = iphc1; @@ -761,4 +611,18 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, } EXPORT_SYMBOL_GPL(lowpan_header_compress); +static int __init lowpan_module_init(void) +{ + request_module_nowait("nhc_dest"); + request_module_nowait("nhc_fragment"); + request_module_nowait("nhc_hop"); + request_module_nowait("nhc_ipv6"); + request_module_nowait("nhc_mobility"); + request_module_nowait("nhc_routing"); + request_module_nowait("nhc_udp"); + + return 0; +} +module_init(lowpan_module_init); + MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c new file mode 100644 index 000000000000..fd20fc51a7c4 --- /dev/null +++ b/net/6lowpan/nhc.c @@ -0,0 +1,241 @@ +/* + * 6LoWPAN next header compression + * + * + * Authors: + * Alexander Aring <aar@pengutronix.de> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/netdevice.h> + +#include <net/ipv6.h> + +#include "nhc.h" + +static struct rb_root rb_root = RB_ROOT; +static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX]; +static DEFINE_SPINLOCK(lowpan_nhc_lock); + +static int lowpan_nhc_insert(struct lowpan_nhc *nhc) +{ + struct rb_node **new = &rb_root.rb_node, *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct lowpan_nhc *this = container_of(*new, struct lowpan_nhc, + node); + int result, len_dif, len; + + len_dif = nhc->idlen - this->idlen; + + if (nhc->idlen < this->idlen) + len = nhc->idlen; + else + len = this->idlen; + + result = memcmp(nhc->id, this->id, len); + if (!result) + result = len_dif; + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + return -EEXIST; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&nhc->node, parent, new); + rb_insert_color(&nhc->node, &rb_root); + + return 0; +} + +static void lowpan_nhc_remove(struct lowpan_nhc *nhc) +{ + rb_erase(&nhc->node, &rb_root); +} + +static struct lowpan_nhc *lowpan_nhc_by_nhcid(const struct sk_buff *skb) +{ + struct rb_node *node = rb_root.rb_node; + const u8 *nhcid_skb_ptr = skb->data; + + while (node) { + struct lowpan_nhc *nhc = container_of(node, struct lowpan_nhc, + node); + u8 nhcid_skb_ptr_masked[LOWPAN_NHC_MAX_ID_LEN]; + int result, i; + + if (nhcid_skb_ptr + nhc->idlen > skb->data + skb->len) + return NULL; + + /* copy and mask afterwards the nhid value from skb */ + memcpy(nhcid_skb_ptr_masked, nhcid_skb_ptr, nhc->idlen); + for (i = 0; i < nhc->idlen; i++) + nhcid_skb_ptr_masked[i] &= nhc->idmask[i]; + + result = memcmp(nhcid_skb_ptr_masked, nhc->id, nhc->idlen); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return nhc; + } + + return NULL; +} + +int lowpan_nhc_check_compression(struct sk_buff *skb, + const struct ipv6hdr *hdr, u8 **hc_ptr, + u8 *iphc0) +{ + struct lowpan_nhc *nhc; + + spin_lock_bh(&lowpan_nhc_lock); + + nhc = lowpan_nexthdr_nhcs[hdr->nexthdr]; + if (nhc && nhc->compress) + *iphc0 |= LOWPAN_IPHC_NH_C; + else + lowpan_push_hc_data(hc_ptr, &hdr->nexthdr, + sizeof(hdr->nexthdr)); + + spin_unlock_bh(&lowpan_nhc_lock); + + return 0; +} + +int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr, + u8 **hc_ptr) +{ + int ret; + struct lowpan_nhc *nhc; + + spin_lock_bh(&lowpan_nhc_lock); + + nhc = lowpan_nexthdr_nhcs[hdr->nexthdr]; + /* check if the nhc module was removed in unlocked part. + * TODO: this is a workaround we should prevent unloading + * of nhc modules while unlocked part, this will always drop + * the lowpan packet but it's very unlikely. + * + * Solution isn't easy because we need to decide at + * lowpan_nhc_check_compression if we do a compression or not. + * Because the inline data which is added to skb, we can't move this + * handling. + */ + if (unlikely(!nhc || !nhc->compress)) { + ret = -EINVAL; + goto out; + } + + /* In the case of RAW sockets the transport header is not set by + * the ip6 stack so we must set it ourselves + */ + if (skb->transport_header == skb->network_header) + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + ret = nhc->compress(skb, hc_ptr); + if (ret < 0) + goto out; + + /* skip the transport header */ + skb_pull(skb, nhc->nexthdrlen); + +out: + spin_unlock_bh(&lowpan_nhc_lock); + + return ret; +} + +int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev, + struct ipv6hdr *hdr) +{ + struct lowpan_nhc *nhc; + int ret; + + spin_lock_bh(&lowpan_nhc_lock); + + nhc = lowpan_nhc_by_nhcid(skb); + if (nhc) { + if (nhc->uncompress) { + ret = nhc->uncompress(skb, sizeof(struct ipv6hdr) + + nhc->nexthdrlen); + if (ret < 0) { + spin_unlock_bh(&lowpan_nhc_lock); + return ret; + } + } else { + spin_unlock_bh(&lowpan_nhc_lock); + netdev_warn(dev, "received nhc id for %s which is not implemented.\n", + nhc->name); + return -ENOTSUPP; + } + } else { + spin_unlock_bh(&lowpan_nhc_lock); + netdev_warn(dev, "received unknown nhc id which was not found.\n"); + return -ENOENT; + } + + hdr->nexthdr = nhc->nexthdr; + skb_reset_transport_header(skb); + raw_dump_table(__func__, "raw transport header dump", + skb_transport_header(skb), nhc->nexthdrlen); + + spin_unlock_bh(&lowpan_nhc_lock); + + return 0; +} + +int lowpan_nhc_add(struct lowpan_nhc *nhc) +{ + int ret; + + if (!nhc->idlen || !nhc->idsetup) + return -EINVAL; + + WARN_ONCE(nhc->idlen > LOWPAN_NHC_MAX_ID_LEN, + "LOWPAN_NHC_MAX_ID_LEN should be updated to %zd.\n", + nhc->idlen); + + nhc->idsetup(nhc); + + spin_lock_bh(&lowpan_nhc_lock); + + if (lowpan_nexthdr_nhcs[nhc->nexthdr]) { + ret = -EEXIST; + goto out; + } + + ret = lowpan_nhc_insert(nhc); + if (ret < 0) + goto out; + + lowpan_nexthdr_nhcs[nhc->nexthdr] = nhc; +out: + spin_unlock_bh(&lowpan_nhc_lock); + return ret; +} +EXPORT_SYMBOL(lowpan_nhc_add); + +void lowpan_nhc_del(struct lowpan_nhc *nhc) +{ + spin_lock_bh(&lowpan_nhc_lock); + + lowpan_nhc_remove(nhc); + lowpan_nexthdr_nhcs[nhc->nexthdr] = NULL; + + spin_unlock_bh(&lowpan_nhc_lock); + + synchronize_net(); +} +EXPORT_SYMBOL(lowpan_nhc_del); diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h new file mode 100644 index 000000000000..ed44938eb5de --- /dev/null +++ b/net/6lowpan/nhc.h @@ -0,0 +1,146 @@ +#ifndef __6LOWPAN_NHC_H +#define __6LOWPAN_NHC_H + +#include <linux/skbuff.h> +#include <linux/rbtree.h> +#include <linux/module.h> + +#include <net/6lowpan.h> +#include <net/ipv6.h> + +#define LOWPAN_NHC_MAX_ID_LEN 1 + +/** + * LOWPAN_NHC - helper macro to generate nh id fields and lowpan_nhc struct + * + * @__nhc: variable name of the lowpan_nhc struct. + * @_name: const char * of common header compression name. + * @_nexthdr: ipv6 nexthdr field for the header compression. + * @_nexthdrlen: ipv6 nexthdr len for the reserved space. + * @_idsetup: callback to setup id and mask values. + * @_idlen: len for the next header id and mask, should be always the same. + * @_uncompress: callback for uncompression call. + * @_compress: callback for compression call. + */ +#define LOWPAN_NHC(__nhc, _name, _nexthdr, \ + _hdrlen, _idsetup, _idlen, \ + _uncompress, _compress) \ +static u8 __nhc##_val[_idlen]; \ +static u8 __nhc##_mask[_idlen]; \ +static struct lowpan_nhc __nhc = { \ + .name = _name, \ + .nexthdr = _nexthdr, \ + .nexthdrlen = _hdrlen, \ + .id = __nhc##_val, \ + .idmask = __nhc##_mask, \ + .idlen = _idlen, \ + .idsetup = _idsetup, \ + .uncompress = _uncompress, \ + .compress = _compress, \ +} + +#define module_lowpan_nhc(__nhc) \ +static int __init __nhc##_init(void) \ +{ \ + return lowpan_nhc_add(&(__nhc)); \ +} \ +module_init(__nhc##_init); \ +static void __exit __nhc##_exit(void) \ +{ \ + lowpan_nhc_del(&(__nhc)); \ +} \ +module_exit(__nhc##_exit); + +/** + * struct lowpan_nhc - hold 6lowpan next hdr compression ifnformation + * + * @node: holder for the rbtree. + * @name: name of the specific next header compression + * @nexthdr: next header value of the protocol which should be compressed. + * @nexthdrlen: ipv6 nexthdr len for the reserved space. + * @id: array for nhc id. Note this need to be in network byteorder. + * @mask: array for nhc id mask. Note this need to be in network byteorder. + * @len: the length of the next header id and mask. + * @setup: callback to setup fill the next header id value and mask. + * @compress: callback to do the header compression. + * @uncompress: callback to do the header uncompression. + */ +struct lowpan_nhc { + struct rb_node node; + const char *name; + const u8 nexthdr; + const size_t nexthdrlen; + u8 *id; + u8 *idmask; + const size_t idlen; + + void (*idsetup)(struct lowpan_nhc *nhc); + int (*uncompress)(struct sk_buff *skb, size_t needed); + int (*compress)(struct sk_buff *skb, u8 **hc_ptr); +}; + +/** + * lowpan_nhc_by_nexthdr - return the 6lowpan nhc by ipv6 nexthdr. + * + * @nexthdr: ipv6 nexthdr value. + */ +struct lowpan_nhc *lowpan_nhc_by_nexthdr(u8 nexthdr); + +/** + * lowpan_nhc_check_compression - checks if we support compression format. If + * we support the nhc by nexthdr field, the 6LoWPAN iphc NHC bit will be + * set. If we don't support nexthdr will be added as inline data to the + * 6LoWPAN header. + * + * @skb: skb of 6LoWPAN header to read nhc and replace header. + * @hdr: ipv6hdr to check the nexthdr value + * @hc_ptr: pointer for 6LoWPAN header which should increment at the end of + * replaced header. + * @iphc0: iphc0 pointer to set the 6LoWPAN NHC bit + */ +int lowpan_nhc_check_compression(struct sk_buff *skb, + const struct ipv6hdr *hdr, u8 **hc_ptr, + u8 *iphc0); + +/** + * lowpan_nhc_do_compression - calling compress callback for nhc + * + * @skb: skb of 6LoWPAN header to read nhc and replace header. + * @hdr: ipv6hdr to set the nexthdr value + * @hc_ptr: pointer for 6LoWPAN header which should increment at the end of + * replaced header. + */ +int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr, + u8 **hc_ptr); + +/** + * lowpan_nhc_do_uncompression - calling uncompress callback for nhc + * + * @nhc: 6LoWPAN nhc context, get by lowpan_nhc_by_ functions. + * @skb: skb of 6LoWPAN header, skb->data should be pointed to nhc id value. + * @dev: netdevice for print logging information. + * @hdr: ipv6hdr for setting nexthdr value. + */ +int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev, + struct ipv6hdr *hdr); + +/** + * lowpan_nhc_add - register a next header compression to framework + * + * @nhc: nhc which should be add. + */ +int lowpan_nhc_add(struct lowpan_nhc *nhc); + +/** + * lowpan_nhc_del - delete a next header compression from framework + * + * @nhc: nhc which should be delete. + */ +void lowpan_nhc_del(struct lowpan_nhc *nhc); + +/** + * lowpan_nhc_init - adding all default nhcs + */ +void lowpan_nhc_init(void); + +#endif /* __6LOWPAN_NHC_H */ diff --git a/net/6lowpan/nhc_dest.c b/net/6lowpan/nhc_dest.c new file mode 100644 index 000000000000..0b292c9646eb --- /dev/null +++ b/net/6lowpan/nhc_dest.c @@ -0,0 +1,28 @@ +/* + * 6LoWPAN IPv6 Destination Options Header compression according to + * RFC6282 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_DEST_IDLEN 1 +#define LOWPAN_NHC_DEST_ID_0 0xe6 +#define LOWPAN_NHC_DEST_MASK_0 0xfe + +static void dest_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_DEST_ID_0; + nhc->idmask[0] = LOWPAN_NHC_DEST_MASK_0; +} + +LOWPAN_NHC(nhc_dest, "RFC6282 Destination Options", NEXTHDR_DEST, 0, + dest_nhid_setup, LOWPAN_NHC_DEST_IDLEN, NULL, NULL); + +module_lowpan_nhc(nhc_dest); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Destination Options compression"); +MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc_fragment.c b/net/6lowpan/nhc_fragment.c new file mode 100644 index 000000000000..473dbc58ef84 --- /dev/null +++ b/net/6lowpan/nhc_fragment.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN IPv6 Fragment Header compression according to RFC6282 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_FRAGMENT_IDLEN 1 +#define LOWPAN_NHC_FRAGMENT_ID_0 0xe4 +#define LOWPAN_NHC_FRAGMENT_MASK_0 0xfe + +static void fragment_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_FRAGMENT_ID_0; + nhc->idmask[0] = LOWPAN_NHC_FRAGMENT_MASK_0; +} + +LOWPAN_NHC(nhc_fragment, "RFC6282 Fragment", NEXTHDR_FRAGMENT, 0, + fragment_nhid_setup, LOWPAN_NHC_FRAGMENT_IDLEN, NULL, NULL); + +module_lowpan_nhc(nhc_fragment); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Fragment compression"); +MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc_hop.c b/net/6lowpan/nhc_hop.c new file mode 100644 index 000000000000..1eb66be16f19 --- /dev/null +++ b/net/6lowpan/nhc_hop.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN IPv6 Hop-by-Hop Options Header compression according to RFC6282 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_HOP_IDLEN 1 +#define LOWPAN_NHC_HOP_ID_0 0xe0 +#define LOWPAN_NHC_HOP_MASK_0 0xfe + +static void hop_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_HOP_ID_0; + nhc->idmask[0] = LOWPAN_NHC_HOP_MASK_0; +} + +LOWPAN_NHC(nhc_hop, "RFC6282 Hop-by-Hop Options", NEXTHDR_HOP, 0, + hop_nhid_setup, LOWPAN_NHC_HOP_IDLEN, NULL, NULL); + +module_lowpan_nhc(nhc_hop); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Hop-by-Hop Options compression"); +MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc_ipv6.c b/net/6lowpan/nhc_ipv6.c new file mode 100644 index 000000000000..2313d1600af3 --- /dev/null +++ b/net/6lowpan/nhc_ipv6.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN IPv6 Header compression according to RFC6282 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_IPV6_IDLEN 1 +#define LOWPAN_NHC_IPV6_ID_0 0xee +#define LOWPAN_NHC_IPV6_MASK_0 0xfe + +static void ipv6_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_IPV6_ID_0; + nhc->idmask[0] = LOWPAN_NHC_IPV6_MASK_0; +} + +LOWPAN_NHC(nhc_ipv6, "RFC6282 IPv6", NEXTHDR_IPV6, 0, ipv6_nhid_setup, + LOWPAN_NHC_IPV6_IDLEN, NULL, NULL); + +module_lowpan_nhc(nhc_ipv6); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 IPv6 compression"); +MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc_mobility.c b/net/6lowpan/nhc_mobility.c new file mode 100644 index 000000000000..60d3f3886c98 --- /dev/null +++ b/net/6lowpan/nhc_mobility.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN IPv6 Mobility Header compression according to RFC6282 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_MOBILITY_IDLEN 1 +#define LOWPAN_NHC_MOBILITY_ID_0 0xe8 +#define LOWPAN_NHC_MOBILITY_MASK_0 0xfe + +static void mobility_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_MOBILITY_ID_0; + nhc->idmask[0] = LOWPAN_NHC_MOBILITY_MASK_0; +} + +LOWPAN_NHC(nhc_mobility, "RFC6282 Mobility", NEXTHDR_MOBILITY, 0, + mobility_nhid_setup, LOWPAN_NHC_MOBILITY_IDLEN, NULL, NULL); + +module_lowpan_nhc(nhc_mobility); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Mobility compression"); +MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc_routing.c b/net/6lowpan/nhc_routing.c new file mode 100644 index 000000000000..c393280f11c4 --- /dev/null +++ b/net/6lowpan/nhc_routing.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN IPv6 Routing Header compression according to RFC6282 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_ROUTING_IDLEN 1 +#define LOWPAN_NHC_ROUTING_ID_0 0xe2 +#define LOWPAN_NHC_ROUTING_MASK_0 0xfe + +static void routing_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_ROUTING_ID_0; + nhc->idmask[0] = LOWPAN_NHC_ROUTING_MASK_0; +} + +LOWPAN_NHC(nhc_routing, "RFC6282 Routing", NEXTHDR_ROUTING, 0, + routing_nhid_setup, LOWPAN_NHC_ROUTING_IDLEN, NULL, NULL); + +module_lowpan_nhc(nhc_routing); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Routing compression"); +MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c new file mode 100644 index 000000000000..c6bcaeb428ae --- /dev/null +++ b/net/6lowpan/nhc_udp.c @@ -0,0 +1,157 @@ +/* + * 6LoWPAN IPv6 UDP compression according to RFC6282 + * + * + * Authors: + * Alexander Aring <aar@pengutronix.de> + * + * Orignal written by: + * Alexander Smirnov <alex.bluesman.smirnov@gmail.com> + * Jon Smirl <jonsmirl@gmail.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_UDP_IDLEN 1 + +static int udp_uncompress(struct sk_buff *skb, size_t needed) +{ + u8 tmp = 0, val = 0; + struct udphdr uh; + bool fail; + int err; + + fail = lowpan_fetch_skb(skb, &tmp, sizeof(tmp)); + + pr_debug("UDP header uncompression\n"); + switch (tmp & LOWPAN_NHC_UDP_CS_P_11) { + case LOWPAN_NHC_UDP_CS_P_00: + fail |= lowpan_fetch_skb(skb, &uh.source, sizeof(uh.source)); + fail |= lowpan_fetch_skb(skb, &uh.dest, sizeof(uh.dest)); + break; + case LOWPAN_NHC_UDP_CS_P_01: + fail |= lowpan_fetch_skb(skb, &uh.source, sizeof(uh.source)); + fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); + uh.dest = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); + break; + case LOWPAN_NHC_UDP_CS_P_10: + fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); + uh.source = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); + fail |= lowpan_fetch_skb(skb, &uh.dest, sizeof(uh.dest)); + break; + case LOWPAN_NHC_UDP_CS_P_11: + fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); + uh.source = htons(LOWPAN_NHC_UDP_4BIT_PORT + (val >> 4)); + uh.dest = htons(LOWPAN_NHC_UDP_4BIT_PORT + (val & 0x0f)); + break; + default: + BUG(); + } + + pr_debug("uncompressed UDP ports: src = %d, dst = %d\n", + ntohs(uh.source), ntohs(uh.dest)); + + /* checksum */ + if (tmp & LOWPAN_NHC_UDP_CS_C) { + pr_debug_ratelimited("checksum elided currently not supported\n"); + fail = true; + } else { + fail |= lowpan_fetch_skb(skb, &uh.check, sizeof(uh.check)); + } + + if (fail) + return -EINVAL; + + /* UDP length needs to be infered from the lower layers + * here, we obtain the hint from the remaining size of the + * frame + */ + uh.len = htons(skb->len + sizeof(struct udphdr)); + pr_debug("uncompressed UDP length: src = %d", ntohs(uh.len)); + + /* replace the compressed UDP head by the uncompressed UDP + * header + */ + err = skb_cow(skb, needed); + if (unlikely(err)) + return err; + + skb_push(skb, sizeof(struct udphdr)); + skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); + + return 0; +} + +static int udp_compress(struct sk_buff *skb, u8 **hc_ptr) +{ + const struct udphdr *uh = udp_hdr(skb); + u8 tmp; + + if (((ntohs(uh->source) & LOWPAN_NHC_UDP_4BIT_MASK) == + LOWPAN_NHC_UDP_4BIT_PORT) && + ((ntohs(uh->dest) & LOWPAN_NHC_UDP_4BIT_MASK) == + LOWPAN_NHC_UDP_4BIT_PORT)) { + pr_debug("UDP header: both ports compression to 4 bits\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_11; + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + /* source and destination port */ + tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_4BIT_PORT + + ((ntohs(uh->source) - LOWPAN_NHC_UDP_4BIT_PORT) << 4); + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + } else if ((ntohs(uh->dest) & LOWPAN_NHC_UDP_8BIT_MASK) == + LOWPAN_NHC_UDP_8BIT_PORT) { + pr_debug("UDP header: remove 8 bits of dest\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_01; + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + /* source port */ + lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source)); + /* destination port */ + tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_8BIT_PORT; + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + } else if ((ntohs(uh->source) & LOWPAN_NHC_UDP_8BIT_MASK) == + LOWPAN_NHC_UDP_8BIT_PORT) { + pr_debug("UDP header: remove 8 bits of source\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_10; + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + /* source port */ + tmp = ntohs(uh->source) - LOWPAN_NHC_UDP_8BIT_PORT; + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + /* destination port */ + lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest)); + } else { + pr_debug("UDP header: can't compress\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_00; + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + /* source port */ + lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source)); + /* destination port */ + lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest)); + } + + /* checksum is always inline */ + lowpan_push_hc_data(hc_ptr, &uh->check, sizeof(uh->check)); + + return 0; +} + +static void udp_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_UDP_ID; + nhc->idmask[0] = LOWPAN_NHC_UDP_MASK; +} + +LOWPAN_NHC(nhc_udp, "RFC6282 UDP", NEXTHDR_UDP, sizeof(struct udphdr), + udp_nhid_setup, LOWPAN_NHC_UDP_IDLEN, udp_uncompress, udp_compress); + +module_lowpan_nhc(nhc_udp); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 UDP compression"); +MODULE_LICENSE("GPL"); diff --git a/net/802/fc.c b/net/802/fc.c index 7c174b6750cd..7b9219022418 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -75,29 +75,8 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev, return -hdr_len; } -/* - * A neighbour discovery of some species (eg arp) has completed. We - * can now send the packet. - */ - -static int fc_rebuild_header(struct sk_buff *skb) -{ -#ifdef CONFIG_INET - struct fch_hdr *fch=(struct fch_hdr *)skb->data; - struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr)); - if(fcllc->ethertype != htons(ETH_P_IP)) { - printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype)); - return 0; - } - return arp_find(fch->daddr, skb); -#else - return 0; -#endif -} - static const struct header_ops fc_header_ops = { .create = fc_header, - .rebuild = fc_rebuild_header, }; static void fc_setup(struct net_device *dev) diff --git a/net/802/fddi.c b/net/802/fddi.c index 59e7346f1193..7d3a0af954e8 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -87,31 +87,6 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev, return -hl; } - -/* - * Rebuild the FDDI MAC header. This is called after an ARP - * (or in future other address resolution) has completed on - * this sk_buff. We now let ARP fill in the other fields. - */ - -static int fddi_rebuild_header(struct sk_buff *skb) -{ - struct fddihdr *fddi = (struct fddihdr *)skb->data; - -#ifdef CONFIG_INET - if (fddi->hdr.llc_snap.ethertype == htons(ETH_P_IP)) - /* Try to get ARP to resolve the header and fill destination address */ - return arp_find(fddi->daddr, skb); - else -#endif - { - printk("%s: Don't know how to resolve type %04X addresses.\n", - skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype)); - return 0; - } -} - - /* * Determine the packet's protocol ID and fill in skb fields. * This routine is called before an incoming packet is passed @@ -177,7 +152,6 @@ EXPORT_SYMBOL(fddi_change_mtu); static const struct header_ops fddi_header_ops = { .create = fddi_header, - .rebuild = fddi_rebuild_header, }; diff --git a/net/802/hippi.c b/net/802/hippi.c index 2e03f8259dd5..ade1a52cdcff 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -91,33 +91,6 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev, /* - * Rebuild the HIPPI MAC header. This is called after an ARP has - * completed on this sk_buff. We now let ARP fill in the other fields. - */ - -static int hippi_rebuild_header(struct sk_buff *skb) -{ - struct hippi_hdr *hip = (struct hippi_hdr *)skb->data; - - /* - * Only IP is currently supported - */ - - if(hip->snap.ethertype != htons(ETH_P_IP)) - { - printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",skb->dev->name,ntohs(hip->snap.ethertype)); - return 0; - } - - /* - * We don't support dynamic ARP on HIPPI, but we use the ARP - * static ARP tables to hold the I-FIELDs. - */ - return arp_find(hip->le.daddr, skb); -} - - -/* * Determine the packet's protocol ID. */ @@ -186,7 +159,6 @@ EXPORT_SYMBOL(hippi_neigh_setup_dev); static const struct header_ops hippi_header_ops = { .create = hippi_header, - .rebuild = hippi_rebuild_header, }; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 64c6bed4a3d3..98a30a5b8664 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -413,7 +413,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, vlan_transfer_features(dev, vlandev); break; - case NETDEV_DOWN: + case NETDEV_DOWN: { + struct net_device *tmp; + LIST_HEAD(close_list); + if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) vlan_vid_del(dev, htons(ETH_P_8021Q), 0); @@ -425,11 +428,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, vlan = vlan_dev_priv(vlandev); if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) - dev_change_flags(vlandev, flgs & ~IFF_UP); + list_add(&vlandev->close_list, &close_list); + } + + dev_close_many(&close_list, false); + + list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) { netif_stacked_transfer_operstate(dev, vlandev); + list_del_init(&vlandev->close_list); } + list_del(&close_list); break; - + } case NETDEV_UP: /* Put all VLANs for this dev in the up state too. */ vlan_group_for_each_dev(grp, i, vlandev) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 118956448cf6..8b5ab9033b41 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -37,39 +37,6 @@ #include <linux/netpoll.h> /* - * Rebuild the Ethernet MAC header. This is called after an ARP - * (or in future other address resolution) has completed on this - * sk_buff. We now let ARP fill in the other fields. - * - * This routine CANNOT use cached dst->neigh! - * Really, it is used only when dst->neigh is wrong. - * - * TODO: This needs a checkup, I'm ignorant here. --BLG - */ -static int vlan_dev_rebuild_header(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); - - switch (veth->h_vlan_encapsulated_proto) { -#ifdef CONFIG_INET - case htons(ETH_P_IP): - - /* TODO: Confirm this will work with VLAN headers... */ - return arp_find(veth->h_dest, skb); -#endif - default: - pr_debug("%s: unable to resolve type %X addresses\n", - dev->name, ntohs(veth->h_vlan_encapsulated_proto)); - - ether_addr_copy(veth->h_source, dev->dev_addr); - break; - } - - return 0; -} - -/* * Create the VLAN header for an arbitrary protocol layer * * saddr=NULL means use device source address @@ -534,7 +501,6 @@ static int vlan_dev_get_lock_subclass(struct net_device *dev) static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, - .rebuild = vlan_dev_rebuild_header, .parse = eth_header_parse, }; @@ -554,7 +520,6 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev static const struct header_ops vlan_passthru_header_ops = { .create = vlan_passthru_hard_header, - .rebuild = dev_rebuild_header, .parse = eth_header_parse, }; @@ -589,6 +554,7 @@ static int vlan_dev_init(struct net_device *dev) if (dev->features & NETIF_F_VLAN_FEATURES) netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); + dev->vlan_features = real_dev->vlan_features & ~NETIF_F_ALL_FCOE; /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; @@ -827,5 +793,5 @@ void vlan_setup(struct net_device *dev) dev->destructor = vlan_dev_free; dev->ethtool_ops = &vlan_ethtool_ops; - memset(dev->broadcast, 0, ETH_ALEN); + eth_zero_addr(dev->broadcast); } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 80d08f6664cb..3e3d82d8ff70 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -940,7 +940,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); sin_server.sin_port = htons(opts.port); - err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_INET, + err = __sock_create(current->nsproxy->net_ns, PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { pr_err("%s (%d): problem creating socket\n", @@ -988,7 +988,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); - err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_UNIX, + err = __sock_create(current->nsproxy->net_ns, PF_UNIX, SOCK_STREAM, 0, &csocket, 1); if (err < 0) { pr_err("%s (%d): problem creating socket\n", diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index daa749c8b3fb..36a1a739ad68 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -524,6 +524,12 @@ static int p9_virtio_probe(struct virtio_device *vdev) int err; struct virtio_chan *chan; + if (!vdev->config->get) { + dev_err(&vdev->dev, "%s failure: config access disabled\n", + __func__); + return -EINVAL; + } + chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL); if (!chan) { pr_err("Failed to allocate virtio 9P channel\n"); @@ -652,14 +658,30 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) static void p9_virtio_remove(struct virtio_device *vdev) { struct virtio_chan *chan = vdev->priv; - - if (chan->inuse) - p9_virtio_close(chan->client); - vdev->config->del_vqs(vdev); + unsigned long warning_time; mutex_lock(&virtio_9p_lock); + + /* Remove self from list so we don't get new users. */ list_del(&chan->chan_list); + warning_time = jiffies; + + /* Wait for existing users to close. */ + while (chan->inuse) { + mutex_unlock(&virtio_9p_lock); + msleep(250); + if (time_after(jiffies, warning_time + 10 * HZ)) { + dev_emerg(&vdev->dev, + "p9_virtio_remove: waiting for device in use.\n"); + warning_time = jiffies; + } + mutex_lock(&virtio_9p_lock); + } + mutex_unlock(&virtio_9p_lock); + + vdev->config->del_vqs(vdev); + sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); kfree(chan->tag); diff --git a/net/Kconfig b/net/Kconfig index ff9ffc17fa0e..44dd5786ee91 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -231,18 +231,18 @@ source "net/hsr/Kconfig" source "net/switchdev/Kconfig" config RPS - boolean + bool depends on SMP && SYSFS default y config RFS_ACCEL - boolean + bool depends on RPS select CPU_RMAP default y config XPS - boolean + bool depends on SMP default y @@ -254,18 +254,18 @@ config CGROUP_NET_PRIO a per-interface basis. config CGROUP_NET_CLASSID - boolean "Network classid cgroup" + bool "Network classid cgroup" depends on CGROUPS ---help--- Cgroup subsystem for use as general purpose socket classid marker that is being used in cls_cgroup and for netfilter matching. config NET_RX_BUSY_POLL - boolean + bool default y config BQL - boolean + bool depends on SYSFS select DQL default y @@ -282,7 +282,7 @@ config BPF_JIT this feature changing /proc/sys/net/core/bpf_jit_enable config NET_FLOW_LIMIT - boolean + bool depends on RPS default y ---help--- diff --git a/net/Makefile b/net/Makefile index 38704bdf941a..3995613e5510 100644 --- a/net/Makefile +++ b/net/Makefile @@ -69,7 +69,7 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ -obj-$(CONFIG_NET_MPLS_GSO) += mpls/ +obj-$(CONFIG_MPLS) += mpls/ obj-$(CONFIG_HSR) += hsr/ ifneq ($(CONFIG_NET_SWITCHDEV),) obj-y += switchdev/ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index d1c55d8dd0a2..8ad3ec2610b6 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -141,7 +141,7 @@ static void __aarp_send_query(struct aarp_entry *a) eah->pa_src_net = sat->s_net; eah->pa_src_node = sat->s_node; - memset(eah->hw_dst, '\0', ETH_ALEN); + eth_zero_addr(eah->hw_dst); eah->pa_dst_zero = 0; eah->pa_dst_net = a->target_addr.s_net; @@ -189,7 +189,7 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us, eah->pa_src_node = us->s_node; if (!sha) - memset(eah->hw_dst, '\0', ETH_ALEN); + eth_zero_addr(eah->hw_dst); else ether_addr_copy(eah->hw_dst, sha); @@ -239,7 +239,7 @@ static void aarp_send_probe(struct net_device *dev, struct atalk_addr *us) eah->pa_src_net = us->s_net; eah->pa_src_node = us->s_node; - memset(eah->hw_dst, '\0', ETH_ALEN); + eth_zero_addr(eah->hw_dst); eah->pa_dst_zero = 0; eah->pa_dst_net = us->s_net; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 0d0766ea5ab1..3b7ad43c7dad 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1559,8 +1559,7 @@ freeit: return 0; } -static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t len) +static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct atalk_sock *at = at_sk(sk); @@ -1728,8 +1727,8 @@ out: return err ? : len; } -static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags) +static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct ddpehdr *ddp; diff --git a/net/atm/common.c b/net/atm/common.c index b84057e41bd6..ed0466637e13 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -523,8 +523,8 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci) return 0; } -int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags) +int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct atm_vcc *vcc; @@ -569,8 +569,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, return copied; } -int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t size) +int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) { struct sock *sk = sock->sk; DEFINE_WAIT(wait); diff --git a/net/atm/common.h b/net/atm/common.h index cc3c2dae4d79..4d6f5b2068ac 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -13,10 +13,9 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family); int vcc_release(struct socket *sock); int vcc_connect(struct socket *sock, int itf, short vpi, int vci); -int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags); -int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t total_len); +int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags); +int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len); unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait); int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); diff --git a/net/atm/lec.c b/net/atm/lec.c index 4b98f897044a..cd3b37989057 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -2001,7 +2001,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, if (entry == NULL) goto out; memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - memset(entry->mac_addr, 0, ETH_ALEN); + eth_zero_addr(entry->mac_addr); entry->recv_vcc = vcc; entry->old_recv_push = old_push; entry->status = ESI_UNKNOWN; @@ -2086,7 +2086,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, entry->vcc = vcc; entry->old_push = old_push; memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - memset(entry->mac_addr, 0, ETH_ALEN); + eth_zero_addr(entry->mac_addr); entry->status = ESI_UNKNOWN; hlist_add_head(&entry->next, &priv->lec_arp_empty_ones); entry->timer.expires = jiffies + priv->vcc_timeout_period; diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 523bce72f698..4fd6af47383a 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -19,36 +19,15 @@ #include "resources.h" #include "signaling.h" -#undef WAIT_FOR_DEMON /* #define this if system calls on SVC sockets - should block until the demon runs. - Danger: may cause nasty hangs if the demon - crashes. */ - struct atm_vcc *sigd = NULL; -#ifdef WAIT_FOR_DEMON -static DECLARE_WAIT_QUEUE_HEAD(sigd_sleep); -#endif static void sigd_put_skb(struct sk_buff *skb) { -#ifdef WAIT_FOR_DEMON - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(&sigd_sleep, &wait); - while (!sigd) { - set_current_state(TASK_UNINTERRUPTIBLE); - pr_debug("atmsvc: waiting for signaling daemon...\n"); - schedule(); - } - current->state = TASK_RUNNING; - remove_wait_queue(&sigd_sleep, &wait); -#else if (!sigd) { pr_debug("atmsvc: no signaling daemon\n"); kfree_skb(skb); return; } -#endif atm_force_charge(sigd, skb->truesize); skb_queue_tail(&sk_atm(sigd)->sk_receive_queue, skb); sk_atm(sigd)->sk_data_ready(sk_atm(sigd)); @@ -261,8 +240,5 @@ int sigd_attach(struct atm_vcc *vcc) vcc_insert_socket(sk_atm(vcc)); set_bit(ATM_VF_META, &vcc->flags); set_bit(ATM_VF_READY, &vcc->flags); -#ifdef WAIT_FOR_DEMON - wake_up(&sigd_sleep); -#endif return 0; } diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index ca049a7c9287..330c1f4a5a0b 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1432,8 +1432,7 @@ out: return err; } -static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_ax25 *, usax, msg->msg_name); struct sock *sk = sock->sk; @@ -1599,8 +1598,8 @@ out: return err; } -static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index 67de6b33f2c3..7c646bb2c6f7 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -46,9 +46,9 @@ #ifdef CONFIG_INET -int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *daddr, - const void *saddr, unsigned int len) +static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) { unsigned char *buff; @@ -100,7 +100,7 @@ int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, return -AX25_HEADER_LEN; /* Unfinished header */ } -int ax25_rebuild_header(struct sk_buff *skb) +netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) { struct sk_buff *ourskb; unsigned char *bp = skb->data; @@ -115,9 +115,6 @@ int ax25_rebuild_header(struct sk_buff *skb) dst = (ax25_address *)(bp + 1); src = (ax25_address *)(bp + 8); - if (arp_find(bp + 1, skb)) - return 1; - route = ax25_get_route(dst, NULL); if (route) { digipeat = route->digipeat; @@ -129,6 +126,7 @@ int ax25_rebuild_header(struct sk_buff *skb) dev = skb->dev; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) { + kfree_skb(skb); goto put; } @@ -212,31 +210,29 @@ put: if (route) ax25_put_route(route); - return 1; + return NETDEV_TX_OK; } #else /* INET */ -int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *daddr, - const void *saddr, unsigned int len) +static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) { return -AX25_HEADER_LEN; } -int ax25_rebuild_header(struct sk_buff *skb) +netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) { - return 1; + kfree_skb(skb); + return NETDEV_TX_OK; } - #endif const struct header_ops ax25_header_ops = { .create = ax25_hard_header, - .rebuild = ax25_rebuild_header, }; -EXPORT_SYMBOL(ax25_hard_header); -EXPORT_SYMBOL(ax25_rebuild_header); EXPORT_SYMBOL(ax25_header_ops); +EXPORT_SYMBOL(ax25_ip_xmit); diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 7de74635a110..b8c794b87523 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -91,4 +91,12 @@ config BT_SELFTEST_SMP Run test cases for SMP cryptographic functionality, including both legacy SMP as well as the Secure Connections features. +config BT_DEBUGFS + bool "Export Bluetooth internals in debugfs" + depends on BT && DEBUG_FS + default y + help + Provide extensive information about internal Bluetooth states + in debugfs. + source "drivers/bluetooth/Kconfig" diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 8e96e3072266..9a8ea232d28f 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -13,8 +13,9 @@ bluetooth_6lowpan-y := 6lowpan.o bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \ - a2mp.o amp.o ecc.o hci_request.o hci_debugfs.o + a2mp.o amp.o ecc.o hci_request.o mgmt_util.o +bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o subdir-ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index cedfbda15dad..5a04eb1a7e57 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -19,9 +19,11 @@ #include "a2mp.h" #include "amp.h" +#define A2MP_FEAT_EXT 0x8000 + /* Global AMP Manager list */ -LIST_HEAD(amp_mgr_list); -DEFINE_MUTEX(amp_mgr_list_lock); +static LIST_HEAD(amp_mgr_list); +static DEFINE_MUTEX(amp_mgr_list_lock); /* A2MP build & send command helper functions */ static struct a2mp_cmd *__a2mp_build(u8 code, u8 ident, u16 len, void *data) @@ -43,7 +45,7 @@ static struct a2mp_cmd *__a2mp_build(u8 code, u8 ident, u16 len, void *data) return cmd; } -void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data) +static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data) { struct l2cap_chan *chan = mgr->a2mp_chan; struct a2mp_cmd *cmd; @@ -67,7 +69,7 @@ void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data) kfree(cmd); } -u8 __next_ident(struct amp_mgr *mgr) +static u8 __next_ident(struct amp_mgr *mgr) { if (++mgr->ident == 0) mgr->ident = 1; @@ -75,6 +77,23 @@ u8 __next_ident(struct amp_mgr *mgr) return mgr->ident; } +static struct amp_mgr *amp_mgr_lookup_by_state(u8 state) +{ + struct amp_mgr *mgr; + + mutex_lock(&_mgr_list_lock); + list_for_each_entry(mgr, &_mgr_list, list) { + if (test_and_clear_bit(state, &mgr->state)) { + amp_mgr_get(mgr); + mutex_unlock(&_mgr_list_lock); + return mgr; + } + } + mutex_unlock(&_mgr_list_lock); + + return NULL; +} + /* hci_dev_list shall be locked */ static void __a2mp_add_cl(struct amp_mgr *mgr, struct a2mp_cl *cl) { @@ -860,23 +879,6 @@ struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn, return mgr->a2mp_chan; } -struct amp_mgr *amp_mgr_lookup_by_state(u8 state) -{ - struct amp_mgr *mgr; - - mutex_lock(&_mgr_list_lock); - list_for_each_entry(mgr, &_mgr_list, list) { - if (test_and_clear_bit(state, &mgr->state)) { - amp_mgr_get(mgr); - mutex_unlock(&_mgr_list_lock); - return mgr; - } - } - mutex_unlock(&_mgr_list_lock); - - return NULL; -} - void a2mp_send_getinfo_rsp(struct hci_dev *hdev) { struct amp_mgr *mgr; diff --git a/net/bluetooth/a2mp.h b/net/bluetooth/a2mp.h index 487b54c1308f..296f665adb09 100644 --- a/net/bluetooth/a2mp.h +++ b/net/bluetooth/a2mp.h @@ -17,8 +17,6 @@ #include <net/bluetooth/l2cap.h> -#define A2MP_FEAT_EXT 0x8000 - enum amp_mgr_state { READ_LOC_AMP_INFO, READ_LOC_AMP_ASSOC, @@ -131,16 +129,10 @@ struct a2mp_physlink_rsp { #define A2MP_STATUS_PHYS_LINK_EXISTS 0x05 #define A2MP_STATUS_SECURITY_VIOLATION 0x06 -extern struct list_head amp_mgr_list; -extern struct mutex amp_mgr_list_lock; - struct amp_mgr *amp_mgr_get(struct amp_mgr *mgr); int amp_mgr_put(struct amp_mgr *mgr); -u8 __next_ident(struct amp_mgr *mgr); struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn, struct sk_buff *skb); -struct amp_mgr *amp_mgr_lookup_by_state(u8 state); -void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data); void a2mp_discover_amp(struct l2cap_chan *chan); void a2mp_send_getinfo_rsp(struct hci_dev *hdev); void a2mp_send_getampassoc_rsp(struct hci_dev *hdev, u8 status); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index ce22e0cfa923..70f9d945faf7 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -210,8 +210,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) } EXPORT_SYMBOL(bt_accept_dequeue); -int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; @@ -283,8 +283,8 @@ static long bt_sock_data_wait(struct sock *sk, long timeo) return timeo; } -int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; int err = 0; @@ -711,10 +711,9 @@ EXPORT_SYMBOL_GPL(bt_debugfs); static int __init bt_init(void) { - struct sk_buff *skb; int err; - BUILD_BUG_ON(sizeof(struct bt_skb_cb) > sizeof(skb->cb)); + sock_skb_cb_check_size(sizeof(struct bt_skb_cb)); BT_INFO("Core ver %s", VERSION); @@ -750,6 +749,13 @@ static int __init bt_init(void) goto sock_err; } + err = mgmt_init(); + if (err < 0) { + sco_exit(); + l2cap_exit(); + goto sock_err; + } + return 0; sock_err: @@ -764,6 +770,8 @@ error: static void __exit bt_exit(void) { + mgmt_exit(); + sco_exit(); l2cap_exit(); diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 4b488ec26105..6ceb5d36a32b 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -218,7 +218,7 @@ static const struct net_device_ops bnep_netdev_ops = { void bnep_net_setup(struct net_device *dev) { - memset(dev->broadcast, 0xff, ETH_ALEN); + eth_broadcast_addr(dev->broadcast); dev->addr_len = ETH_ALEN; ether_setup(dev); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index c9b8fa544785..ee5e59839b02 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -309,7 +309,7 @@ void hci_sco_setup(struct hci_conn *conn, __u8 status) else hci_add_sco(sco, conn->handle); } else { - hci_proto_connect_cfm(sco, status); + hci_connect_cfm(sco, status); hci_conn_del(sco); } } @@ -571,7 +571,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) list_for_each_entry(d, &hci_dev_list, list) { if (!test_bit(HCI_UP, &d->flags) || - test_bit(HCI_USER_CHANNEL, &d->dev_flags) || + hci_dev_test_flag(d, HCI_USER_CHANNEL) || d->dev_type != HCI_BREDR) continue; @@ -618,7 +618,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); - hci_proto_connect_cfm(conn, status); + hci_connect_cfm(conn, status); hci_conn_del(conn); @@ -700,7 +700,7 @@ static void hci_req_directed_advertising(struct hci_request *req, * and write a new random address. The flag will be set back on * as soon as the SET_ADV_ENABLE HCI command completes. */ - clear_bit(HCI_LE_ADV, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LE_ADV); /* Set require_privacy to false so that the remote device has a * chance of identifying us. @@ -733,6 +733,14 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, struct hci_request req; int err; + /* Let's make sure that le is enabled.*/ + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { + if (lmp_le_capable(hdev)) + return ERR_PTR(-ECONNREFUSED); + + return ERR_PTR(-EOPNOTSUPP); + } + /* Some devices send ATT messages as soon as the physical link is * established. To be able to handle these ATT messages, the user- * space first establishes the connection and then starts the pairing @@ -791,7 +799,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, * anyway have to disable it in order to start directed * advertising. */ - if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { u8 enable = 0x00; hci_req_add(&req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); @@ -802,7 +810,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, /* If we're active scanning most controllers are unable * to initiate advertising. Simply reject the attempt. */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_LE_SCAN) && hdev->le_scan_type == LE_SCAN_ACTIVE) { skb_queue_purge(&req.cmd_q); hci_conn_del(conn); @@ -832,9 +840,9 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, * handler for scan disabling knows to set the correct discovery * state. */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { hci_req_add_le_scan_disable(&req); - set_bit(HCI_LE_SCAN_INTERRUPTED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED); } hci_req_add_le_create_conn(&req, conn); @@ -856,8 +864,12 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, { struct hci_conn *acl; - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { + if (lmp_bredr_capable(hdev)) + return ERR_PTR(-ECONNREFUSED); + return ERR_PTR(-EOPNOTSUPP); + } acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { @@ -930,7 +942,7 @@ int hci_conn_check_link_mode(struct hci_conn *conn) * Connections is used and the link is encrypted with AES-CCM * using a P-256 authenticated combination key. */ - if (test_bit(HCI_SC_ONLY, &conn->hdev->flags)) { + if (hci_dev_test_flag(conn->hdev, HCI_SC_ONLY)) { if (!hci_conn_sc_enabled(conn) || !test_bit(HCI_CONN_AES_CCM, &conn->flags) || conn->key_type != HCI_LK_AUTH_COMBINATION_P256) @@ -1139,7 +1151,7 @@ void hci_conn_hash_flush(struct hci_dev *hdev) list_for_each_entry_safe(c, n, &h->list, list) { c->state = BT_CLOSED; - hci_proto_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM); + hci_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM); hci_conn_del(c); } } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 3322d3f4c85a..e6bfeb7b4415 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -51,7 +51,7 @@ DEFINE_RWLOCK(hci_dev_list_lock); /* HCI callback list */ LIST_HEAD(hci_cb_list); -DEFINE_RWLOCK(hci_cb_list_lock); +DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); @@ -80,7 +80,7 @@ static ssize_t dut_mode_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = test_bit(HCI_DUT_MODE, &hdev->dbg_flags) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y': 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -106,7 +106,7 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf, if (strtobool(buf, &enable)) return -EINVAL; - if (enable == test_bit(HCI_DUT_MODE, &hdev->dbg_flags)) + if (enable == hci_dev_test_flag(hdev, HCI_DUT_MODE)) return -EALREADY; hci_req_lock(hdev); @@ -127,7 +127,7 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf, if (err < 0) return err; - change_bit(HCI_DUT_MODE, &hdev->dbg_flags); + hci_dev_change_flag(hdev, HCI_DUT_MODE); return count; } @@ -390,7 +390,7 @@ static void bredr_init(struct hci_request *req) hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL); } -static void amp_init(struct hci_request *req) +static void amp_init1(struct hci_request *req) { req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; @@ -400,9 +400,6 @@ static void amp_init(struct hci_request *req) /* Read Local Supported Commands */ hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); - /* Read Local Supported Features */ - hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); - /* Read Local AMP Info */ hci_req_add(req, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); @@ -416,6 +413,16 @@ static void amp_init(struct hci_request *req) hci_req_add(req, HCI_OP_READ_LOCATION_DATA, 0, NULL); } +static void amp_init2(struct hci_request *req) +{ + /* Read Local Supported Features. Not all AMP controllers + * support this so it's placed conditionally in the second + * stage init. + */ + if (req->hdev->commands[14] & 0x20) + hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); +} + static void hci_init1_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; @@ -432,7 +439,7 @@ static void hci_init1_req(struct hci_request *req, unsigned long opt) break; case HCI_AMP: - amp_init(req); + amp_init1(req); break; default: @@ -494,7 +501,7 @@ static void le_setup(struct hci_request *req) /* LE-only controllers have LE implicitly enabled */ if (!lmp_bredr_capable(hdev)) - set_bit(HCI_LE_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_LE_ENABLED); } static void hci_setup_event_mask(struct hci_request *req) @@ -578,10 +585,13 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; + if (hdev->dev_type == HCI_AMP) + return amp_init2(req); + if (lmp_bredr_capable(hdev)) bredr_setup(req); else - clear_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED); if (lmp_le_capable(hdev)) le_setup(req); @@ -607,7 +617,7 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) */ hdev->max_page = 0x01; - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { u8 mode = 0x01; hci_req_add(req, HCI_OP_WRITE_SSP_MODE, @@ -646,7 +656,7 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) sizeof(cp), &cp); } - if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LINK_SECURITY)) { u8 enable = 1; hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable), &enable); @@ -683,7 +693,7 @@ static void hci_set_le_support(struct hci_request *req) memset(&cp, 0, sizeof(cp)); - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { cp.le = 0x01; cp.simul = 0x00; } @@ -871,7 +881,7 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL); /* Enable Secure Connections if supported and configured */ - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && bredr_sc_enabled(hdev)) { u8 support = 0x01; @@ -891,22 +901,22 @@ static int __hci_init(struct hci_dev *hdev) /* The Device Under Test (DUT) mode is special and available for * all controller types. So just create it early on. */ - if (test_bit(HCI_SETUP, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_SETUP)) { debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev, &dut_mode_fops); } + err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT); + if (err < 0) + return err; + /* HCI_BREDR covers both single-mode LE, BR/EDR and dual-mode * BR/EDR/LE type controllers. AMP controllers only need the - * first stage init. + * first two stages of init. */ if (hdev->dev_type != HCI_BREDR) return 0; - err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT); - if (err < 0) - return err; - err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT); if (err < 0) return err; @@ -927,8 +937,8 @@ static int __hci_init(struct hci_dev *hdev) * So only when in setup phase or config phase, create the debugfs * entries and register the SMP channels. */ - if (!test_bit(HCI_SETUP, &hdev->dev_flags) && - !test_bit(HCI_CONFIG, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG)) return 0; hci_debugfs_create_common(hdev); @@ -1290,12 +1300,12 @@ int hci_inquiry(void __user *arg) if (!hdev) return -ENODEV; - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EBUSY; goto done; } - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { err = -EOPNOTSUPP; goto done; } @@ -1305,7 +1315,7 @@ int hci_inquiry(void __user *arg) goto done; } - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = -EOPNOTSUPP; goto done; } @@ -1377,17 +1387,17 @@ static int hci_dev_do_open(struct hci_dev *hdev) hci_req_lock(hdev); - if (test_bit(HCI_UNREGISTER, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { ret = -ENODEV; goto done; } - if (!test_bit(HCI_SETUP, &hdev->dev_flags) && - !test_bit(HCI_CONFIG, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG)) { /* Check for rfkill but allow the HCI setup stage to * proceed (which in itself doesn't cause any RF activity). */ - if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_RFKILLED)) { ret = -ERFKILL; goto done; } @@ -1404,7 +1414,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) * This check is only valid for BR/EDR controllers * since AMP controllers do not have an address. */ - if (!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hdev->dev_type == HCI_BREDR && !bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY)) { @@ -1426,7 +1436,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) atomic_set(&hdev->cmd_cnt, 1); set_bit(HCI_INIT, &hdev->flags); - if (test_bit(HCI_SETUP, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_SETUP)) { if (hdev->setup) ret = hdev->setup(hdev); @@ -1438,7 +1448,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) */ if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks)) - set_bit(HCI_UNCONFIGURED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_UNCONFIGURED); /* For an unconfigured controller it is required to * read at least the version information provided by @@ -1448,11 +1458,11 @@ static int hci_dev_do_open(struct hci_dev *hdev) * also the original Bluetooth public device address * will be read using the Read BD Address command. */ - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) ret = __hci_unconf_init(hdev); } - if (test_bit(HCI_CONFIG, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_CONFIG)) { /* If public address change is configured, ensure that * the address gets programmed. If the driver does not * support changing the public address, fail the power @@ -1466,8 +1476,8 @@ static int hci_dev_do_open(struct hci_dev *hdev) } if (!ret) { - if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && - !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) ret = __hci_init(hdev); } @@ -1475,13 +1485,13 @@ static int hci_dev_do_open(struct hci_dev *hdev) if (!ret) { hci_dev_hold(hdev); - set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); set_bit(HCI_UP, &hdev->flags); hci_notify(hdev, HCI_DEV_UP); - if (!test_bit(HCI_SETUP, &hdev->dev_flags) && - !test_bit(HCI_CONFIG, &hdev->dev_flags) && - !test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && - !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG) && + !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hdev->dev_type == HCI_BREDR) { hci_dev_lock(hdev); mgmt_powered(hdev, 1); @@ -1533,8 +1543,8 @@ int hci_dev_open(__u16 dev) * HCI_USER_CHANNEL will be set first before attempting to * open the device. */ - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && - !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EOPNOTSUPP; goto done; } @@ -1544,7 +1554,7 @@ int hci_dev_open(__u16 dev) * particularly important if the setup procedure has not yet * completed. */ - if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) cancel_delayed_work(&hdev->power_off); /* After this call it is guaranteed that the setup procedure @@ -1559,9 +1569,9 @@ int hci_dev_open(__u16 dev) * is in use this bit will be cleared again and userspace has * to explicitly enable it. */ - if (!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) && - !test_bit(HCI_MGMT, &hdev->dev_flags)) - set_bit(HCI_BONDABLE, &hdev->dev_flags); + if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + !hci_dev_test_flag(hdev, HCI_MGMT)) + hci_dev_set_flag(hdev, HCI_BONDABLE); err = hci_dev_do_open(hdev); @@ -1591,6 +1601,12 @@ static int hci_dev_do_close(struct hci_dev *hdev) { BT_DBG("%s %p", hdev->name, hdev); + if (!hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + /* Execute vendor specific shutdown routine */ + if (hdev->shutdown) + hdev->shutdown(hdev); + } + cancel_delayed_work(&hdev->power_off); hci_req_cancel(hdev, ENODEV); @@ -1609,17 +1625,17 @@ static int hci_dev_do_close(struct hci_dev *hdev) if (hdev->discov_timeout > 0) { cancel_delayed_work(&hdev->discov_off); hdev->discov_timeout = 0; - clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags); - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); } - if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) + if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) cancel_delayed_work(&hdev->service_cache); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) cancel_delayed_work_sync(&hdev->rpa_expired); /* Avoid potential lockdep warnings from the *_flush() calls by @@ -1631,7 +1647,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { + if (!hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { if (hdev->dev_type == HCI_BREDR) mgmt_powered(hdev, 0); } @@ -1651,8 +1667,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) /* Reset device */ skb_queue_purge(&hdev->cmd_q); atomic_set(&hdev->cmd_cnt, 1); - if (!test_bit(HCI_AUTO_OFF, &hdev->dev_flags) && - !test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_AUTO_OFF) && + !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { set_bit(HCI_INIT, &hdev->flags); __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); @@ -1683,7 +1699,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) /* Clear flags */ hdev->flags &= BIT(HCI_RAW); - hdev->dev_flags &= ~HCI_PERSISTENT_MASK; + hci_dev_clear_volatile_flags(hdev); /* Controller radio is available but is currently powered down */ hdev->amp_status = AMP_STATUS_POWERED_DOWN; @@ -1707,12 +1723,12 @@ int hci_dev_close(__u16 dev) if (!hdev) return -ENODEV; - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EBUSY; goto done; } - if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) cancel_delayed_work(&hdev->power_off); err = hci_dev_do_close(hdev); @@ -1770,12 +1786,12 @@ int hci_dev_reset(__u16 dev) goto done; } - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EBUSY; goto done; } - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { err = -EOPNOTSUPP; goto done; } @@ -1796,12 +1812,12 @@ int hci_dev_reset_stat(__u16 dev) if (!hdev) return -ENODEV; - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { ret = -EBUSY; goto done; } - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { ret = -EOPNOTSUPP; goto done; } @@ -1820,29 +1836,29 @@ static void hci_update_scan_state(struct hci_dev *hdev, u8 scan) BT_DBG("%s scan 0x%02x", hdev->name, scan); if ((scan & SCAN_PAGE)) - conn_changed = !test_and_set_bit(HCI_CONNECTABLE, - &hdev->dev_flags); + conn_changed = !hci_dev_test_and_set_flag(hdev, + HCI_CONNECTABLE); else - conn_changed = test_and_clear_bit(HCI_CONNECTABLE, - &hdev->dev_flags); + conn_changed = hci_dev_test_and_clear_flag(hdev, + HCI_CONNECTABLE); if ((scan & SCAN_INQUIRY)) { - discov_changed = !test_and_set_bit(HCI_DISCOVERABLE, - &hdev->dev_flags); + discov_changed = !hci_dev_test_and_set_flag(hdev, + HCI_DISCOVERABLE); } else { - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); - discov_changed = test_and_clear_bit(HCI_DISCOVERABLE, - &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + discov_changed = hci_dev_test_and_clear_flag(hdev, + HCI_DISCOVERABLE); } - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) return; if (conn_changed || discov_changed) { /* In case this was disabled through mgmt */ - set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) mgmt_update_adv_data(hdev); mgmt_new_settings(hdev); @@ -1862,12 +1878,12 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!hdev) return -ENODEV; - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EBUSY; goto done; } - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { err = -EOPNOTSUPP; goto done; } @@ -1877,7 +1893,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) goto done; } - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = -EOPNOTSUPP; goto done; } @@ -1981,7 +1997,7 @@ int hci_get_dev_list(void __user *arg) * is running, but in that case still indicate that the * device is actually down. */ - if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) flags &= ~BIT(HCI_UP); (dr + n)->dev_id = hdev->id; @@ -2019,7 +2035,7 @@ int hci_get_dev_info(void __user *arg) * is running, but in that case still indicate that the * device is actually down. */ - if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) flags = hdev->flags & ~BIT(HCI_UP); else flags = hdev->flags; @@ -2062,16 +2078,16 @@ static int hci_rfkill_set_block(void *data, bool blocked) BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; if (blocked) { - set_bit(HCI_RFKILLED, &hdev->dev_flags); - if (!test_bit(HCI_SETUP, &hdev->dev_flags) && - !test_bit(HCI_CONFIG, &hdev->dev_flags)) + hci_dev_set_flag(hdev, HCI_RFKILLED); + if (!hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG)) hci_dev_do_close(hdev); } else { - clear_bit(HCI_RFKILLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_RFKILLED); } return 0; @@ -2100,23 +2116,23 @@ static void hci_power_on(struct work_struct *work) * ignored and they need to be checked now. If they are still * valid, it is important to turn the device back off. */ - if (test_bit(HCI_RFKILLED, &hdev->dev_flags) || - test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) || + if (hci_dev_test_flag(hdev, HCI_RFKILLED) || + hci_dev_test_flag(hdev, HCI_UNCONFIGURED) || (hdev->dev_type == HCI_BREDR && !bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY))) { - clear_bit(HCI_AUTO_OFF, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_AUTO_OFF); hci_dev_do_close(hdev); - } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { + } else if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) { queue_delayed_work(hdev->req_workqueue, &hdev->power_off, HCI_AUTO_OFF_TIMEOUT); } - if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) { + if (hci_dev_test_and_clear_flag(hdev, HCI_SETUP)) { /* For unconfigured devices, set the HCI_RAW flag * so that userspace can easily identify them. */ - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) set_bit(HCI_RAW, &hdev->flags); /* For fully configured devices, this will send @@ -2127,11 +2143,11 @@ static void hci_power_on(struct work_struct *work) * and no event will be send. */ mgmt_index_added(hdev); - } else if (test_and_clear_bit(HCI_CONFIG, &hdev->dev_flags)) { + } else if (hci_dev_test_and_clear_flag(hdev, HCI_CONFIG)) { /* When the controller is now configured, then it * is important to clear the HCI_RAW flag. */ - if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) clear_bit(HCI_RAW, &hdev->flags); /* Powering on the controller with HCI_CONFIG set only @@ -2500,6 +2516,42 @@ void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type) } } +bool hci_bdaddr_is_paired(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) +{ + struct smp_ltk *k; + struct smp_irk *irk; + u8 addr_type; + + if (type == BDADDR_BREDR) { + if (hci_find_link_key(hdev, bdaddr)) + return true; + return false; + } + + /* Convert to HCI addr type which struct smp_ltk uses */ + if (type == BDADDR_LE_PUBLIC) + addr_type = ADDR_LE_DEV_PUBLIC; + else + addr_type = ADDR_LE_DEV_RANDOM; + + irk = hci_get_irk(hdev, bdaddr, addr_type); + if (irk) { + bdaddr = &irk->bdaddr; + addr_type = irk->addr_type; + } + + rcu_read_lock(); + list_for_each_entry_rcu(k, &hdev->long_term_keys, list) { + if (k->bdaddr_type == addr_type && !bacmp(bdaddr, &k->bdaddr)) { + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); + + return false; +} + /* HCI command timer function */ static void hci_cmd_timeout(struct work_struct *work) { @@ -2822,7 +2874,6 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status, { /* General inquiry access code (GIAC) */ u8 lap[3] = { 0x33, 0x8b, 0x9e }; - struct hci_request req; struct hci_cp_inquiry cp; int err; @@ -2841,21 +2892,37 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status, break; case DISCOV_TYPE_INTERLEAVED: - hci_req_init(&req, hdev); + hci_dev_lock(hdev); - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; - hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, + &hdev->quirks)) { + /* If we were running LE only scan, change discovery + * state. If we were running both LE and BR/EDR inquiry + * simultaneously, and BR/EDR inquiry is already + * finished, stop discovery, otherwise BR/EDR inquiry + * will stop discovery when finished. + */ + if (!test_bit(HCI_INQUIRY, &hdev->flags)) + hci_discovery_set_state(hdev, + DISCOVERY_STOPPED); + } else { + struct hci_request req; - hci_dev_lock(hdev); + hci_inquiry_cache_flush(hdev); - hci_inquiry_cache_flush(hdev); + hci_req_init(&req, hdev); - err = hci_req_run(&req, inquiry_complete); - if (err) { - BT_ERR("Inquiry request failed: err %d", err); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; + hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + err = hci_req_run(&req, inquiry_complete); + if (err) { + BT_ERR("Inquiry request failed: err %d", err); + hci_discovery_set_state(hdev, + DISCOVERY_STOPPED); + } } hci_dev_unlock(hdev); @@ -2934,7 +3001,7 @@ static void le_scan_restart_work(struct work_struct *work) BT_DBG("%s", hdev->name); /* If controller is not scanning we are done. */ - if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) return; hci_req_init(&req, hdev); @@ -2967,9 +3034,9 @@ static void le_scan_restart_work(struct work_struct *work) void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *bdaddr_type) { - if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || + if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || !bacmp(&hdev->bdaddr, BDADDR_ANY) || - (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && bacmp(&hdev->static_addr, BDADDR_ANY))) { bacpy(bdaddr, &hdev->static_addr); *bdaddr_type = ADDR_LE_DEV_RANDOM; @@ -3059,6 +3126,7 @@ struct hci_dev *hci_alloc_dev(void) hci_init_sysfs(hdev); discovery_init(hdev); + adv_info_init(hdev); return hdev; } @@ -3137,16 +3205,16 @@ int hci_register_dev(struct hci_dev *hdev) } if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) - set_bit(HCI_RFKILLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RFKILLED); - set_bit(HCI_SETUP, &hdev->dev_flags); - set_bit(HCI_AUTO_OFF, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_SETUP); + hci_dev_set_flag(hdev, HCI_AUTO_OFF); if (hdev->dev_type == HCI_BREDR) { /* Assume BR/EDR support until proven otherwise (such as * through reading supported features during init. */ - set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); } write_lock(&hci_dev_list_lock); @@ -3157,7 +3225,7 @@ int hci_register_dev(struct hci_dev *hdev) * and should not be included in normal operation. */ if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) - set_bit(HCI_UNCONFIGURED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_UNCONFIGURED); hci_notify(hdev, HCI_DEV_REG); hci_dev_hold(hdev); @@ -3183,7 +3251,7 @@ void hci_unregister_dev(struct hci_dev *hdev) BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); - set_bit(HCI_UNREGISTER, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_UNREGISTER); id = hdev->id; @@ -3199,8 +3267,8 @@ void hci_unregister_dev(struct hci_dev *hdev) cancel_work_sync(&hdev->power_on); if (!test_bit(HCI_INIT, &hdev->flags) && - !test_bit(HCI_SETUP, &hdev->dev_flags) && - !test_bit(HCI_CONFIG, &hdev->dev_flags)) { + !hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG)) { hci_dev_lock(hdev); mgmt_index_removed(hdev); hci_dev_unlock(hdev); @@ -3448,9 +3516,9 @@ int hci_register_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - write_lock(&hci_cb_list_lock); - list_add(&cb->list, &hci_cb_list); - write_unlock(&hci_cb_list_lock); + mutex_lock(&hci_cb_list_lock); + list_add_tail(&cb->list, &hci_cb_list); + mutex_unlock(&hci_cb_list_lock); return 0; } @@ -3460,9 +3528,9 @@ int hci_unregister_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - write_lock(&hci_cb_list_lock); + mutex_lock(&hci_cb_list_lock); list_del(&cb->list); - write_unlock(&hci_cb_list_lock); + mutex_unlock(&hci_cb_list_lock); return 0; } @@ -3517,7 +3585,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, /* Stand-alone HCI commands must be flagged as * single-command requests. */ - bt_cb(skb)->req.start = true; + bt_cb(skb)->req_start = 1; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); @@ -3874,7 +3942,7 @@ static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb) static void __check_timeout(struct hci_dev *hdev, unsigned int cnt) { - if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { /* ACL tx timeout must be longer than maximum * link supervision timeout (40.9 seconds) */ if (!cnt && time_after(jiffies, hdev->acl_last_tx + @@ -4057,7 +4125,7 @@ static void hci_sched_le(struct hci_dev *hdev) if (!hci_conn_num(hdev, LE_LINK)) return; - if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { /* LE tx timeout must be longer than maximum * link supervision timeout (40.9 seconds) */ if (!hdev->le_cnt && hdev->le_pkts && @@ -4105,7 +4173,7 @@ static void hci_tx_work(struct work_struct *work) BT_DBG("%s acl %d sco %d le %d", hdev->name, hdev->acl_cnt, hdev->sco_cnt, hdev->le_cnt); - if (!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { /* Schedule queues and send stuff to HCI driver */ hci_sched_acl(hdev); hci_sched_sco(hdev); @@ -4195,7 +4263,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev) if (!skb) return true; - return bt_cb(skb)->req.start; + return bt_cb(skb)->req_start; } static void hci_resend_last(struct hci_dev *hdev) @@ -4255,14 +4323,14 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) * command queue (hdev->cmd_q). */ if (hdev->sent_cmd) { - req_complete = bt_cb(hdev->sent_cmd)->req.complete; + req_complete = bt_cb(hdev->sent_cmd)->req_complete; if (req_complete) { /* We must set the complete callback to NULL to * avoid calling the callback more than once if * this function gets called again. */ - bt_cb(hdev->sent_cmd)->req.complete = NULL; + bt_cb(hdev->sent_cmd)->req_complete = NULL; goto call_complete; } @@ -4271,12 +4339,12 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) /* Remove all pending commands belonging to this request */ spin_lock_irqsave(&hdev->cmd_q.lock, flags); while ((skb = __skb_dequeue(&hdev->cmd_q))) { - if (bt_cb(skb)->req.start) { + if (bt_cb(skb)->req_start) { __skb_queue_head(&hdev->cmd_q, skb); break; } - req_complete = bt_cb(skb)->req.complete; + req_complete = bt_cb(skb)->req_complete; kfree_skb(skb); } spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); @@ -4302,7 +4370,7 @@ static void hci_rx_work(struct work_struct *work) hci_send_to_sock(hdev, skb); } - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { kfree_skb(skb); continue; } diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 65261e5d4b84..e6255833a258 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -28,6 +28,54 @@ #include "hci_debugfs.h" +#define DEFINE_QUIRK_ATTRIBUTE(__name, __quirk) \ +static ssize_t __name ## _read(struct file *file, \ + char __user *user_buf, \ + size_t count, loff_t *ppos) \ +{ \ + struct hci_dev *hdev = file->private_data; \ + char buf[3]; \ + \ + buf[0] = test_bit(__quirk, &hdev->quirks) ? 'Y' : 'N'; \ + buf[1] = '\n'; \ + buf[2] = '\0'; \ + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); \ +} \ + \ +static ssize_t __name ## _write(struct file *file, \ + const char __user *user_buf, \ + size_t count, loff_t *ppos) \ +{ \ + struct hci_dev *hdev = file->private_data; \ + char buf[32]; \ + size_t buf_size = min(count, (sizeof(buf) - 1)); \ + bool enable; \ + \ + if (test_bit(HCI_UP, &hdev->flags)) \ + return -EBUSY; \ + \ + if (copy_from_user(buf, user_buf, buf_size)) \ + return -EFAULT; \ + \ + buf[buf_size] = '\0'; \ + if (strtobool(buf, &enable)) \ + return -EINVAL; \ + \ + if (enable == test_bit(__quirk, &hdev->quirks)) \ + return -EALREADY; \ + \ + change_bit(__quirk, &hdev->quirks); \ + \ + return count; \ +} \ + \ +static const struct file_operations __name ## _fops = { \ + .open = simple_open, \ + .read = __name ## _read, \ + .write = __name ## _write, \ + .llseek = default_llseek, \ +} \ + static int features_show(struct seq_file *f, void *ptr) { struct hci_dev *hdev = f->private; @@ -166,7 +214,7 @@ static int remote_oob_show(struct seq_file *f, void *ptr) seq_printf(f, "%pMR (type %u) %u %*phN %*phN %*phN %*phN\n", &data->bdaddr, data->bdaddr_type, data->present, 16, data->hash192, 16, data->rand192, - 16, data->hash256, 19, data->rand256); + 16, data->hash256, 16, data->rand256); } hci_dev_unlock(hdev); @@ -247,7 +295,7 @@ static ssize_t use_debug_keys_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS) ? 'Y': 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -265,7 +313,7 @@ static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_SC_ONLY) ? 'Y': 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -679,7 +727,7 @@ static ssize_t force_static_address_read(struct file *file, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ? 'Y': 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -704,10 +752,10 @@ static ssize_t force_static_address_write(struct file *file, if (strtobool(buf, &enable)) return -EINVAL; - if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags)) + if (enable == hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR)) return -EALREADY; - change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags); + hci_dev_change_flag(hdev, HCI_FORCE_STATIC_ADDR); return count; } @@ -997,6 +1045,11 @@ static int adv_max_interval_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get, adv_max_interval_set, "%llu\n"); +DEFINE_QUIRK_ATTRIBUTE(quirk_strict_duplicate_filter, + HCI_QUIRK_STRICT_DUPLICATE_FILTER); +DEFINE_QUIRK_ATTRIBUTE(quirk_simultaneous_discovery, + HCI_QUIRK_SIMULTANEOUS_DISCOVERY); + void hci_debugfs_create_le(struct hci_dev *hdev) { debugfs_create_file("identity", 0400, hdev->debugfs, hdev, @@ -1041,6 +1094,13 @@ void hci_debugfs_create_le(struct hci_dev *hdev) &adv_max_interval_fops); debugfs_create_u16("discov_interleaved_timeout", 0644, hdev->debugfs, &hdev->discov_interleaved_timeout); + + debugfs_create_file("quirk_strict_duplicate_filter", 0644, + hdev->debugfs, hdev, + &quirk_strict_duplicate_filter_fops); + debugfs_create_file("quirk_simultaneous_discovery", 0644, + hdev->debugfs, hdev, + &quirk_simultaneous_discovery_fops); } void hci_debugfs_create_conn(struct hci_conn *conn) diff --git a/net/bluetooth/hci_debugfs.h b/net/bluetooth/hci_debugfs.h index fb68efe083c5..4444dc8cedc2 100644 --- a/net/bluetooth/hci_debugfs.h +++ b/net/bluetooth/hci_debugfs.h @@ -20,7 +20,29 @@ SOFTWARE IS DISCLAIMED. */ +#if IS_ENABLED(CONFIG_BT_DEBUGFS) + void hci_debugfs_create_common(struct hci_dev *hdev); void hci_debugfs_create_bredr(struct hci_dev *hdev); void hci_debugfs_create_le(struct hci_dev *hdev); void hci_debugfs_create_conn(struct hci_conn *conn); + +#else + +static inline void hci_debugfs_create_common(struct hci_dev *hdev) +{ +} + +static inline void hci_debugfs_create_bredr(struct hci_dev *hdev) +{ +} + +static inline void hci_debugfs_create_le(struct hci_dev *hdev) +{ +} + +static inline void hci_debugfs_create_conn(struct hci_conn *conn) +{ +} + +#endif diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a3fb094822b6..62f92a508961 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -70,7 +70,7 @@ static void hci_cc_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb) if (status) return; - set_bit(HCI_PERIODIC_INQ, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_PERIODIC_INQ); } static void hci_cc_exit_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb) @@ -82,7 +82,7 @@ static void hci_cc_exit_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb) if (status) return; - clear_bit(HCI_PERIODIC_INQ, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); hci_conn_check_pending(hdev); } @@ -198,7 +198,7 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) return; /* Reset all non-persistent flags */ - hdev->dev_flags &= ~HCI_PERSISTENT_MASK; + hci_dev_clear_volatile_flags(hdev); hci_discovery_set_state(hdev, DISCOVERY_STOPPED); @@ -265,7 +265,7 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_set_local_name_complete(hdev, sent, status); else if (!status) memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH); @@ -282,8 +282,8 @@ static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) if (rp->status) return; - if (test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG)) memcpy(hdev->dev_name, rp->name, HCI_MAX_NAME_LENGTH); } @@ -309,7 +309,7 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_AUTH, &hdev->flags); } - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_auth_enable_complete(hdev, status); hci_dev_unlock(hdev); @@ -404,7 +404,7 @@ static void hci_cc_write_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) if (status == 0) memcpy(hdev->dev_class, sent, 3); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_set_class_of_dev_complete(hdev, sent, status); hci_dev_unlock(hdev); @@ -497,13 +497,13 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) hdev->features[1][0] &= ~LMP_HOST_SSP; } - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_ssp_enable_complete(hdev, sent->mode, status); else if (!status) { if (sent->mode) - set_bit(HCI_SSP_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_SSP_ENABLED); else - clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_SSP_ENABLED); } hci_dev_unlock(hdev); @@ -529,11 +529,11 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb) hdev->features[1][0] &= ~LMP_HOST_SC; } - if (!test_bit(HCI_MGMT, &hdev->dev_flags) && !status) { + if (!hci_dev_test_flag(hdev, HCI_MGMT) && !status) { if (sent->support) - set_bit(HCI_SC_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_SC_ENABLED); else - clear_bit(HCI_SC_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_SC_ENABLED); } hci_dev_unlock(hdev); @@ -548,8 +548,8 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) if (rp->status) return; - if (test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG)) { hdev->hci_ver = rp->hci_ver; hdev->hci_rev = __le16_to_cpu(rp->hci_rev); hdev->lmp_ver = rp->lmp_ver; @@ -568,8 +568,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, if (rp->status) return; - if (test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG)) memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); } @@ -691,7 +691,7 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_INIT, &hdev->flags)) bacpy(&hdev->bdaddr, &rp->bdaddr); - if (test_bit(HCI_SETUP, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SETUP)) bacpy(&hdev->setup_addr, &rp->bdaddr); } @@ -900,7 +900,7 @@ static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_pin_code_reply_complete(hdev, &rp->bdaddr, rp->status); if (rp->status) @@ -926,7 +926,7 @@ static void hci_cc_pin_code_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_pin_code_neg_reply_complete(hdev, &rp->bdaddr, rp->status); @@ -985,7 +985,7 @@ static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_confirm_reply_complete(hdev, &rp->bdaddr, ACL_LINK, 0, rp->status); @@ -1001,7 +1001,7 @@ static void hci_cc_user_confirm_neg_reply(struct hci_dev *hdev, hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_confirm_neg_reply_complete(hdev, &rp->bdaddr, ACL_LINK, 0, rp->status); @@ -1016,7 +1016,7 @@ static void hci_cc_user_passkey_reply(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_reply_complete(hdev, &rp->bdaddr, ACL_LINK, 0, rp->status); @@ -1032,7 +1032,7 @@ static void hci_cc_user_passkey_neg_reply(struct hci_dev *hdev, hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_neg_reply_complete(hdev, &rp->bdaddr, ACL_LINK, 0, rp->status); @@ -1109,7 +1109,7 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) if (*sent) { struct hci_conn *conn; - set_bit(HCI_LE_ADV, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_LE_ADV); conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); if (conn) @@ -1117,7 +1117,7 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) &conn->le_conn_timeout, conn->conn_timeout); } else { - clear_bit(HCI_LE_ADV, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LE_ADV); } hci_dev_unlock(hdev); @@ -1192,7 +1192,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, switch (cp->enable) { case LE_SCAN_ENABLE: - set_bit(HCI_LE_SCAN, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_LE_SCAN); if (hdev->le_scan_type == LE_SCAN_ACTIVE) clear_pending_adv_report(hdev); break; @@ -1217,7 +1217,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, */ cancel_delayed_work(&hdev->le_scan_disable); - clear_bit(HCI_LE_SCAN, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LE_SCAN); /* The HCI_LE_SCAN_INTERRUPTED flag indicates that we * interrupted scanning due to a connect request. Mark @@ -1226,10 +1226,9 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, * been disabled because of active scanning, so * re-enable it again if necessary. */ - if (test_and_clear_bit(HCI_LE_SCAN_INTERRUPTED, - &hdev->dev_flags)) + if (hci_dev_test_and_clear_flag(hdev, HCI_LE_SCAN_INTERRUPTED)) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - else if (!test_bit(HCI_LE_ADV, &hdev->dev_flags) && + else if (!hci_dev_test_flag(hdev, HCI_LE_ADV) && hdev->discovery.state == DISCOVERY_FINDING) mgmt_reenable_advertising(hdev); @@ -1388,11 +1387,11 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev, if (sent->le) { hdev->features[1][0] |= LMP_HOST_LE; - set_bit(HCI_LE_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_LE_ENABLED); } else { hdev->features[1][0] &= ~LMP_HOST_LE; - clear_bit(HCI_LE_ENABLED, &hdev->dev_flags); - clear_bit(HCI_ADVERTISING, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LE_ENABLED); + hci_dev_clear_flag(hdev, HCI_ADVERTISING); } if (sent->simul) @@ -1537,7 +1536,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) if (conn && conn->state == BT_CONNECT) { if (status != 0x0c || conn->attempt > 2) { conn->state = BT_CLOSED; - hci_proto_connect_cfm(conn, status); + hci_connect_cfm(conn, status); hci_conn_del(conn); } else conn->state = BT_CONNECT2; @@ -1581,7 +1580,7 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status) if (sco) { sco->state = BT_CLOSED; - hci_proto_connect_cfm(sco, status); + hci_connect_cfm(sco, status); hci_conn_del(sco); } } @@ -1608,7 +1607,7 @@ static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status) conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); if (conn) { if (conn->state == BT_CONFIG) { - hci_proto_connect_cfm(conn, status); + hci_connect_cfm(conn, status); hci_conn_drop(conn); } } @@ -1635,7 +1634,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status) conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); if (conn) { if (conn->state == BT_CONFIG) { - hci_proto_connect_cfm(conn, status); + hci_connect_cfm(conn, status); hci_conn_drop(conn); } } @@ -1769,7 +1768,7 @@ static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) hci_check_pending_name(hdev, conn, &cp->bdaddr, NULL, 0); if (!conn) @@ -1811,7 +1810,7 @@ static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status) conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); if (conn) { if (conn->state == BT_CONFIG) { - hci_proto_connect_cfm(conn, status); + hci_connect_cfm(conn, status); hci_conn_drop(conn); } } @@ -1838,7 +1837,7 @@ static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status) conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); if (conn) { if (conn->state == BT_CONFIG) { - hci_proto_connect_cfm(conn, status); + hci_connect_cfm(conn, status); hci_conn_drop(conn); } } @@ -1873,7 +1872,7 @@ static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status) if (sco) { sco->state = BT_CLOSED; - hci_proto_connect_cfm(sco, status); + hci_connect_cfm(sco, status); hci_conn_del(sco); } } @@ -2118,7 +2117,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) smp_mb__after_atomic(); /* wake_up_bit advises about this barrier */ wake_up_bit(&hdev->flags, HCI_INQUIRY); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) return; hci_dev_lock(hdev); @@ -2127,7 +2126,16 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) goto unlock; if (list_empty(&discov->resolve)) { - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + /* When BR/EDR inquiry is active and no LE scanning is in + * progress, then change discovery state to indicate completion. + * + * When running LE scanning and BR/EDR inquiry simultaneously + * and the LE scan already finished, then change the discovery + * state to indicate completion. + */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) || + !test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); goto unlock; } @@ -2136,7 +2144,16 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) e->name_state = NAME_PENDING; hci_discovery_set_state(hdev, DISCOVERY_RESOLVING); } else { - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + /* When BR/EDR inquiry is active and no LE scanning is in + * progress, then change discovery state to indicate completion. + * + * When running LE scanning and BR/EDR inquiry simultaneously + * and the LE scan already finished, then change the discovery + * state to indicate completion. + */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) || + !test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); } unlock: @@ -2154,7 +2171,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) if (!num_rsp) return; - if (test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) return; hci_dev_lock(hdev); @@ -2255,10 +2272,10 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_sco_setup(conn, ev->status); if (ev->status) { - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); hci_conn_del(conn); } else if (ev->link_type != ACL_LINK) - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); unlock: hci_dev_unlock(hdev); @@ -2304,8 +2321,8 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) * connection. These features are only touched through mgmt so * only do the checks if HCI_MGMT is set. */ - if (test_bit(HCI_MGMT, &hdev->dev_flags) && - !test_bit(HCI_CONNECTABLE, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_MGMT) && + !hci_dev_test_flag(hdev, HCI_CONNECTABLE) && !hci_bdaddr_list_lookup(&hdev->whitelist, &ev->bdaddr, BDADDR_BREDR)) { hci_reject_conn(hdev, &ev->bdaddr); @@ -2366,7 +2383,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) &cp); } else { conn->state = BT_CONNECT2; - hci_proto_connect_cfm(conn, 0); + hci_connect_cfm(conn, 0); } } @@ -2444,7 +2461,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) type = conn->type; - hci_proto_disconn_cfm(conn, ev->reason); + hci_disconn_cfm(conn, ev->reason); hci_conn_del(conn); /* Re-enable advertising if necessary, since it might @@ -2501,7 +2518,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) &cp); } else { conn->state = BT_CONNECTED; - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); hci_conn_drop(conn); } } else { @@ -2542,7 +2559,7 @@ static void hci_remote_name_evt(struct hci_dev *hdev, struct sk_buff *skb) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) goto check_auth; if (ev->status == 0) @@ -2608,7 +2625,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) * whenever the encryption procedure fails. */ if (ev->status && conn->type == LE_LINK) - set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); @@ -2626,15 +2643,15 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) * connections that are not encrypted with AES-CCM * using a P-256 authenticated combination key. */ - if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_SC_ONLY) && (!test_bit(HCI_CONN_AES_CCM, &conn->flags) || conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) { - hci_proto_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE); + hci_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE); hci_conn_drop(conn); goto unlock; } - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); hci_conn_drop(conn); } else hci_encrypt_cfm(conn, ev->status, ev->encrypt); @@ -2707,7 +2724,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev, if (!hci_outgoing_auth_needed(hdev, conn)) { conn->state = BT_CONNECTED; - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); hci_conn_drop(conn); } @@ -3106,7 +3123,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) cancel_delayed_work(&hdev->cmd_timer); if (ev->status || - (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event)) + (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req_event)) hci_req_cmd_complete(hdev, opcode, ev->status); if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { @@ -3331,11 +3348,11 @@ static void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_conn_drop(conn); } - if (!test_bit(HCI_BONDABLE, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_BONDABLE) && !test_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags)) { hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(ev->bdaddr), &ev->bdaddr); - } else if (test_bit(HCI_MGMT, &hdev->dev_flags)) { + } else if (hci_dev_test_flag(hdev, HCI_MGMT)) { u8 secure; if (conn->pending_sec_level == BT_SECURITY_HIGH) @@ -3391,7 +3408,7 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s", hdev->name); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) return; hci_dev_lock(hdev); @@ -3465,7 +3482,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) set_bit(HCI_CONN_NEW_LINK_KEY, &conn->flags); conn_set_key(conn, ev->key_type, conn->pin_length); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) goto unlock; key = hci_add_link_key(hdev, conn, &ev->bdaddr, ev->link_key, @@ -3487,7 +3504,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) * store_hint being 0). */ if (key->type == HCI_LK_DEBUG_COMBINATION && - !test_bit(HCI_KEEP_DEBUG_KEYS, &hdev->dev_flags)) { + !hci_dev_test_flag(hdev, HCI_KEEP_DEBUG_KEYS)) { list_del_rcu(&key->list); kfree_rcu(key, rcu); goto unlock; @@ -3570,7 +3587,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, if (!num_rsp) return; - if (test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) return; hci_dev_lock(hdev); @@ -3679,7 +3696,7 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev, if (!hci_outgoing_auth_needed(hdev, conn)) { conn->state = BT_CONNECTED; - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); hci_conn_drop(conn); } @@ -3738,7 +3755,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, break; } - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); if (ev->status) hci_conn_del(conn); @@ -3776,7 +3793,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, if (!num_rsp) return; - if (test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) return; hci_dev_lock(hdev); @@ -3794,7 +3811,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, data.rssi = info->rssi; data.ssp_mode = 0x01; - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) name_known = eir_has_data_type(info->data, sizeof(info->data), EIR_NAME_COMPLETE); @@ -3849,7 +3866,7 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, if (!ev->status) conn->state = BT_CONNECTED; - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); hci_conn_drop(conn); } else { hci_auth_cfm(conn, ev->status); @@ -3890,41 +3907,37 @@ static u8 bredr_oob_data_present(struct hci_conn *conn) if (!data) return 0x00; - if (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags)) { - if (bredr_sc_enabled(hdev)) { - /* When Secure Connections is enabled, then just - * return the present value stored with the OOB - * data. The stored value contains the right present - * information. However it can only be trusted when - * not in Secure Connection Only mode. - */ - if (!test_bit(HCI_SC_ONLY, &hdev->dev_flags)) - return data->present; - - /* When Secure Connections Only mode is enabled, then - * the P-256 values are required. If they are not - * available, then do not declare that OOB data is - * present. - */ - if (!memcmp(data->rand256, ZERO_KEY, 16) || - !memcmp(data->hash256, ZERO_KEY, 16)) - return 0x00; - - return 0x02; - } + if (bredr_sc_enabled(hdev)) { + /* When Secure Connections is enabled, then just + * return the present value stored with the OOB + * data. The stored value contains the right present + * information. However it can only be trusted when + * not in Secure Connection Only mode. + */ + if (!hci_dev_test_flag(hdev, HCI_SC_ONLY)) + return data->present; - /* When Secure Connections is not enabled or actually - * not supported by the hardware, then check that if - * P-192 data values are present. + /* When Secure Connections Only mode is enabled, then + * the P-256 values are required. If they are not + * available, then do not declare that OOB data is + * present. */ - if (!memcmp(data->rand192, ZERO_KEY, 16) || - !memcmp(data->hash192, ZERO_KEY, 16)) + if (!memcmp(data->rand256, ZERO_KEY, 16) || + !memcmp(data->hash256, ZERO_KEY, 16)) return 0x00; - return 0x01; + return 0x02; } - return 0x00; + /* When Secure Connections is not enabled or actually + * not supported by the hardware, then check that if + * P-192 data values are present. + */ + if (!memcmp(data->rand192, ZERO_KEY, 16) || + !memcmp(data->hash192, ZERO_KEY, 16)) + return 0x00; + + return 0x01; } static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -3942,13 +3955,13 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_conn_hold(conn); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) goto unlock; /* Allow pairing if we're pairable, the initiators of the * pairing or if the remote is not requesting bonding. */ - if (test_bit(HCI_BONDABLE, &hdev->dev_flags) || + if (hci_dev_test_flag(hdev, HCI_BONDABLE) || test_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags) || (conn->remote_auth & ~0x01) == HCI_AT_NO_BONDING) { struct hci_cp_io_capability_reply cp; @@ -3974,7 +3987,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) /* If we're not bondable, force one of the non-bondable * authentication requirement values. */ - if (!test_bit(HCI_BONDABLE, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_BONDABLE)) conn->auth_type &= HCI_AT_NO_BONDING_MITM; cp.authentication = conn->auth_type; @@ -4011,8 +4024,6 @@ static void hci_io_capa_reply_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->remote_cap = ev->capability; conn->remote_auth = ev->authentication; - if (ev->oob_data) - set_bit(HCI_CONN_REMOTE_OOB, &conn->flags); unlock: hci_dev_unlock(hdev); @@ -4029,7 +4040,7 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, hci_dev_lock(hdev); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) goto unlock; conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); @@ -4100,7 +4111,7 @@ static void hci_user_passkey_request_evt(struct hci_dev *hdev, BT_DBG("%s", hdev->name); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_request(hdev, &ev->bdaddr, ACL_LINK, 0); } @@ -4119,7 +4130,7 @@ static void hci_user_passkey_notify_evt(struct hci_dev *hdev, conn->passkey_notify = __le32_to_cpu(ev->passkey); conn->passkey_entered = 0; - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, conn->dst_type, conn->passkey_notify, conn->passkey_entered); @@ -4157,7 +4168,7 @@ static void hci_keypress_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) return; } - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, conn->dst_type, conn->passkey_notify, conn->passkey_entered); @@ -4226,7 +4237,7 @@ static void hci_remote_oob_data_request_evt(struct hci_dev *hdev, hci_dev_lock(hdev); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) goto unlock; data = hci_find_remote_oob_data(hdev, &ev->bdaddr, BDADDR_BREDR); @@ -4243,7 +4254,7 @@ static void hci_remote_oob_data_request_evt(struct hci_dev *hdev, struct hci_cp_remote_oob_ext_data_reply cp; bacpy(&cp.bdaddr, &ev->bdaddr); - if (test_bit(HCI_SC_ONLY, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { memset(cp.hash192, 0, sizeof(cp.hash192)); memset(cp.rand192, 0, sizeof(cp.rand192)); } else { @@ -4409,7 +4420,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) /* All controllers implicitly stop advertising in the event of a * connection, so ensure that the state bit is cleared. */ - clear_bit(HCI_LE_ADV, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LE_ADV); conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); if (!conn) { @@ -4432,7 +4443,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (conn->out) { conn->resp_addr_type = ev->bdaddr_type; bacpy(&conn->resp_addr, &ev->bdaddr); - if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) { conn->init_addr_type = ADDR_LE_DEV_RANDOM; bacpy(&conn->init_addr, &hdev->rpa); } else { @@ -4512,7 +4523,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_debugfs_create_conn(conn); hci_conn_add_sysfs(conn); - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst, conn->dst_type); @@ -4658,7 +4669,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, /* If the controller is not using resolvable random * addresses, then this report can be ignored. */ - if (!test_bit(HCI_PRIVACY, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_PRIVACY)) return; /* If the local IRK of the controller does not match @@ -5039,7 +5050,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) skb_pull(skb, HCI_EVENT_HDR_SIZE); - if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) { + if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req_event == event) { struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data; u16 opcode = __le16_to_cpu(cmd_hdr->opcode); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b59f92c6df0c..55e096d20a0f 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -55,7 +55,7 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete) return -ENODATA; skb = skb_peek_tail(&req->cmd_q); - bt_cb(skb)->req.complete = complete; + bt_cb(skb)->req_complete = complete; spin_lock_irqsave(&hdev->cmd_q.lock, flags); skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); @@ -116,9 +116,9 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, } if (skb_queue_empty(&req->cmd_q)) - bt_cb(skb)->req.start = true; + bt_cb(skb)->req_start = 1; - bt_cb(skb)->req.event = event; + bt_cb(skb)->req_event = event; skb_queue_tail(&req->cmd_q, skb); } @@ -270,7 +270,7 @@ void hci_req_add_le_passive_scan(struct hci_request *req) * and 0x01 (whitelist enabled) use the new filter policies * 0x02 (no whitelist) and 0x03 (whitelist enabled). */ - if (test_bit(HCI_PRIVACY, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_PRIVACY) && (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) filter_policy |= 0x02; @@ -304,10 +304,10 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) * In this kind of scenario skip the update and let the random * address be updated at the next cycle. */ - if (test_bit(HCI_LE_ADV, &hdev->dev_flags) || + if (hci_dev_test_flag(hdev, HCI_LE_ADV) || hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) { BT_DBG("Deferring random address update"); - set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); return; } @@ -324,12 +324,12 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, * current RPA has expired or there is something else than * the current RPA in use, then generate a new one. */ - if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) { int to; *own_addr_type = ADDR_LE_DEV_RANDOM; - if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) && + if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) && !bacmp(&hdev->random_addr, &hdev->rpa)) return 0; @@ -383,9 +383,9 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, * and a static address has been configured, then use that * address instead of the public BR/EDR address. */ - if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || + if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || !bacmp(&hdev->bdaddr, BDADDR_ANY) || - (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && bacmp(&hdev->static_addr, BDADDR_ANY))) { *own_addr_type = ADDR_LE_DEV_RANDOM; if (bacmp(&hdev->static_addr, &hdev->random_addr)) @@ -425,7 +425,7 @@ void __hci_update_page_scan(struct hci_request *req) struct hci_dev *hdev = req->hdev; u8 scan; - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return; if (!hdev_is_powered(hdev)) @@ -434,7 +434,7 @@ void __hci_update_page_scan(struct hci_request *req) if (mgmt_powering_down(hdev)) return; - if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) || + if (hci_dev_test_flag(hdev, HCI_CONNECTABLE) || disconnected_whitelist_entries(hdev)) scan = SCAN_PAGE; else @@ -443,7 +443,7 @@ void __hci_update_page_scan(struct hci_request *req) if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE)) return; - if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) scan |= SCAN_INQUIRY; hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); @@ -471,14 +471,14 @@ void __hci_update_background_scan(struct hci_request *req) if (!test_bit(HCI_UP, &hdev->flags) || test_bit(HCI_INIT, &hdev->flags) || - test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags) || - test_bit(HCI_AUTO_OFF, &hdev->dev_flags) || - test_bit(HCI_UNREGISTER, &hdev->dev_flags)) + hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG) || + hci_dev_test_flag(hdev, HCI_AUTO_OFF) || + hci_dev_test_flag(hdev, HCI_UNREGISTER)) return; /* No point in doing scanning if LE support hasn't been enabled */ - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return; /* If discovery is active don't interfere with it */ @@ -502,7 +502,7 @@ void __hci_update_background_scan(struct hci_request *req) */ /* If controller is not scanning we are done. */ - if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) return; hci_req_add_le_scan_disable(req); @@ -524,7 +524,7 @@ void __hci_update_background_scan(struct hci_request *req) /* If controller is currently scanning, we stop it to ensure we * don't miss any advertising (due to duplicates filter). */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) hci_req_add_le_scan_disable(req); hci_req_add_le_passive_scan(req); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 1d65c5be7c82..85a44a7dc150 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -30,6 +30,12 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/hci_mon.h> +#include <net/bluetooth/mgmt.h> + +#include "mgmt_util.h" + +static LIST_HEAD(mgmt_chan_list); +static DEFINE_MUTEX(mgmt_chan_list_lock); static atomic_t monitor_promisc = ATOMIC_INIT(0); @@ -44,11 +50,32 @@ struct hci_pinfo { struct hci_filter filter; __u32 cmsg_mask; unsigned short channel; + unsigned long flags; }; -static inline int hci_test_bit(int nr, void *addr) +void hci_sock_set_flag(struct sock *sk, int nr) +{ + set_bit(nr, &hci_pi(sk)->flags); +} + +void hci_sock_clear_flag(struct sock *sk, int nr) { - return *((__u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); + clear_bit(nr, &hci_pi(sk)->flags); +} + +int hci_sock_test_flag(struct sock *sk, int nr) +{ + return test_bit(nr, &hci_pi(sk)->flags); +} + +unsigned short hci_sock_get_channel(struct sock *sk) +{ + return hci_pi(sk)->channel; +} + +static inline int hci_test_bit(int nr, const void *addr) +{ + return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); } /* Security filter */ @@ -183,54 +210,31 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb_copy); } -/* Send frame to control socket */ -void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk) +/* Send frame to sockets with specific channel */ +void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, + int flag, struct sock *skip_sk) { struct sock *sk; - BT_DBG("len %d", skb->len); + BT_DBG("channel %u len %d", channel, skb->len); read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; - /* Skip the original socket */ - if (sk == skip_sk) - continue; - - if (sk->sk_state != BT_BOUND) - continue; - - if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) + /* Ignore socket without the flag set */ + if (!hci_sock_test_flag(sk, flag)) continue; - nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) + /* Skip the original socket */ + if (sk == skip_sk) continue; - if (sock_queue_rcv_skb(sk, nskb)) - kfree_skb(nskb); - } - - read_unlock(&hci_sk_list.lock); -} - -static void queue_monitor_skb(struct sk_buff *skb) -{ - struct sock *sk; - - BT_DBG("len %d", skb->len); - - read_lock(&hci_sk_list.lock); - - sk_for_each(sk, &hci_sk_list.head) { - struct sk_buff *nskb; - if (sk->sk_state != BT_BOUND) continue; - if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR) + if (hci_pi(sk)->channel != channel) continue; nskb = skb_clone(skb, GFP_ATOMIC); @@ -290,7 +294,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) hdr->index = cpu_to_le16(hdev->id); hdr->len = cpu_to_le16(skb->len); - queue_monitor_skb(skb_copy); + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb_copy, + HCI_SOCK_TRUSTED, NULL); kfree_skb(skb_copy); } @@ -397,7 +402,8 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) skb = create_monitor_event(hdev, event); if (skb) { - queue_monitor_skb(skb); + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } @@ -428,6 +434,56 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) } } +static struct hci_mgmt_chan *__hci_mgmt_chan_find(unsigned short channel) +{ + struct hci_mgmt_chan *c; + + list_for_each_entry(c, &mgmt_chan_list, list) { + if (c->channel == channel) + return c; + } + + return NULL; +} + +static struct hci_mgmt_chan *hci_mgmt_chan_find(unsigned short channel) +{ + struct hci_mgmt_chan *c; + + mutex_lock(&mgmt_chan_list_lock); + c = __hci_mgmt_chan_find(channel); + mutex_unlock(&mgmt_chan_list_lock); + + return c; +} + +int hci_mgmt_chan_register(struct hci_mgmt_chan *c) +{ + if (c->channel < HCI_CHANNEL_CONTROL) + return -EINVAL; + + mutex_lock(&mgmt_chan_list_lock); + if (__hci_mgmt_chan_find(c->channel)) { + mutex_unlock(&mgmt_chan_list_lock); + return -EALREADY; + } + + list_add_tail(&c->list, &mgmt_chan_list); + + mutex_unlock(&mgmt_chan_list_lock); + + return 0; +} +EXPORT_SYMBOL(hci_mgmt_chan_register); + +void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c) +{ + mutex_lock(&mgmt_chan_list_lock); + list_del(&c->list); + mutex_unlock(&mgmt_chan_list_lock); +} +EXPORT_SYMBOL(hci_mgmt_chan_unregister); + static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -448,7 +504,7 @@ static int hci_sock_release(struct socket *sock) if (hdev) { if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { mgmt_index_added(hdev); - clear_bit(HCI_USER_CHANNEL, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); hci_dev_close(hdev->id); } @@ -508,10 +564,10 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, if (!hdev) return -EBADFD; - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) return -EOPNOTSUPP; if (hdev->dev_type != HCI_BREDR) @@ -687,14 +743,14 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, if (test_bit(HCI_UP, &hdev->flags) || test_bit(HCI_INIT, &hdev->flags) || - test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags)) { + hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG)) { err = -EBUSY; hci_dev_put(hdev); goto done; } - if (test_and_set_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_and_set_flag(hdev, HCI_USER_CHANNEL)) { err = -EUSERS; hci_dev_put(hdev); goto done; @@ -704,7 +760,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, err = hci_dev_open(hdev->id); if (err) { - clear_bit(HCI_USER_CHANNEL, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); mgmt_index_added(hdev); hci_dev_put(hdev); goto done; @@ -715,38 +771,62 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_pi(sk)->hdev = hdev; break; - case HCI_CHANNEL_CONTROL: + case HCI_CHANNEL_MONITOR: if (haddr.hci_dev != HCI_DEV_NONE) { err = -EINVAL; goto done; } - if (!capable(CAP_NET_ADMIN)) { + if (!capable(CAP_NET_RAW)) { err = -EPERM; goto done; } + /* The monitor interface is restricted to CAP_NET_RAW + * capabilities and with that implicitly trusted. + */ + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + send_monitor_replay(sk); + + atomic_inc(&monitor_promisc); break; - case HCI_CHANNEL_MONITOR: - if (haddr.hci_dev != HCI_DEV_NONE) { + default: + if (!hci_mgmt_chan_find(haddr.hci_channel)) { err = -EINVAL; goto done; } - if (!capable(CAP_NET_RAW)) { - err = -EPERM; + if (haddr.hci_dev != HCI_DEV_NONE) { + err = -EINVAL; goto done; } - send_monitor_replay(sk); - - atomic_inc(&monitor_promisc); + /* Users with CAP_NET_ADMIN capabilities are allowed + * access to all management commands and events. For + * untrusted users the interface is restricted and + * also only untrusted events are sent. + */ + if (capable(CAP_NET_ADMIN)) + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + /* At the moment the index and unconfigured index events + * are enabled unconditionally. Setting them on each + * socket when binding keeps this functionality. They + * however might be cleared later and then sending of these + * events will be disabled, but that is then intentional. + * + * This also enables generic events that are safe to be + * received by untrusted users. Example for such events + * are changes to settings, class of device, name etc. + */ + if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); + } break; - - default: - err = -EINVAL; - goto done; } @@ -826,8 +906,8 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, } } -static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; @@ -860,10 +940,13 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, hci_sock_cmsg(sk, msg, skb); break; case HCI_CHANNEL_USER: - case HCI_CHANNEL_CONTROL: case HCI_CHANNEL_MONITOR: sock_recv_timestamp(msg, sk, skb); break; + default: + if (hci_mgmt_chan_find(hci_pi(sk)->channel)) + sock_recv_timestamp(msg, sk, skb); + break; } skb_free_datagram(sk, skb); @@ -871,10 +954,122 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err ? : copied; } -static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, + struct msghdr *msg, size_t msglen) +{ + void *buf; + u8 *cp; + struct mgmt_hdr *hdr; + u16 opcode, index, len; + struct hci_dev *hdev = NULL; + const struct hci_mgmt_handler *handler; + bool var_len, no_hdev; + int err; + + BT_DBG("got %zu bytes", msglen); + + if (msglen < sizeof(*hdr)) + return -EINVAL; + + buf = kmalloc(msglen, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (memcpy_from_msg(buf, msg, msglen)) { + err = -EFAULT; + goto done; + } + + hdr = buf; + opcode = __le16_to_cpu(hdr->opcode); + index = __le16_to_cpu(hdr->index); + len = __le16_to_cpu(hdr->len); + + if (len != msglen - sizeof(*hdr)) { + err = -EINVAL; + goto done; + } + + if (opcode >= chan->handler_count || + chan->handlers[opcode].func == NULL) { + BT_DBG("Unknown op %u", opcode); + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_UNKNOWN_COMMAND); + goto done; + } + + handler = &chan->handlers[opcode]; + + if (!hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) && + !(handler->flags & HCI_MGMT_UNTRUSTED)) { + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_PERMISSION_DENIED); + goto done; + } + + if (index != MGMT_INDEX_NONE) { + hdev = hci_dev_get(index); + if (!hdev) { + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_INDEX); + goto done; + } + + if (hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG) || + hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_INDEX); + goto done; + } + + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && + !(handler->flags & HCI_MGMT_UNCONFIGURED)) { + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_INDEX); + goto done; + } + } + + no_hdev = (handler->flags & HCI_MGMT_NO_HDEV); + if (no_hdev != !hdev) { + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_INDEX); + goto done; + } + + var_len = (handler->flags & HCI_MGMT_VAR_LEN); + if ((var_len && len < handler->data_len) || + (!var_len && len != handler->data_len)) { + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_PARAMS); + goto done; + } + + if (hdev && chan->hdev_init) + chan->hdev_init(sk, hdev); + + cp = buf + sizeof(*hdr); + + err = handler->func(sk, hdev, cp, len); + if (err < 0) + goto done; + + err = msglen; + +done: + if (hdev) + hci_dev_put(hdev); + + kfree(buf); + return err; +} + +static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; + struct hci_mgmt_chan *chan; struct hci_dev *hdev; struct sk_buff *skb; int err; @@ -896,14 +1091,18 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, case HCI_CHANNEL_RAW: case HCI_CHANNEL_USER: break; - case HCI_CHANNEL_CONTROL: - err = mgmt_control(sk, msg, len); - goto done; case HCI_CHANNEL_MONITOR: err = -EOPNOTSUPP; goto done; default: - err = -EINVAL; + mutex_lock(&mgmt_chan_list_lock); + chan = __hci_mgmt_chan_find(hci_pi(sk)->channel); + if (chan) + err = hci_mgmt_cmd(chan, sk, msg, len); + else + err = -EINVAL; + + mutex_unlock(&mgmt_chan_list_lock); goto done; } @@ -965,7 +1164,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, /* Stand-alone HCI commands must be flagged as * single-command requests. */ - bt_cb(skb)->req.start = true; + bt_cb(skb)->req_start = 1; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 6ba33f9631e8..d69861c89bb5 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1244,6 +1244,13 @@ static void l2cap_move_done(struct l2cap_chan *chan) static void l2cap_chan_ready(struct l2cap_chan *chan) { + /* The channel may have already been flagged as connected in + * case of receiving data before the L2CAP info req/rsp + * procedure is complete. + */ + if (chan->state == BT_CONNECTED) + return; + /* This clears all conf flags, including CONF_NOT_COMPLETE */ chan->conf_state = 0; __clear_chan_timer(chan); @@ -3893,7 +3900,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn, return -EPROTO; hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_MGMT) && !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags)) mgmt_device_connected(hdev, hcon, 0, NULL, 0); hci_dev_unlock(hdev); @@ -6785,6 +6792,13 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, BT_DBG("chan %p, len %d", chan, skb->len); + /* If we receive data on a fixed channel before the info req/rsp + * procdure is done simply assume that the channel is supported + * and mark it as ready. + */ + if (chan->chan_type == L2CAP_CHAN_FIXED) + l2cap_chan_ready(chan); + if (chan->state != BT_CONNECTED) goto drop; @@ -6973,12 +6987,12 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) conn->local_fixed_chan = L2CAP_FC_SIG_BREDR | L2CAP_FC_CONNLESS; if (hcon->type == ACL_LINK && - test_bit(HCI_HS_ENABLED, &hcon->hdev->dev_flags)) + hci_dev_test_flag(hcon->hdev, HCI_HS_ENABLED)) conn->local_fixed_chan |= L2CAP_FC_A2MP; - if (test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags) && + if (hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED) && (bredr_sc_enabled(hcon->hdev) || - test_bit(HCI_FORCE_BREDR_SMP, &hcon->hdev->dbg_flags))) + hci_dev_test_flag(hcon->hdev, HCI_FORCE_BREDR_SMP))) conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR; mutex_init(&conn->ident_lock); @@ -7098,7 +7112,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, else dst_type = ADDR_LE_DEV_RANDOM; - if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) role = HCI_ROLE_SLAVE; else role = HCI_ROLE_MASTER; @@ -7238,13 +7252,16 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, return NULL; } -void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) +static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { struct hci_dev *hdev = hcon->hdev; struct l2cap_conn *conn; struct l2cap_chan *pchan; u8 dst_type; + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); if (status) { @@ -7307,8 +7324,11 @@ int l2cap_disconn_ind(struct hci_conn *hcon) return conn->disc_reason; } -void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) +static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); l2cap_conn_del(hcon, bt_to_errno(reason)); @@ -7331,13 +7351,13 @@ static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt) } } -int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) +static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) { struct l2cap_conn *conn = hcon->l2cap_data; struct l2cap_chan *chan; if (!conn) - return 0; + return; BT_DBG("conn %p status 0x%2.2x encrypt %u", conn, status, encrypt); @@ -7420,8 +7440,6 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) } mutex_unlock(&conn->chan_lock); - - return 0; } int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) @@ -7529,6 +7547,13 @@ drop: return 0; } +static struct hci_cb l2cap_cb = { + .name = "L2CAP", + .connect_cfm = l2cap_connect_cfm, + .disconn_cfm = l2cap_disconn_cfm, + .security_cfm = l2cap_security_cfm, +}; + static int l2cap_debugfs_show(struct seq_file *f, void *p) { struct l2cap_chan *c; @@ -7570,6 +7595,8 @@ int __init l2cap_init(void) if (err < 0) return err; + hci_register_cb(&l2cap_cb); + if (IS_ERR_OR_NULL(bt_debugfs)) return 0; @@ -7587,6 +7614,7 @@ int __init l2cap_init(void) void l2cap_exit(void) { debugfs_remove(l2cap_debugfs); + hci_unregister_cb(&l2cap_cb); l2cap_cleanup_sockets(); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 60694f0f4c73..9070720eedc8 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -944,8 +944,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, return err; } -static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct l2cap_chan *chan = l2cap_pi(sk)->chan; @@ -976,8 +976,8 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, return err; } -static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { struct sock *sk = sock->sk; struct l2cap_pinfo *pi = l2cap_pi(sk); @@ -1004,9 +1004,9 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, release_sock(sk); if (sock->type == SOCK_STREAM) - err = bt_sock_stream_recvmsg(iocb, sock, msg, len, flags); + err = bt_sock_stream_recvmsg(sock, msg, len, flags); else - err = bt_sock_recvmsg(iocb, sock, msg, len, flags); + err = bt_sock_recvmsg(sock, msg, len, flags); if (pi->chan->mode != L2CAP_MODE_ERTM) return err; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9ec5390c85eb..fb2e764c6211 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -29,14 +29,16 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> +#include <net/bluetooth/hci_sock.h> #include <net/bluetooth/l2cap.h> #include <net/bluetooth/mgmt.h> #include "hci_request.h" #include "smp.h" +#include "mgmt_util.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 8 +#define MGMT_REVISION 9 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -95,6 +97,11 @@ static const u16 mgmt_commands[] = { MGMT_OP_SET_EXTERNAL_CONFIG, MGMT_OP_SET_PUBLIC_ADDRESS, MGMT_OP_START_SERVICE_DISCOVERY, + MGMT_OP_READ_LOCAL_OOB_EXT_DATA, + MGMT_OP_READ_EXT_INDEX_LIST, + MGMT_OP_READ_ADV_FEATURES, + MGMT_OP_ADD_ADVERTISING, + MGMT_OP_REMOVE_ADVERTISING, }; static const u16 mgmt_events[] = { @@ -127,6 +134,32 @@ static const u16 mgmt_events[] = { MGMT_EV_UNCONF_INDEX_ADDED, MGMT_EV_UNCONF_INDEX_REMOVED, MGMT_EV_NEW_CONFIG_OPTIONS, + MGMT_EV_EXT_INDEX_ADDED, + MGMT_EV_EXT_INDEX_REMOVED, + MGMT_EV_LOCAL_OOB_DATA_UPDATED, + MGMT_EV_ADVERTISING_ADDED, + MGMT_EV_ADVERTISING_REMOVED, +}; + +static const u16 mgmt_untrusted_commands[] = { + MGMT_OP_READ_INDEX_LIST, + MGMT_OP_READ_INFO, + MGMT_OP_READ_UNCONF_INDEX_LIST, + MGMT_OP_READ_CONFIG_INFO, + MGMT_OP_READ_EXT_INDEX_LIST, +}; + +static const u16 mgmt_untrusted_events[] = { + MGMT_EV_INDEX_ADDED, + MGMT_EV_INDEX_REMOVED, + MGMT_EV_NEW_SETTINGS, + MGMT_EV_CLASS_OF_DEV_CHANGED, + MGMT_EV_LOCAL_NAME_CHANGED, + MGMT_EV_UNCONF_INDEX_ADDED, + MGMT_EV_UNCONF_INDEX_REMOVED, + MGMT_EV_NEW_CONFIG_OPTIONS, + MGMT_EV_EXT_INDEX_ADDED, + MGMT_EV_EXT_INDEX_REMOVED, }; #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) @@ -134,17 +167,6 @@ static const u16 mgmt_events[] = { #define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ "\x00\x00\x00\x00\x00\x00\x00\x00" -struct pending_cmd { - struct list_head list; - u16 opcode; - int index; - void *param; - size_t param_len; - struct sock *sk; - void *user_data; - int (*cmd_complete)(struct pending_cmd *cmd, u8 status); -}; - /* HCI to MGMT error code conversion table */ static u8 mgmt_status_table[] = { MGMT_STATUS_SUCCESS, @@ -218,98 +240,32 @@ static u8 mgmt_status(u8 hci_status) return MGMT_STATUS_FAILED; } -static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 data_len, - struct sock *skip_sk) +static int mgmt_index_event(u16 event, struct hci_dev *hdev, void *data, + u16 len, int flag) { - struct sk_buff *skb; - struct mgmt_hdr *hdr; - - skb = alloc_skb(sizeof(*hdr) + data_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(event); - if (hdev) - hdr->index = cpu_to_le16(hdev->id); - else - hdr->index = cpu_to_le16(MGMT_INDEX_NONE); - hdr->len = cpu_to_le16(data_len); - - if (data) - memcpy(skb_put(skb, data_len), data, data_len); - - /* Time stamp */ - __net_timestamp(skb); - - hci_send_to_control(skb, skip_sk); - kfree_skb(skb); - - return 0; + return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, + flag, NULL); } -static int cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) +static int mgmt_limited_event(u16 event, struct hci_dev *hdev, void *data, + u16 len, int flag, struct sock *skip_sk) { - struct sk_buff *skb; - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_status *ev; - int err; - - BT_DBG("sock %p, index %u, cmd %u, status %u", sk, index, cmd, status); - - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_KERNEL); - if (!skb) - return -ENOMEM; - - hdr = (void *) skb_put(skb, sizeof(*hdr)); - - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); - hdr->index = cpu_to_le16(index); - hdr->len = cpu_to_le16(sizeof(*ev)); - - ev = (void *) skb_put(skb, sizeof(*ev)); - ev->status = status; - ev->opcode = cpu_to_le16(cmd); - - err = sock_queue_rcv_skb(sk, skb); - if (err < 0) - kfree_skb(skb); - - return err; + return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, + flag, skip_sk); } -static int cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, - void *rp, size_t rp_len) +static int mgmt_generic_event(u16 event, struct hci_dev *hdev, void *data, + u16 len, struct sock *skip_sk) { - struct sk_buff *skb; - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_complete *ev; - int err; - - BT_DBG("sock %p", sk); - - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + rp_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - hdr = (void *) skb_put(skb, sizeof(*hdr)); - - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->index = cpu_to_le16(index); - hdr->len = cpu_to_le16(sizeof(*ev) + rp_len); - - ev = (void *) skb_put(skb, sizeof(*ev) + rp_len); - ev->opcode = cpu_to_le16(cmd); - ev->status = status; - - if (rp) - memcpy(ev->data, rp, rp_len); - - err = sock_queue_rcv_skb(sk, skb); - if (err < 0) - kfree_skb(skb); + return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, + HCI_MGMT_GENERIC_EVENTS, skip_sk); +} - return err; +static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len, + struct sock *skip_sk) +{ + return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, + HCI_SOCK_TRUSTED, skip_sk); } static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, @@ -322,22 +278,28 @@ static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, rp.version = MGMT_VERSION; rp.revision = cpu_to_le16(MGMT_REVISION); - return cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, 0, &rp, - sizeof(rp)); + return mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, 0, + &rp, sizeof(rp)); } static int read_commands(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_commands *rp; - const u16 num_commands = ARRAY_SIZE(mgmt_commands); - const u16 num_events = ARRAY_SIZE(mgmt_events); - __le16 *opcode; + u16 num_commands, num_events; size_t rp_size; int i, err; BT_DBG("sock %p", sk); + if (hci_sock_test_flag(sk, HCI_SOCK_TRUSTED)) { + num_commands = ARRAY_SIZE(mgmt_commands); + num_events = ARRAY_SIZE(mgmt_events); + } else { + num_commands = ARRAY_SIZE(mgmt_untrusted_commands); + num_events = ARRAY_SIZE(mgmt_untrusted_events); + } + rp_size = sizeof(*rp) + ((num_commands + num_events) * sizeof(u16)); rp = kmalloc(rp_size, GFP_KERNEL); @@ -347,14 +309,26 @@ static int read_commands(struct sock *sk, struct hci_dev *hdev, void *data, rp->num_commands = cpu_to_le16(num_commands); rp->num_events = cpu_to_le16(num_events); - for (i = 0, opcode = rp->opcodes; i < num_commands; i++, opcode++) - put_unaligned_le16(mgmt_commands[i], opcode); + if (hci_sock_test_flag(sk, HCI_SOCK_TRUSTED)) { + __le16 *opcode = rp->opcodes; + + for (i = 0; i < num_commands; i++, opcode++) + put_unaligned_le16(mgmt_commands[i], opcode); + + for (i = 0; i < num_events; i++, opcode++) + put_unaligned_le16(mgmt_events[i], opcode); + } else { + __le16 *opcode = rp->opcodes; + + for (i = 0; i < num_commands; i++, opcode++) + put_unaligned_le16(mgmt_untrusted_commands[i], opcode); - for (i = 0; i < num_events; i++, opcode++) - put_unaligned_le16(mgmt_events[i], opcode); + for (i = 0; i < num_events; i++, opcode++) + put_unaligned_le16(mgmt_untrusted_events[i], opcode); + } - err = cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_COMMANDS, 0, rp, - rp_size); + err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_COMMANDS, 0, + rp, rp_size); kfree(rp); return err; @@ -376,7 +350,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, count = 0; list_for_each_entry(d, &hci_dev_list, list) { if (d->dev_type == HCI_BREDR && - !test_bit(HCI_UNCONFIGURED, &d->dev_flags)) + !hci_dev_test_flag(d, HCI_UNCONFIGURED)) count++; } @@ -389,9 +363,9 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, count = 0; list_for_each_entry(d, &hci_dev_list, list) { - if (test_bit(HCI_SETUP, &d->dev_flags) || - test_bit(HCI_CONFIG, &d->dev_flags) || - test_bit(HCI_USER_CHANNEL, &d->dev_flags)) + if (hci_dev_test_flag(d, HCI_SETUP) || + hci_dev_test_flag(d, HCI_CONFIG) || + hci_dev_test_flag(d, HCI_USER_CHANNEL)) continue; /* Devices marked as raw-only are neither configured @@ -401,7 +375,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, continue; if (d->dev_type == HCI_BREDR && - !test_bit(HCI_UNCONFIGURED, &d->dev_flags)) { + !hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); BT_DBG("Added hci%u", d->id); } @@ -412,8 +386,8 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, read_unlock(&hci_dev_list_lock); - err = cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_INDEX_LIST, 0, rp, - rp_len); + err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_INDEX_LIST, + 0, rp, rp_len); kfree(rp); @@ -436,7 +410,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, count = 0; list_for_each_entry(d, &hci_dev_list, list) { if (d->dev_type == HCI_BREDR && - test_bit(HCI_UNCONFIGURED, &d->dev_flags)) + hci_dev_test_flag(d, HCI_UNCONFIGURED)) count++; } @@ -449,9 +423,9 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, count = 0; list_for_each_entry(d, &hci_dev_list, list) { - if (test_bit(HCI_SETUP, &d->dev_flags) || - test_bit(HCI_CONFIG, &d->dev_flags) || - test_bit(HCI_USER_CHANNEL, &d->dev_flags)) + if (hci_dev_test_flag(d, HCI_SETUP) || + hci_dev_test_flag(d, HCI_CONFIG) || + hci_dev_test_flag(d, HCI_USER_CHANNEL)) continue; /* Devices marked as raw-only are neither configured @@ -461,7 +435,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, continue; if (d->dev_type == HCI_BREDR && - test_bit(HCI_UNCONFIGURED, &d->dev_flags)) { + hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); BT_DBG("Added hci%u", d->id); } @@ -472,8 +446,84 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, read_unlock(&hci_dev_list_lock); - err = cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_UNCONF_INDEX_LIST, - 0, rp, rp_len); + err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, + MGMT_OP_READ_UNCONF_INDEX_LIST, 0, rp, rp_len); + + kfree(rp); + + return err; +} + +static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_rp_read_ext_index_list *rp; + struct hci_dev *d; + size_t rp_len; + u16 count; + int err; + + BT_DBG("sock %p", sk); + + read_lock(&hci_dev_list_lock); + + count = 0; + list_for_each_entry(d, &hci_dev_list, list) { + if (d->dev_type == HCI_BREDR || d->dev_type == HCI_AMP) + count++; + } + + rp_len = sizeof(*rp) + (sizeof(rp->entry[0]) * count); + rp = kmalloc(rp_len, GFP_ATOMIC); + if (!rp) { + read_unlock(&hci_dev_list_lock); + return -ENOMEM; + } + + count = 0; + list_for_each_entry(d, &hci_dev_list, list) { + if (hci_dev_test_flag(d, HCI_SETUP) || + hci_dev_test_flag(d, HCI_CONFIG) || + hci_dev_test_flag(d, HCI_USER_CHANNEL)) + continue; + + /* Devices marked as raw-only are neither configured + * nor unconfigured controllers. + */ + if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) + continue; + + if (d->dev_type == HCI_BREDR) { + if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) + rp->entry[count].type = 0x01; + else + rp->entry[count].type = 0x00; + } else if (d->dev_type == HCI_AMP) { + rp->entry[count].type = 0x02; + } else { + continue; + } + + rp->entry[count].bus = d->bus; + rp->entry[count++].index = cpu_to_le16(d->id); + BT_DBG("Added hci%u", d->id); + } + + rp->num_controllers = cpu_to_le16(count); + rp_len = sizeof(*rp) + (sizeof(rp->entry[0]) * count); + + read_unlock(&hci_dev_list_lock); + + /* If this command is called at least once, then all the + * default index and unconfigured index events are disabled + * and from now on only extended index events are used. + */ + hci_sock_set_flag(sk, HCI_MGMT_EXT_INDEX_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_INDEX_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); + + err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, + MGMT_OP_READ_EXT_INDEX_LIST, 0, rp, rp_len); kfree(rp); @@ -483,7 +533,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, static bool is_configured(struct hci_dev *hdev) { if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) && - !test_bit(HCI_EXT_CONFIGURED, &hdev->dev_flags)) + !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED)) return false; if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) && @@ -498,7 +548,7 @@ static __le32 get_missing_options(struct hci_dev *hdev) u32 options = 0; if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) && - !test_bit(HCI_EXT_CONFIGURED, &hdev->dev_flags)) + !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED)) options |= MGMT_OPTION_EXTERNAL_CONFIG; if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) && @@ -512,16 +562,16 @@ static int new_options(struct hci_dev *hdev, struct sock *skip) { __le32 options = get_missing_options(hdev); - return mgmt_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, - sizeof(options), skip); + return mgmt_generic_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, + sizeof(options), skip); } static int send_options_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) { __le32 options = get_missing_options(hdev); - return cmd_complete(sk, hdev->id, opcode, 0, &options, - sizeof(options)); + return mgmt_cmd_complete(sk, hdev->id, opcode, 0, &options, + sizeof(options)); } static int read_config_info(struct sock *sk, struct hci_dev *hdev, @@ -548,8 +598,8 @@ static int read_config_info(struct sock *sk, struct hci_dev *hdev, hci_dev_unlock(hdev); - return cmd_complete(sk, hdev->id, MGMT_OP_READ_CONFIG_INFO, 0, &rp, - sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_CONFIG_INFO, 0, + &rp, sizeof(rp)); } static u32 get_supported_settings(struct hci_dev *hdev) @@ -582,6 +632,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_ADVERTISING; settings |= MGMT_SETTING_SECURE_CONN; settings |= MGMT_SETTING_PRIVACY; + settings |= MGMT_SETTING_STATIC_ADDRESS; } if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || @@ -598,45 +649,64 @@ static u32 get_current_settings(struct hci_dev *hdev) if (hdev_is_powered(hdev)) settings |= MGMT_SETTING_POWERED; - if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_CONNECTABLE)) settings |= MGMT_SETTING_CONNECTABLE; - if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) settings |= MGMT_SETTING_FAST_CONNECTABLE; - if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) settings |= MGMT_SETTING_DISCOVERABLE; - if (test_bit(HCI_BONDABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_BONDABLE)) settings |= MGMT_SETTING_BONDABLE; - if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) settings |= MGMT_SETTING_BREDR; - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) settings |= MGMT_SETTING_LE; - if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LINK_SECURITY)) settings |= MGMT_SETTING_LINK_SECURITY; - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) settings |= MGMT_SETTING_SSP; - if (test_bit(HCI_HS_ENABLED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_HS_ENABLED)) settings |= MGMT_SETTING_HS; - if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) settings |= MGMT_SETTING_ADVERTISING; - if (test_bit(HCI_SC_ENABLED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SC_ENABLED)) settings |= MGMT_SETTING_SECURE_CONN; - if (test_bit(HCI_KEEP_DEBUG_KEYS, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_KEEP_DEBUG_KEYS)) settings |= MGMT_SETTING_DEBUG_KEYS; - if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) settings |= MGMT_SETTING_PRIVACY; + /* The current setting for static address has two purposes. The + * first is to indicate if the static address will be used and + * the second is to indicate if it is actually set. + * + * This means if the static address is not configured, this flag + * will never be set. If the address is configured, then if the + * address is actually used decides if the flag is set or not. + * + * For single mode LE only controllers and dual-mode controllers + * with BR/EDR disabled, the existence of the static address will + * be evaluated. + */ + if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || + !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) || + !bacmp(&hdev->bdaddr, BDADDR_ANY)) { + if (bacmp(&hdev->static_addr, BDADDR_ANY)) + settings |= MGMT_SETTING_STATIC_ADDRESS; + } + return settings; } @@ -750,35 +820,19 @@ static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) return ptr; } -static struct pending_cmd *mgmt_pending_find(u16 opcode, struct hci_dev *hdev) +static struct mgmt_pending_cmd *pending_find(u16 opcode, struct hci_dev *hdev) { - struct pending_cmd *cmd; - - list_for_each_entry(cmd, &hdev->mgmt_pending, list) { - if (cmd->opcode == opcode) - return cmd; - } - - return NULL; + return mgmt_pending_find(HCI_CHANNEL_CONTROL, opcode, hdev); } -static struct pending_cmd *mgmt_pending_find_data(u16 opcode, +static struct mgmt_pending_cmd *pending_find_data(u16 opcode, struct hci_dev *hdev, const void *data) { - struct pending_cmd *cmd; - - list_for_each_entry(cmd, &hdev->mgmt_pending, list) { - if (cmd->user_data != data) - continue; - if (cmd->opcode == opcode) - return cmd; - } - - return NULL; + return mgmt_pending_find_data(HCI_CHANNEL_CONTROL, opcode, hdev, data); } -static u8 create_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) { u8 ad_len = 0; size_t name_len; @@ -804,21 +858,36 @@ static u8 create_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) return ad_len; } -static void update_scan_rsp_data(struct hci_request *req) +static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +{ + /* TODO: Set the appropriate entries based on advertising instance flags + * here once flags other than 0 are supported. + */ + memcpy(ptr, hdev->adv_instance.scan_rsp_data, + hdev->adv_instance.scan_rsp_len); + + return hdev->adv_instance.scan_rsp_len; +} + +static void update_scan_rsp_data_for_instance(struct hci_request *req, + u8 instance) { struct hci_dev *hdev = req->hdev; struct hci_cp_le_set_scan_rsp_data cp; u8 len; - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return; memset(&cp, 0, sizeof(cp)); - len = create_scan_rsp_data(hdev, cp.data); + if (instance) + len = create_instance_scan_rsp_data(hdev, cp.data); + else + len = create_default_scan_rsp_data(hdev, cp.data); if (hdev->scan_rsp_data_len == len && - memcmp(cp.data, hdev->scan_rsp_data, len) == 0) + !memcmp(cp.data, hdev->scan_rsp_data, len)) return; memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data)); @@ -829,14 +898,33 @@ static void update_scan_rsp_data(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp); } +static void update_scan_rsp_data(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + u8 instance; + + /* The "Set Advertising" setting supersedes the "Add Advertising" + * setting. Here we set the scan response data based on which + * setting was set. When neither apply, default to the global settings, + * represented by instance "0". + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + !hci_dev_test_flag(hdev, HCI_ADVERTISING)) + instance = 0x01; + else + instance = 0x00; + + update_scan_rsp_data_for_instance(req, instance); +} + static u8 get_adv_discov_flags(struct hci_dev *hdev) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; /* If there's a pending mgmt command the flags will not yet have * their final values, so check for this first. */ - cmd = mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, hdev); + cmd = pending_find(MGMT_OP_SET_DISCOVERABLE, hdev); if (cmd) { struct mgmt_mode *cp = cmd->param; if (cp->val == 0x01) @@ -844,60 +932,138 @@ static u8 get_adv_discov_flags(struct hci_dev *hdev) else if (cp->val == 0x02) return LE_AD_LIMITED; } else { - if (test_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) return LE_AD_LIMITED; - else if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) + else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) return LE_AD_GENERAL; } return 0; } -static u8 create_adv_data(struct hci_dev *hdev, u8 *ptr) +static u8 get_current_adv_instance(struct hci_dev *hdev) +{ + /* The "Set Advertising" setting supersedes the "Add Advertising" + * setting. Here we set the advertising data based on which + * setting was set. When neither apply, default to the global settings, + * represented by instance "0". + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + !hci_dev_test_flag(hdev, HCI_ADVERTISING)) + return 0x01; + + return 0x00; +} + +static bool get_connectable(struct hci_dev *hdev) +{ + struct mgmt_pending_cmd *cmd; + + /* If there's a pending mgmt command the flag will not yet have + * it's final value, so check for this first. + */ + cmd = pending_find(MGMT_OP_SET_CONNECTABLE, hdev); + if (cmd) { + struct mgmt_mode *cp = cmd->param; + + return cp->val; + } + + return hci_dev_test_flag(hdev, HCI_CONNECTABLE); +} + +static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance) +{ + u32 flags; + + if (instance > 0x01) + return 0; + + if (instance == 0x01) + return hdev->adv_instance.flags; + + /* Instance 0 always manages the "Tx Power" and "Flags" fields */ + flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; + + /* For instance 0, assemble the flags from global settings */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE) || + get_connectable(hdev)) + flags |= MGMT_ADV_FLAG_CONNECTABLE; + + return flags; +} + +static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) { u8 ad_len = 0, flags = 0; + u32 instance_flags = get_adv_instance_flags(hdev, instance); + + /* The Add Advertising command allows userspace to set both the general + * and limited discoverable flags. + */ + if (instance_flags & MGMT_ADV_FLAG_DISCOV) + flags |= LE_AD_GENERAL; - flags |= get_adv_discov_flags(hdev); + if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV) + flags |= LE_AD_LIMITED; - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) - flags |= LE_AD_NO_BREDR; + if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) { + /* If a discovery flag wasn't provided, simply use the global + * settings. + */ + if (!flags) + flags |= get_adv_discov_flags(hdev); - if (flags) { - BT_DBG("adv flags 0x%02x", flags); + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + flags |= LE_AD_NO_BREDR; - ptr[0] = 2; - ptr[1] = EIR_FLAGS; - ptr[2] = flags; + /* If flags would still be empty, then there is no need to + * include the "Flags" AD field". + */ + if (flags) { + ptr[0] = 0x02; + ptr[1] = EIR_FLAGS; + ptr[2] = flags; - ad_len += 3; - ptr += 3; + ad_len += 3; + ptr += 3; + } } - if (hdev->adv_tx_power != HCI_TX_POWER_INVALID) { - ptr[0] = 2; + /* Provide Tx Power only if we can provide a valid value for it */ + if (hdev->adv_tx_power != HCI_TX_POWER_INVALID && + (instance_flags & MGMT_ADV_FLAG_TX_POWER)) { + ptr[0] = 0x02; ptr[1] = EIR_TX_POWER; - ptr[2] = (u8) hdev->adv_tx_power; + ptr[2] = (u8)hdev->adv_tx_power; ad_len += 3; ptr += 3; } + if (instance) { + memcpy(ptr, hdev->adv_instance.adv_data, + hdev->adv_instance.adv_data_len); + ad_len += hdev->adv_instance.adv_data_len; + } + return ad_len; } -static void update_adv_data(struct hci_request *req) +static void update_adv_data_for_instance(struct hci_request *req, u8 instance) { struct hci_dev *hdev = req->hdev; struct hci_cp_le_set_adv_data cp; u8 len; - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return; memset(&cp, 0, sizeof(cp)); - len = create_adv_data(hdev, cp.data); + len = create_instance_adv_data(hdev, instance, cp.data); + /* There's nothing to do if the data hasn't changed */ if (hdev->adv_data_len == len && memcmp(cp.data, hdev->adv_data, len) == 0) return; @@ -910,6 +1076,14 @@ static void update_adv_data(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); } +static void update_adv_data(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + u8 instance = get_current_adv_instance(hdev); + + update_adv_data_for_instance(req, instance); +} + int mgmt_update_adv_data(struct hci_dev *hdev) { struct hci_request req; @@ -979,10 +1153,10 @@ static void update_eir(struct hci_request *req) if (!lmp_ext_inq_capable(hdev)) return; - if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) return; - if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) return; memset(&cp, 0, sizeof(cp)); @@ -1018,17 +1192,17 @@ static void update_class(struct hci_request *req) if (!hdev_is_powered(hdev)) return; - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return; - if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) return; cod[0] = hdev->minor_class; cod[1] = hdev->major_class; cod[2] = get_service_classes(hdev); - if (test_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) cod[1] |= 0x20; if (memcmp(cod, hdev->dev_class, 3) == 0) @@ -1037,22 +1211,6 @@ static void update_class(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); } -static bool get_connectable(struct hci_dev *hdev) -{ - struct pending_cmd *cmd; - - /* If there's a pending mgmt command the flag will not yet have - * it's final value, so check for this first. - */ - cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev); - if (cmd) { - struct mgmt_mode *cp = cmd->param; - return cp->val; - } - - return test_bit(HCI_CONNECTABLE, &hdev->dev_flags); -} - static void disable_advertising(struct hci_request *req) { u8 enable = 0x00; @@ -1066,11 +1224,13 @@ static void enable_advertising(struct hci_request *req) struct hci_cp_le_set_adv_param cp; u8 own_addr_type, enable = 0x01; bool connectable; + u8 instance; + u32 flags; if (hci_conn_num(hdev, LE_LINK) > 0) return; - if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) disable_advertising(req); /* Clear the HCI_LE_ADV bit temporarily so that the @@ -1078,9 +1238,11 @@ static void enable_advertising(struct hci_request *req) * and write a new random address. The flag will be set back on * as soon as the SET_ADV_ENABLE HCI command completes. */ - clear_bit(HCI_LE_ADV, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LE_ADV); - connectable = get_connectable(hdev); + instance = get_current_adv_instance(hdev); + flags = get_adv_instance_flags(hdev, instance); + connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE); /* Set require_privacy to true only when non-connectable * advertising is used. In that case it is fine to use a @@ -1107,7 +1269,7 @@ static void service_cache_off(struct work_struct *work) service_cache.work); struct hci_request req; - if (!test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) + if (!hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) return; hci_req_init(&req, hdev); @@ -1130,9 +1292,9 @@ static void rpa_expired(struct work_struct *work) BT_DBG(""); - set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); - if (!test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_ADVERTISING)) return; /* The generation of a new RPA and programming it into the @@ -1145,7 +1307,7 @@ static void rpa_expired(struct work_struct *work) static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) { - if (test_and_set_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_and_set_flag(hdev, HCI_MGMT)) return; INIT_DELAYED_WORK(&hdev->service_cache, service_cache_off); @@ -1156,7 +1318,7 @@ static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) * for mgmt we require user-space to explicitly enable * it */ - clear_bit(HCI_BONDABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_BONDABLE); } static int read_controller_info(struct sock *sk, struct hci_dev *hdev, @@ -1185,73 +1347,16 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, hci_dev_unlock(hdev); - return cmd_complete(sk, hdev->id, MGMT_OP_READ_INFO, 0, &rp, - sizeof(rp)); -} - -static void mgmt_pending_free(struct pending_cmd *cmd) -{ - sock_put(cmd->sk); - kfree(cmd->param); - kfree(cmd); -} - -static struct pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, - struct hci_dev *hdev, void *data, - u16 len) -{ - struct pending_cmd *cmd; - - cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); - if (!cmd) - return NULL; - - cmd->opcode = opcode; - cmd->index = hdev->id; - - cmd->param = kmemdup(data, len, GFP_KERNEL); - if (!cmd->param) { - kfree(cmd); - return NULL; - } - - cmd->param_len = len; - - cmd->sk = sk; - sock_hold(sk); - - list_add(&cmd->list, &hdev->mgmt_pending); - - return cmd; -} - -static void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, - void (*cb)(struct pending_cmd *cmd, - void *data), - void *data) -{ - struct pending_cmd *cmd, *tmp; - - list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { - if (opcode > 0 && cmd->opcode != opcode) - continue; - - cb(cmd, data); - } -} - -static void mgmt_pending_remove(struct pending_cmd *cmd) -{ - list_del(&cmd->list); - mgmt_pending_free(cmd); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_INFO, 0, &rp, + sizeof(rp)); } static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) { __le32 settings = cpu_to_le32(get_current_settings(hdev)); - return cmd_complete(sk, hdev->id, opcode, 0, &settings, - sizeof(settings)); + return mgmt_cmd_complete(sk, hdev->id, opcode, 0, &settings, + sizeof(settings)); } static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode) @@ -1272,9 +1377,10 @@ static bool hci_stop_discovery(struct hci_request *req) switch (hdev->discovery.state) { case DISCOVERY_FINDING: - if (test_bit(HCI_INQUIRY, &hdev->flags)) { + if (test_bit(HCI_INQUIRY, &hdev->flags)) hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL); - } else { + + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { cancel_delayed_work(&hdev->le_scan_disable); hci_req_add_le_scan_disable(req); } @@ -1295,7 +1401,7 @@ static bool hci_stop_discovery(struct hci_request *req) default: /* Passive scanning */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { hci_req_add_le_scan_disable(req); return true; } @@ -1306,6 +1412,49 @@ static bool hci_stop_discovery(struct hci_request *req) return false; } +static void advertising_added(struct sock *sk, struct hci_dev *hdev, + u8 instance) +{ + struct mgmt_ev_advertising_added ev; + + ev.instance = instance; + + mgmt_event(MGMT_EV_ADVERTISING_ADDED, hdev, &ev, sizeof(ev), sk); +} + +static void advertising_removed(struct sock *sk, struct hci_dev *hdev, + u8 instance) +{ + struct mgmt_ev_advertising_removed ev; + + ev.instance = instance; + + mgmt_event(MGMT_EV_ADVERTISING_REMOVED, hdev, &ev, sizeof(ev), sk); +} + +static void clear_adv_instance(struct hci_dev *hdev) +{ + struct hci_request req; + + if (!hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + return; + + if (hdev->adv_instance.timeout) + cancel_delayed_work(&hdev->adv_instance.timeout_exp); + + memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance)); + advertising_removed(NULL, hdev, 1); + hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); + + if (!hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_ADVERTISING)) + return; + + hci_req_init(&req, hdev); + disable_advertising(&req); + hci_req_run(&req, NULL); +} + static int clean_up_hci_state(struct hci_dev *hdev) { struct hci_request req; @@ -1321,7 +1470,10 @@ static int clean_up_hci_state(struct hci_dev *hdev) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } - if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) + if (hdev->adv_instance.timeout) + clear_adv_instance(hdev); + + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) disable_advertising(&req); discov_stopped = hci_stop_discovery(&req); @@ -1369,24 +1521,24 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; int err; BT_DBG("request for %s", hdev->name); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); - if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_POWERED, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, + MGMT_STATUS_BUSY); goto failed; } - if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { + if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { cancel_delayed_work(&hdev->power_off); if (cp->val) { @@ -1433,11 +1585,10 @@ failed: static int new_settings(struct hci_dev *hdev, struct sock *skip) { - __le32 ev; - - ev = cpu_to_le32(get_current_settings(hdev)); + __le32 ev = cpu_to_le32(get_current_settings(hdev)); - return mgmt_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, sizeof(ev), skip); + return mgmt_generic_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, + sizeof(ev), skip); } int mgmt_new_settings(struct hci_dev *hdev) @@ -1451,7 +1602,7 @@ struct cmd_lookup { u8 mgmt_status; }; -static void settings_rsp(struct pending_cmd *cmd, void *data) +static void settings_rsp(struct mgmt_pending_cmd *cmd, void *data) { struct cmd_lookup *match = data; @@ -1467,15 +1618,15 @@ static void settings_rsp(struct pending_cmd *cmd, void *data) mgmt_pending_free(cmd); } -static void cmd_status_rsp(struct pending_cmd *cmd, void *data) +static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data) { u8 *status = data; - cmd_status(cmd->sk, cmd->index, cmd->opcode, *status); + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, *status); mgmt_pending_remove(cmd); } -static void cmd_complete_rsp(struct pending_cmd *cmd, void *data) +static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) { if (cmd->cmd_complete) { u8 *status = data; @@ -1489,23 +1640,23 @@ static void cmd_complete_rsp(struct pending_cmd *cmd, void *data) cmd_status_rsp(cmd, data); } -static int generic_cmd_complete(struct pending_cmd *cmd, u8 status) +static int generic_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, - cmd->param, cmd->param_len); + return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + cmd->param, cmd->param_len); } -static int addr_cmd_complete(struct pending_cmd *cmd, u8 status) +static int addr_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, - sizeof(struct mgmt_addr_info)); + return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + cmd->param, sizeof(struct mgmt_addr_info)); } static u8 mgmt_bredr_support(struct hci_dev *hdev) { if (!lmp_bredr_capable(hdev)) return MGMT_STATUS_NOT_SUPPORTED; - else if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + else if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return MGMT_STATUS_REJECTED; else return MGMT_STATUS_SUCCESS; @@ -1515,7 +1666,7 @@ static u8 mgmt_le_support(struct hci_dev *hdev) { if (!lmp_le_capable(hdev)) return MGMT_STATUS_NOT_SUPPORTED; - else if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + else if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return MGMT_STATUS_REJECTED; else return MGMT_STATUS_SUCCESS; @@ -1524,7 +1675,7 @@ static u8 mgmt_le_support(struct hci_dev *hdev) static void set_discoverable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct mgmt_mode *cp; struct hci_request req; bool changed; @@ -1533,21 +1684,20 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status, hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, hdev); + cmd = pending_find(MGMT_OP_SET_DISCOVERABLE, hdev); if (!cmd) goto unlock; if (status) { u8 mgmt_err = mgmt_status(status); - cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); goto remove_cmd; } cp = cmd->param; if (cp->val) { - changed = !test_and_set_bit(HCI_DISCOVERABLE, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_DISCOVERABLE); if (hdev->discov_timeout > 0) { int to = msecs_to_jiffies(hdev->discov_timeout * 1000); @@ -1555,8 +1705,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status, to); } } else { - changed = test_and_clear_bit(HCI_DISCOVERABLE, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_DISCOVERABLE); } send_settings_rsp(cmd->sk, MGMT_OP_SET_DISCOVERABLE, hdev); @@ -1585,7 +1734,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_discoverable *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; u16 timeout; u8 scan; @@ -1593,14 +1742,14 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags) && - !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, - MGMT_STATUS_REJECTED); + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) && + !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_INVALID_PARAMS); timeout = __le16_to_cpu(cp->timeout); @@ -1609,27 +1758,27 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, */ if ((cp->val == 0x00 && timeout > 0) || (cp->val == 0x02 && timeout == 0)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (!hdev_is_powered(hdev) && timeout > 0) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, - MGMT_STATUS_NOT_POWERED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_NOT_POWERED); goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, hdev) || - mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_DISCOVERABLE, hdev) || + pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_BUSY); goto failed; } - if (!test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, - MGMT_STATUS_REJECTED); + if (!hci_dev_test_flag(hdev, HCI_CONNECTABLE)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_REJECTED); goto failed; } @@ -1640,8 +1789,8 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, * not a valid operation since it requires a timeout * and so no need to check HCI_LIMITED_DISCOVERABLE. */ - if (!!cp->val != test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) { - change_bit(HCI_DISCOVERABLE, &hdev->dev_flags); + if (!!cp->val != hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) { + hci_dev_change_flag(hdev, HCI_DISCOVERABLE); changed = true; } @@ -1659,9 +1808,9 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, * value with the new value. And if only the timeout gets updated, * then no need for any HCI transactions. */ - if (!!cp->val == test_bit(HCI_DISCOVERABLE, &hdev->dev_flags) && - (cp->val == 0x02) == test_bit(HCI_LIMITED_DISCOVERABLE, - &hdev->dev_flags)) { + if (!!cp->val == hci_dev_test_flag(hdev, HCI_DISCOVERABLE) && + (cp->val == 0x02) == hci_dev_test_flag(hdev, + HCI_LIMITED_DISCOVERABLE)) { cancel_delayed_work(&hdev->discov_off); hdev->discov_timeout = timeout; @@ -1690,16 +1839,16 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, /* Limited discoverable mode */ if (cp->val == 0x02) - set_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_LIMITED_DISCOVERABLE); else - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); hci_req_init(&req, hdev); /* The procedure for LE-only controllers is much simpler - just * update the advertising data. */ - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) goto update_ad; scan = SCAN_PAGE; @@ -1729,7 +1878,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, scan |= SCAN_INQUIRY; } else { - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); } hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan); @@ -1752,7 +1901,7 @@ static void write_fast_connectable(struct hci_request *req, bool enable) struct hci_cp_write_page_scan_activity acp; u8 type; - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return; if (hdev->hci_ver < BLUETOOTH_VER_1_2) @@ -1784,7 +1933,7 @@ static void write_fast_connectable(struct hci_request *req, bool enable) static void set_connectable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct mgmt_mode *cp; bool conn_changed, discov_changed; @@ -1792,26 +1941,26 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status, hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev); + cmd = pending_find(MGMT_OP_SET_CONNECTABLE, hdev); if (!cmd) goto unlock; if (status) { u8 mgmt_err = mgmt_status(status); - cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); goto remove_cmd; } cp = cmd->param; if (cp->val) { - conn_changed = !test_and_set_bit(HCI_CONNECTABLE, - &hdev->dev_flags); + conn_changed = !hci_dev_test_and_set_flag(hdev, + HCI_CONNECTABLE); discov_changed = false; } else { - conn_changed = test_and_clear_bit(HCI_CONNECTABLE, - &hdev->dev_flags); - discov_changed = test_and_clear_bit(HCI_DISCOVERABLE, - &hdev->dev_flags); + conn_changed = hci_dev_test_and_clear_flag(hdev, + HCI_CONNECTABLE); + discov_changed = hci_dev_test_and_clear_flag(hdev, + HCI_DISCOVERABLE); } send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev); @@ -1837,14 +1986,14 @@ static int set_connectable_update_settings(struct hci_dev *hdev, bool changed = false; int err; - if (!!val != test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) + if (!!val != hci_dev_test_flag(hdev, HCI_CONNECTABLE)) changed = true; if (val) { - set_bit(HCI_CONNECTABLE, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_CONNECTABLE); } else { - clear_bit(HCI_CONNECTABLE, &hdev->dev_flags); - clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_CONNECTABLE); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); } err = send_settings_rsp(sk, MGMT_OP_SET_CONNECTABLE, hdev); @@ -1864,21 +2013,21 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; u8 scan; int err; BT_DBG("request for %s", hdev->name); - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags) && - !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, - MGMT_STATUS_REJECTED); + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) && + !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, + MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); @@ -1887,10 +2036,10 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, hdev) || - mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_DISCOVERABLE, hdev) || + pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, + MGMT_STATUS_BUSY); goto failed; } @@ -1906,10 +2055,10 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, * by-product of disabling connectable, we need to update the * advertising flags. */ - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { if (!cp->val) { - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); - clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); } update_adv_data(&req); } else if (cp->val != test_bit(HCI_PSCAN, &hdev->flags)) { @@ -1938,17 +2087,8 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, } no_scan_update: - /* If we're going from non-connectable to connectable or - * vice-versa when fast connectable is enabled ensure that fast - * connectable gets disabled. write_fast_connectable won't do - * anything if the page scan parameters are already what they - * should be. - */ - if (cp->val || test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) - write_fast_connectable(&req, false); - /* Update the advertising parameters if necessary */ - if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) enable_advertising(&req); err = hci_req_run(&req, set_connectable_complete); @@ -1975,15 +2115,15 @@ static int set_bondable(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_BONDABLE, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BONDABLE, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (cp->val) - changed = !test_and_set_bit(HCI_BONDABLE, &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_BONDABLE); else - changed = test_and_clear_bit(HCI_BONDABLE, &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_BONDABLE); err = send_settings_rsp(sk, MGMT_OP_SET_BONDABLE, hdev); if (err < 0) @@ -2001,7 +2141,7 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; u8 val, status; int err; @@ -2009,21 +2149,20 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data, status = mgmt_bredr_support(hdev); if (status) - return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, - status); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, + status); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { bool changed = false; - if (!!cp->val != test_bit(HCI_LINK_SECURITY, - &hdev->dev_flags)) { - change_bit(HCI_LINK_SECURITY, &hdev->dev_flags); + if (!!cp->val != hci_dev_test_flag(hdev, HCI_LINK_SECURITY)) { + hci_dev_change_flag(hdev, HCI_LINK_SECURITY); changed = true; } @@ -2037,9 +2176,9 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_LINK_SECURITY, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_LINK_SECURITY, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, + MGMT_STATUS_BUSY); goto failed; } @@ -2070,7 +2209,7 @@ failed: static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; u8 status; int err; @@ -2078,15 +2217,15 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) status = mgmt_bredr_support(hdev); if (status) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, status); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, status); if (!lmp_ssp_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, + MGMT_STATUS_NOT_SUPPORTED); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); @@ -2094,16 +2233,16 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) bool changed; if (cp->val) { - changed = !test_and_set_bit(HCI_SSP_ENABLED, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, + HCI_SSP_ENABLED); } else { - changed = test_and_clear_bit(HCI_SSP_ENABLED, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_SSP_ENABLED); if (!changed) - changed = test_and_clear_bit(HCI_HS_ENABLED, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_HS_ENABLED); else - clear_bit(HCI_HS_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_HS_ENABLED); } err = send_settings_rsp(sk, MGMT_OP_SET_SSP, hdev); @@ -2116,14 +2255,13 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_SSP, hdev) || - mgmt_pending_find(MGMT_OP_SET_HS, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_SSP, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, + MGMT_STATUS_BUSY); goto failed; } - if (!!cp->val == test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + if (!!cp->val == hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { err = send_settings_rsp(sk, MGMT_OP_SET_SSP, hdev); goto failed; } @@ -2134,7 +2272,7 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto failed; } - if (!cp->val && test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags)) + if (!cp->val && hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) hci_send_cmd(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE, sizeof(cp->val), &cp->val); @@ -2160,32 +2298,38 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) status = mgmt_bredr_support(hdev); if (status) - return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, status); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, status); if (!lmp_ssp_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_NOT_SUPPORTED); - if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_REJECTED); + if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); + if (pending_find(MGMT_OP_SET_SSP, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_BUSY); + goto unlock; + } + if (cp->val) { - changed = !test_and_set_bit(HCI_HS_ENABLED, &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_HS_ENABLED); } else { if (hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_REJECTED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_REJECTED); goto unlock; } - changed = test_and_clear_bit(HCI_HS_ENABLED, &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_HS_ENABLED); } err = send_settings_rsp(sk, MGMT_OP_SET_HS, hdev); @@ -2226,7 +2370,7 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) * has actually been enabled. During power on, the * update in powered_update_hci will take care of it. */ - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { struct hci_request req; hci_req_init(&req, hdev); @@ -2244,7 +2388,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct hci_cp_write_le_host_supported hci_cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; u8 val, enabled; @@ -2252,17 +2396,29 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) BT_DBG("request for %s", hdev->name); if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_NOT_SUPPORTED); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_INVALID_PARAMS); + + /* Bluetooth single mode LE only controllers or dual-mode + * controllers configured as LE only devices, do not allow + * switching LE off. These have either LE enabled explicitly + * or BR/EDR has been previously switched off. + * + * When trying to enable an already enabled LE, then gracefully + * send a positive response. Trying to disable it however will + * result into rejection. + */ + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { + if (cp->val == 0x01) + return send_settings_rsp(sk, MGMT_OP_SET_LE, hdev); - /* LE-only devices do not allow toggling LE on/off */ - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, - MGMT_STATUS_REJECTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_REJECTED); + } hci_dev_lock(hdev); @@ -2272,13 +2428,13 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) if (!hdev_is_powered(hdev) || val == enabled) { bool changed = false; - if (val != test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { - change_bit(HCI_LE_ENABLED, &hdev->dev_flags); + if (val != hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { + hci_dev_change_flag(hdev, HCI_LE_ENABLED); changed = true; } - if (!val && test_bit(HCI_ADVERTISING, &hdev->dev_flags)) { - clear_bit(HCI_ADVERTISING, &hdev->dev_flags); + if (!val && hci_dev_test_flag(hdev, HCI_ADVERTISING)) { + hci_dev_clear_flag(hdev, HCI_ADVERTISING); changed = true; } @@ -2292,10 +2448,10 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto unlock; } - if (mgmt_pending_find(MGMT_OP_SET_LE, hdev) || - mgmt_pending_find(MGMT_OP_SET_ADVERTISING, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_LE, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_LE, hdev) || + pending_find(MGMT_OP_SET_ADVERTISING, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_BUSY); goto unlock; } @@ -2313,7 +2469,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_cp.le = val; hci_cp.simul = 0x00; } else { - if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) disable_advertising(&req); } @@ -2337,7 +2493,7 @@ unlock: */ static bool pending_eir_or_class(struct hci_dev *hdev) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; list_for_each_entry(cmd, &hdev->mgmt_pending, list) { switch (cmd->opcode) { @@ -2373,16 +2529,16 @@ static u8 get_uuid_size(const u8 *uuid) static void mgmt_class_complete(struct hci_dev *hdev, u16 mgmt_op, u8 status) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; hci_dev_lock(hdev); - cmd = mgmt_pending_find(mgmt_op, hdev); + cmd = pending_find(mgmt_op, hdev); if (!cmd) goto unlock; - cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status), - hdev->dev_class, 3); + mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status), hdev->dev_class, 3); mgmt_pending_remove(cmd); @@ -2400,7 +2556,7 @@ static void add_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode) static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_uuid *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; struct bt_uuid *uuid; int err; @@ -2410,8 +2566,8 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_dev_lock(hdev); if (pending_eir_or_class(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID, - MGMT_STATUS_BUSY); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID, + MGMT_STATUS_BUSY); goto failed; } @@ -2437,8 +2593,8 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) if (err != -ENODATA) goto failed; - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0, - hdev->dev_class, 3); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0, + hdev->dev_class, 3); goto failed; } @@ -2460,7 +2616,7 @@ static bool enable_service_cache(struct hci_dev *hdev) if (!hdev_is_powered(hdev)) return false; - if (!test_and_set_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) { + if (!hci_dev_test_and_set_flag(hdev, HCI_SERVICE_CACHE)) { queue_delayed_work(hdev->workqueue, &hdev->service_cache, CACHE_TIMEOUT); return true; @@ -2480,7 +2636,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_remove_uuid *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct bt_uuid *match, *tmp; u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; struct hci_request req; @@ -2491,8 +2647,8 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); if (pending_eir_or_class(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID, - MGMT_STATUS_BUSY); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID, + MGMT_STATUS_BUSY); goto unlock; } @@ -2500,8 +2656,9 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, hci_uuids_clear(hdev); if (enable_service_cache(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, - 0, hdev->dev_class, 3); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_UUID, + 0, hdev->dev_class, 3); goto unlock; } @@ -2520,8 +2677,8 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, } if (found == 0) { - err = cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID, - MGMT_STATUS_INVALID_PARAMS); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID, + MGMT_STATUS_INVALID_PARAMS); goto unlock; } @@ -2536,8 +2693,8 @@ update_class: if (err != -ENODATA) goto unlock; - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0, - hdev->dev_class, 3); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0, + hdev->dev_class, 3); goto unlock; } @@ -2565,27 +2722,27 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_dev_class *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; BT_DBG("request for %s", hdev->name); if (!lmp_bredr_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_NOT_SUPPORTED); hci_dev_lock(hdev); if (pending_eir_or_class(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, - MGMT_STATUS_BUSY); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_BUSY); goto unlock; } if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, - MGMT_STATUS_INVALID_PARAMS); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_INVALID_PARAMS); goto unlock; } @@ -2593,14 +2750,14 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, hdev->minor_class = cp->minor; if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0, - hdev->dev_class, 3); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0, + hdev->dev_class, 3); goto unlock; } hci_req_init(&req, hdev); - if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) { + if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) { hci_dev_unlock(hdev); cancel_delayed_work_sync(&hdev->service_cache); hci_dev_lock(hdev); @@ -2614,8 +2771,8 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, if (err != -ENODATA) goto unlock; - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0, - hdev->dev_class, 3); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0, + hdev->dev_class, 3); goto unlock; } @@ -2645,15 +2802,15 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); if (!lmp_bredr_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_NOT_SUPPORTED); key_count = __le16_to_cpu(cp->key_count); if (key_count > max_key_count) { BT_ERR("load_link_keys: too big key_count value %u", key_count); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); } expected_len = sizeof(*cp) + key_count * @@ -2661,13 +2818,13 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, if (expected_len != len) { BT_ERR("load_link_keys: expected %u bytes, got %u bytes", expected_len, len); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); } if (cp->debug_keys != 0x00 && cp->debug_keys != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); BT_DBG("%s debug_keys %u key_count %u", hdev->name, cp->debug_keys, key_count); @@ -2676,8 +2833,9 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_link_key_info *key = &cp->keys[i]; if (key->addr.type != BDADDR_BREDR || key->type > 0x08) - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); } hci_dev_lock(hdev); @@ -2685,11 +2843,10 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, hci_link_keys_clear(hdev); if (cp->debug_keys) - changed = !test_and_set_bit(HCI_KEEP_DEBUG_KEYS, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_KEEP_DEBUG_KEYS); else - changed = test_and_clear_bit(HCI_KEEP_DEBUG_KEYS, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_KEEP_DEBUG_KEYS); if (changed) new_settings(hdev, NULL); @@ -2707,7 +2864,7 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, key->type, key->pin_len, NULL); } - cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, 0, NULL, 0); + mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, 0, NULL, 0); hci_dev_unlock(hdev); @@ -2732,7 +2889,7 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_cp_unpair_device *cp = data; struct mgmt_rp_unpair_device rp; struct hci_cp_disconnect dc; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_conn *conn; int err; @@ -2741,20 +2898,21 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, rp.addr.type = cp->addr.type; if (!bdaddr_type_is_valid(cp->addr.type)) - return cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); if (cp->disconnect != 0x00 && cp->disconnect != 0x01) - return cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, - MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_NOT_POWERED, &rp, + sizeof(rp)); goto unlock; } @@ -2804,8 +2962,9 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, } if (err < 0) { - err = cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, - MGMT_STATUS_NOT_PAIRED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_NOT_PAIRED, &rp, + sizeof(rp)); goto unlock; } @@ -2813,8 +2972,8 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, * link is requested. */ if (!conn) { - err = cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, 0, - &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, 0, + &rp, sizeof(rp)); device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, sk); goto unlock; } @@ -2844,7 +3003,7 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_disconnect *cp = data; struct mgmt_rp_disconnect rp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_conn *conn; int err; @@ -2855,21 +3014,22 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, rp.addr.type = cp->addr.type; if (!bdaddr_type_is_valid(cp->addr.type)) - return cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); hci_dev_lock(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_NOT_POWERED, &rp, + sizeof(rp)); goto failed; } - if (mgmt_pending_find(MGMT_OP_DISCONNECT, hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_BUSY, &rp, sizeof(rp)); + if (pending_find(MGMT_OP_DISCONNECT, hdev)) { + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_BUSY, &rp, sizeof(rp)); goto failed; } @@ -2880,8 +3040,9 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) { - err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_NOT_CONNECTED, &rp, + sizeof(rp)); goto failed; } @@ -2935,8 +3096,8 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, - MGMT_STATUS_NOT_POWERED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, + MGMT_STATUS_NOT_POWERED); goto unlock; } @@ -2969,8 +3130,8 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data, /* Recalculate length in case of filtered SCO connections, etc */ rp_len = sizeof(*rp) + (i * sizeof(struct mgmt_addr_info)); - err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, 0, rp, - rp_len); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, 0, rp, + rp_len); kfree(rp); @@ -2982,7 +3143,7 @@ unlock: static int send_pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_pin_code_neg_reply *cp) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; int err; cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, hdev, cp, @@ -3004,7 +3165,7 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_conn *conn; struct mgmt_cp_pin_code_reply *cp = data; struct hci_cp_pin_code_reply reply; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; int err; BT_DBG(""); @@ -3012,15 +3173,15 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, - MGMT_STATUS_NOT_POWERED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, + MGMT_STATUS_NOT_POWERED); goto failed; } conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); if (!conn) { - err = cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, - MGMT_STATUS_NOT_CONNECTED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, + MGMT_STATUS_NOT_CONNECTED); goto failed; } @@ -3033,8 +3194,8 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data, err = send_pin_code_neg_reply(sk, hdev, &ncp); if (err >= 0) - err = cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, - MGMT_STATUS_INVALID_PARAMS); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, + MGMT_STATUS_INVALID_PARAMS); goto failed; } @@ -3068,8 +3229,8 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); if (cp->io_capability > SMP_IO_KEYBOARD_DISPLAY) - return cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, - MGMT_STATUS_INVALID_PARAMS, NULL, 0); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, + MGMT_STATUS_INVALID_PARAMS, NULL, 0); hci_dev_lock(hdev); @@ -3080,14 +3241,14 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); - return cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, 0, NULL, - 0); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, 0, + NULL, 0); } -static struct pending_cmd *find_pairing(struct hci_conn *conn) +static struct mgmt_pending_cmd *find_pairing(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; list_for_each_entry(cmd, &hdev->mgmt_pending, list) { if (cmd->opcode != MGMT_OP_PAIR_DEVICE) @@ -3102,7 +3263,7 @@ static struct pending_cmd *find_pairing(struct hci_conn *conn) return NULL; } -static int pairing_complete(struct pending_cmd *cmd, u8 status) +static int pairing_complete(struct mgmt_pending_cmd *cmd, u8 status) { struct mgmt_rp_pair_device rp; struct hci_conn *conn = cmd->user_data; @@ -3111,8 +3272,8 @@ static int pairing_complete(struct pending_cmd *cmd, u8 status) bacpy(&rp.addr.bdaddr, &conn->dst); rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type); - err = cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, status, - &rp, sizeof(rp)); + err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, + status, &rp, sizeof(rp)); /* So we don't get further callbacks for this connection */ conn->connect_cfm_cb = NULL; @@ -3134,7 +3295,7 @@ static int pairing_complete(struct pending_cmd *cmd, u8 status) void mgmt_smp_complete(struct hci_conn *conn, bool complete) { u8 status = complete ? MGMT_STATUS_SUCCESS : MGMT_STATUS_FAILED; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; cmd = find_pairing(conn); if (cmd) { @@ -3145,7 +3306,7 @@ void mgmt_smp_complete(struct hci_conn *conn, bool complete) static void pairing_complete_cb(struct hci_conn *conn, u8 status) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status %u", status); @@ -3161,7 +3322,7 @@ static void pairing_complete_cb(struct hci_conn *conn, u8 status) static void le_pairing_complete_cb(struct hci_conn *conn, u8 status) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status %u", status); @@ -3183,7 +3344,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_pair_device *cp = data; struct mgmt_rp_pair_device rp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; u8 sec_level, auth_type; struct hci_conn *conn; int err; @@ -3195,20 +3356,28 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, rp.addr.type = cp->addr.type; if (!bdaddr_type_is_valid(cp->addr.type)) - return cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); if (cp->io_cap > SMP_IO_KEYBOARD_DISPLAY) - return cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, - MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_NOT_POWERED, &rp, + sizeof(rp)); + goto unlock; + } + + if (hci_bdaddr_is_paired(hdev, &cp->addr.bdaddr, cp->addr.type)) { + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_ALREADY_PAIRED, &rp, + sizeof(rp)); goto unlock; } @@ -3249,19 +3418,22 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, if (PTR_ERR(conn) == -EBUSY) status = MGMT_STATUS_BUSY; + else if (PTR_ERR(conn) == -EOPNOTSUPP) + status = MGMT_STATUS_NOT_SUPPORTED; + else if (PTR_ERR(conn) == -ECONNREFUSED) + status = MGMT_STATUS_REJECTED; else status = MGMT_STATUS_CONNECT_FAILED; - err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, - status, &rp, - sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + status, &rp, sizeof(rp)); goto unlock; } if (conn->connect_cfm_cb) { hci_conn_drop(conn); - err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, - MGMT_STATUS_BUSY, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_BUSY, &rp, sizeof(rp)); goto unlock; } @@ -3305,7 +3477,7 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_addr_info *addr = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_conn *conn; int err; @@ -3314,31 +3486,31 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, - MGMT_STATUS_NOT_POWERED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, + MGMT_STATUS_NOT_POWERED); goto unlock; } - cmd = mgmt_pending_find(MGMT_OP_PAIR_DEVICE, hdev); + cmd = pending_find(MGMT_OP_PAIR_DEVICE, hdev); if (!cmd) { - err = cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, - MGMT_STATUS_INVALID_PARAMS); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS); goto unlock; } conn = cmd->user_data; if (bacmp(&addr->bdaddr, &conn->dst) != 0) { - err = cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, - MGMT_STATUS_INVALID_PARAMS); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS); goto unlock; } cmd->cmd_complete(cmd, MGMT_STATUS_CANCELLED); mgmt_pending_remove(cmd); - err = cmd_complete(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, 0, - addr, sizeof(*addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, 0, + addr, sizeof(*addr)); unlock: hci_dev_unlock(hdev); return err; @@ -3348,16 +3520,16 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, struct mgmt_addr_info *addr, u16 mgmt_op, u16 hci_op, __le32 passkey) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_conn *conn; int err; hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, mgmt_op, - MGMT_STATUS_NOT_POWERED, addr, - sizeof(*addr)); + err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, + MGMT_STATUS_NOT_POWERED, addr, + sizeof(*addr)); goto done; } @@ -3367,22 +3539,22 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &addr->bdaddr); if (!conn) { - err = cmd_complete(sk, hdev->id, mgmt_op, - MGMT_STATUS_NOT_CONNECTED, addr, - sizeof(*addr)); + err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, + MGMT_STATUS_NOT_CONNECTED, addr, + sizeof(*addr)); goto done; } if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) { err = smp_user_confirm_reply(conn, mgmt_op, passkey); if (!err) - err = cmd_complete(sk, hdev->id, mgmt_op, - MGMT_STATUS_SUCCESS, addr, - sizeof(*addr)); + err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, + MGMT_STATUS_SUCCESS, addr, + sizeof(*addr)); else - err = cmd_complete(sk, hdev->id, mgmt_op, - MGMT_STATUS_FAILED, addr, - sizeof(*addr)); + err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, + MGMT_STATUS_FAILED, addr, + sizeof(*addr)); goto done; } @@ -3434,8 +3606,8 @@ static int user_confirm_reply(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); if (len != sizeof(*cp)) - return cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY, + MGMT_STATUS_INVALID_PARAMS); return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_USER_CONFIRM_REPLY, @@ -3491,24 +3663,24 @@ static void update_name(struct hci_request *req) static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct mgmt_cp_set_local_name *cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status 0x%02x", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); + cmd = pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); if (!cmd) goto unlock; cp = cmd->param; if (status) - cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, - mgmt_status(status)); + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, + mgmt_status(status)); else - cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, - cp, sizeof(*cp)); + mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, + cp, sizeof(*cp)); mgmt_pending_remove(cmd); @@ -3520,7 +3692,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_local_name *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; @@ -3534,8 +3706,8 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, if (!memcmp(hdev->dev_name, cp->name, sizeof(hdev->dev_name)) && !memcmp(hdev->short_name, cp->short_name, sizeof(hdev->short_name))) { - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, - data, len); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, + data, len); goto failed; } @@ -3544,13 +3716,13 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, if (!hdev_is_powered(hdev)) { memcpy(hdev->dev_name, cp->name, sizeof(hdev->dev_name)); - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, - data, len); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, + data, len); if (err < 0) goto failed; - err = mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data, len, - sk); + err = mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, + data, len, sk); goto failed; } @@ -3588,7 +3760,7 @@ failed: static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; int err; BT_DBG("%s", hdev->name); @@ -3596,20 +3768,20 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, - MGMT_STATUS_NOT_POWERED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, + MGMT_STATUS_NOT_POWERED); goto unlock; } if (!lmp_ssp_capable(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, - MGMT_STATUS_NOT_SUPPORTED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, + MGMT_STATUS_NOT_SUPPORTED); goto unlock; } - if (mgmt_pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, + MGMT_STATUS_BUSY); goto unlock; } @@ -3642,9 +3814,10 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, BT_DBG("%s ", hdev->name); if (!bdaddr_type_is_valid(addr->type)) - return cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_INVALID_PARAMS, addr, - sizeof(*addr)); + return mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, + addr, sizeof(*addr)); hci_dev_lock(hdev); @@ -3653,10 +3826,10 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, u8 status; if (cp->addr.type != BDADDR_BREDR) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } @@ -3668,8 +3841,9 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, else status = MGMT_STATUS_SUCCESS; - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, - status, &cp->addr, sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_REMOTE_OOB_DATA, status, + &cp->addr, sizeof(cp->addr)); } else if (len == MGMT_ADD_REMOTE_OOB_EXT_DATA_SIZE) { struct mgmt_cp_add_remote_oob_ext_data *cp = data; u8 *rand192, *hash192, *rand256, *hash256; @@ -3681,10 +3855,10 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, */ if (memcmp(cp->rand192, ZERO_KEY, 16) || memcmp(cp->hash192, ZERO_KEY, 16)) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_INVALID_PARAMS, - addr, sizeof(*addr)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, + addr, sizeof(*addr)); goto unlock; } @@ -3724,12 +3898,13 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, else status = MGMT_STATUS_SUCCESS; - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, - status, &cp->addr, sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_REMOTE_OOB_DATA, + status, &cp->addr, sizeof(cp->addr)); } else { BT_ERR("add_remote_oob_data: invalid length of %u bytes", len); - err = cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_INVALID_PARAMS); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS); } unlock: @@ -3747,9 +3922,10 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev, BT_DBG("%s", hdev->name); if (cp->addr.type != BDADDR_BREDR) - return cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_REMOTE_OOB_DATA, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + return mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); hci_dev_lock(hdev); @@ -3766,100 +3942,136 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev, status = MGMT_STATUS_SUCCESS; done: - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_REMOTE_OOB_DATA, - status, &cp->addr, sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_REMOTE_OOB_DATA, + status, &cp->addr, sizeof(cp->addr)); hci_dev_unlock(hdev); return err; } -static bool trigger_discovery(struct hci_request *req, u8 *status) +static bool trigger_bredr_inquiry(struct hci_request *req, u8 *status) { struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_scan_param param_cp; - struct hci_cp_le_set_scan_enable enable_cp; - struct hci_cp_inquiry inq_cp; + struct hci_cp_inquiry cp; /* General inquiry access code (GIAC) */ u8 lap[3] = { 0x33, 0x8b, 0x9e }; + + *status = mgmt_bredr_support(hdev); + if (*status) + return false; + + if (hci_dev_test_flag(hdev, HCI_INQUIRY)) { + *status = MGMT_STATUS_BUSY; + return false; + } + + hci_inquiry_cache_flush(hdev); + + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = DISCOV_BREDR_INQUIRY_LEN; + + hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + return true; +} + +static bool trigger_le_scan(struct hci_request *req, u16 interval, u8 *status) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_scan_param param_cp; + struct hci_cp_le_set_scan_enable enable_cp; u8 own_addr_type; int err; - switch (hdev->discovery.type) { - case DISCOV_TYPE_BREDR: - *status = mgmt_bredr_support(hdev); - if (*status) - return false; + *status = mgmt_le_support(hdev); + if (*status) + return false; - if (test_bit(HCI_INQUIRY, &hdev->flags)) { - *status = MGMT_STATUS_BUSY; + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { + /* Don't let discovery abort an outgoing connection attempt + * that's using directed advertising. + */ + if (hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) { + *status = MGMT_STATUS_REJECTED; return false; } - hci_inquiry_cache_flush(hdev); + disable_advertising(req); + } - memset(&inq_cp, 0, sizeof(inq_cp)); - memcpy(&inq_cp.lap, lap, sizeof(inq_cp.lap)); - inq_cp.length = DISCOV_BREDR_INQUIRY_LEN; - hci_req_add(req, HCI_OP_INQUIRY, sizeof(inq_cp), &inq_cp); - break; + /* If controller is scanning, it means the background scanning is + * running. Thus, we should temporarily stop it in order to set the + * discovery scanning parameters. + */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) + hci_req_add_le_scan_disable(req); - case DISCOV_TYPE_LE: - case DISCOV_TYPE_INTERLEAVED: - *status = mgmt_le_support(hdev); - if (*status) - return false; + /* All active scans will be done with either a resolvable private + * address (when privacy feature has been enabled) or non-resolvable + * private address. + */ + err = hci_update_random_address(req, true, &own_addr_type); + if (err < 0) { + *status = MGMT_STATUS_FAILED; + return false; + } - if (hdev->discovery.type == DISCOV_TYPE_INTERLEAVED && - !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { - *status = MGMT_STATUS_NOT_SUPPORTED; + memset(¶m_cp, 0, sizeof(param_cp)); + param_cp.type = LE_SCAN_ACTIVE; + param_cp.interval = cpu_to_le16(interval); + param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN); + param_cp.own_address_type = own_addr_type; + + hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), + ¶m_cp); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_ENABLE; + enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), + &enable_cp); + + return true; +} + +static bool trigger_discovery(struct hci_request *req, u8 *status) +{ + struct hci_dev *hdev = req->hdev; + + switch (hdev->discovery.type) { + case DISCOV_TYPE_BREDR: + if (!trigger_bredr_inquiry(req, status)) return false; - } + break; - if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) { - /* Don't let discovery abort an outgoing - * connection attempt that's using directed - * advertising. + case DISCOV_TYPE_INTERLEAVED: + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, + &hdev->quirks)) { + /* During simultaneous discovery, we double LE scan + * interval. We must leave some time for the controller + * to do BR/EDR inquiry. */ - if (hci_conn_hash_lookup_state(hdev, LE_LINK, - BT_CONNECT)) { - *status = MGMT_STATUS_REJECTED; + if (!trigger_le_scan(req, DISCOV_LE_SCAN_INT * 2, + status)) return false; - } - - disable_advertising(req); - } - /* If controller is scanning, it means the background scanning - * is running. Thus, we should temporarily stop it in order to - * set the discovery scanning parameters. - */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - hci_req_add_le_scan_disable(req); + if (!trigger_bredr_inquiry(req, status)) + return false; - memset(¶m_cp, 0, sizeof(param_cp)); + return true; + } - /* All active scans will be done with either a resolvable - * private address (when privacy feature has been enabled) - * or non-resolvable private address. - */ - err = hci_update_random_address(req, true, &own_addr_type); - if (err < 0) { - *status = MGMT_STATUS_FAILED; + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { + *status = MGMT_STATUS_NOT_SUPPORTED; return false; } + /* fall through */ - param_cp.type = LE_SCAN_ACTIVE; - param_cp.interval = cpu_to_le16(DISCOV_LE_SCAN_INT); - param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN); - param_cp.own_address_type = own_addr_type; - hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), - ¶m_cp); - - memset(&enable_cp, 0, sizeof(enable_cp)); - enable_cp.enable = LE_SCAN_ENABLE; - enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), - &enable_cp); + case DISCOV_TYPE_LE: + if (!trigger_le_scan(req, DISCOV_LE_SCAN_INT, status)) + return false; break; default: @@ -3873,16 +4085,16 @@ static bool trigger_discovery(struct hci_request *req, u8 *status) static void start_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; unsigned long timeout; BT_DBG("status %d", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_START_DISCOVERY, hdev); + cmd = pending_find(MGMT_OP_START_DISCOVERY, hdev); if (!cmd) - cmd = mgmt_pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev); + cmd = pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev); if (cmd) { cmd->cmd_complete(cmd, mgmt_status(status)); @@ -3904,7 +4116,18 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status, timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); break; case DISCOV_TYPE_INTERLEAVED: - timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); + /* When running simultaneous discovery, the LE scanning time + * should occupy the whole discovery time sine BR/EDR inquiry + * and LE scanning are scheduled by the controller. + * + * For interleaving discovery in comparison, BR/EDR inquiry + * and LE scanning are done sequentially with separate + * timeouts. + */ + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) + timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); + else + timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); break; case DISCOV_TYPE_BREDR: timeout = 0; @@ -3923,8 +4146,7 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status, */ if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) && - (hdev->discovery.uuid_count > 0 || - hdev->discovery.rssi != HCI_RSSI_INVALID)) { + hdev->discovery.result_filtering) { hdev->discovery.scan_start = jiffies; hdev->discovery.scan_duration = timeout; } @@ -3941,7 +4163,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_start_discovery *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; u8 status; int err; @@ -3951,17 +4173,17 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - MGMT_STATUS_NOT_POWERED, - &cp->type, sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_NOT_POWERED, + &cp->type, sizeof(cp->type)); goto failed; } if (hdev->discovery.state != DISCOVERY_STOPPED || - test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - MGMT_STATUS_BUSY, &cp->type, - sizeof(cp->type)); + hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) { + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_BUSY, &cp->type, + sizeof(cp->type)); goto failed; } @@ -3984,8 +4206,8 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); if (!trigger_discovery(&req, &status)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - status, &cp->type, sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, + status, &cp->type, sizeof(cp->type)); mgmt_pending_remove(cmd); goto failed; } @@ -4003,17 +4225,18 @@ failed: return err; } -static int service_discovery_cmd_complete(struct pending_cmd *cmd, u8 status) +static int service_discovery_cmd_complete(struct mgmt_pending_cmd *cmd, + u8 status) { - return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, - cmd->param, 1); + return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + cmd->param, 1); } static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_start_service_discovery *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; const u16 max_uuid_count = ((U16_MAX - sizeof(*cp)) / 16); u16 uuid_count, expected_len; @@ -4025,19 +4248,19 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - MGMT_STATUS_NOT_POWERED, - &cp->type, sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + MGMT_STATUS_NOT_POWERED, + &cp->type, sizeof(cp->type)); goto failed; } if (hdev->discovery.state != DISCOVERY_STOPPED || - test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags)) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - MGMT_STATUS_BUSY, &cp->type, - sizeof(cp->type)); + hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) { + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + MGMT_STATUS_BUSY, &cp->type, + sizeof(cp->type)); goto failed; } @@ -4045,10 +4268,10 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, if (uuid_count > max_uuid_count) { BT_ERR("service_discovery: too big uuid_count value %u", uuid_count); - err = cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - MGMT_STATUS_INVALID_PARAMS, &cp->type, - sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + MGMT_STATUS_INVALID_PARAMS, &cp->type, + sizeof(cp->type)); goto failed; } @@ -4056,10 +4279,10 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, if (expected_len != len) { BT_ERR("service_discovery: expected %u bytes, got %u bytes", expected_len, len); - err = cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - MGMT_STATUS_INVALID_PARAMS, &cp->type, - sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + MGMT_STATUS_INVALID_PARAMS, &cp->type, + sizeof(cp->type)); goto failed; } @@ -4077,6 +4300,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, */ hci_discovery_filter_clear(hdev); + hdev->discovery.result_filtering = true; hdev->discovery.type = cp->type; hdev->discovery.rssi = cp->rssi; hdev->discovery.uuid_count = uuid_count; @@ -4085,10 +4309,10 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, hdev->discovery.uuids = kmemdup(cp->uuids, uuid_count * 16, GFP_KERNEL); if (!hdev->discovery.uuids) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - MGMT_STATUS_FAILED, - &cp->type, sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + MGMT_STATUS_FAILED, + &cp->type, sizeof(cp->type)); mgmt_pending_remove(cmd); goto failed; } @@ -4097,9 +4321,9 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); if (!trigger_discovery(&req, &status)) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - status, &cp->type, sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + status, &cp->type, sizeof(cp->type)); mgmt_pending_remove(cmd); goto failed; } @@ -4119,13 +4343,13 @@ failed: static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status %d", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_STOP_DISCOVERY, hdev); + cmd = pending_find(MGMT_OP_STOP_DISCOVERY, hdev); if (cmd) { cmd->cmd_complete(cmd, mgmt_status(status)); mgmt_pending_remove(cmd); @@ -4141,7 +4365,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_stop_discovery *mgmt_cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; @@ -4150,16 +4374,16 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); if (!hci_discovery_active(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, - MGMT_STATUS_REJECTED, &mgmt_cp->type, - sizeof(mgmt_cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, + MGMT_STATUS_REJECTED, &mgmt_cp->type, + sizeof(mgmt_cp->type)); goto unlock; } if (hdev->discovery.type != mgmt_cp->type) { - err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, - MGMT_STATUS_INVALID_PARAMS, &mgmt_cp->type, - sizeof(mgmt_cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, + MGMT_STATUS_INVALID_PARAMS, + &mgmt_cp->type, sizeof(mgmt_cp->type)); goto unlock; } @@ -4185,8 +4409,8 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, /* If no HCI commands were sent we're done */ if (err == -ENODATA) { - err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, 0, - &mgmt_cp->type, sizeof(mgmt_cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, 0, + &mgmt_cp->type, sizeof(mgmt_cp->type)); hci_discovery_set_state(hdev, DISCOVERY_STOPPED); } @@ -4207,17 +4431,17 @@ static int confirm_name(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); if (!hci_discovery_active(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, - MGMT_STATUS_FAILED, &cp->addr, - sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, + MGMT_STATUS_FAILED, &cp->addr, + sizeof(cp->addr)); goto failed; } e = hci_inquiry_cache_lookup_unknown(hdev, &cp->addr.bdaddr); if (!e) { - err = cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, - MGMT_STATUS_INVALID_PARAMS, &cp->addr, - sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, + MGMT_STATUS_INVALID_PARAMS, &cp->addr, + sizeof(cp->addr)); goto failed; } @@ -4229,8 +4453,8 @@ static int confirm_name(struct sock *sk, struct hci_dev *hdev, void *data, hci_inquiry_cache_update_resolve(hdev, e); } - err = cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, 0, &cp->addr, - sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, 0, + &cp->addr, sizeof(cp->addr)); failed: hci_dev_unlock(hdev); @@ -4247,9 +4471,9 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("%s", hdev->name); if (!bdaddr_type_is_valid(cp->addr.type)) - return cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); hci_dev_lock(hdev); @@ -4265,8 +4489,8 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data, status = MGMT_STATUS_SUCCESS; done: - err = cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, status, - &cp->addr, sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, status, + &cp->addr, sizeof(cp->addr)); hci_dev_unlock(hdev); @@ -4283,9 +4507,9 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("%s", hdev->name); if (!bdaddr_type_is_valid(cp->addr.type)) - return cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); hci_dev_lock(hdev); @@ -4301,8 +4525,8 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data, status = MGMT_STATUS_SUCCESS; done: - err = cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, status, - &cp->addr, sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, status, + &cp->addr, sizeof(cp->addr)); hci_dev_unlock(hdev); @@ -4322,8 +4546,8 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, source = __le16_to_cpu(cp->source); if (source > 0x0002) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); @@ -4332,7 +4556,8 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, hdev->devid_product = __le16_to_cpu(cp->product); hdev->devid_version = __le16_to_cpu(cp->version); - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, NULL, 0); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, + NULL, 0); hci_req_init(&req, hdev); update_eir(&req); @@ -4343,10 +4568,17 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, return err; } +static void enable_advertising_instance(struct hci_dev *hdev, u8 status, + u16 opcode) +{ + BT_DBG("status %d", status); +} + static void set_advertising_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct cmd_lookup match = { NULL, hdev }; + struct hci_request req; hci_dev_lock(hdev); @@ -4358,10 +4590,10 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, goto unlock; } - if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) - set_bit(HCI_ADVERTISING, &hdev->dev_flags); + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + hci_dev_set_flag(hdev, HCI_ADVERTISING); else - clear_bit(HCI_ADVERTISING, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_ADVERTISING); mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp, &match); @@ -4371,6 +4603,21 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, if (match.sk) sock_put(match.sk); + /* If "Set Advertising" was just disabled and instance advertising was + * set up earlier, then enable the advertising instance. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + goto unlock; + + hci_req_init(&req, hdev); + + update_adv_data(&req); + enable_advertising(&req); + + if (hci_req_run(&req, enable_advertising_instance) < 0) + BT_ERR("Failed to re-configure advertising"); + unlock: hci_dev_unlock(hdev); } @@ -4379,41 +4626,48 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; - u8 val, enabled, status; + u8 val, status; int err; BT_DBG("request for %s", hdev->name); status = mgmt_le_support(hdev); if (status) - return cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, - status); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + status); - if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, - MGMT_STATUS_INVALID_PARAMS); + if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); val = !!cp->val; - enabled = test_bit(HCI_ADVERTISING, &hdev->dev_flags); /* The following conditions are ones which mean that we should * not do any HCI communication but directly send a mgmt * response to user space (after toggling the flag if * necessary). */ - if (!hdev_is_powered(hdev) || val == enabled || + if (!hdev_is_powered(hdev) || + (val == hci_dev_test_flag(hdev, HCI_ADVERTISING) && + (cp->val == 0x02) == hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) || hci_conn_num(hdev, LE_LINK) > 0 || - (test_bit(HCI_LE_SCAN, &hdev->dev_flags) && + (hci_dev_test_flag(hdev, HCI_LE_SCAN) && hdev->le_scan_type == LE_SCAN_ACTIVE)) { - bool changed = false; + bool changed; - if (val != test_bit(HCI_ADVERTISING, &hdev->dev_flags)) { - change_bit(HCI_ADVERTISING, &hdev->dev_flags); - changed = true; + if (cp->val) { + changed = !hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING); + if (cp->val == 0x02) + hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE); + else + hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE); + } else { + changed = hci_dev_test_and_clear_flag(hdev, HCI_ADVERTISING); + hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE); } err = send_settings_rsp(sk, MGMT_OP_SET_ADVERTISING, hdev); @@ -4426,10 +4680,10 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - if (mgmt_pending_find(MGMT_OP_SET_ADVERTISING, hdev) || - mgmt_pending_find(MGMT_OP_SET_LE, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_ADVERTISING, hdev) || + pending_find(MGMT_OP_SET_LE, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_BUSY); goto unlock; } @@ -4441,10 +4695,19 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, hci_req_init(&req, hdev); - if (val) - enable_advertising(&req); + if (cp->val == 0x02) + hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE); else + hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE); + + if (val) { + /* Switch to instance "0" for the Set Advertising setting. */ + update_adv_data_for_instance(&req, 0); + update_scan_rsp_data_for_instance(&req, 0); + enable_advertising(&req); + } else { disable_advertising(&req); + } err = hci_req_run(&req, set_advertising_complete); if (err < 0) @@ -4464,34 +4727,38 @@ static int set_static_address(struct sock *sk, struct hci_dev *hdev, BT_DBG("%s", hdev->name); if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, + MGMT_STATUS_NOT_SUPPORTED); if (hdev_is_powered(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, - MGMT_STATUS_REJECTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, + MGMT_STATUS_REJECTED); if (bacmp(&cp->bdaddr, BDADDR_ANY)) { if (!bacmp(&cp->bdaddr, BDADDR_NONE)) - return cmd_status(sk, hdev->id, - MGMT_OP_SET_STATIC_ADDRESS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_STATIC_ADDRESS, + MGMT_STATUS_INVALID_PARAMS); /* Two most significant bits shall be set */ if ((cp->bdaddr.b[5] & 0xc0) != 0xc0) - return cmd_status(sk, hdev->id, - MGMT_OP_SET_STATIC_ADDRESS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_STATIC_ADDRESS, + MGMT_STATUS_INVALID_PARAMS); } hci_dev_lock(hdev); bacpy(&hdev->static_addr, &cp->bdaddr); - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, 0, NULL, 0); + err = send_settings_rsp(sk, MGMT_OP_SET_STATIC_ADDRESS, hdev); + if (err < 0) + goto unlock; - hci_dev_unlock(hdev); + err = new_settings(hdev, sk); +unlock: + hci_dev_unlock(hdev); return err; } @@ -4505,36 +4772,37 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, BT_DBG("%s", hdev->name); if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, + MGMT_STATUS_NOT_SUPPORTED); interval = __le16_to_cpu(cp->interval); if (interval < 0x0004 || interval > 0x4000) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, + MGMT_STATUS_INVALID_PARAMS); window = __le16_to_cpu(cp->window); if (window < 0x0004 || window > 0x4000) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, + MGMT_STATUS_INVALID_PARAMS); if (window > interval) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); hdev->le_scan_interval = interval; hdev->le_scan_window = window; - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, 0, NULL, 0); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, 0, + NULL, 0); /* If background scan is running, restart it so new parameters are * loaded. */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_LE_SCAN) && hdev->discovery.state == DISCOVERY_STOPPED) { struct hci_request req; @@ -4554,26 +4822,26 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, static void fast_connectable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status 0x%02x", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev); + cmd = pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev); if (!cmd) goto unlock; if (status) { - cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - mgmt_status(status)); + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + mgmt_status(status)); } else { struct mgmt_mode *cp = cmd->param; if (cp->val) - set_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_FAST_CONNECTABLE); else - clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_FAST_CONNECTABLE); send_settings_rsp(cmd->sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev); new_settings(hdev, cmd->sk); @@ -4589,43 +4857,43 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; BT_DBG("%s", hdev->name); - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) || + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) || hdev->hci_ver < BLUETOOTH_VER_1_2) - return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_NOT_SUPPORTED); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_INVALID_PARAMS); - - if (!hdev_is_powered(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_NOT_POWERED); - - if (!test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_REJECTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); - if (mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_BUSY); goto unlock; } - if (!!cp->val == test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) { + if (!!cp->val == hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) { err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev); goto unlock; } + if (!hdev_is_powered(hdev)) { + hci_dev_change_flag(hdev, HCI_FAST_CONNECTABLE); + err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, + hdev); + new_settings(hdev, sk); + goto unlock; + } + cmd = mgmt_pending_add(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev, data, len); if (!cmd) { @@ -4639,8 +4907,8 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, err = hci_req_run(&req, fast_connectable_complete); if (err < 0) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_FAILED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_FAILED); mgmt_pending_remove(cmd); } @@ -4652,13 +4920,13 @@ unlock: static void set_bredr_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status 0x%02x", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_SET_BREDR, hdev); + cmd = pending_find(MGMT_OP_SET_BREDR, hdev); if (!cmd) goto unlock; @@ -4668,9 +4936,9 @@ static void set_bredr_complete(struct hci_dev *hdev, u8 status, u16 opcode) /* We need to restore the flag if related HCI commands * failed. */ - clear_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED); - cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); } else { send_settings_rsp(cmd->sk, MGMT_OP_SET_BREDR, hdev); new_settings(hdev, cmd->sk); @@ -4685,41 +4953,41 @@ unlock: static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; BT_DBG("request for %s", hdev->name); if (!lmp_bredr_capable(hdev) || !lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_NOT_SUPPORTED); - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, - MGMT_STATUS_REJECTED); + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); - if (cp->val == test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + if (cp->val == hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = send_settings_rsp(sk, MGMT_OP_SET_BREDR, hdev); goto unlock; } if (!hdev_is_powered(hdev)) { if (!cp->val) { - clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags); - clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags); - clear_bit(HCI_LINK_SECURITY, &hdev->dev_flags); - clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); - clear_bit(HCI_HS_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_SSP_ENABLED); + hci_dev_clear_flag(hdev, HCI_LINK_SECURITY); + hci_dev_clear_flag(hdev, HCI_FAST_CONNECTABLE); + hci_dev_clear_flag(hdev, HCI_HS_ENABLED); } - change_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + hci_dev_change_flag(hdev, HCI_BREDR_ENABLED); err = send_settings_rsp(sk, MGMT_OP_SET_BREDR, hdev); if (err < 0) @@ -4731,8 +4999,8 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) /* Reject disabling when powered on */ if (!cp->val) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, - MGMT_STATUS_REJECTED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_REJECTED); goto unlock; } else { /* When configuring a dual-mode controller to operate @@ -4749,18 +5017,18 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) * switching BR/EDR back on when secure connections has been * enabled is not a supported transaction. */ - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && (bacmp(&hdev->static_addr, BDADDR_ANY) || - test_bit(HCI_SC_ENABLED, &hdev->dev_flags))) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, - MGMT_STATUS_REJECTED); + hci_dev_test_flag(hdev, HCI_SC_ENABLED))) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_REJECTED); goto unlock; } } - if (mgmt_pending_find(MGMT_OP_SET_BREDR, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_BREDR, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_BUSY); goto unlock; } @@ -4773,7 +5041,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) /* We need to flip the bit already here so that update_adv_data * generates the correct flags. */ - set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); hci_req_init(&req, hdev); @@ -4796,20 +5064,20 @@ unlock: static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct mgmt_mode *cp; BT_DBG("%s status %u", hdev->name, status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_SET_SECURE_CONN, hdev); + cmd = pending_find(MGMT_OP_SET_SECURE_CONN, hdev); if (!cmd) goto unlock; if (status) { - cmd_status(cmd->sk, cmd->index, cmd->opcode, - mgmt_status(status)); + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status)); goto remove; } @@ -4817,16 +5085,16 @@ static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) switch (cp->val) { case 0x00: - clear_bit(HCI_SC_ENABLED, &hdev->dev_flags); - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_SC_ENABLED); + hci_dev_clear_flag(hdev, HCI_SC_ONLY); break; case 0x01: - set_bit(HCI_SC_ENABLED, &hdev->dev_flags); - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_SC_ENABLED); + hci_dev_clear_flag(hdev, HCI_SC_ONLY); break; case 0x02: - set_bit(HCI_SC_ENABLED, &hdev->dev_flags); - set_bit(HCI_SC_ONLY, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_SC_ENABLED); + hci_dev_set_flag(hdev, HCI_SC_ONLY); break; } @@ -4843,7 +5111,7 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; u8 val; int err; @@ -4851,37 +5119,37 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, BT_DBG("request for %s", hdev->name); if (!lmp_sc_capable(hdev) && - !test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, - MGMT_STATUS_NOT_SUPPORTED); + !hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, + MGMT_STATUS_NOT_SUPPORTED); - if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && lmp_sc_capable(hdev) && - !test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, - MGMT_STATUS_REJECTED); + !hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, + MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (!hdev_is_powered(hdev) || !lmp_sc_capable(hdev) || - !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { bool changed; if (cp->val) { - changed = !test_and_set_bit(HCI_SC_ENABLED, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, + HCI_SC_ENABLED); if (cp->val == 0x02) - set_bit(HCI_SC_ONLY, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_SC_ONLY); else - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_SC_ONLY); } else { - changed = test_and_clear_bit(HCI_SC_ENABLED, - &hdev->dev_flags); - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_SC_ENABLED); + hci_dev_clear_flag(hdev, HCI_SC_ONLY); } err = send_settings_rsp(sk, MGMT_OP_SET_SECURE_CONN, hdev); @@ -4894,16 +5162,16 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_SECURE_CONN, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_SECURE_CONN, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, + MGMT_STATUS_BUSY); goto failed; } val = !!cp->val; - if (val == test_bit(HCI_SC_ENABLED, &hdev->dev_flags) && - (cp->val == 0x02) == test_bit(HCI_SC_ONLY, &hdev->dev_flags)) { + if (val == hci_dev_test_flag(hdev, HCI_SC_ENABLED) && + (cp->val == 0x02) == hci_dev_test_flag(hdev, HCI_SC_ONLY)) { err = send_settings_rsp(sk, MGMT_OP_SET_SECURE_CONN, hdev); goto failed; } @@ -4937,27 +5205,26 @@ static int set_debug_keys(struct sock *sk, struct hci_dev *hdev, BT_DBG("request for %s", hdev->name); if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DEBUG_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEBUG_KEYS, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (cp->val) - changed = !test_and_set_bit(HCI_KEEP_DEBUG_KEYS, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_KEEP_DEBUG_KEYS); else - changed = test_and_clear_bit(HCI_KEEP_DEBUG_KEYS, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_KEEP_DEBUG_KEYS); if (cp->val == 0x02) - use_changed = !test_and_set_bit(HCI_USE_DEBUG_KEYS, - &hdev->dev_flags); + use_changed = !hci_dev_test_and_set_flag(hdev, + HCI_USE_DEBUG_KEYS); else - use_changed = test_and_clear_bit(HCI_USE_DEBUG_KEYS, - &hdev->dev_flags); + use_changed = hci_dev_test_and_clear_flag(hdev, + HCI_USE_DEBUG_KEYS); if (hdev_is_powered(hdev) && use_changed && - test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { u8 mode = (cp->val == 0x02) ? 0x01 : 0x00; hci_send_cmd(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE, sizeof(mode), &mode); @@ -4985,32 +5252,32 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data, BT_DBG("request for %s", hdev->name); if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, + MGMT_STATUS_NOT_SUPPORTED); if (cp->privacy != 0x00 && cp->privacy != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, + MGMT_STATUS_INVALID_PARAMS); if (hdev_is_powered(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, - MGMT_STATUS_REJECTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, + MGMT_STATUS_REJECTED); hci_dev_lock(hdev); /* If user space supports this command it is also expected to * handle IRKs. Therefore, set the HCI_RPA_RESOLVING flag. */ - set_bit(HCI_RPA_RESOLVING, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_RESOLVING); if (cp->privacy) { - changed = !test_and_set_bit(HCI_PRIVACY, &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_PRIVACY); memcpy(hdev->irk, cp->irk, sizeof(hdev->irk)); - set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); } else { - changed = test_and_clear_bit(HCI_PRIVACY, &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_PRIVACY); memset(hdev->irk, 0, sizeof(hdev->irk)); - clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED); } err = send_settings_rsp(sk, MGMT_OP_SET_PRIVACY, hdev); @@ -5053,22 +5320,22 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, BT_DBG("request for %s", hdev->name); if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, + MGMT_STATUS_NOT_SUPPORTED); irk_count = __le16_to_cpu(cp->irk_count); if (irk_count > max_irk_count) { BT_ERR("load_irks: too big irk_count value %u", irk_count); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, + MGMT_STATUS_INVALID_PARAMS); } expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info); if (expected_len != len) { BT_ERR("load_irks: expected %u bytes, got %u bytes", expected_len, len); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, + MGMT_STATUS_INVALID_PARAMS); } BT_DBG("%s irk_count %u", hdev->name, irk_count); @@ -5077,9 +5344,9 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, struct mgmt_irk_info *key = &cp->irks[i]; if (!irk_is_valid(key)) - return cmd_status(sk, hdev->id, - MGMT_OP_LOAD_IRKS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_LOAD_IRKS, + MGMT_STATUS_INVALID_PARAMS); } hci_dev_lock(hdev); @@ -5099,9 +5366,9 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, BDADDR_ANY); } - set_bit(HCI_RPA_RESOLVING, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_RESOLVING); - err = cmd_complete(sk, hdev->id, MGMT_OP_LOAD_IRKS, 0, NULL, 0); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_IRKS, 0, NULL, 0); hci_dev_unlock(hdev); @@ -5139,14 +5406,14 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, BT_DBG("request for %s", hdev->name); if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, + MGMT_STATUS_NOT_SUPPORTED); key_count = __le16_to_cpu(cp->key_count); if (key_count > max_key_count) { BT_ERR("load_ltks: too big key_count value %u", key_count); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, + MGMT_STATUS_INVALID_PARAMS); } expected_len = sizeof(*cp) + key_count * @@ -5154,8 +5421,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, if (expected_len != len) { BT_ERR("load_keys: expected %u bytes, got %u bytes", expected_len, len); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, + MGMT_STATUS_INVALID_PARAMS); } BT_DBG("%s key_count %u", hdev->name, key_count); @@ -5164,9 +5431,9 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, struct mgmt_ltk_info *key = &cp->keys[i]; if (!ltk_is_valid(key)) - return cmd_status(sk, hdev->id, - MGMT_OP_LOAD_LONG_TERM_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_LOAD_LONG_TERM_KEYS, + MGMT_STATUS_INVALID_PARAMS); } hci_dev_lock(hdev); @@ -5211,7 +5478,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, key->rand); } - err = cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0, + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0, NULL, 0); hci_dev_unlock(hdev); @@ -5219,7 +5486,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, return err; } -static int conn_info_cmd_complete(struct pending_cmd *cmd, u8 status) +static int conn_info_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { struct hci_conn *conn = cmd->user_data; struct mgmt_rp_get_conn_info rp; @@ -5237,8 +5504,8 @@ static int conn_info_cmd_complete(struct pending_cmd *cmd, u8 status) rp.max_tx_power = HCI_TX_POWER_INVALID; } - err = cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, - &rp, sizeof(rp)); + err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, + status, &rp, sizeof(rp)); hci_conn_drop(conn); hci_conn_put(conn); @@ -5250,7 +5517,7 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status, u16 opcode) { struct hci_cp_read_rssi *cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_conn *conn; u16 handle; u8 status; @@ -5288,7 +5555,7 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status, goto unlock; } - cmd = mgmt_pending_find_data(MGMT_OP_GET_CONN_INFO, hdev, conn); + cmd = pending_find_data(MGMT_OP_GET_CONN_INFO, hdev, conn); if (!cmd) goto unlock; @@ -5315,15 +5582,16 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, rp.addr.type = cp->addr.type; if (!bdaddr_type_is_valid(cp->addr.type)) - return cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, - MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_NOT_POWERED, &rp, + sizeof(rp)); goto unlock; } @@ -5334,14 +5602,15 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); if (!conn || conn->state != BT_CONNECTED) { - err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, - MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_NOT_CONNECTED, &rp, + sizeof(rp)); goto unlock; } - if (mgmt_pending_find_data(MGMT_OP_GET_CONN_INFO, hdev, conn)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, - MGMT_STATUS_BUSY, &rp, sizeof(rp)); + if (pending_find_data(MGMT_OP_GET_CONN_INFO, hdev, conn)) { + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_BUSY, &rp, sizeof(rp)); goto unlock; } @@ -5361,7 +5630,7 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_request req; struct hci_cp_read_tx_power req_txp_cp; struct hci_cp_read_rssi req_rssi_cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; hci_req_init(&req, hdev); req_rssi_cp.handle = cpu_to_le16(conn->handle); @@ -5409,8 +5678,8 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, rp.tx_power = conn->tx_power; rp.max_tx_power = conn->max_tx_power; - err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, - MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); } unlock: @@ -5418,7 +5687,7 @@ unlock: return err; } -static int clock_info_cmd_complete(struct pending_cmd *cmd, u8 status) +static int clock_info_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { struct hci_conn *conn = cmd->user_data; struct mgmt_rp_get_clock_info rp; @@ -5443,8 +5712,8 @@ static int clock_info_cmd_complete(struct pending_cmd *cmd, u8 status) } complete: - err = cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, - sizeof(rp)); + err = mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, + sizeof(rp)); if (conn) { hci_conn_drop(conn); @@ -5457,7 +5726,7 @@ complete: static void get_clock_info_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct hci_cp_read_clock *hci_cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_conn *conn; BT_DBG("%s status %u", hdev->name, status); @@ -5475,7 +5744,7 @@ static void get_clock_info_complete(struct hci_dev *hdev, u8 status, u16 opcode) conn = NULL; } - cmd = mgmt_pending_find_data(MGMT_OP_GET_CLOCK_INFO, hdev, conn); + cmd = pending_find_data(MGMT_OP_GET_CLOCK_INFO, hdev, conn); if (!cmd) goto unlock; @@ -5492,7 +5761,7 @@ static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_cp_get_clock_info *cp = data; struct mgmt_rp_get_clock_info rp; struct hci_cp_read_clock hci_cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; struct hci_conn *conn; int err; @@ -5504,15 +5773,16 @@ static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data, rp.addr.type = cp->addr.type; if (cp->addr.type != BDADDR_BREDR) - return cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, - MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, + MGMT_STATUS_NOT_POWERED, &rp, + sizeof(rp)); goto unlock; } @@ -5520,10 +5790,10 @@ static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); if (!conn || conn->state != BT_CONNECTED) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_GET_CLOCK_INFO, - MGMT_STATUS_NOT_CONNECTED, - &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_GET_CLOCK_INFO, + MGMT_STATUS_NOT_CONNECTED, + &rp, sizeof(rp)); goto unlock; } } else { @@ -5634,13 +5904,13 @@ static void device_added(struct sock *sk, struct hci_dev *hdev, static void add_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status 0x%02x", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_ADD_DEVICE, hdev); + cmd = pending_find(MGMT_OP_ADD_DEVICE, hdev); if (!cmd) goto unlock; @@ -5655,7 +5925,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_device *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; u8 auto_conn, addr_type; int err; @@ -5664,14 +5934,14 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, if (!bdaddr_type_is_valid(cp->addr.type) || !bacmp(&cp->addr.bdaddr, BDADDR_ANY)) - return cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); if (cp->action != 0x00 && cp->action != 0x01 && cp->action != 0x02) - return cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); hci_req_init(&req, hdev); @@ -5757,13 +6027,13 @@ static void device_removed(struct sock *sk, struct hci_dev *hdev, static void remove_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status 0x%02x", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_REMOVE_DEVICE, hdev); + cmd = pending_find(MGMT_OP_REMOVE_DEVICE, hdev); if (!cmd) goto unlock; @@ -5778,7 +6048,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_remove_device *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; @@ -5911,15 +6181,15 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, int i; if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, + MGMT_STATUS_NOT_SUPPORTED); param_count = __le16_to_cpu(cp->param_count); if (param_count > max_param_count) { BT_ERR("load_conn_param: too big param_count value %u", param_count); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, + MGMT_STATUS_INVALID_PARAMS); } expected_len = sizeof(*cp) + param_count * @@ -5927,8 +6197,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, if (expected_len != len) { BT_ERR("load_conn_param: expected %u bytes, got %u bytes", expected_len, len); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, + MGMT_STATUS_INVALID_PARAMS); } BT_DBG("%s param_count %u", hdev->name, param_count); @@ -5983,7 +6253,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); - return cmd_complete(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, 0, NULL, 0); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, 0, + NULL, 0); } static int set_external_config(struct sock *sk, struct hci_dev *hdev, @@ -5996,25 +6267,23 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev, BT_DBG("%s", hdev->name); if (hdev_is_powered(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, - MGMT_STATUS_REJECTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, + MGMT_STATUS_REJECTED); if (cp->config != 0x00 && cp->config != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, + MGMT_STATUS_INVALID_PARAMS); if (!test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, + MGMT_STATUS_NOT_SUPPORTED); hci_dev_lock(hdev); if (cp->config) - changed = !test_and_set_bit(HCI_EXT_CONFIGURED, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_EXT_CONFIGURED); else - changed = test_and_clear_bit(HCI_EXT_CONFIGURED, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_EXT_CONFIGURED); err = send_options_rsp(sk, MGMT_OP_SET_EXTERNAL_CONFIG, hdev); if (err < 0) @@ -6025,12 +6294,12 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev, err = new_options(hdev, sk); - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) == is_configured(hdev)) { + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) == is_configured(hdev)) { mgmt_index_removed(hdev); - if (test_and_change_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { - set_bit(HCI_CONFIG, &hdev->dev_flags); - set_bit(HCI_AUTO_OFF, &hdev->dev_flags); + if (hci_dev_test_and_change_flag(hdev, HCI_UNCONFIGURED)) { + hci_dev_set_flag(hdev, HCI_CONFIG); + hci_dev_set_flag(hdev, HCI_AUTO_OFF); queue_work(hdev->req_workqueue, &hdev->power_on); } else { @@ -6054,16 +6323,16 @@ static int set_public_address(struct sock *sk, struct hci_dev *hdev, BT_DBG("%s", hdev->name); if (hdev_is_powered(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, - MGMT_STATUS_REJECTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, + MGMT_STATUS_REJECTED); if (!bacmp(&cp->bdaddr, BDADDR_ANY)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, + MGMT_STATUS_INVALID_PARAMS); if (!hdev->set_bdaddr) - return cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, + MGMT_STATUS_NOT_SUPPORTED); hci_dev_lock(hdev); @@ -6077,16 +6346,16 @@ static int set_public_address(struct sock *sk, struct hci_dev *hdev, if (!changed) goto unlock; - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) err = new_options(hdev, sk); if (is_configured(hdev)) { mgmt_index_removed(hdev); - clear_bit(HCI_UNCONFIGURED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_UNCONFIGURED); - set_bit(HCI_CONFIG, &hdev->dev_flags); - set_bit(HCI_AUTO_OFF, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_CONFIG); + hci_dev_set_flag(hdev, HCI_AUTO_OFF); queue_work(hdev->req_workqueue, &hdev->power_on); } @@ -6096,213 +6365,694 @@ unlock: return err; } -static const struct mgmt_handler { - int (*func) (struct sock *sk, struct hci_dev *hdev, void *data, - u16 data_len); - bool var_len; - size_t data_len; -} mgmt_handlers[] = { - { NULL }, /* 0x0000 (no command) */ - { read_version, false, MGMT_READ_VERSION_SIZE }, - { read_commands, false, MGMT_READ_COMMANDS_SIZE }, - { read_index_list, false, MGMT_READ_INDEX_LIST_SIZE }, - { read_controller_info, false, MGMT_READ_INFO_SIZE }, - { set_powered, false, MGMT_SETTING_SIZE }, - { set_discoverable, false, MGMT_SET_DISCOVERABLE_SIZE }, - { set_connectable, false, MGMT_SETTING_SIZE }, - { set_fast_connectable, false, MGMT_SETTING_SIZE }, - { set_bondable, false, MGMT_SETTING_SIZE }, - { set_link_security, false, MGMT_SETTING_SIZE }, - { set_ssp, false, MGMT_SETTING_SIZE }, - { set_hs, false, MGMT_SETTING_SIZE }, - { set_le, false, MGMT_SETTING_SIZE }, - { set_dev_class, false, MGMT_SET_DEV_CLASS_SIZE }, - { set_local_name, false, MGMT_SET_LOCAL_NAME_SIZE }, - { add_uuid, false, MGMT_ADD_UUID_SIZE }, - { remove_uuid, false, MGMT_REMOVE_UUID_SIZE }, - { load_link_keys, true, MGMT_LOAD_LINK_KEYS_SIZE }, - { load_long_term_keys, true, MGMT_LOAD_LONG_TERM_KEYS_SIZE }, - { disconnect, false, MGMT_DISCONNECT_SIZE }, - { get_connections, false, MGMT_GET_CONNECTIONS_SIZE }, - { pin_code_reply, false, MGMT_PIN_CODE_REPLY_SIZE }, - { pin_code_neg_reply, false, MGMT_PIN_CODE_NEG_REPLY_SIZE }, - { set_io_capability, false, MGMT_SET_IO_CAPABILITY_SIZE }, - { pair_device, false, MGMT_PAIR_DEVICE_SIZE }, - { cancel_pair_device, false, MGMT_CANCEL_PAIR_DEVICE_SIZE }, - { unpair_device, false, MGMT_UNPAIR_DEVICE_SIZE }, - { user_confirm_reply, false, MGMT_USER_CONFIRM_REPLY_SIZE }, - { user_confirm_neg_reply, false, MGMT_USER_CONFIRM_NEG_REPLY_SIZE }, - { user_passkey_reply, false, MGMT_USER_PASSKEY_REPLY_SIZE }, - { user_passkey_neg_reply, false, MGMT_USER_PASSKEY_NEG_REPLY_SIZE }, - { read_local_oob_data, false, MGMT_READ_LOCAL_OOB_DATA_SIZE }, - { add_remote_oob_data, true, MGMT_ADD_REMOTE_OOB_DATA_SIZE }, - { remove_remote_oob_data, false, MGMT_REMOVE_REMOTE_OOB_DATA_SIZE }, - { start_discovery, false, MGMT_START_DISCOVERY_SIZE }, - { stop_discovery, false, MGMT_STOP_DISCOVERY_SIZE }, - { confirm_name, false, MGMT_CONFIRM_NAME_SIZE }, - { block_device, false, MGMT_BLOCK_DEVICE_SIZE }, - { unblock_device, false, MGMT_UNBLOCK_DEVICE_SIZE }, - { set_device_id, false, MGMT_SET_DEVICE_ID_SIZE }, - { set_advertising, false, MGMT_SETTING_SIZE }, - { set_bredr, false, MGMT_SETTING_SIZE }, - { set_static_address, false, MGMT_SET_STATIC_ADDRESS_SIZE }, - { set_scan_params, false, MGMT_SET_SCAN_PARAMS_SIZE }, - { set_secure_conn, false, MGMT_SETTING_SIZE }, - { set_debug_keys, false, MGMT_SETTING_SIZE }, - { set_privacy, false, MGMT_SET_PRIVACY_SIZE }, - { load_irks, true, MGMT_LOAD_IRKS_SIZE }, - { get_conn_info, false, MGMT_GET_CONN_INFO_SIZE }, - { get_clock_info, false, MGMT_GET_CLOCK_INFO_SIZE }, - { add_device, false, MGMT_ADD_DEVICE_SIZE }, - { remove_device, false, MGMT_REMOVE_DEVICE_SIZE }, - { load_conn_param, true, MGMT_LOAD_CONN_PARAM_SIZE }, - { read_unconf_index_list, false, MGMT_READ_UNCONF_INDEX_LIST_SIZE }, - { read_config_info, false, MGMT_READ_CONFIG_INFO_SIZE }, - { set_external_config, false, MGMT_SET_EXTERNAL_CONFIG_SIZE }, - { set_public_address, false, MGMT_SET_PUBLIC_ADDRESS_SIZE }, - { start_service_discovery,true, MGMT_START_SERVICE_DISCOVERY_SIZE }, -}; - -int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) +static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, + u8 data_len) { - void *buf; - u8 *cp; - struct mgmt_hdr *hdr; - u16 opcode, index, len; - struct hci_dev *hdev = NULL; - const struct mgmt_handler *handler; - int err; + eir[eir_len++] = sizeof(type) + data_len; + eir[eir_len++] = type; + memcpy(&eir[eir_len], data, data_len); + eir_len += data_len; - BT_DBG("got %zu bytes", msglen); + return eir_len; +} - if (msglen < sizeof(*hdr)) - return -EINVAL; +static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_cp_read_local_oob_ext_data *cp = data; + struct mgmt_rp_read_local_oob_ext_data *rp; + size_t rp_len; + u16 eir_len; + u8 status, flags, role, addr[7], hash[16], rand[16]; + int err; - buf = kmalloc(msglen, GFP_KERNEL); - if (!buf) - return -ENOMEM; + BT_DBG("%s", hdev->name); - if (memcpy_from_msg(buf, msg, msglen)) { - err = -EFAULT; - goto done; + if (!hdev_is_powered(hdev)) + return mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_READ_LOCAL_OOB_EXT_DATA, + MGMT_STATUS_NOT_POWERED, + &cp->type, sizeof(cp->type)); + + switch (cp->type) { + case BIT(BDADDR_BREDR): + status = mgmt_bredr_support(hdev); + if (status) + return mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_READ_LOCAL_OOB_EXT_DATA, + status, &cp->type, + sizeof(cp->type)); + eir_len = 5; + break; + case (BIT(BDADDR_LE_PUBLIC) | BIT(BDADDR_LE_RANDOM)): + status = mgmt_le_support(hdev); + if (status) + return mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_READ_LOCAL_OOB_EXT_DATA, + status, &cp->type, + sizeof(cp->type)); + eir_len = 9 + 3 + 18 + 18 + 3; + break; + default: + return mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_READ_LOCAL_OOB_EXT_DATA, + MGMT_STATUS_INVALID_PARAMS, + &cp->type, sizeof(cp->type)); } - hdr = buf; - opcode = __le16_to_cpu(hdr->opcode); - index = __le16_to_cpu(hdr->index); - len = __le16_to_cpu(hdr->len); + hci_dev_lock(hdev); - if (len != msglen - sizeof(*hdr)) { - err = -EINVAL; - goto done; + rp_len = sizeof(*rp) + eir_len; + rp = kmalloc(rp_len, GFP_ATOMIC); + if (!rp) { + hci_dev_unlock(hdev); + return -ENOMEM; } - if (index != MGMT_INDEX_NONE) { - hdev = hci_dev_get(index); - if (!hdev) { - err = cmd_status(sk, index, opcode, - MGMT_STATUS_INVALID_INDEX); + eir_len = 0; + switch (cp->type) { + case BIT(BDADDR_BREDR): + eir_len = eir_append_data(rp->eir, eir_len, EIR_CLASS_OF_DEV, + hdev->dev_class, 3); + break; + case (BIT(BDADDR_LE_PUBLIC) | BIT(BDADDR_LE_RANDOM)): + if (hci_dev_test_flag(hdev, HCI_SC_ENABLED) && + smp_generate_oob(hdev, hash, rand) < 0) { + hci_dev_unlock(hdev); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_READ_LOCAL_OOB_EXT_DATA, + MGMT_STATUS_FAILED, + &cp->type, sizeof(cp->type)); goto done; } - if (test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags) || - test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { - err = cmd_status(sk, index, opcode, - MGMT_STATUS_INVALID_INDEX); - goto done; + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) { + memcpy(addr, &hdev->rpa, 6); + addr[6] = 0x01; + } else if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || + !bacmp(&hdev->bdaddr, BDADDR_ANY) || + (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && + bacmp(&hdev->static_addr, BDADDR_ANY))) { + memcpy(addr, &hdev->static_addr, 6); + addr[6] = 0x01; + } else { + memcpy(addr, &hdev->bdaddr, 6); + addr[6] = 0x00; } - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && - opcode != MGMT_OP_READ_CONFIG_INFO && - opcode != MGMT_OP_SET_EXTERNAL_CONFIG && - opcode != MGMT_OP_SET_PUBLIC_ADDRESS) { - err = cmd_status(sk, index, opcode, - MGMT_STATUS_INVALID_INDEX); - goto done; + eir_len = eir_append_data(rp->eir, eir_len, EIR_LE_BDADDR, + addr, sizeof(addr)); + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) + role = 0x02; + else + role = 0x01; + + eir_len = eir_append_data(rp->eir, eir_len, EIR_LE_ROLE, + &role, sizeof(role)); + + if (hci_dev_test_flag(hdev, HCI_SC_ENABLED)) { + eir_len = eir_append_data(rp->eir, eir_len, + EIR_LE_SC_CONFIRM, + hash, sizeof(hash)); + + eir_len = eir_append_data(rp->eir, eir_len, + EIR_LE_SC_RANDOM, + rand, sizeof(rand)); } + + flags = get_adv_discov_flags(hdev); + + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + flags |= LE_AD_NO_BREDR; + + eir_len = eir_append_data(rp->eir, eir_len, EIR_FLAGS, + &flags, sizeof(flags)); + break; } - if (opcode >= ARRAY_SIZE(mgmt_handlers) || - mgmt_handlers[opcode].func == NULL) { - BT_DBG("Unknown op %u", opcode); - err = cmd_status(sk, index, opcode, - MGMT_STATUS_UNKNOWN_COMMAND); + rp->type = cp->type; + rp->eir_len = cpu_to_le16(eir_len); + + hci_dev_unlock(hdev); + + hci_sock_set_flag(sk, HCI_MGMT_OOB_DATA_EVENTS); + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_EXT_DATA, + MGMT_STATUS_SUCCESS, rp, sizeof(*rp) + eir_len); + if (err < 0) goto done; + + err = mgmt_limited_event(MGMT_EV_LOCAL_OOB_DATA_UPDATED, hdev, + rp, sizeof(*rp) + eir_len, + HCI_MGMT_OOB_DATA_EVENTS, sk); + +done: + kfree(rp); + + return err; +} + +static u32 get_supported_adv_flags(struct hci_dev *hdev) +{ + u32 flags = 0; + + flags |= MGMT_ADV_FLAG_CONNECTABLE; + flags |= MGMT_ADV_FLAG_DISCOV; + flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; + flags |= MGMT_ADV_FLAG_MANAGED_FLAGS; + + if (hdev->adv_tx_power != HCI_TX_POWER_INVALID) + flags |= MGMT_ADV_FLAG_TX_POWER; + + return flags; +} + +static int read_adv_features(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_rp_read_adv_features *rp; + size_t rp_len; + int err; + bool instance; + u32 supported_flags; + + BT_DBG("%s", hdev->name); + + if (!lmp_le_capable(hdev)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES, + MGMT_STATUS_REJECTED); + + hci_dev_lock(hdev); + + rp_len = sizeof(*rp); + + /* Currently only one instance is supported, so just add 1 to the + * response length. + */ + instance = hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE); + if (instance) + rp_len++; + + rp = kmalloc(rp_len, GFP_ATOMIC); + if (!rp) { + hci_dev_unlock(hdev); + return -ENOMEM; } - if (hdev && (opcode <= MGMT_OP_READ_INDEX_LIST || - opcode == MGMT_OP_READ_UNCONF_INDEX_LIST)) { - err = cmd_status(sk, index, opcode, - MGMT_STATUS_INVALID_INDEX); - goto done; + supported_flags = get_supported_adv_flags(hdev); + + rp->supported_flags = cpu_to_le32(supported_flags); + rp->max_adv_data_len = HCI_MAX_AD_LENGTH; + rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH; + rp->max_instances = 1; + + /* Currently only one instance is supported, so simply return the + * current instance number. + */ + if (instance) { + rp->num_instances = 1; + rp->instance[0] = 1; + } else { + rp->num_instances = 0; } - if (!hdev && (opcode > MGMT_OP_READ_INDEX_LIST && - opcode != MGMT_OP_READ_UNCONF_INDEX_LIST)) { - err = cmd_status(sk, index, opcode, - MGMT_STATUS_INVALID_INDEX); - goto done; + hci_dev_unlock(hdev); + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES, + MGMT_STATUS_SUCCESS, rp, rp_len); + + kfree(rp); + + return err; +} + +static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, + u8 len, bool is_adv_data) +{ + u8 max_len = HCI_MAX_AD_LENGTH; + int i, cur_len; + bool flags_managed = false; + bool tx_power_managed = false; + u32 flags_params = MGMT_ADV_FLAG_DISCOV | MGMT_ADV_FLAG_LIMITED_DISCOV | + MGMT_ADV_FLAG_MANAGED_FLAGS; + + if (is_adv_data && (adv_flags & flags_params)) { + flags_managed = true; + max_len -= 3; } - handler = &mgmt_handlers[opcode]; + if (is_adv_data && (adv_flags & MGMT_ADV_FLAG_TX_POWER)) { + tx_power_managed = true; + max_len -= 3; + } - if ((handler->var_len && len < handler->data_len) || - (!handler->var_len && len != handler->data_len)) { - err = cmd_status(sk, index, opcode, - MGMT_STATUS_INVALID_PARAMS); - goto done; + if (len > max_len) + return false; + + /* Make sure that the data is correctly formatted. */ + for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) { + cur_len = data[i]; + + if (flags_managed && data[i + 1] == EIR_FLAGS) + return false; + + if (tx_power_managed && data[i + 1] == EIR_TX_POWER) + return false; + + /* If the current field length would exceed the total data + * length, then it's invalid. + */ + if (i + cur_len >= len) + return false; + } + + return true; +} + +static void add_advertising_complete(struct hci_dev *hdev, u8 status, + u16 opcode) +{ + struct mgmt_pending_cmd *cmd; + struct mgmt_rp_add_advertising rp; + + BT_DBG("status %d", status); + + hci_dev_lock(hdev); + + cmd = pending_find(MGMT_OP_ADD_ADVERTISING, hdev); + + if (status) { + hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); + memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance)); + advertising_removed(cmd ? cmd->sk : NULL, hdev, 1); + } + + if (!cmd) + goto unlock; + + rp.instance = 0x01; + + if (status) + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status)); + else + mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status), &rp, sizeof(rp)); + + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + +static void adv_timeout_expired(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + adv_instance.timeout_exp.work); + + hdev->adv_instance.timeout = 0; + + hci_dev_lock(hdev); + clear_adv_instance(hdev); + hci_dev_unlock(hdev); +} + +static int add_advertising(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_cp_add_advertising *cp = data; + struct mgmt_rp_add_advertising rp; + u32 flags; + u32 supported_flags; + u8 status; + u16 timeout; + int err; + struct mgmt_pending_cmd *cmd; + struct hci_request req; + + BT_DBG("%s", hdev->name); + + status = mgmt_le_support(hdev); + if (status) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + status); + + flags = __le32_to_cpu(cp->flags); + timeout = __le16_to_cpu(cp->timeout); + + /* The current implementation only supports adding one instance and only + * a subset of the specified flags. + */ + supported_flags = get_supported_adv_flags(hdev); + if (cp->instance != 0x01 || (flags & ~supported_flags)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + + if (timeout && !hdev_is_powered(hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_REJECTED); + goto unlock; + } + + if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) || + pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) || + pending_find(MGMT_OP_SET_LE, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_BUSY); + goto unlock; } - if (hdev) - mgmt_init_hdev(sk, hdev); + if (!tlv_data_is_valid(hdev, flags, cp->data, cp->adv_data_len, true) || + !tlv_data_is_valid(hdev, flags, cp->data + cp->adv_data_len, + cp->scan_rsp_len, false)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + goto unlock; + } - cp = buf + sizeof(*hdr); + INIT_DELAYED_WORK(&hdev->adv_instance.timeout_exp, adv_timeout_expired); - err = handler->func(sk, hdev, cp, len); + hdev->adv_instance.flags = flags; + hdev->adv_instance.adv_data_len = cp->adv_data_len; + hdev->adv_instance.scan_rsp_len = cp->scan_rsp_len; + + if (cp->adv_data_len) + memcpy(hdev->adv_instance.adv_data, cp->data, cp->adv_data_len); + + if (cp->scan_rsp_len) + memcpy(hdev->adv_instance.scan_rsp_data, + cp->data + cp->adv_data_len, cp->scan_rsp_len); + + if (hdev->adv_instance.timeout) + cancel_delayed_work(&hdev->adv_instance.timeout_exp); + + hdev->adv_instance.timeout = timeout; + + if (timeout) + queue_delayed_work(hdev->workqueue, + &hdev->adv_instance.timeout_exp, + msecs_to_jiffies(timeout * 1000)); + + if (!hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING_INSTANCE)) + advertising_added(sk, hdev, 1); + + /* If the HCI_ADVERTISING flag is set or the device isn't powered then + * we have no HCI communication to make. Simply return. + */ + if (!hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_ADVERTISING)) { + rp.instance = 0x01; + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + goto unlock; + } + + /* We're good to go, update advertising data, parameters, and start + * advertising. + */ + cmd = mgmt_pending_add(sk, MGMT_OP_ADD_ADVERTISING, hdev, data, + data_len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + hci_req_init(&req, hdev); + + update_adv_data(&req); + update_scan_rsp_data(&req); + enable_advertising(&req); + + err = hci_req_run(&req, add_advertising_complete); if (err < 0) - goto done; + mgmt_pending_remove(cmd); - err = msglen; +unlock: + hci_dev_unlock(hdev); -done: - if (hdev) - hci_dev_put(hdev); + return err; +} + +static void remove_advertising_complete(struct hci_dev *hdev, u8 status, + u16 opcode) +{ + struct mgmt_pending_cmd *cmd; + struct mgmt_rp_remove_advertising rp; + + BT_DBG("status %d", status); + + hci_dev_lock(hdev); + + /* A failure status here only means that we failed to disable + * advertising. Otherwise, the advertising instance has been removed, + * so report success. + */ + cmd = pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev); + if (!cmd) + goto unlock; + + rp.instance = 1; + + mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, MGMT_STATUS_SUCCESS, + &rp, sizeof(rp)); + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + +static int remove_advertising(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_cp_remove_advertising *cp = data; + struct mgmt_rp_remove_advertising rp; + int err; + struct mgmt_pending_cmd *cmd; + struct hci_request req; + + BT_DBG("%s", hdev->name); + + /* The current implementation only allows modifying instance no 1. A + * value of 0 indicates that all instances should be cleared. + */ + if (cp->instance > 1) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + + if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) || + pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) || + pending_find(MGMT_OP_SET_LE, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, + MGMT_STATUS_BUSY); + goto unlock; + } + + if (!hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + goto unlock; + } + + if (hdev->adv_instance.timeout) + cancel_delayed_work(&hdev->adv_instance.timeout_exp); + + memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance)); + + advertising_removed(sk, hdev, 1); + + hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); + + /* If the HCI_ADVERTISING flag is set or the device isn't powered then + * we have no HCI communication to make. Simply return. + */ + if (!hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_ADVERTISING)) { + rp.instance = 1; + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_ADVERTISING, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + goto unlock; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADVERTISING, hdev, data, + data_len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + hci_req_init(&req, hdev); + disable_advertising(&req); + + err = hci_req_run(&req, remove_advertising_complete); + if (err < 0) + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); - kfree(buf); return err; } +static const struct hci_mgmt_handler mgmt_handlers[] = { + { NULL }, /* 0x0000 (no command) */ + { read_version, MGMT_READ_VERSION_SIZE, + HCI_MGMT_NO_HDEV | + HCI_MGMT_UNTRUSTED }, + { read_commands, MGMT_READ_COMMANDS_SIZE, + HCI_MGMT_NO_HDEV | + HCI_MGMT_UNTRUSTED }, + { read_index_list, MGMT_READ_INDEX_LIST_SIZE, + HCI_MGMT_NO_HDEV | + HCI_MGMT_UNTRUSTED }, + { read_controller_info, MGMT_READ_INFO_SIZE, + HCI_MGMT_UNTRUSTED }, + { set_powered, MGMT_SETTING_SIZE }, + { set_discoverable, MGMT_SET_DISCOVERABLE_SIZE }, + { set_connectable, MGMT_SETTING_SIZE }, + { set_fast_connectable, MGMT_SETTING_SIZE }, + { set_bondable, MGMT_SETTING_SIZE }, + { set_link_security, MGMT_SETTING_SIZE }, + { set_ssp, MGMT_SETTING_SIZE }, + { set_hs, MGMT_SETTING_SIZE }, + { set_le, MGMT_SETTING_SIZE }, + { set_dev_class, MGMT_SET_DEV_CLASS_SIZE }, + { set_local_name, MGMT_SET_LOCAL_NAME_SIZE }, + { add_uuid, MGMT_ADD_UUID_SIZE }, + { remove_uuid, MGMT_REMOVE_UUID_SIZE }, + { load_link_keys, MGMT_LOAD_LINK_KEYS_SIZE, + HCI_MGMT_VAR_LEN }, + { load_long_term_keys, MGMT_LOAD_LONG_TERM_KEYS_SIZE, + HCI_MGMT_VAR_LEN }, + { disconnect, MGMT_DISCONNECT_SIZE }, + { get_connections, MGMT_GET_CONNECTIONS_SIZE }, + { pin_code_reply, MGMT_PIN_CODE_REPLY_SIZE }, + { pin_code_neg_reply, MGMT_PIN_CODE_NEG_REPLY_SIZE }, + { set_io_capability, MGMT_SET_IO_CAPABILITY_SIZE }, + { pair_device, MGMT_PAIR_DEVICE_SIZE }, + { cancel_pair_device, MGMT_CANCEL_PAIR_DEVICE_SIZE }, + { unpair_device, MGMT_UNPAIR_DEVICE_SIZE }, + { user_confirm_reply, MGMT_USER_CONFIRM_REPLY_SIZE }, + { user_confirm_neg_reply, MGMT_USER_CONFIRM_NEG_REPLY_SIZE }, + { user_passkey_reply, MGMT_USER_PASSKEY_REPLY_SIZE }, + { user_passkey_neg_reply, MGMT_USER_PASSKEY_NEG_REPLY_SIZE }, + { read_local_oob_data, MGMT_READ_LOCAL_OOB_DATA_SIZE }, + { add_remote_oob_data, MGMT_ADD_REMOTE_OOB_DATA_SIZE, + HCI_MGMT_VAR_LEN }, + { remove_remote_oob_data, MGMT_REMOVE_REMOTE_OOB_DATA_SIZE }, + { start_discovery, MGMT_START_DISCOVERY_SIZE }, + { stop_discovery, MGMT_STOP_DISCOVERY_SIZE }, + { confirm_name, MGMT_CONFIRM_NAME_SIZE }, + { block_device, MGMT_BLOCK_DEVICE_SIZE }, + { unblock_device, MGMT_UNBLOCK_DEVICE_SIZE }, + { set_device_id, MGMT_SET_DEVICE_ID_SIZE }, + { set_advertising, MGMT_SETTING_SIZE }, + { set_bredr, MGMT_SETTING_SIZE }, + { set_static_address, MGMT_SET_STATIC_ADDRESS_SIZE }, + { set_scan_params, MGMT_SET_SCAN_PARAMS_SIZE }, + { set_secure_conn, MGMT_SETTING_SIZE }, + { set_debug_keys, MGMT_SETTING_SIZE }, + { set_privacy, MGMT_SET_PRIVACY_SIZE }, + { load_irks, MGMT_LOAD_IRKS_SIZE, + HCI_MGMT_VAR_LEN }, + { get_conn_info, MGMT_GET_CONN_INFO_SIZE }, + { get_clock_info, MGMT_GET_CLOCK_INFO_SIZE }, + { add_device, MGMT_ADD_DEVICE_SIZE }, + { remove_device, MGMT_REMOVE_DEVICE_SIZE }, + { load_conn_param, MGMT_LOAD_CONN_PARAM_SIZE, + HCI_MGMT_VAR_LEN }, + { read_unconf_index_list, MGMT_READ_UNCONF_INDEX_LIST_SIZE, + HCI_MGMT_NO_HDEV | + HCI_MGMT_UNTRUSTED }, + { read_config_info, MGMT_READ_CONFIG_INFO_SIZE, + HCI_MGMT_UNCONFIGURED | + HCI_MGMT_UNTRUSTED }, + { set_external_config, MGMT_SET_EXTERNAL_CONFIG_SIZE, + HCI_MGMT_UNCONFIGURED }, + { set_public_address, MGMT_SET_PUBLIC_ADDRESS_SIZE, + HCI_MGMT_UNCONFIGURED }, + { start_service_discovery, MGMT_START_SERVICE_DISCOVERY_SIZE, + HCI_MGMT_VAR_LEN }, + { read_local_oob_ext_data, MGMT_READ_LOCAL_OOB_EXT_DATA_SIZE }, + { read_ext_index_list, MGMT_READ_EXT_INDEX_LIST_SIZE, + HCI_MGMT_NO_HDEV | + HCI_MGMT_UNTRUSTED }, + { read_adv_features, MGMT_READ_ADV_FEATURES_SIZE }, + { add_advertising, MGMT_ADD_ADVERTISING_SIZE, + HCI_MGMT_VAR_LEN }, + { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, +}; + void mgmt_index_added(struct hci_dev *hdev) { - if (hdev->dev_type != HCI_BREDR) - return; + struct mgmt_ev_ext_index ev; if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) - mgmt_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, NULL, 0, NULL); - else - mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL); + switch (hdev->dev_type) { + case HCI_BREDR: + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { + mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, + NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS); + ev.type = 0x01; + } else { + mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, + HCI_MGMT_INDEX_EVENTS); + ev.type = 0x00; + } + break; + case HCI_AMP: + ev.type = 0x02; + break; + default: + return; + } + + ev.bus = hdev->bus; + + mgmt_index_event(MGMT_EV_EXT_INDEX_ADDED, hdev, &ev, sizeof(ev), + HCI_MGMT_EXT_INDEX_EVENTS); } void mgmt_index_removed(struct hci_dev *hdev) { + struct mgmt_ev_ext_index ev; u8 status = MGMT_STATUS_INVALID_INDEX; - if (hdev->dev_type != HCI_BREDR) + if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + switch (hdev->dev_type) { + case HCI_BREDR: + mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); + + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { + mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, + NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS); + ev.type = 0x01; + } else { + mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, + HCI_MGMT_INDEX_EVENTS); + ev.type = 0x00; + } + break; + case HCI_AMP: + ev.type = 0x02; + break; + default: return; + } - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); + ev.bus = hdev->bus; - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) - mgmt_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, NULL); - else - mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL); + mgmt_index_event(MGMT_EV_EXT_INDEX_REMOVED, hdev, &ev, sizeof(ev), + HCI_MGMT_EXT_INDEX_EVENTS); } /* This function requires the caller holds hdev->lock */ @@ -6367,7 +7117,7 @@ static int powered_update_hci(struct hci_dev *hdev) hci_req_init(&req, hdev); - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && !lmp_host_ssp_capable(hdev)) { u8 mode = 0x01; @@ -6381,7 +7131,7 @@ static int powered_update_hci(struct hci_dev *hdev) } } - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && lmp_bredr_capable(hdev)) { struct hci_cp_write_le_host_supported cp; @@ -6402,24 +7152,28 @@ static int powered_update_hci(struct hci_dev *hdev) * advertising data. This also applies to the case * where BR/EDR was toggled during the AUTO_OFF phase. */ - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { update_adv_data(&req); update_scan_rsp_data(&req); } - if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) enable_advertising(&req); restart_le_actions(&req); } - link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); + link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(link_sec), &link_sec); if (lmp_bredr_capable(hdev)) { - write_fast_connectable(&req, false); + if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) + write_fast_connectable(&req, true); + else + write_fast_connectable(&req, false); __hci_update_page_scan(&req); update_class(&req); update_name(&req); @@ -6435,7 +7189,7 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) u8 status, zero_cod[] = { 0, 0, 0 }; int err; - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) return 0; if (powered) { @@ -6456,7 +7210,7 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) * been triggered, potentially causing misleading DISCONNECTED * status responses. */ - if (test_bit(HCI_UNREGISTER, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) status = MGMT_STATUS_INVALID_INDEX; else status = MGMT_STATUS_NOT_POWERED; @@ -6464,8 +7218,8 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) - mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, - zero_cod, sizeof(zero_cod), NULL); + mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, + zero_cod, sizeof(zero_cod), NULL); new_settings: err = new_settings(hdev, match.sk); @@ -6478,10 +7232,10 @@ new_settings: void mgmt_set_powered_failed(struct hci_dev *hdev, int err) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; u8 status; - cmd = mgmt_pending_find(MGMT_OP_SET_POWERED, hdev); + cmd = pending_find(MGMT_OP_SET_POWERED, hdev); if (!cmd) return; @@ -6490,7 +7244,7 @@ void mgmt_set_powered_failed(struct hci_dev *hdev, int err) else status = MGMT_STATUS_FAILED; - cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_POWERED, status); + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_POWERED, status); mgmt_pending_remove(cmd); } @@ -6506,17 +7260,23 @@ void mgmt_discoverable_timeout(struct hci_dev *hdev) * of a timeout triggered from general discoverable, it is * safe to unconditionally clear the flag. */ - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); - clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hci_req_init(&req, hdev); - if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { u8 scan = SCAN_PAGE; hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan); } update_class(&req); - update_adv_data(&req); + + /* Advertising instances don't use the global discoverable setting, so + * only update AD if advertising was enabled using Set Advertising. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) + update_adv_data(&req); + hci_req_run(&req, NULL); hdev->discov_timeout = 0; @@ -6654,7 +7414,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr); ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type); - ev.key.master = csrk->master; + ev.key.type = csrk->type; memcpy(ev.key.val, csrk->val, sizeof(csrk->val)); mgmt_event(MGMT_EV_NEW_CSRK, hdev, &ev, sizeof(ev), NULL); @@ -6681,17 +7441,6 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, mgmt_event(MGMT_EV_NEW_CONN_PARAM, hdev, &ev, sizeof(ev), NULL); } -static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, - u8 data_len) -{ - eir[eir_len++] = sizeof(type) + data_len; - eir[eir_len++] = type; - memcpy(&eir[eir_len], data, data_len); - eir_len += data_len; - - return eir_len; -} - void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, u32 flags, u8 *name, u8 name_len) { @@ -6729,7 +7478,7 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, sizeof(*ev) + eir_len, NULL); } -static void disconnect_rsp(struct pending_cmd *cmd, void *data) +static void disconnect_rsp(struct mgmt_pending_cmd *cmd, void *data) { struct sock **sk = data; @@ -6741,7 +7490,7 @@ static void disconnect_rsp(struct pending_cmd *cmd, void *data) mgmt_pending_remove(cmd); } -static void unpair_device_rsp(struct pending_cmd *cmd, void *data) +static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data) { struct hci_dev *hdev = data; struct mgmt_cp_unpair_device *cp = cmd->param; @@ -6754,10 +7503,10 @@ static void unpair_device_rsp(struct pending_cmd *cmd, void *data) bool mgmt_powering_down(struct hci_dev *hdev) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct mgmt_mode *cp; - cmd = mgmt_pending_find(MGMT_OP_SET_POWERED, hdev); + cmd = pending_find(MGMT_OP_SET_POWERED, hdev); if (!cmd) return false; @@ -6809,12 +7558,12 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, { u8 bdaddr_type = link_to_bdaddr(link_type, addr_type); struct mgmt_cp_disconnect *cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp, hdev); - cmd = mgmt_pending_find(MGMT_OP_DISCONNECT, hdev); + cmd = pending_find(MGMT_OP_DISCONNECT, hdev); if (!cmd) return; @@ -6864,9 +7613,9 @@ void mgmt_pin_code_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 secure) void mgmt_pin_code_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; - cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_REPLY, hdev); + cmd = pending_find(MGMT_OP_PIN_CODE_REPLY, hdev); if (!cmd) return; @@ -6877,9 +7626,9 @@ void mgmt_pin_code_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, void mgmt_pin_code_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; - cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, hdev); + cmd = pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, hdev); if (!cmd) return; @@ -6922,9 +7671,9 @@ static int user_pairing_resp_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status, u8 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; - cmd = mgmt_pending_find(opcode, hdev); + cmd = pending_find(opcode, hdev); if (!cmd) return -ENOENT; @@ -6983,7 +7732,7 @@ int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr, void mgmt_auth_failed(struct hci_conn *conn, u8 hci_status) { struct mgmt_ev_auth_failed ev; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; u8 status = mgmt_status(hci_status); bacpy(&ev.addr.bdaddr, &conn->dst); @@ -7014,11 +7763,9 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) } if (test_bit(HCI_AUTH, &hdev->flags)) - changed = !test_and_set_bit(HCI_LINK_SECURITY, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_LINK_SECURITY); else - changed = test_and_clear_bit(HCI_LINK_SECURITY, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY); mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, settings_rsp, &match); @@ -7054,9 +7801,9 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) if (status) { u8 mgmt_err = mgmt_status(status); - if (enable && test_and_clear_bit(HCI_SSP_ENABLED, - &hdev->dev_flags)) { - clear_bit(HCI_HS_ENABLED, &hdev->dev_flags); + if (enable && hci_dev_test_and_clear_flag(hdev, + HCI_SSP_ENABLED)) { + hci_dev_clear_flag(hdev, HCI_HS_ENABLED); new_settings(hdev, NULL); } @@ -7066,14 +7813,14 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) } if (enable) { - changed = !test_and_set_bit(HCI_SSP_ENABLED, &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_SSP_ENABLED); } else { - changed = test_and_clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED); if (!changed) - changed = test_and_clear_bit(HCI_HS_ENABLED, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_HS_ENABLED); else - clear_bit(HCI_HS_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_HS_ENABLED); } mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match); @@ -7086,8 +7833,8 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) hci_req_init(&req, hdev); - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { - if (test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { + if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) hci_req_add(&req, HCI_OP_WRITE_SSP_DEBUG_MODE, sizeof(enable), &enable); update_eir(&req); @@ -7098,7 +7845,7 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) hci_req_run(&req, NULL); } -static void sk_lookup(struct pending_cmd *cmd, void *data) +static void sk_lookup(struct mgmt_pending_cmd *cmd, void *data) { struct cmd_lookup *match = data; @@ -7118,8 +7865,8 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); if (!status) - mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, 3, - NULL); + mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, + dev_class, 3, NULL); if (match.sk) sock_put(match.sk); @@ -7128,7 +7875,7 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) { struct mgmt_cp_set_local_name ev; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; if (status) return; @@ -7137,36 +7884,36 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) memcpy(ev.name, name, HCI_MAX_NAME_LENGTH); memcpy(ev.short_name, hdev->short_name, HCI_MAX_SHORT_NAME_LENGTH); - cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); + cmd = pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); if (!cmd) { memcpy(hdev->dev_name, name, sizeof(hdev->dev_name)); /* If this is a HCI command related to powering on the * HCI dev don't send any mgmt signals. */ - if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev)) + if (pending_find(MGMT_OP_SET_POWERED, hdev)) return; } - mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), - cmd ? cmd->sk : NULL); + mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), + cmd ? cmd->sk : NULL); } void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192, u8 *rand192, u8 *hash256, u8 *rand256, u8 status) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("%s status %u", hdev->name, status); - cmd = mgmt_pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev); + cmd = pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev); if (!cmd) return; if (status) { - cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, - mgmt_status(status)); + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, + mgmt_status(status)); } else { struct mgmt_rp_read_local_oob_data rp; size_t rp_size = sizeof(rp); @@ -7181,8 +7928,9 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192, rp_size -= sizeof(rp.hash256) + sizeof(rp.rand256); } - cmd_complete(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, 0, - &rp, rp_size); + mgmt_cmd_complete(cmd->sk, hdev->id, + MGMT_OP_READ_LOCAL_OOB_DATA, 0, + &rp, rp_size); } mgmt_pending_remove(cmd); @@ -7258,7 +8006,7 @@ static bool eir_has_uuids(u8 *eir, u16 eir_len, u16 uuid_count, u8 (*uuids)[16]) static void restart_le_scan(struct hci_dev *hdev) { /* If controller is not scanning we are done. */ - if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) return; if (time_after(jiffies + DISCOV_LE_RESTART_DELAY, @@ -7270,14 +8018,58 @@ static void restart_le_scan(struct hci_dev *hdev) DISCOV_LE_RESTART_DELAY); } +static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir, + u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) +{ + /* If a RSSI threshold has been specified, and + * HCI_QUIRK_STRICT_DUPLICATE_FILTER is not set, then all results with + * a RSSI smaller than the RSSI threshold will be dropped. If the quirk + * is set, let it through for further processing, as we might need to + * restart the scan. + * + * For BR/EDR devices (pre 1.2) providing no RSSI during inquiry, + * the results are also dropped. + */ + if (hdev->discovery.rssi != HCI_RSSI_INVALID && + (rssi == HCI_RSSI_INVALID || + (rssi < hdev->discovery.rssi && + !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)))) + return false; + + if (hdev->discovery.uuid_count != 0) { + /* If a list of UUIDs is provided in filter, results with no + * matching UUID should be dropped. + */ + if (!eir_has_uuids(eir, eir_len, hdev->discovery.uuid_count, + hdev->discovery.uuids) && + !eir_has_uuids(scan_rsp, scan_rsp_len, + hdev->discovery.uuid_count, + hdev->discovery.uuids)) + return false; + } + + /* If duplicate filtering does not report RSSI changes, then restart + * scanning to ensure updated result with updated RSSI values. + */ + if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)) { + restart_le_scan(hdev); + + /* Validate RSSI value against the RSSI threshold once more. */ + if (hdev->discovery.rssi != HCI_RSSI_INVALID && + rssi < hdev->discovery.rssi) + return false; + } + + return true; +} + void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) { char buf[512]; - struct mgmt_ev_device_found *ev = (void *) buf; + struct mgmt_ev_device_found *ev = (void *)buf; size_t ev_size; - bool match; /* Don't send events for a non-kernel initiated discovery. With * LE one exception is if we have pend_le_reports > 0 in which @@ -7290,21 +8082,12 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, return; } - /* When using service discovery with a RSSI threshold, then check - * if such a RSSI threshold is specified. If a RSSI threshold has - * been specified, and HCI_QUIRK_STRICT_DUPLICATE_FILTER is not set, - * then all results with a RSSI smaller than the RSSI threshold will be - * dropped. If the quirk is set, let it through for further processing, - * as we might need to restart the scan. - * - * For BR/EDR devices (pre 1.2) providing no RSSI during inquiry, - * the results are also dropped. - */ - if (hdev->discovery.rssi != HCI_RSSI_INVALID && - (rssi == HCI_RSSI_INVALID || - (rssi < hdev->discovery.rssi && - !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)))) - return; + if (hdev->discovery.result_filtering) { + /* We are using service discovery */ + if (!is_filter_match(hdev, rssi, eir, eir_len, scan_rsp, + scan_rsp_len)) + return; + } /* Make sure that the buffer is big enough. The 5 extra bytes * are for the potential CoD field. @@ -7331,87 +8114,17 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, ev->rssi = rssi; ev->flags = cpu_to_le32(flags); - if (eir_len > 0) { - /* When using service discovery and a list of UUID is - * provided, results with no matching UUID should be - * dropped. In case there is a match the result is - * kept and checking possible scan response data - * will be skipped. - */ - if (hdev->discovery.uuid_count > 0) { - match = eir_has_uuids(eir, eir_len, - hdev->discovery.uuid_count, - hdev->discovery.uuids); - /* If duplicate filtering does not report RSSI changes, - * then restart scanning to ensure updated result with - * updated RSSI values. - */ - if (match && test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, - &hdev->quirks)) - restart_le_scan(hdev); - } else { - match = true; - } - - if (!match && !scan_rsp_len) - return; - + if (eir_len > 0) /* Copy EIR or advertising data into event */ memcpy(ev->eir, eir, eir_len); - } else { - /* When using service discovery and a list of UUID is - * provided, results with empty EIR or advertising data - * should be dropped since they do not match any UUID. - */ - if (hdev->discovery.uuid_count > 0 && !scan_rsp_len) - return; - - match = false; - } if (dev_class && !eir_has_data_type(ev->eir, eir_len, EIR_CLASS_OF_DEV)) eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, dev_class, 3); - if (scan_rsp_len > 0) { - /* When using service discovery and a list of UUID is - * provided, results with no matching UUID should be - * dropped if there is no previous match from the - * advertising data. - */ - if (hdev->discovery.uuid_count > 0) { - if (!match && !eir_has_uuids(scan_rsp, scan_rsp_len, - hdev->discovery.uuid_count, - hdev->discovery.uuids)) - return; - - /* If duplicate filtering does not report RSSI changes, - * then restart scanning to ensure updated result with - * updated RSSI values. - */ - if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, - &hdev->quirks)) - restart_le_scan(hdev); - } - + if (scan_rsp_len > 0) /* Append scan response data to event */ memcpy(ev->eir + eir_len, scan_rsp, scan_rsp_len); - } else { - /* When using service discovery and a list of UUID is - * provided, results with empty scan response and no - * previous matched advertising data should be dropped. - */ - if (hdev->discovery.uuid_count > 0 && !match) - return; - } - - /* Validate the reported RSSI value against the RSSI threshold once more - * incase HCI_QUIRK_STRICT_DUPLICATE_FILTER forced a restart of LE - * scanning. - */ - if (hdev->discovery.rssi != HCI_RSSI_INVALID && - rssi < hdev->discovery.rssi) - return; ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); ev_size = sizeof(*ev) + eir_len + scan_rsp_len; @@ -7464,10 +8177,28 @@ void mgmt_reenable_advertising(struct hci_dev *hdev) { struct hci_request req; - if (!test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && + !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) return; hci_req_init(&req, hdev); enable_advertising(&req); hci_req_run(&req, adv_enable_complete); } + +static struct hci_mgmt_chan chan = { + .channel = HCI_CHANNEL_CONTROL, + .handler_count = ARRAY_SIZE(mgmt_handlers), + .handlers = mgmt_handlers, + .hdev_init = mgmt_init_hdev, +}; + +int mgmt_init(void) +{ + return hci_mgmt_chan_register(&chan); +} + +void mgmt_exit(void) +{ + hci_mgmt_chan_unregister(&chan); +} diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c new file mode 100644 index 000000000000..8c30c7eb8bef --- /dev/null +++ b/net/bluetooth/mgmt_util.c @@ -0,0 +1,210 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + + Copyright (C) 2015 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> +#include <net/bluetooth/mgmt.h> + +#include "mgmt_util.h" + +int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, + void *data, u16 data_len, int flag, struct sock *skip_sk) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + + skb = alloc_skb(sizeof(*hdr) + data_len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(event); + if (hdev) + hdr->index = cpu_to_le16(hdev->id); + else + hdr->index = cpu_to_le16(MGMT_INDEX_NONE); + hdr->len = cpu_to_le16(data_len); + + if (data) + memcpy(skb_put(skb, data_len), data, data_len); + + /* Time stamp */ + __net_timestamp(skb); + + hci_send_to_channel(channel, skb, flag, skip_sk); + kfree_skb(skb); + + return 0; +} + +int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_status *ev; + int err; + + BT_DBG("sock %p, index %u, cmd %u, status %u", sk, index, cmd, status); + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); + hdr->index = cpu_to_le16(index); + hdr->len = cpu_to_le16(sizeof(*ev)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + ev->status = status; + ev->opcode = cpu_to_le16(cmd); + + err = sock_queue_rcv_skb(sk, skb); + if (err < 0) + kfree_skb(skb); + + return err; +} + +int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, + void *rp, size_t rp_len) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_complete *ev; + int err; + + BT_DBG("sock %p", sk); + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + rp_len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->index = cpu_to_le16(index); + hdr->len = cpu_to_le16(sizeof(*ev) + rp_len); + + ev = (void *) skb_put(skb, sizeof(*ev) + rp_len); + ev->opcode = cpu_to_le16(cmd); + ev->status = status; + + if (rp) + memcpy(ev->data, rp, rp_len); + + err = sock_queue_rcv_skb(sk, skb); + if (err < 0) + kfree_skb(skb); + + return err; +} + +struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, + struct hci_dev *hdev) +{ + struct mgmt_pending_cmd *cmd; + + list_for_each_entry(cmd, &hdev->mgmt_pending, list) { + if (hci_sock_get_channel(cmd->sk) != channel) + continue; + if (cmd->opcode == opcode) + return cmd; + } + + return NULL; +} + +struct mgmt_pending_cmd *mgmt_pending_find_data(unsigned short channel, + u16 opcode, + struct hci_dev *hdev, + const void *data) +{ + struct mgmt_pending_cmd *cmd; + + list_for_each_entry(cmd, &hdev->mgmt_pending, list) { + if (cmd->user_data != data) + continue; + if (cmd->opcode == opcode) + return cmd; + } + + return NULL; +} + +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, + void (*cb)(struct mgmt_pending_cmd *cmd, void *data), + void *data) +{ + struct mgmt_pending_cmd *cmd, *tmp; + + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { + if (opcode > 0 && cmd->opcode != opcode) + continue; + + cb(cmd, data); + } +} + +struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, + struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_pending_cmd *cmd; + + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) + return NULL; + + cmd->opcode = opcode; + cmd->index = hdev->id; + + cmd->param = kmemdup(data, len, GFP_KERNEL); + if (!cmd->param) { + kfree(cmd); + return NULL; + } + + cmd->param_len = len; + + cmd->sk = sk; + sock_hold(sk); + + list_add(&cmd->list, &hdev->mgmt_pending); + + return cmd; +} + +void mgmt_pending_free(struct mgmt_pending_cmd *cmd) +{ + sock_put(cmd->sk); + kfree(cmd->param); + kfree(cmd); +} + +void mgmt_pending_remove(struct mgmt_pending_cmd *cmd) +{ + list_del(&cmd->list); + mgmt_pending_free(cmd); +} diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h new file mode 100644 index 000000000000..6559f189213c --- /dev/null +++ b/net/bluetooth/mgmt_util.h @@ -0,0 +1,53 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2015 Intel Coropration + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +struct mgmt_pending_cmd { + struct list_head list; + u16 opcode; + int index; + void *param; + size_t param_len; + struct sock *sk; + void *user_data; + int (*cmd_complete)(struct mgmt_pending_cmd *cmd, u8 status); +}; + +int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, + void *data, u16 data_len, int flag, struct sock *skip_sk); +int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status); +int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, + void *rp, size_t rp_len); + +struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, + struct hci_dev *hdev); +struct mgmt_pending_cmd *mgmt_pending_find_data(unsigned short channel, + u16 opcode, + struct hci_dev *hdev, + const void *data); +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, + void (*cb)(struct mgmt_pending_cmd *cmd, void *data), + void *data); +struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, + struct hci_dev *hdev, + void *data, u16 len); +void mgmt_pending_free(struct mgmt_pending_cmd *cmd); +void mgmt_pending_remove(struct mgmt_pending_cmd *cmd); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 3c6d2c8ac1a4..825e8fb5114b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -549,8 +549,8 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int * return 0; } -static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; @@ -615,8 +615,8 @@ done: return sent; } -static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int rfcomm_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; @@ -627,7 +627,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return 0; } - len = bt_sock_stream_recvmsg(iocb, sock, msg, size, flags); + len = bt_sock_stream_recvmsg(sock, msg, size, flags); lock_sock(sk); if (!(flags & MSG_PEEK) && len > 0) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 76321b546e84..4322c833e748 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -688,8 +688,8 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len return 0; } -static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; int err; @@ -758,8 +758,8 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting) } } -static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { struct sock *sk = sock->sk; struct sco_pinfo *pi = sco_pi(sk); @@ -777,7 +777,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, release_sock(sk); - return bt_sock_recvmsg(iocb, sock, msg, len, flags); + return bt_sock_recvmsg(sock, msg, len, flags); } static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) @@ -1083,9 +1083,13 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) return lm; } -void sco_connect_cfm(struct hci_conn *hcon, __u8 status) +static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); + if (!status) { struct sco_conn *conn; @@ -1096,8 +1100,11 @@ void sco_connect_cfm(struct hci_conn *hcon, __u8 status) sco_conn_del(hcon, bt_to_errno(status)); } -void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) +static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); sco_conn_del(hcon, bt_to_errno(reason)); @@ -1122,6 +1129,12 @@ drop: return 0; } +static struct hci_cb sco_cb = { + .name = "SCO", + .connect_cfm = sco_connect_cfm, + .disconn_cfm = sco_disconn_cfm, +}; + static int sco_debugfs_show(struct seq_file *f, void *p) { struct sock *sk; @@ -1203,6 +1216,8 @@ int __init sco_init(void) BT_INFO("SCO socket layer initialized"); + hci_register_cb(&sco_cb); + if (IS_ERR_OR_NULL(bt_debugfs)) return 0; @@ -1216,12 +1231,14 @@ error: return err; } -void __exit sco_exit(void) +void sco_exit(void) { bt_procfs_cleanup(&init_net, "sco"); debugfs_remove(sco_debugfs); + hci_unregister_cb(&sco_cb); + bt_sock_unregister(BTPROTO_SCO); proto_unregister(&sco_proto); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index c09a821f381d..1ec3f66b5a74 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -52,7 +52,7 @@ #define SMP_TIMEOUT msecs_to_jiffies(30000) -#define AUTH_REQ_MASK(dev) (test_bit(HCI_SC_ENABLED, &(dev)->dev_flags) ? \ +#define AUTH_REQ_MASK(dev) (hci_dev_test_flag(dev, HCI_SC_ENABLED) ? \ 0x1f : 0x07) #define KEY_DIST_MASK 0x07 @@ -70,7 +70,19 @@ enum { SMP_FLAG_DEBUG_KEY, SMP_FLAG_WAIT_USER, SMP_FLAG_DHKEY_PENDING, - SMP_FLAG_OOB, + SMP_FLAG_REMOTE_OOB, + SMP_FLAG_LOCAL_OOB, +}; + +struct smp_dev { + /* Secure Connections OOB data */ + u8 local_pk[64]; + u8 local_sk[32]; + u8 local_rand[16]; + bool debug_key; + + struct crypto_blkcipher *tfm_aes; + struct crypto_hash *tfm_cmac; }; struct smp_chan { @@ -84,7 +96,8 @@ struct smp_chan { u8 rrnd[16]; /* SMP Pairing Random (remote) */ u8 pcnf[16]; /* SMP Pairing Confirm */ u8 tk[16]; /* SMP Temporary Key */ - u8 rr[16]; + u8 rr[16]; /* Remote OOB ra/rb value */ + u8 lr[16]; /* Local OOB ra/rb value */ u8 enc_key_size; u8 remote_key_dist; bdaddr_t id_addr; @@ -478,18 +491,18 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16], const bdaddr_t *bdaddr) { struct l2cap_chan *chan = hdev->smp_data; - struct crypto_blkcipher *tfm; + struct smp_dev *smp; u8 hash[3]; int err; if (!chan || !chan->data) return false; - tfm = chan->data; + smp = chan->data; BT_DBG("RPA %pMR IRK %*phN", bdaddr, 16, irk); - err = smp_ah(tfm, irk, &bdaddr->b[3], hash); + err = smp_ah(smp->tfm_aes, irk, &bdaddr->b[3], hash); if (err) return false; @@ -499,20 +512,20 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16], int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa) { struct l2cap_chan *chan = hdev->smp_data; - struct crypto_blkcipher *tfm; + struct smp_dev *smp; int err; if (!chan || !chan->data) return -EOPNOTSUPP; - tfm = chan->data; + smp = chan->data; get_random_bytes(&rpa->b[3], 3); rpa->b[5] &= 0x3f; /* Clear two most significant bits */ rpa->b[5] |= 0x40; /* Set second most significant bit */ - err = smp_ah(tfm, irk, &rpa->b[3], rpa->b); + err = smp_ah(smp->tfm_aes, irk, &rpa->b[3], rpa->b); if (err < 0) return err; @@ -521,6 +534,53 @@ int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa) return 0; } +int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) +{ + struct l2cap_chan *chan = hdev->smp_data; + struct smp_dev *smp; + int err; + + if (!chan || !chan->data) + return -EOPNOTSUPP; + + smp = chan->data; + + if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { + BT_DBG("Using debug keys"); + memcpy(smp->local_pk, debug_pk, 64); + memcpy(smp->local_sk, debug_sk, 32); + smp->debug_key = true; + } else { + while (true) { + /* Generate local key pair for Secure Connections */ + if (!ecc_make_key(smp->local_pk, smp->local_sk)) + return -EIO; + + /* This is unlikely, but we need to check that + * we didn't accidentially generate a debug key. + */ + if (memcmp(smp->local_sk, debug_sk, 32)) + break; + } + smp->debug_key = false; + } + + SMP_DBG("OOB Public Key X: %32phN", smp->local_pk); + SMP_DBG("OOB Public Key Y: %32phN", smp->local_pk + 32); + SMP_DBG("OOB Private Key: %32phN", smp->local_sk); + + get_random_bytes(smp->local_rand, 16); + + err = smp_f4(smp->tfm_cmac, smp->local_pk, smp->local_pk, + smp->local_rand, 0, hash); + if (err < 0) + return err; + + memcpy(rand, smp->local_rand, 16); + + return 0; +} + static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data) { struct l2cap_chan *chan = conn->smp; @@ -589,7 +649,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn, struct hci_dev *hdev = hcon->hdev; u8 local_dist = 0, remote_dist = 0, oob_flag = SMP_OOB_NOT_PRESENT; - if (test_bit(HCI_BONDABLE, &conn->hcon->hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_BONDABLE)) { local_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN; remote_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN; authreq |= SMP_AUTH_BONDING; @@ -597,18 +657,18 @@ static void build_pairing_cmd(struct l2cap_conn *conn, authreq &= ~SMP_AUTH_BONDING; } - if (test_bit(HCI_RPA_RESOLVING, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_RPA_RESOLVING)) remote_dist |= SMP_DIST_ID_KEY; - if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) local_dist |= SMP_DIST_ID_KEY; - if (test_bit(HCI_SC_ENABLED, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_SC_ENABLED) && (authreq & SMP_AUTH_SC)) { struct oob_data *oob_data; u8 bdaddr_type; - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { local_dist |= SMP_DIST_LINK_KEY; remote_dist |= SMP_DIST_LINK_KEY; } @@ -621,10 +681,12 @@ static void build_pairing_cmd(struct l2cap_conn *conn, oob_data = hci_find_remote_oob_data(hdev, &hcon->dst, bdaddr_type); if (oob_data && oob_data->present) { - set_bit(SMP_FLAG_OOB, &smp->flags); + set_bit(SMP_FLAG_REMOTE_OOB, &smp->flags); oob_flag = SMP_OOB_PRESENT; memcpy(smp->rr, oob_data->rand256, 16); memcpy(smp->pcnf, oob_data->hash256, 16); + SMP_DBG("OOB Remote Confirmation: %16phN", smp->pcnf); + SMP_DBG("OOB Remote Random: %16phN", smp->rr); } } else { @@ -681,9 +743,9 @@ static void smp_chan_destroy(struct l2cap_conn *conn) complete = test_bit(SMP_FLAG_COMPLETE, &smp->flags); mgmt_smp_complete(hcon, complete); - kfree(smp->csrk); - kfree(smp->slave_csrk); - kfree(smp->link_key); + kzfree(smp->csrk); + kzfree(smp->slave_csrk); + kzfree(smp->link_key); crypto_free_blkcipher(smp->tfm_aes); crypto_free_hash(smp->tfm_cmac); @@ -692,7 +754,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn) * support hasn't been explicitly enabled. */ if (smp->ltk && smp->ltk->type == SMP_LTK_P256_DEBUG && - !test_bit(HCI_KEEP_DEBUG_KEYS, &hcon->hdev->dev_flags)) { + !hci_dev_test_flag(hcon->hdev, HCI_KEEP_DEBUG_KEYS)) { list_del_rcu(&smp->ltk->list); kfree_rcu(smp->ltk, rcu); smp->ltk = NULL; @@ -717,7 +779,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn) } chan->data = NULL; - kfree(smp); + kzfree(smp); hci_conn_drop(hcon); } @@ -818,6 +880,12 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, return 0; } + /* If this function is used for SC -> legacy fallback we + * can only recover the just-works case. + */ + if (test_bit(SMP_FLAG_SC, &smp->flags)) + return -EINVAL; + /* Not Just Works/Confirm results in MITM Authentication */ if (smp->method != JUST_CFM) { set_bit(SMP_FLAG_MITM_AUTH, &smp->flags); @@ -1052,7 +1120,7 @@ static void smp_notify_keys(struct l2cap_conn *conn) /* Don't keep debug keys around if the relevant * flag is not set. */ - if (!test_bit(HCI_KEEP_DEBUG_KEYS, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_KEEP_DEBUG_KEYS) && key->type == HCI_LK_DEBUG_COMBINATION) { list_del_rcu(&key->list); kfree_rcu(key, rcu); @@ -1097,13 +1165,13 @@ static void sc_generate_link_key(struct smp_chan *smp) return; if (smp_h6(smp->tfm_cmac, smp->tk, tmp1, smp->link_key)) { - kfree(smp->link_key); + kzfree(smp->link_key); smp->link_key = NULL; return; } if (smp_h6(smp->tfm_cmac, smp->link_key, lebr, smp->link_key)) { - kfree(smp->link_key); + kzfree(smp->link_key); smp->link_key = NULL; return; } @@ -1252,7 +1320,10 @@ static void smp_distribute_keys(struct smp_chan *smp) csrk = kzalloc(sizeof(*csrk), GFP_KERNEL); if (csrk) { - csrk->master = 0x00; + if (hcon->sec_level > BT_SECURITY_MEDIUM) + csrk->type = MGMT_CSRK_LOCAL_AUTHENTICATED; + else + csrk->type = MGMT_CSRK_LOCAL_UNAUTHENTICATED; memcpy(csrk->val, sign.csrk, sizeof(csrk->val)); } smp->slave_csrk = csrk; @@ -1297,7 +1368,7 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) smp->tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(smp->tfm_aes)) { BT_ERR("Unable to create ECB crypto context"); - kfree(smp); + kzfree(smp); return NULL; } @@ -1305,7 +1376,7 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) if (IS_ERR(smp->tfm_cmac)) { BT_ERR("Unable to create CMAC crypto context"); crypto_free_blkcipher(smp->tfm_aes); - kfree(smp); + kzfree(smp); return NULL; } @@ -1601,15 +1672,15 @@ static void build_bredr_pairing_cmd(struct smp_chan *smp, struct hci_dev *hdev = conn->hcon->hdev; u8 local_dist = 0, remote_dist = 0; - if (test_bit(HCI_BONDABLE, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_BONDABLE)) { local_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN; remote_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN; } - if (test_bit(HCI_RPA_RESOLVING, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_RPA_RESOLVING)) remote_dist |= SMP_DIST_ID_KEY; - if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) local_dist |= SMP_DIST_ID_KEY; if (!rsp) { @@ -1661,22 +1732,29 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) /* We didn't start the pairing, so match remote */ auth = req->auth_req & AUTH_REQ_MASK(hdev); - if (!test_bit(HCI_BONDABLE, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_BONDABLE) && (auth & SMP_AUTH_BONDING)) return SMP_PAIRING_NOTSUPP; - if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) && !(auth & SMP_AUTH_SC)) + if (hci_dev_test_flag(hdev, HCI_SC_ONLY) && !(auth & SMP_AUTH_SC)) return SMP_AUTH_REQUIREMENTS; smp->preq[0] = SMP_CMD_PAIRING_REQ; memcpy(&smp->preq[1], req, sizeof(*req)); skb_pull(skb, sizeof(*req)); + /* If the remote side's OOB flag is set it means it has + * successfully received our local OOB data - therefore set the + * flag to indicate that local OOB is in use. + */ + if (req->oob_flag == SMP_OOB_PRESENT) + set_bit(SMP_FLAG_LOCAL_OOB, &smp->flags); + /* SMP over BR/EDR requires special treatment */ if (conn->hcon->type == ACL_LINK) { /* We must have a BR/EDR SC link */ if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags) && - !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) + !hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP)) return SMP_CROSS_TRANSP_NOT_ALLOWED; set_bit(SMP_FLAG_SC, &smp->flags); @@ -1734,14 +1812,19 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) clear_bit(SMP_FLAG_INITIATOR, &smp->flags); + /* Strictly speaking we shouldn't allow Pairing Confirm for the + * SC case, however some implementations incorrectly copy RFU auth + * req bits from our security request, which may create a false + * positive SC enablement. + */ + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); + if (test_bit(SMP_FLAG_SC, &smp->flags)) { SMP_ALLOW_CMD(smp, SMP_CMD_PUBLIC_KEY); /* Clear bits which are generated but not distributed */ smp->remote_key_dist &= ~SMP_SC_NO_DIST; /* Wait for Public Key from Initiating Device */ return 0; - } else { - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); } /* Request setup of TK */ @@ -1758,7 +1841,26 @@ static u8 sc_send_public_key(struct smp_chan *smp) BT_DBG(""); - if (test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags)) { + if (test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) { + struct l2cap_chan *chan = hdev->smp_data; + struct smp_dev *smp_dev; + + if (!chan || !chan->data) + return SMP_UNSPECIFIED; + + smp_dev = chan->data; + + memcpy(smp->local_pk, smp_dev->local_pk, 64); + memcpy(smp->local_sk, smp_dev->local_sk, 32); + memcpy(smp->lr, smp_dev->local_rand, 16); + + if (smp_dev->debug_key) + set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); + + goto done; + } + + if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { BT_DBG("Using debug keys"); memcpy(smp->local_pk, debug_pk, 64); memcpy(smp->local_sk, debug_sk, 32); @@ -1777,8 +1879,9 @@ static u8 sc_send_public_key(struct smp_chan *smp) } } +done: SMP_DBG("Local Public Key X: %32phN", smp->local_pk); - SMP_DBG("Local Public Key Y: %32phN", &smp->local_pk[32]); + SMP_DBG("Local Public Key Y: %32phN", smp->local_pk + 32); SMP_DBG("Local Private Key: %32phN", smp->local_sk); smp_send_cmd(smp->conn, SMP_CMD_PUBLIC_KEY, 64, smp->local_pk); @@ -1813,9 +1916,16 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) auth = rsp->auth_req & AUTH_REQ_MASK(hdev); - if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) && !(auth & SMP_AUTH_SC)) + if (hci_dev_test_flag(hdev, HCI_SC_ONLY) && !(auth & SMP_AUTH_SC)) return SMP_AUTH_REQUIREMENTS; + /* If the remote side's OOB flag is set it means it has + * successfully received our local OOB data - therefore set the + * flag to indicate that local OOB is in use. + */ + if (rsp->oob_flag == SMP_OOB_PRESENT) + set_bit(SMP_FLAG_LOCAL_OOB, &smp->flags); + smp->prsp[0] = SMP_CMD_PAIRING_RSP; memcpy(&smp->prsp[1], rsp, sizeof(*rsp)); @@ -1882,10 +1992,6 @@ static u8 sc_check_confirm(struct smp_chan *smp) BT_DBG(""); - /* Public Key exchange must happen before any other steps */ - if (!test_bit(SMP_FLAG_REMOTE_PK, &smp->flags)) - return SMP_UNSPECIFIED; - if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) return sc_passkey_round(smp, SMP_CMD_PAIRING_CONFIRM); @@ -1898,6 +2004,47 @@ static u8 sc_check_confirm(struct smp_chan *smp) return 0; } +/* Work-around for some implementations that incorrectly copy RFU bits + * from our security request and thereby create the impression that + * we're doing SC when in fact the remote doesn't support it. + */ +static int fixup_sc_false_positive(struct smp_chan *smp) +{ + struct l2cap_conn *conn = smp->conn; + struct hci_conn *hcon = conn->hcon; + struct hci_dev *hdev = hcon->hdev; + struct smp_cmd_pairing *req, *rsp; + u8 auth; + + /* The issue is only observed when we're in slave role */ + if (hcon->out) + return SMP_UNSPECIFIED; + + if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { + BT_ERR("Refusing SMP SC -> legacy fallback in SC-only mode"); + return SMP_UNSPECIFIED; + } + + BT_ERR("Trying to fall back to legacy SMP"); + + req = (void *) &smp->preq[1]; + rsp = (void *) &smp->prsp[1]; + + /* Rebuild key dist flags which may have been cleared for SC */ + smp->remote_key_dist = (req->init_key_dist & rsp->resp_key_dist); + + auth = req->auth_req & AUTH_REQ_MASK(hdev); + + if (tk_request(conn, 0, auth, rsp->io_capability, req->io_capability)) { + BT_ERR("Failed to fall back to legacy SMP"); + return SMP_UNSPECIFIED; + } + + clear_bit(SMP_FLAG_SC, &smp->flags); + + return 0; +} + static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) { struct l2cap_chan *chan = conn->smp; @@ -1911,8 +2058,19 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) memcpy(smp->pcnf, skb->data, sizeof(smp->pcnf)); skb_pull(skb, sizeof(smp->pcnf)); - if (test_bit(SMP_FLAG_SC, &smp->flags)) - return sc_check_confirm(smp); + if (test_bit(SMP_FLAG_SC, &smp->flags)) { + int ret; + + /* Public Key exchange must happen before any other steps */ + if (test_bit(SMP_FLAG_REMOTE_PK, &smp->flags)) + return sc_check_confirm(smp); + + BT_ERR("Unexpected SMP Pairing Confirm"); + + ret = fixup_sc_false_positive(smp); + if (ret) + return ret; + } if (conn->hcon->out) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), @@ -1923,8 +2081,8 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) if (test_bit(SMP_FLAG_TK_VALID, &smp->flags)) return smp_confirm(smp); - else - set_bit(SMP_FLAG_CFM_PENDING, &smp->flags); + + set_bit(SMP_FLAG_CFM_PENDING, &smp->flags); return 0; } @@ -2083,7 +2241,7 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) auth = rp->auth_req & AUTH_REQ_MASK(hdev); - if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) && !(auth & SMP_AUTH_SC)) + if (hci_dev_test_flag(hdev, HCI_SC_ONLY) && !(auth & SMP_AUTH_SC)) return SMP_AUTH_REQUIREMENTS; if (hcon->io_capability == HCI_IO_NO_INPUT_OUTPUT) @@ -2104,7 +2262,7 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) if (!smp) return SMP_UNSPECIFIED; - if (!test_bit(HCI_BONDABLE, &hcon->hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_BONDABLE) && (auth & SMP_AUTH_BONDING)) return SMP_PAIRING_NOTSUPP; @@ -2138,7 +2296,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) chan = conn->smp; - if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags)) + if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) return 1; if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK)) @@ -2167,7 +2325,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) authreq = seclevel_to_authreq(sec_level); - if (test_bit(HCI_SC_ENABLED, &hcon->hdev->dev_flags)) + if (hci_dev_test_flag(hcon->hdev, HCI_SC_ENABLED)) authreq |= SMP_AUTH_SC; /* Require MITM if IO Capability allows or the security level @@ -2352,7 +2510,10 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb) csrk = kzalloc(sizeof(*csrk), GFP_KERNEL); if (csrk) { - csrk->master = 0x01; + if (conn->hcon->sec_level > BT_SECURITY_MEDIUM) + csrk->type = MGMT_CSRK_REMOTE_AUTHENTICATED; + else + csrk->type = MGMT_CSRK_REMOTE_UNAUTHENTICATED; memcpy(csrk->val, rp->csrk, sizeof(csrk->val)); } smp->csrk = csrk; @@ -2368,7 +2529,8 @@ static u8 sc_select_method(struct smp_chan *smp) struct smp_cmd_pairing *local, *remote; u8 local_mitm, remote_mitm, local_io, remote_io, method; - if (test_bit(SMP_FLAG_OOB, &smp->flags)) + if (test_bit(SMP_FLAG_REMOTE_OOB, &smp->flags) || + test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) return REQ_OOB; /* The preq/prsp contain the raw Pairing Request/Response PDUs @@ -2422,6 +2584,16 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) memcpy(smp->remote_pk, key, 64); + if (test_bit(SMP_FLAG_REMOTE_OOB, &smp->flags)) { + err = smp_f4(smp->tfm_cmac, smp->remote_pk, smp->remote_pk, + smp->rr, 0, cfm.confirm_val); + if (err) + return SMP_UNSPECIFIED; + + if (memcmp(cfm.confirm_val, smp->pcnf, 16)) + return SMP_CONFIRM_FAILED; + } + /* Non-initiating device sends its public key after receiving * the key from the initiating device. */ @@ -2432,7 +2604,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) } SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk); - SMP_DBG("Remote Public Key Y: %32phN", &smp->remote_pk[32]); + SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32); if (!ecdh_shared_secret(smp->remote_pk, smp->local_sk, smp->dhkey)) return SMP_UNSPECIFIED; @@ -2470,14 +2642,6 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) } if (smp->method == REQ_OOB) { - err = smp_f4(smp->tfm_cmac, smp->remote_pk, smp->remote_pk, - smp->rr, 0, cfm.confirm_val); - if (err) - return SMP_UNSPECIFIED; - - if (memcmp(cfm.confirm_val, smp->pcnf, 16)) - return SMP_CONFIRM_FAILED; - if (hcon->out) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); @@ -2550,6 +2714,8 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) put_unaligned_le32(hcon->passkey_notify, r); + else if (smp->method == REQ_OOB) + memcpy(r, smp->lr, 16); err = smp_f6(smp->tfm_cmac, smp->mackey, smp->rrnd, smp->prnd, r, io_cap, remote_addr, local_addr, e); @@ -2600,7 +2766,7 @@ static int smp_sig_channel(struct l2cap_chan *chan, struct sk_buff *skb) if (skb->len < 1) return -EILSEQ; - if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags)) { + if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) { reason = SMP_PAIRING_NOTSUPP; goto done; } @@ -2738,16 +2904,16 @@ static void bredr_pairing(struct l2cap_chan *chan) return; /* Secure Connections support must be enabled */ - if (!test_bit(HCI_SC_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_SC_ENABLED)) return; /* BR/EDR must use Secure Connections for SMP */ if (!test_bit(HCI_CONN_AES_CCM, &hcon->flags) && - !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) + !hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP)) return; /* If our LE support is not enabled don't do anything */ - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return; /* Don't bother if remote LE support is not enabled */ @@ -2924,51 +3090,63 @@ static const struct l2cap_ops smp_root_chan_ops = { static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid) { struct l2cap_chan *chan; - struct crypto_blkcipher *tfm_aes; + struct smp_dev *smp; + struct crypto_blkcipher *tfm_aes; + struct crypto_hash *tfm_cmac; if (cid == L2CAP_CID_SMP_BREDR) { - tfm_aes = NULL; + smp = NULL; goto create_chan; } - tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, 0); + smp = kzalloc(sizeof(*smp), GFP_KERNEL); + if (!smp) + return ERR_PTR(-ENOMEM); + + tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm_aes)) { - BT_ERR("Unable to create crypto context"); + BT_ERR("Unable to create ECB crypto context"); + kzfree(smp); return ERR_CAST(tfm_aes); } + tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm_cmac)) { + BT_ERR("Unable to create CMAC crypto context"); + crypto_free_blkcipher(tfm_aes); + kzfree(smp); + return ERR_CAST(tfm_cmac); + } + + smp->tfm_aes = tfm_aes; + smp->tfm_cmac = tfm_cmac; + create_chan: chan = l2cap_chan_create(); if (!chan) { - crypto_free_blkcipher(tfm_aes); + if (smp) { + crypto_free_blkcipher(smp->tfm_aes); + crypto_free_hash(smp->tfm_cmac); + kzfree(smp); + } return ERR_PTR(-ENOMEM); } - chan->data = tfm_aes; + chan->data = smp; l2cap_add_scid(chan, cid); l2cap_chan_set_defaults(chan); if (cid == L2CAP_CID_SMP) { - /* If usage of static address is forced or if the devices - * does not have a public address, then listen on the static - * address. - * - * In case BR/EDR has been disabled on a dual-mode controller - * and a static address has been configued, then listen on - * the static address instead. - */ - if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || - !bacmp(&hdev->bdaddr, BDADDR_ANY) || - (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && - bacmp(&hdev->static_addr, BDADDR_ANY))) { - bacpy(&chan->src, &hdev->static_addr); - chan->src_type = BDADDR_LE_RANDOM; - } else { - bacpy(&chan->src, &hdev->bdaddr); + u8 bdaddr_type; + + hci_copy_identity_address(hdev, &chan->src, &bdaddr_type); + + if (bdaddr_type == ADDR_LE_DEV_PUBLIC) chan->src_type = BDADDR_LE_PUBLIC; - } + else + chan->src_type = BDADDR_LE_RANDOM; } else { bacpy(&chan->src, &hdev->bdaddr); chan->src_type = BDADDR_BREDR; @@ -2987,14 +3165,18 @@ create_chan: static void smp_del_chan(struct l2cap_chan *chan) { - struct crypto_blkcipher *tfm_aes; + struct smp_dev *smp; BT_DBG("chan %p", chan); - tfm_aes = chan->data; - if (tfm_aes) { + smp = chan->data; + if (smp) { chan->data = NULL; - crypto_free_blkcipher(tfm_aes); + if (smp->tfm_aes) + crypto_free_blkcipher(smp->tfm_aes); + if (smp->tfm_cmac) + crypto_free_hash(smp->tfm_cmac); + kzfree(smp); } l2cap_chan_put(chan); @@ -3007,7 +3189,7 @@ static ssize_t force_bredr_smp_read(struct file *file, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP) ? 'Y': 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -3029,7 +3211,7 @@ static ssize_t force_bredr_smp_write(struct file *file, if (strtobool(buf, &enable)) return -EINVAL; - if (enable == test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) + if (enable == hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP)) return -EALREADY; if (enable) { @@ -3048,7 +3230,7 @@ static ssize_t force_bredr_smp_write(struct file *file, smp_del_chan(chan); } - change_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags); + hci_dev_change_flag(hdev, HCI_FORCE_BREDR_SMP); return count; } diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 60c5b73fcb4b..6cf872563ea7 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -188,6 +188,7 @@ int smp_user_confirm_reply(struct hci_conn *conn, u16 mgmt_op, __le32 passkey); bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16], const bdaddr_t *bdaddr); int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa); +int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]); int smp_register(struct hci_dev *hdev); void smp_unregister(struct hci_dev *hdev); diff --git a/net/bridge/br.c b/net/bridge/br.c index fb57ab6b24f9..02c24cf63c34 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -190,6 +190,8 @@ static int __init br_init(void) { int err; + BUILD_BUG_ON(sizeof(struct br_input_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); + err = stp_proto_register(&br_stp_proto); if (err < 0) { pr_err("bridge: can't register sap for STP\n"); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ffd379db5938..4ff77a16956c 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -25,6 +25,9 @@ #define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ NETIF_F_GSO_MASK | NETIF_F_HW_CSUM) +const struct nf_br_ops __rcu *nf_br_ops __read_mostly; +EXPORT_SYMBOL_GPL(nf_br_ops); + /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -33,16 +36,15 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); + const struct nf_br_ops *nf_ops; u16 vid = 0; rcu_read_lock(); -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { - br_nf_pre_routing_finish_bridge_slow(skb); + nf_ops = rcu_dereference(nf_br_ops); + if (nf_ops && nf_ops->br_dev_xmit_hook(skb)) { rcu_read_unlock(); return NETDEV_TX_OK; } -#endif u64_stats_update_begin(&brstats->syncp); brstats->tx_packets++; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index f96933a823e3..3304a5442331 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -37,9 +37,7 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct sk_buff *skb) { - /* ip_fragment doesn't copy the MAC header */ - if (nf_bridge_maybe_copy_header(skb) || - !is_skb_forwardable(skb->dev, skb)) { + if (!is_skb_forwardable(skb->dev, skb)) { kfree_skb(skb); } else { skb_push(skb, ETH_HLEN); @@ -188,6 +186,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, /* Do not flood to ports that enable proxy ARP */ if (p->flags & BR_PROXYARP) continue; + if ((p->flags & BR_PROXYARP_WIFI) && + BR_INPUT_SKB_CB(skb)->proxyarp_replied) + continue; prev = maybe_deliver(prev, p, skb, __packet_hook); if (IS_ERR(prev)) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index b087d278c679..1849d96b3c91 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -563,6 +563,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) */ del_nbp(p); + dev_set_mtu(br->dev, br_min_mtu(br)); + spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); spin_unlock_bh(&br->lock); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e2aa7be3a847..052c5ebbc947 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -60,7 +60,7 @@ static int br_pass_frame_up(struct sk_buff *skb) } static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, - u16 vid) + u16 vid, struct net_bridge_port *p) { struct net_device *dev = br->dev; struct neighbour *n; @@ -68,6 +68,8 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, u8 *arpptr, *sha; __be32 sip, tip; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; + if (dev->flags & IFF_NOARP) return; @@ -105,9 +107,12 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, } f = __br_fdb_get(br, n->ha, vid); - if (f) + if (f && ((p->flags & BR_PROXYARP) || + (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) { arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip, sha, n->ha, sha); + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + } neigh_release(n); } @@ -153,12 +158,10 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; - if (is_broadcast_ether_addr(dest)) { - if (IS_ENABLED(CONFIG_INET) && - p->flags & BR_PROXYARP && - skb->protocol == htons(ETH_P_ARP)) - br_do_proxy_arp(skb, br, vid); + if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP)) + br_do_proxy_arp(skb, br, vid, p); + if (is_broadcast_ether_addr(dest)) { skb2 = skb; unicast = false; } else if (is_multicast_ether_addr(dest)) { diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 0ee453fad3de..f3884a1b942f 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -37,17 +37,16 @@ #include <net/route.h> #include <net/netfilter/br_netfilter.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + #include <asm/uaccess.h> #include "br_private.h" #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif -#define skb_origaddr(skb) (((struct bridge_skb_cb *) \ - (skb->nf_bridge->data))->daddr.ipv4) -#define store_orig_dstaddr(skb) (skb_origaddr(skb) = ip_hdr(skb)->daddr) -#define dnat_took_place(skb) (skb_origaddr(skb) != ip_hdr(skb)->daddr) - #ifdef CONFIG_SYSCTL static struct ctl_table_header *brnf_sysctl_header; static int brnf_call_iptables __read_mostly = 1; @@ -154,6 +153,18 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) return nf_bridge; } +static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case __cpu_to_be16(ETH_P_8021Q): + return VLAN_HLEN; + case __cpu_to_be16(ETH_P_PPP_SES): + return PPPOE_SES_HLEN; + default: + return 0; + } +} + static inline void nf_bridge_push_encap_header(struct sk_buff *skb) { unsigned int len = nf_bridge_encap_header_len(skb); @@ -239,6 +250,14 @@ drop: return -1; } +static void nf_bridge_update_protocol(struct sk_buff *skb) +{ + if (skb->nf_bridge->mask & BRNF_8021Q) + skb->protocol = htons(ETH_P_8021Q); + else if (skb->nf_bridge->mask & BRNF_PPPoE) + skb->protocol = htons(ETH_P_PPP_SES); +} + /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ @@ -314,6 +333,22 @@ free_skb: return 0; } +static bool dnat_took_place(const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || nf_ct_is_untracked(ct)) + return false; + + return test_bit(IPS_DST_NAT_BIT, &ct->status); +#else + return false; +#endif +} + /* This requires some explaining. If DNAT has taken place, * we will need to fix up the destination Ethernet address. * @@ -617,7 +652,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, return NF_DROP; if (!setup_pre_routing(skb)) return NF_DROP; - store_orig_dstaddr(skb); + skb->protocol = htons(ETH_P_IP); NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, @@ -713,8 +748,6 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb)) return NF_DROP; - /* The physdev module checks on this */ - nf_bridge->mask |= BRNF_BRIDGED; nf_bridge->physoutdev = skb->dev; if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); @@ -764,23 +797,53 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, } #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +static bool nf_bridge_copy_header(struct sk_buff *skb) +{ + int err; + unsigned int header_size; + + nf_bridge_update_protocol(skb); + header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); + err = skb_cow_head(skb, header_size); + if (err) + return false; + + skb_copy_to_linear_data_offset(skb, -header_size, + skb->nf_bridge->data, header_size); + __skb_push(skb, nf_bridge_encap_header_len(skb)); + return true; +} + +static int br_nf_push_frag_xmit(struct sk_buff *skb) +{ + if (!nf_bridge_copy_header(skb)) { + kfree_skb(skb); + return 0; + } + + return br_dev_queue_push_xmit(skb); +} + static int br_nf_dev_queue_xmit(struct sk_buff *skb) { int ret; int frag_max_size; + unsigned int mtu_reserved; + + if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) + return br_dev_queue_push_xmit(skb); + mtu_reserved = nf_bridge_mtu_reduction(skb); /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ - if (skb->protocol == htons(ETH_P_IP) && - skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && - !skb_is_gso(skb)) { + if (skb->len + mtu_reserved > skb->dev->mtu) { frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; if (br_parse_ip_options(skb)) /* Drop invalid packet */ return NF_DROP; IPCB(skb)->frag_max_size = frag_max_size; - ret = ip_fragment(skb, br_dev_queue_push_xmit); + ret = ip_fragment(skb, br_nf_push_frag_xmit); } else ret = br_dev_queue_push_xmit(skb); @@ -804,7 +867,12 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, struct net_device *realoutdev = bridge_parent(skb->dev); u_int8_t pf; - if (!nf_bridge || !(nf_bridge->mask & BRNF_BRIDGED)) + /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in + * on a bridge, but was delivered locally and is now being routed: + * + * POST_ROUTING was already invoked from the ip stack. + */ + if (!nf_bridge || !nf_bridge->physoutdev) return NF_ACCEPT; if (!realoutdev) @@ -854,6 +922,41 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, return NF_ACCEPT; } +/* This is called when br_netfilter has called into iptables/netfilter, + * and DNAT has taken place on a bridge-forwarded packet. + * + * neigh->output has created a new MAC header, with local br0 MAC + * as saddr. + * + * This restores the original MAC saddr of the bridged packet + * before invoking bridge forward logic to transmit the packet. + */ +static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + + skb_pull(skb, ETH_HLEN); + nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; + + skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), + skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); + skb->dev = nf_bridge->physindev; + br_handle_frame_finish(skb); +} + +static int br_nf_dev_xmit(struct sk_buff *skb) +{ + if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { + br_nf_pre_routing_finish_bridge_slow(skb); + return 1; + } + return 0; +} + +static const struct nf_br_ops br_ops = { + .br_dev_xmit_hook = br_nf_dev_xmit, +}; + void br_netfilter_enable(void) { } @@ -991,12 +1094,14 @@ static int __init br_netfilter_init(void) return -ENOMEM; } #endif + RCU_INIT_POINTER(nf_br_ops, &br_ops); printk(KERN_NOTICE "Bridge firewalling registered\n"); return 0; } static void __exit br_netfilter_fini(void) { + RCU_INIT_POINTER(nf_br_ops, NULL); nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); #ifdef CONFIG_SYSCTL unregister_net_sysctl_table(brnf_sysctl_header); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 17e0177467f5..e1115a224a95 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -22,22 +22,83 @@ #include "br_private.h" #include "br_private_stp.h" -static size_t br_get_link_af_size(const struct net_device *dev) +static int br_get_num_vlan_infos(const struct net_port_vlans *pv, + u32 filter_mask) +{ + u16 vid_range_start = 0, vid_range_end = 0; + u16 vid_range_flags = 0; + u16 pvid, vid, flags; + int num_vlans = 0; + + if (filter_mask & RTEXT_FILTER_BRVLAN) + return pv->num_vlans; + + if (!(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) + return 0; + + /* Count number of vlan info's + */ + pvid = br_get_pvid(pv); + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + flags = 0; + if (vid == pvid) + flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (vid_range_start == 0) { + goto initvars; + } else if ((vid - vid_range_end) == 1 && + flags == vid_range_flags) { + vid_range_end = vid; + continue; + } else { + if ((vid_range_end - vid_range_start) > 0) + num_vlans += 2; + else + num_vlans += 1; + } +initvars: + vid_range_start = vid; + vid_range_end = vid; + vid_range_flags = flags; + } + + if (vid_range_start != 0) { + if ((vid_range_end - vid_range_start) > 0) + num_vlans += 2; + else + num_vlans += 1; + } + + return num_vlans; +} + +static size_t br_get_link_af_size_filtered(const struct net_device *dev, + u32 filter_mask) { struct net_port_vlans *pv; + int num_vlan_infos; + rcu_read_lock(); if (br_port_exists(dev)) - pv = nbp_get_vlan_info(br_port_get_rtnl(dev)); + pv = nbp_get_vlan_info(br_port_get_rcu(dev)); else if (dev->priv_flags & IFF_EBRIDGE) pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); else - return 0; + pv = NULL; + if (pv) + num_vlan_infos = br_get_num_vlan_infos(pv, filter_mask); + else + num_vlan_infos = 0; + rcu_read_unlock(); - if (!pv) + if (!num_vlan_infos) return 0; /* Each VLAN is returned in bridge_vlan_info along with flags */ - return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); + return num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info)); } static inline size_t br_port_info_size(void) @@ -54,7 +115,7 @@ static inline size_t br_port_info_size(void) + 0; } -static inline size_t br_nlmsg_size(struct net_device *dev) +static inline size_t br_nlmsg_size(struct net_device *dev, u32 filter_mask) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ @@ -64,7 +125,8 @@ static inline size_t br_nlmsg_size(struct net_device *dev) + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(br_port_info_size()) /* IFLA_PROTINFO */ - + nla_total_size(br_get_link_af_size(dev)); /* IFLA_AF_SPEC */ + + nla_total_size(br_get_link_af_size_filtered(dev, + filter_mask)); /* IFLA_AF_SPEC */ } static int br_port_fill_attrs(struct sk_buff *skb, @@ -81,7 +143,9 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) || - nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP))) + nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) || + nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI, + !!(p->flags & BR_PROXYARP_WIFI))) return -EMSGSIZE; return 0; @@ -299,6 +363,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) struct net *net; struct sk_buff *skb; int err = -ENOBUFS; + u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; if (!port) return; @@ -307,12 +372,11 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) br_debug(port->br, "port %u(%s) event %d\n", (unsigned int)port->port_no, port->dev->name, event); - skb = nlmsg_new(br_nlmsg_size(port->dev), GFP_ATOMIC); + skb = nlmsg_new(br_nlmsg_size(port->dev, filter), GFP_ATOMIC); if (skb == NULL) goto errout; - err = br_fill_ifinfo(skb, port, 0, 0, event, 0, - RTEXT_FILTER_BRVLAN_COMPRESSED, port->dev); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, port->dev); if (err < 0) { /* -EMSGSIZE implies BUG in br_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -491,6 +555,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); + br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); @@ -668,6 +733,9 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_FORWARD_DELAY] = { .type = NLA_U32 }, [IFLA_BR_HELLO_TIME] = { .type = NLA_U32 }, [IFLA_BR_MAX_AGE] = { .type = NLA_U32 }, + [IFLA_BR_AGEING_TIME] = { .type = NLA_U32 }, + [IFLA_BR_STP_STATE] = { .type = NLA_U32 }, + [IFLA_BR_PRIORITY] = { .type = NLA_U16 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -697,6 +765,24 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return err; } + if (data[IFLA_BR_AGEING_TIME]) { + u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]); + + br->ageing_time = clock_t_to_jiffies(ageing_time); + } + + if (data[IFLA_BR_STP_STATE]) { + u32 stp_enabled = nla_get_u32(data[IFLA_BR_STP_STATE]); + + br_stp_set_enabled(br, stp_enabled); + } + + if (data[IFLA_BR_PRIORITY]) { + u32 priority = nla_get_u16(data[IFLA_BR_PRIORITY]); + + br_stp_set_bridge_priority(br, priority); + } + return 0; } @@ -705,6 +791,9 @@ static size_t br_get_size(const struct net_device *brdev) return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */ nla_total_size(sizeof(u32)) + /* IFLA_BR_HELLO_TIME */ nla_total_size(sizeof(u32)) + /* IFLA_BR_MAX_AGE */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_AGEING_TIME */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_STP_STATE */ + nla_total_size(sizeof(u16)) + /* IFLA_BR_PRIORITY */ 0; } @@ -714,15 +803,39 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) u32 forward_delay = jiffies_to_clock_t(br->forward_delay); u32 hello_time = jiffies_to_clock_t(br->hello_time); u32 age_time = jiffies_to_clock_t(br->max_age); + u32 ageing_time = jiffies_to_clock_t(br->ageing_time); + u32 stp_enabled = br->stp_enabled; + u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]; if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) || nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) || - nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time)) + nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time) || + nla_put_u32(skb, IFLA_BR_AGEING_TIME, ageing_time) || + nla_put_u32(skb, IFLA_BR_STP_STATE, stp_enabled) || + nla_put_u16(skb, IFLA_BR_PRIORITY, priority)) return -EMSGSIZE; return 0; } +static size_t br_get_link_af_size(const struct net_device *dev) +{ + struct net_port_vlans *pv; + + if (br_port_exists(dev)) + pv = nbp_get_vlan_info(br_port_get_rtnl(dev)); + else if (dev->priv_flags & IFF_EBRIDGE) + pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); + else + return 0; + + if (!pv) + return 0; + + /* Each VLAN is returned in bridge_vlan_info along with flags */ + return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); +} + static struct rtnl_af_ops br_af_ops __read_mostly = { .family = AF_BRIDGE, .get_link_af_size = br_get_link_af_size, diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c index 387cb3bd017c..20cbb727df4d 100644 --- a/net/bridge/br_nf_core.c +++ b/net/bridge/br_nf_core.c @@ -54,7 +54,6 @@ static unsigned int fake_mtu(const struct dst_entry *dst) static struct dst_ops fake_dst_ops = { .family = AF_INET, - .protocol = cpu_to_be16(ETH_P_IP), .update_pmtu = fake_update_pmtu, .redirect = fake_redirect, .cow_metrics = fake_cow_metrics, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index de0919975a25..b46fa0c5b8ec 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -305,6 +305,7 @@ struct br_input_skb_cb { #endif u16 frag_max_size; + bool proxyarp_replied; #ifdef CONFIG_BRIDGE_VLAN_FILTERING bool vlan_filtered; @@ -762,6 +763,11 @@ static inline int br_vlan_enabled(struct net_bridge *br) } #endif +struct nf_br_ops { + int (*br_dev_xmit_hook)(struct sk_buff *skb); +}; +extern const struct nf_br_ops __rcu *nf_br_ops; + /* br_netfilter.c */ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) int br_nf_core_init(void); diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 2de5d91199e8..4905845a94e9 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -171,6 +171,7 @@ BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); BRPORT_ATTR_FLAG(learning, BR_LEARNING); BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP); +BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -215,6 +216,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_multicast_fast_leave, #endif &brport_attr_proxyarp, + &brport_attr_proxyarp_wifi, NULL }; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 3244aead0926..54a2fdf0f457 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -21,6 +21,7 @@ #include <net/ip.h> #include <net/ip6_checksum.h> #include <linux/netfilter_bridge.h> +#include <linux/netfilter_ipv6.h> #include "../br_private.h" static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, @@ -36,7 +37,12 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, skb_pull(nskb, ETH_HLEN); } -static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook) +/* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) + * or the bridge port (NF_BRIDGE PREROUTING). + */ +static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, + const struct net_device *dev, + int hook) { struct sk_buff *nskb; struct iphdr *niph; @@ -65,11 +71,12 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook) nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); } -static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, - u8 code) +static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) { struct sk_buff *nskb; struct iphdr *niph; @@ -77,8 +84,9 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, unsigned int len; void *payload; __wsum csum; + u8 proto; - if (!nft_bridge_iphdr_validate(oldskb)) + if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb)) return; /* IP header checks: fragment. */ @@ -91,7 +99,17 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, if (!pskb_may_pull(oldskb, len)) return; - if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), 0)) + if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len))) + return; + + if (ip_hdr(oldskb)->protocol == IPPROTO_TCP || + ip_hdr(oldskb)->protocol == IPPROTO_UDP) + proto = ip_hdr(oldskb)->protocol; + else + proto = 0; + + if (!skb_csum_unnecessary(oldskb) && + nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto)) return; nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) + @@ -120,11 +138,13 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); } static void nft_reject_br_send_v6_tcp_reset(struct net *net, - struct sk_buff *oldskb, int hook) + struct sk_buff *oldskb, + const struct net_device *dev, + int hook) { struct sk_buff *nskb; const struct tcphdr *oth; @@ -152,12 +172,37 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net, nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); +} + +static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int thoff; + __be16 fo; + u8 proto = ip6h->nexthdr; + + if (skb->csum_bad) + return false; + + if (skb_csum_unnecessary(skb)) + return true; + + if (ip6h->payload_len && + pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) + return false; + + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; + + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; } static void nft_reject_br_send_v6_unreach(struct net *net, - struct sk_buff *oldskb, int hook, - u8 code) + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) { struct sk_buff *nskb; struct ipv6hdr *nip6h; @@ -176,6 +221,9 @@ static void nft_reject_br_send_v6_unreach(struct net *net, if (!pskb_may_pull(oldskb, len)) return; + if (!reject6_br_csum_ok(oldskb, hook)) + return; + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) + LL_MAX_HEADER + len, GFP_ATOMIC); if (!nskb) @@ -205,7 +253,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net, nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); } static void nft_reject_bridge_eval(const struct nft_expr *expr, @@ -224,16 +272,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, + nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, pkt->ops->hooknum, priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v4_tcp_reset(pkt->skb, + nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in, pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, + nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, pkt->ops->hooknum, nft_reject_icmp_code(priv->icmp_code)); break; @@ -242,16 +290,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IPV6): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v6_unreach(net, pkt->skb, + nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, pkt->ops->hooknum, priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v6_tcp_reset(net, pkt->skb, + nft_reject_br_send_v6_tcp_reset(net, pkt->skb, pkt->in, pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v6_unreach(net, pkt->skb, + nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, pkt->ops->hooknum, nft_reject_icmpv6_code(priv->icmp_code)); break; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 769b185fefbd..4ec0c803aef1 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -271,8 +271,8 @@ static void caif_check_flow_release(struct sock *sk) * Copied from unix_dgram_recvmsg, but removed credit checks, * changed locking, address handling and added MSG_TRUNC. */ -static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t len, int flags) +static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m, + size_t len, int flags) { struct sock *sk = sock->sk; @@ -281,7 +281,7 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, int copylen; ret = -EOPNOTSUPP; - if (m->msg_flags&MSG_OOB) + if (flags & MSG_OOB) goto read_error; skb = skb_recv_datagram(sk, flags, 0 , &ret); @@ -343,9 +343,8 @@ static long caif_stream_data_wait(struct sock *sk, long timeo) * Copied from unix_stream_recvmsg, but removed credit checks, * changed locking calls, changed address handling. */ -static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int caif_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; int copied = 0; @@ -511,8 +510,8 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk, } /* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */ -static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -586,8 +585,8 @@ err: * Changed removed permission handling and added waiting for flow on * and other minor adaptations. */ -static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int caif_stream_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index 8bc7caa28e64..434ba8557826 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -84,7 +84,7 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt) u16 tmp; u16 len; u16 hdrchks; - u16 pktchks; + int pktchks; struct cffrml *this; this = container_obj(layr); diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 1be0b521ac49..f6c3b2137eea 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -255,9 +255,9 @@ inline u16 cfpkt_getlen(struct cfpkt *pkt) return skb->len; } -inline u16 cfpkt_iterate(struct cfpkt *pkt, - u16 (*iter_func)(u16, void *, u16), - u16 data) +int cfpkt_iterate(struct cfpkt *pkt, + u16 (*iter_func)(u16, void *, u16), + u16 data) { /* * Don't care about the performance hit of linearizing, diff --git a/net/can/af_can.c b/net/can/af_can.c index 66e08040ced7..32d710eaf1fc 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -259,6 +259,9 @@ int can_send(struct sk_buff *skb, int loop) goto inval_skb; } + skb->ip_summed = CHECKSUM_UNNECESSARY; + + skb_reset_mac_header(skb); skb_reset_network_header(skb); skb_reset_transport_header(skb); diff --git a/net/can/bcm.c b/net/can/bcm.c index ee9ffd956552..b523453585be 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -328,7 +328,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, * containing the interface index. */ - BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can)); + sock_skb_cb_check_size(sizeof(struct sockaddr_can)); addr = (struct sockaddr_can *)skb->cb; memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; @@ -1231,8 +1231,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) /* * bcm_sendmsg - process BCM commands (opcodes) from the userspace */ -static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); @@ -1535,8 +1534,8 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, return 0; } -static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/can/raw.c b/net/can/raw.c index 00c13ef23661..63ffdb0f3a23 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -95,8 +95,8 @@ struct raw_sock { */ static inline unsigned int *raw_flags(struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) <= (sizeof(struct sockaddr_can) + - sizeof(unsigned int))); + sock_skb_cb_check_size(sizeof(struct sockaddr_can) + + sizeof(unsigned int)); /* return pointer after struct sockaddr_can */ return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]); @@ -135,7 +135,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) * containing the interface index. */ - BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can)); + sock_skb_cb_check_size(sizeof(struct sockaddr_can)); addr = (struct sockaddr_can *)skb->cb; memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; @@ -658,8 +658,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, return 0; } -static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); @@ -728,8 +727,8 @@ send_failed: return err; } -static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 5d5ab67f516d..ec565508e904 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -239,6 +239,8 @@ enum { Opt_nocrc, Opt_cephx_require_signatures, Opt_nocephx_require_signatures, + Opt_tcp_nodelay, + Opt_notcp_nodelay, }; static match_table_t opt_tokens = { @@ -259,6 +261,8 @@ static match_table_t opt_tokens = { {Opt_nocrc, "nocrc"}, {Opt_cephx_require_signatures, "cephx_require_signatures"}, {Opt_nocephx_require_signatures, "nocephx_require_signatures"}, + {Opt_tcp_nodelay, "tcp_nodelay"}, + {Opt_notcp_nodelay, "notcp_nodelay"}, {-1, NULL} }; @@ -457,6 +461,7 @@ ceph_parse_options(char *options, const char *dev_name, case Opt_nocrc: opt->flags |= CEPH_OPT_NOCRC; break; + case Opt_cephx_require_signatures: opt->flags &= ~CEPH_OPT_NOMSGAUTH; break; @@ -464,6 +469,13 @@ ceph_parse_options(char *options, const char *dev_name, opt->flags |= CEPH_OPT_NOMSGAUTH; break; + case Opt_tcp_nodelay: + opt->flags |= CEPH_OPT_TCP_NODELAY; + break; + case Opt_notcp_nodelay: + opt->flags &= ~CEPH_OPT_TCP_NODELAY; + break; + default: BUG_ON(token); } @@ -518,10 +530,12 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, /* msgr */ if (ceph_test_opt(client, MYIP)) myaddr = &client->options->my_addr; + ceph_messenger_init(&client->msgr, myaddr, client->supported_features, client->required_features, - ceph_test_opt(client, NOCRC)); + ceph_test_opt(client, NOCRC), + ceph_test_opt(client, TCP_NODELAY)); /* subsystems */ err = ceph_monc_init(&client->monc, client); diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c index 30560202f57b..139a9cb19b0c 100644 --- a/net/ceph/ceph_strings.c +++ b/net/ceph/ceph_strings.c @@ -42,17 +42,3 @@ const char *ceph_osd_state_name(int s) return "???"; } } - -const char *ceph_pool_op_name(int op) -{ - switch (op) { - case POOL_OP_CREATE: return "create"; - case POOL_OP_DELETE: return "delete"; - case POOL_OP_AUID_CHANGE: return "auid change"; - case POOL_OP_CREATE_SNAP: return "create snap"; - case POOL_OP_DELETE_SNAP: return "delete snap"; - case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; - case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; - } - return "???"; -} diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index d2d525529f87..14d9995097cc 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -127,8 +127,6 @@ static int monc_show(struct seq_file *s, void *p) op = le16_to_cpu(req->request->hdr.type); if (op == CEPH_MSG_STATFS) seq_printf(s, "%llu statfs\n", req->tid); - else if (op == CEPH_MSG_POOLOP) - seq_printf(s, "%llu poolop\n", req->tid); else if (op == CEPH_MSG_MON_GET_VERSION) seq_printf(s, "%llu mon_get_version", req->tid); else diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 33a2f201e460..6b3f54ed65ba 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -510,6 +510,16 @@ static int ceph_tcp_connect(struct ceph_connection *con) return ret; } + if (con->msgr->tcp_nodelay) { + int optval = 1; + + ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, + (char *)&optval, sizeof(optval)); + if (ret) + pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d", + ret); + } + sk_set_memalloc(sock->sk); con->sock = sock; @@ -2922,7 +2932,8 @@ void ceph_messenger_init(struct ceph_messenger *msgr, struct ceph_entity_addr *myaddr, u64 supported_features, u64 required_features, - bool nocrc) + bool nocrc, + bool tcp_nodelay) { msgr->supported_features = supported_features; msgr->required_features = required_features; @@ -2937,6 +2948,7 @@ void ceph_messenger_init(struct ceph_messenger *msgr, get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); encode_my_addr(msgr); msgr->nocrc = nocrc; + msgr->tcp_nodelay = tcp_nodelay; atomic_set(&msgr->stopping, 0); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index f2148e22b148..2b3cf05e87b0 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -410,7 +410,7 @@ out_unlocked: } /* - * generic requests (e.g., statfs, poolop) + * generic requests (currently statfs, mon_get_version) */ static struct ceph_mon_generic_request *__lookup_generic_req( struct ceph_mon_client *monc, u64 tid) @@ -569,7 +569,7 @@ static void handle_statfs_reply(struct ceph_mon_client *monc, return; bad: - pr_err("corrupt generic reply, tid %llu\n", tid); + pr_err("corrupt statfs reply, tid %llu\n", tid); ceph_msg_dump(msg); } @@ -588,7 +588,6 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) kref_init(&req->kref); req->buf = buf; - req->buf_len = sizeof(*buf); init_completion(&req->completion); err = -ENOMEM; @@ -611,7 +610,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) err = do_generic_request(monc, req); out: - kref_put(&req->kref, release_generic_request); + put_generic_request(req); return err; } EXPORT_SYMBOL(ceph_monc_do_statfs); @@ -647,7 +646,7 @@ static void handle_get_version_reply(struct ceph_mon_client *monc, return; bad: - pr_err("corrupt mon_get_version reply\n"); + pr_err("corrupt mon_get_version reply, tid %llu\n", tid); ceph_msg_dump(msg); } @@ -670,7 +669,6 @@ int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what, kref_init(&req->kref); req->buf = newest; - req->buf_len = sizeof(*newest); init_completion(&req->completion); req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION, @@ -701,134 +699,12 @@ int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what, mutex_unlock(&monc->mutex); out: - kref_put(&req->kref, release_generic_request); + put_generic_request(req); return err; } EXPORT_SYMBOL(ceph_monc_do_get_version); /* - * pool ops - */ -static int get_poolop_reply_buf(const char *src, size_t src_len, - char *dst, size_t dst_len) -{ - u32 buf_len; - - if (src_len != sizeof(u32) + dst_len) - return -EINVAL; - - buf_len = le32_to_cpu(*(__le32 *)src); - if (buf_len != dst_len) - return -EINVAL; - - memcpy(dst, src + sizeof(u32), dst_len); - return 0; -} - -static void handle_poolop_reply(struct ceph_mon_client *monc, - struct ceph_msg *msg) -{ - struct ceph_mon_generic_request *req; - struct ceph_mon_poolop_reply *reply = msg->front.iov_base; - u64 tid = le64_to_cpu(msg->hdr.tid); - - if (msg->front.iov_len < sizeof(*reply)) - goto bad; - dout("handle_poolop_reply %p tid %llu\n", msg, tid); - - mutex_lock(&monc->mutex); - req = __lookup_generic_req(monc, tid); - if (req) { - if (req->buf_len && - get_poolop_reply_buf(msg->front.iov_base + sizeof(*reply), - msg->front.iov_len - sizeof(*reply), - req->buf, req->buf_len) < 0) { - mutex_unlock(&monc->mutex); - goto bad; - } - req->result = le32_to_cpu(reply->reply_code); - get_generic_request(req); - } - mutex_unlock(&monc->mutex); - if (req) { - complete(&req->completion); - put_generic_request(req); - } - return; - -bad: - pr_err("corrupt generic reply, tid %llu\n", tid); - ceph_msg_dump(msg); -} - -/* - * Do a synchronous pool op. - */ -static int do_poolop(struct ceph_mon_client *monc, u32 op, - u32 pool, u64 snapid, - char *buf, int len) -{ - struct ceph_mon_generic_request *req; - struct ceph_mon_poolop *h; - int err; - - req = kzalloc(sizeof(*req), GFP_NOFS); - if (!req) - return -ENOMEM; - - kref_init(&req->kref); - req->buf = buf; - req->buf_len = len; - init_completion(&req->completion); - - err = -ENOMEM; - req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS, - true); - if (!req->request) - goto out; - req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS, - true); - if (!req->reply) - goto out; - - /* fill out request */ - req->request->hdr.version = cpu_to_le16(2); - h = req->request->front.iov_base; - h->monhdr.have_version = 0; - h->monhdr.session_mon = cpu_to_le16(-1); - h->monhdr.session_mon_tid = 0; - h->fsid = monc->monmap->fsid; - h->pool = cpu_to_le32(pool); - h->op = cpu_to_le32(op); - h->auid = 0; - h->snapid = cpu_to_le64(snapid); - h->name_len = 0; - - err = do_generic_request(monc, req); - -out: - kref_put(&req->kref, release_generic_request); - return err; -} - -int ceph_monc_create_snapid(struct ceph_mon_client *monc, - u32 pool, u64 *snapid) -{ - return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, - pool, 0, (char *)snapid, sizeof(*snapid)); - -} -EXPORT_SYMBOL(ceph_monc_create_snapid); - -int ceph_monc_delete_snapid(struct ceph_mon_client *monc, - u32 pool, u64 snapid) -{ - return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, - pool, snapid, NULL, 0); - -} - -/* * Resend pending generic requests. */ static void __resend_generic_request(struct ceph_mon_client *monc) @@ -1112,10 +988,6 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) handle_get_version_reply(monc, msg); break; - case CEPH_MSG_POOLOP_REPLY: - handle_poolop_reply(monc, msg); - break; - case CEPH_MSG_MON_MAP: ceph_monc_handle_map(monc, msg); break; @@ -1154,7 +1026,6 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, case CEPH_MSG_MON_SUBSCRIBE_ACK: m = ceph_msg_get(monc->m_subscribe_ack); break; - case CEPH_MSG_POOLOP_REPLY: case CEPH_MSG_STATFS_REPLY: return get_generic_reply(con, hdr, skip); case CEPH_MSG_AUTH_REPLY: diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 53299c7b0ca4..41a4abc7e98e 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1035,10 +1035,11 @@ static void put_osd(struct ceph_osd *osd) { dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), atomic_read(&osd->o_ref) - 1); - if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) { + if (atomic_dec_and_test(&osd->o_ref)) { struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; - ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer); + if (osd->o_auth.authorizer) + ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer); kfree(osd); } } @@ -1048,14 +1049,24 @@ static void put_osd(struct ceph_osd *osd) */ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) { - dout("__remove_osd %p\n", osd); + dout("%s %p osd%d\n", __func__, osd, osd->o_osd); WARN_ON(!list_empty(&osd->o_requests)); WARN_ON(!list_empty(&osd->o_linger_requests)); - rb_erase(&osd->o_node, &osdc->osds); list_del_init(&osd->o_osd_lru); - ceph_con_close(&osd->o_con); - put_osd(osd); + rb_erase(&osd->o_node, &osdc->osds); + RB_CLEAR_NODE(&osd->o_node); +} + +static void remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) +{ + dout("%s %p osd%d\n", __func__, osd, osd->o_osd); + + if (!RB_EMPTY_NODE(&osd->o_node)) { + ceph_con_close(&osd->o_con); + __remove_osd(osdc, osd); + put_osd(osd); + } } static void remove_all_osds(struct ceph_osd_client *osdc) @@ -1065,7 +1076,7 @@ static void remove_all_osds(struct ceph_osd_client *osdc) while (!RB_EMPTY_ROOT(&osdc->osds)) { struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), struct ceph_osd, o_node); - __remove_osd(osdc, osd); + remove_osd(osdc, osd); } mutex_unlock(&osdc->request_mutex); } @@ -1106,7 +1117,7 @@ static void remove_old_osds(struct ceph_osd_client *osdc) list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { if (time_before(jiffies, osd->lru_ttl)) break; - __remove_osd(osdc, osd); + remove_osd(osdc, osd); } mutex_unlock(&osdc->request_mutex); } @@ -1121,8 +1132,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) dout("__reset_osd %p osd%d\n", osd, osd->o_osd); if (list_empty(&osd->o_requests) && list_empty(&osd->o_linger_requests)) { - __remove_osd(osdc, osd); - + remove_osd(osdc, osd); return -ENODEV; } @@ -1926,6 +1936,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc) { struct rb_node *p, *n; + dout("%s %p\n", __func__, osdc); for (p = rb_first(&osdc->osds); p; p = n) { struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node); diff --git a/net/compat.c b/net/compat.c index 49c6a8fb9f09..c4b6b0f43d5d 100644 --- a/net/compat.c +++ b/net/compat.c @@ -49,6 +49,13 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg, __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; + + if (!uaddr) + kmsg->msg_namelen = 0; + + if (kmsg->msg_namelen < 0) + return -EINVAL; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_control = compat_ptr(tmp3); @@ -72,6 +79,8 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg, if (nr_segs > UIO_MAXIOV) return -EMSGSIZE; + kmsg->msg_iocb = NULL; + err = compat_rw_copy_check_uvector(save_addr ? READ : WRITE, compat_ptr(uiov), nr_segs, UIO_FASTIOV, *iov, iov); @@ -711,24 +720,18 @@ static unsigned char nas[21] = { COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; return __sys_sendmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT); } COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; return __sys_recvmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } @@ -751,9 +754,6 @@ COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, int datagrams; struct timespec ktspec; - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; - if (timeout == NULL) return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, NULL); diff --git a/net/core/dev.c b/net/core/dev.c index 8f9710c62e20..65492b0354c0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -946,7 +946,7 @@ bool dev_valid_name(const char *name) return false; while (*name) { - if (*name == '/' || isspace(*name)) + if (*name == '/' || *name == ':' || isspace(*name)) return false; name++; } @@ -1385,7 +1385,7 @@ static int __dev_close(struct net_device *dev) return retval; } -static int dev_close_many(struct list_head *head) +int dev_close_many(struct list_head *head, bool unlink) { struct net_device *dev, *tmp; @@ -1399,11 +1399,13 @@ static int dev_close_many(struct list_head *head) list_for_each_entry_safe(dev, tmp, head, close_list) { rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); call_netdevice_notifiers(NETDEV_DOWN, dev); - list_del_init(&dev->close_list); + if (unlink) + list_del_init(&dev->close_list); } return 0; } +EXPORT_SYMBOL(dev_close_many); /** * dev_close - shutdown an interface. @@ -1420,7 +1422,7 @@ int dev_close(struct net_device *dev) LIST_HEAD(single); list_add(&dev->close_list, &single); - dev_close_many(&single); + dev_close_many(&single, true); list_del(&single); } return 0; @@ -1694,6 +1696,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } skb_scrub_packet(skb, true); + skb->priority = 0; skb->protocol = eth_type_trans(skb, dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); @@ -1737,7 +1740,8 @@ static inline int deliver_skb(struct sk_buff *skb, static inline void deliver_ptype_list_skb(struct sk_buff *skb, struct packet_type **pt, - struct net_device *dev, __be16 type, + struct net_device *orig_dev, + __be16 type, struct list_head *ptype_list) { struct packet_type *ptype, *pt_prev = *pt; @@ -1746,7 +1750,7 @@ static inline void deliver_ptype_list_skb(struct sk_buff *skb, if (ptype->type != type) continue; if (pt_prev) - deliver_skb(skb, pt_prev, dev); + deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } *pt = pt_prev; @@ -2559,12 +2563,26 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, return features; } +netdev_features_t passthru_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + return features; +} +EXPORT_SYMBOL(passthru_features_check); + +static netdev_features_t dflt_features_check(const struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + return vlan_features_check(skb, features); +} + netdev_features_t netif_skb_features(struct sk_buff *skb) { struct net_device *dev = skb->dev; netdev_features_t features = dev->features; u16 gso_segs = skb_shinfo(skb)->gso_segs; - __be16 protocol = skb->protocol; if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) features &= ~NETIF_F_GSO_MASK; @@ -2576,34 +2594,17 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (skb->encapsulation) features &= dev->hw_enc_features; - if (!skb_vlan_tag_present(skb)) { - if (unlikely(protocol == htons(ETH_P_8021Q) || - protocol == htons(ETH_P_8021AD))) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - } else { - goto finalize; - } - } - - features = netdev_intersect_features(features, - dev->vlan_features | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); - - if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) + if (skb_vlan_tagged(skb)) features = netdev_intersect_features(features, - NETIF_F_SG | - NETIF_F_HIGHDMA | - NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | + dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); -finalize: if (dev->netdev_ops->ndo_features_check) features &= dev->netdev_ops->ndo_features_check(skb, dev, features); + else + features &= dflt_features_check(skb, dev, features); return harmonize_features(skb, features); } @@ -5912,6 +5913,24 @@ int dev_get_phys_port_id(struct net_device *dev, EXPORT_SYMBOL(dev_get_phys_port_id); /** + * dev_get_phys_port_name - Get device physical port name + * @dev: device + * @name: port name + * + * Get device physical port name + */ +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_get_phys_port_name) + return -EOPNOTSUPP; + return ops->ndo_get_phys_port_name(dev, name, len); +} +EXPORT_SYMBOL(dev_get_phys_port_name); + +/** * dev_new_index - allocate an ifindex * @net: the applicable net namespace * @@ -5968,7 +5987,7 @@ static void rollback_registered_many(struct list_head *head) /* If device is running, close it first. */ list_for_each_entry(dev, head, unreg_list) list_add_tail(&dev->close_list, &close_head); - dev_close_many(&close_head); + dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ @@ -6841,8 +6860,6 @@ void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; - release_net(dev_net(dev)); - netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS kvfree(dev->_rx); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index eb0c3ace7458..1d00b8922902 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -98,6 +98,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", [NETIF_F_BUSY_POLL_BIT] = "busy-poll", + [NETIF_F_HW_SWITCH_OFFLOAD_BIT] = "hw-switch-offload", }; static const char diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 44706e81b2e0..68ea6950cad1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -31,7 +31,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; - r->fr_net = hold_net(ops->fro_net); + r->fr_net = ops->fro_net; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; @@ -116,7 +116,6 @@ static int __fib_rules_register(struct fib_rules_ops *ops) if (ops->family == o->family) goto errout; - hold_net(net); list_add_tail_rcu(&ops->list, &net->rules_ops); err = 0; errout: @@ -160,15 +159,6 @@ static void fib_rules_cleanup_ops(struct fib_rules_ops *ops) } } -static void fib_rules_put_rcu(struct rcu_head *head) -{ - struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu); - struct net *net = ops->fro_net; - - release_net(net); - kfree(ops); -} - void fib_rules_unregister(struct fib_rules_ops *ops) { struct net *net = ops->fro_net; @@ -178,7 +168,7 @@ void fib_rules_unregister(struct fib_rules_ops *ops) fib_rules_cleanup_ops(ops); spin_unlock(&net->rules_mod_lock); - call_rcu(&ops->rcu, fib_rules_put_rcu); + kfree_rcu(ops, rcu); } EXPORT_SYMBOL_GPL(fib_rules_unregister); @@ -303,7 +293,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) err = -ENOMEM; goto errout; } - rule->fr_net = hold_net(net); + rule->fr_net = net; if (tb[FRA_PRIORITY]) rule->pref = nla_get_u32(tb[FRA_PRIORITY]); @@ -423,7 +413,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) return 0; errout_free: - release_net(rule->fr_net); kfree(rule); errout: rules_ops_put(ops); @@ -492,6 +481,12 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) goto errout; } + if (ops->delete) { + err = ops->delete(rule); + if (err) + goto errout; + } + list_del_rcu(&rule->list); if (rule->action == FR_ACT_GOTO) { @@ -517,8 +512,6 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid); - if (ops->delete) - ops->delete(rule); fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); diff --git a/net/core/filter.c b/net/core/filter.c index f6bdc2b1ba01..444a07e4f68d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -150,10 +150,62 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return prandom_u32(); } +static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, + struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + + switch (skb_field) { + case SKF_AD_MARK: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, mark)); + break; + + case SKF_AD_PKTTYPE: + *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET()); + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX); +#ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5); +#endif + break; + + case SKF_AD_QUEUE: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, queue_mapping)); + break; + + case SKF_AD_VLAN_TAG: + case SKF_AD_VLAN_TAG_PRESENT: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + + /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */ + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, vlan_tci)); + if (skb_field == SKF_AD_VLAN_TAG) { + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, + ~VLAN_TAG_PRESENT); + } else { + /* dst_reg >>= 12 */ + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12); + /* dst_reg &= 1 */ + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1); + } + break; + } + + return insn - insn_buf; +} + static bool convert_bpf_extensions(struct sock_filter *fp, struct bpf_insn **insnp) { struct bpf_insn *insn = *insnp; + u32 cnt; switch (fp->k) { case SKF_AD_OFF + SKF_AD_PROTOCOL: @@ -167,13 +219,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_PKTTYPE: - *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, - PKT_TYPE_OFFSET()); - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); -#ifdef __BIG_ENDIAN_BITFIELD - insn++; - *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5); -#endif + cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_IFINDEX: @@ -197,10 +244,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_MARK: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - - *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, mark)); + cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_RXHASH: @@ -211,29 +256,30 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_QUEUE: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); - - *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, queue_mapping)); + cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_VLAN_TAG: + cnt = convert_skb_access(SKF_AD_VLAN_TAG, + BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; + break; + case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, + BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; + break; + + case SKF_AD_OFF + SKF_AD_VLAN_TPID: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); - /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */ + /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, vlan_tci)); - if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, - ~VLAN_TAG_PRESENT); - } else { - /* A >>= 12 */ - *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12); - /* A &= 1 */ - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1); - } + offsetof(struct sk_buff, vlan_proto)); + /* A = ntohs(A) [emitting a nop or swap16] */ + *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); break; case SKF_AD_OFF + SKF_AD_PAY_OFFSET: @@ -814,7 +860,7 @@ static void bpf_release_orig_filter(struct bpf_prog *fp) static void __bpf_prog_release(struct bpf_prog *prog) { - if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { + if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) { bpf_prog_put(prog); } else { bpf_release_orig_filter(prog); @@ -1019,6 +1065,32 @@ void bpf_prog_destroy(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(bpf_prog_destroy); +static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) +{ + struct sk_filter *fp, *old_fp; + + fp = kmalloc(sizeof(*fp), GFP_KERNEL); + if (!fp) + return -ENOMEM; + + fp->prog = prog; + atomic_set(&fp->refcnt, 0); + + if (!sk_filter_charge(sk, fp)) { + kfree(fp); + return -ENOMEM; + } + + old_fp = rcu_dereference_protected(sk->sk_filter, + sock_owned_by_user(sk)); + rcu_assign_pointer(sk->sk_filter, fp); + + if (old_fp) + sk_filter_uncharge(sk, old_fp); + + return 0; +} + /** * sk_attach_filter - attach a socket filter * @fprog: the filter program @@ -1031,7 +1103,6 @@ EXPORT_SYMBOL_GPL(bpf_prog_destroy); */ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { - struct sk_filter *fp, *old_fp; unsigned int fsize = bpf_classic_proglen(fprog); unsigned int bpf_fsize = bpf_prog_size(fprog->len); struct bpf_prog *prog; @@ -1068,36 +1139,20 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - fp = kmalloc(sizeof(*fp), GFP_KERNEL); - if (!fp) { + err = __sk_attach_prog(prog, sk); + if (err < 0) { __bpf_prog_release(prog); - return -ENOMEM; - } - fp->prog = prog; - - atomic_set(&fp->refcnt, 0); - - if (!sk_filter_charge(sk, fp)) { - __sk_filter_release(fp); - return -ENOMEM; + return err; } - old_fp = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); - rcu_assign_pointer(sk->sk_filter, fp); - - if (old_fp) - sk_filter_uncharge(sk, old_fp); - return 0; } EXPORT_SYMBOL_GPL(sk_attach_filter); -#ifdef CONFIG_BPF_SYSCALL int sk_attach_bpf(u32 ufd, struct sock *sk) { - struct sk_filter *fp, *old_fp; struct bpf_prog *prog; + int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) return -EPERM; @@ -1106,40 +1161,72 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) { - /* valid fd, but invalid program type */ + if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { bpf_prog_put(prog); return -EINVAL; } - fp = kmalloc(sizeof(*fp), GFP_KERNEL); - if (!fp) { + err = __sk_attach_prog(prog, sk); + if (err < 0) { bpf_prog_put(prog); - return -ENOMEM; + return err; } - fp->prog = prog; - atomic_set(&fp->refcnt, 0); + return 0; +} - if (!sk_filter_charge(sk, fp)) { - __sk_filter_release(fp); - return -ENOMEM; - } +static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + unsigned int offset = (unsigned int) r2; + void *from = (void *) (long) r3; + unsigned int len = (unsigned int) r4; + char buf[16]; + void *ptr; + + /* bpf verifier guarantees that: + * 'from' pointer points to bpf program stack + * 'len' bytes of it were initialized + * 'len' > 0 + * 'skb' is a valid pointer to 'struct sk_buff' + * + * so check for invalid 'offset' and too large 'len' + */ + if (offset > 0xffff || len > sizeof(buf)) + return -EFAULT; - old_fp = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); - rcu_assign_pointer(sk->sk_filter, fp); + if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len)) + return -EFAULT; - if (old_fp) - sk_filter_uncharge(sk, old_fp); + ptr = skb_header_pointer(skb, offset, len, buf); + if (unlikely(!ptr)) + return -EFAULT; + + skb_postpull_rcsum(skb, ptr, len); + + memcpy(ptr, from, len); + if (ptr == buf) + /* skb_store_bits cannot return -EFAULT here */ + skb_store_bits(skb, offset, ptr, len); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0)); return 0; } -/* allow socket filters to call - * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem() - */ -static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id) +const struct bpf_func_proto bpf_skb_store_bytes_proto = { + .func = bpf_skb_store_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_STACK, + .arg4_type = ARG_CONST_STACK_SIZE, +}; + +static const struct bpf_func_proto * +sk_filter_func_proto(enum bpf_func_id func_id) { switch (func_id) { case BPF_FUNC_map_lookup_elem: @@ -1148,39 +1235,133 @@ static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; + case BPF_FUNC_get_prandom_u32: + return &bpf_get_prandom_u32_proto; + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_smp_processor_id_proto; default: return NULL; } } -static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type) +static const struct bpf_func_proto * +tc_cls_act_func_proto(enum bpf_func_id func_id) { - /* skb fields cannot be accessed yet */ - return false; + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; + default: + return sk_filter_func_proto(func_id); + } +} + +static bool sk_filter_is_valid_access(int off, int size, + enum bpf_access_type type) +{ + /* only read is allowed */ + if (type != BPF_READ) + return false; + + /* check bounds */ + if (off < 0 || off >= sizeof(struct __sk_buff)) + return false; + + /* disallow misaligned access */ + if (off % size != 0) + return false; + + /* all __sk_buff fields are __u32 */ + if (size != 4) + return false; + + return true; } -static struct bpf_verifier_ops sock_filter_ops = { - .get_func_proto = sock_filter_func_proto, - .is_valid_access = sock_filter_is_valid_access, +static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, + struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + + switch (ctx_off) { + case offsetof(struct __sk_buff, len): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, len)); + break; + + case offsetof(struct __sk_buff, protocol): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, protocol)); + break; + + case offsetof(struct __sk_buff, vlan_proto): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, vlan_proto)); + break; + + case offsetof(struct __sk_buff, mark): + return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, pkt_type): + return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, queue_mapping): + return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, vlan_present): + return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, + dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, vlan_tci): + return convert_skb_access(SKF_AD_VLAN_TAG, + dst_reg, src_reg, insn); + } + + return insn - insn_buf; +} + +static const struct bpf_verifier_ops sk_filter_ops = { + .get_func_proto = sk_filter_func_proto, + .is_valid_access = sk_filter_is_valid_access, + .convert_ctx_access = sk_filter_convert_ctx_access, +}; + +static const struct bpf_verifier_ops tc_cls_act_ops = { + .get_func_proto = tc_cls_act_func_proto, + .is_valid_access = sk_filter_is_valid_access, + .convert_ctx_access = sk_filter_convert_ctx_access, }; -static struct bpf_prog_type_list tl = { - .ops = &sock_filter_ops, +static struct bpf_prog_type_list sk_filter_type __read_mostly = { + .ops = &sk_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, }; -static int __init register_sock_filter_ops(void) +static struct bpf_prog_type_list sched_cls_type __read_mostly = { + .ops = &tc_cls_act_ops, + .type = BPF_PROG_TYPE_SCHED_CLS, +}; + +static struct bpf_prog_type_list sched_act_type __read_mostly = { + .ops = &tc_cls_act_ops, + .type = BPF_PROG_TYPE_SCHED_ACT, +}; + +static int __init register_sk_filter_ops(void) { - bpf_register_prog_type(&tl); + bpf_register_prog_type(&sk_filter_type); + bpf_register_prog_type(&sched_cls_type); + bpf_register_prog_type(&sched_act_type); + return 0; } -late_initcall(register_sock_filter_ops); -#else -int sk_attach_bpf(u32 ufd, struct sock *sk) -{ - return -EOPNOTSUPP; -} -#endif +late_initcall(register_sk_filter_ops); + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 0c08062d1796..1e2f46a69d50 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -32,6 +32,9 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) return 0; nla_put_failure: + kfree(d->xstats); + d->xstats = NULL; + d->xstats_len = 0; spin_unlock_bh(d->lock); return -1; } @@ -305,7 +308,9 @@ int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) { if (d->compat_xstats) { - d->xstats = st; + d->xstats = kmemdup(st, len, GFP_ATOMIC); + if (!d->xstats) + goto err_out; d->xstats_len = len; } @@ -313,6 +318,11 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) return gnet_stats_copy(d, TCA_STATS_APP, st, len); return 0; + +err_out: + d->xstats_len = 0; + spin_unlock_bh(d->lock); + return -1; } EXPORT_SYMBOL(gnet_stats_copy_app); @@ -345,6 +355,9 @@ gnet_stats_finish_copy(struct gnet_dump *d) return -1; } + kfree(d->xstats); + d->xstats = NULL; + d->xstats_len = 0; spin_unlock_bh(d->lock); return 0; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 70fe9e10ac86..3de654256028 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -397,25 +397,15 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev) { struct neighbour *n; - int key_len = tbl->key_len; - u32 hash_val; - struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, lookups); rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); - - for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); - n != NULL; - n = rcu_dereference_bh(n->next)) { - if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { - if (!atomic_inc_not_zero(&n->refcnt)) - n = NULL; - NEIGH_CACHE_STAT_INC(tbl, hits); - break; - } + n = __neigh_lookup_noref(tbl, pkey, dev); + if (n) { + if (!atomic_inc_not_zero(&n->refcnt)) + n = NULL; + NEIGH_CACHE_STAT_INC(tbl, hits); } rcu_read_unlock_bh(); @@ -601,7 +591,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (!n) goto out; - write_pnet(&n->net, hold_net(net)); + write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) @@ -610,7 +600,6 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (tbl->pconstructor && tbl->pconstructor(n)) { if (dev) dev_put(dev); - release_net(net); kfree(n); n = NULL; goto out; @@ -644,7 +633,6 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, tbl->pdestructor(n); if (n->dev) dev_put(n->dev); - release_net(pneigh_net(n)); kfree(n); return 0; } @@ -667,7 +655,6 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); - release_net(pneigh_net(n)); kfree(n); continue; } @@ -830,10 +817,9 @@ out: static __inline__ int neigh_max_probes(struct neighbour *n) { struct neigh_parms *p = n->parms; - int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES); - if (!(n->nud_state & NUD_PROBE)) - max_probes += NEIGH_VAR(p, MCAST_PROBES); - return max_probes; + return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + + (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) : + NEIGH_VAR(p, MCAST_PROBES)); } static void neigh_invalidate(struct neighbour *neigh) @@ -1263,10 +1249,10 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, EXPORT_SYMBOL(neigh_event_ns); /* called with read_lock_bh(&n->lock); */ -static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst) +static void neigh_hh_init(struct neighbour *n) { - struct net_device *dev = dst->dev; - __be16 prot = dst->ops->protocol; + struct net_device *dev = n->dev; + __be16 prot = n->tbl->protocol; struct hh_cache *hh = &n->hh; write_lock_bh(&n->lock); @@ -1280,43 +1266,19 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst) write_unlock_bh(&n->lock); } -/* This function can be used in contexts, where only old dev_queue_xmit - * worked, f.e. if you want to override normal output path (eql, shaper), - * but resolution is not made yet. - */ - -int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - - __skb_pull(skb, skb_network_offset(skb)); - - if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, - skb->len) < 0 && - dev_rebuild_header(skb)) - return 0; - - return dev_queue_xmit(skb); -} -EXPORT_SYMBOL(neigh_compat_output); - /* Slow and careful. */ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); int rc = 0; - if (!dst) - goto discard; - if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; unsigned int seq; if (dev->header_ops->cache && !neigh->hh.hh_len) - neigh_hh_init(neigh, dst); + neigh_hh_init(neigh); do { __skb_pull(skb, skb_network_offset(skb)); @@ -1332,8 +1294,6 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) } out: return rc; -discard: - neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); @@ -1464,11 +1424,10 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); dev_hold(dev); p->dev = dev; - write_pnet(&p->net, hold_net(net)); + write_pnet(&p->net, net); p->sysctl_table = NULL; if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { - release_net(net); dev_put(dev); kfree(p); return NULL; @@ -1508,7 +1467,6 @@ EXPORT_SYMBOL(neigh_parms_release); static void neigh_parms_destroy(struct neigh_parms *parms) { - release_net(neigh_parms_net(parms)); kfree(parms); } @@ -1783,6 +1741,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) NEIGH_VAR(parms, UCAST_PROBES)) || nla_put_u32(skb, NDTPA_MCAST_PROBES, NEIGH_VAR(parms, MCAST_PROBES)) || + nla_put_u32(skb, NDTPA_MCAST_REPROBES, + NEIGH_VAR(parms, MCAST_REPROBES)) || nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, NEIGH_VAR(parms, BASE_REACHABLE_TIME)) || @@ -1942,6 +1902,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_APP_PROBES] = { .type = NLA_U32 }, [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, + [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 }, [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, @@ -2042,6 +2003,10 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) NEIGH_VAR_SET(p, MCAST_PROBES, nla_get_u32(tbp[i])); break; + case NDTPA_MCAST_REPROBES: + NEIGH_VAR_SET(p, MCAST_REPROBES, + nla_get_u32(tbp[i])); + break; case NDTPA_BASE_REACHABLE_TIME: NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, nla_get_msecs(tbp[i])); @@ -2427,6 +2392,40 @@ void __neigh_for_each_release(struct neigh_table *tbl, } EXPORT_SYMBOL(__neigh_for_each_release); +int neigh_xmit(int index, struct net_device *dev, + const void *addr, struct sk_buff *skb) +{ + int err = -EAFNOSUPPORT; + if (likely(index < NEIGH_NR_TABLES)) { + struct neigh_table *tbl; + struct neighbour *neigh; + + tbl = neigh_tables[index]; + if (!tbl) + goto out; + neigh = __neigh_lookup_noref(tbl, addr, dev); + if (!neigh) + neigh = __neigh_create(tbl, addr, dev, false); + err = PTR_ERR(neigh); + if (IS_ERR(neigh)) + goto out_kfree_skb; + err = neigh->output(neigh, skb); + } + else if (index == NEIGH_LINK_TABLE) { + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + addr, NULL, skb->len); + if (err < 0) + goto out_kfree_skb; + err = dev_queue_xmit(skb); + } +out: + return err; +out_kfree_skb: + kfree_skb(skb); + goto out; +} +EXPORT_SYMBOL(neigh_xmit); + #ifdef CONFIG_PROC_FS static struct neighbour *neigh_get_first(struct seq_file *seq) @@ -2994,6 +2993,7 @@ static struct neigh_sysctl_table { NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"), NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f2aa73bfb0e4..cc5cf689809c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -23,6 +23,7 @@ #include <linux/export.h> #include <linux/jiffies.h> #include <linux/pm_runtime.h> +#include <linux/of.h> #include "net-sysfs.h" @@ -417,6 +418,28 @@ static ssize_t phys_port_id_show(struct device *dev, } static DEVICE_ATTR_RO(phys_port_id); +static ssize_t phys_port_name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + ssize_t ret = -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (dev_isalive(netdev)) { + char name[IFNAMSIZ]; + + ret = dev_get_phys_port_name(netdev, name, sizeof(name)); + if (!ret) + ret = sprintf(buf, "%s\n", name); + } + rtnl_unlock(); + + return ret; +} +static DEVICE_ATTR_RO(phys_port_name); + static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -464,6 +487,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_tx_queue_len.attr, &dev_attr_gro_flush_timeout.attr, &dev_attr_phys_port_id.attr, + &dev_attr_phys_port_name.attr, &dev_attr_phys_switch_id.attr, NULL, }; @@ -950,6 +974,60 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue, return sprintf(buf, "%lu", trans_timeout); } +#ifdef CONFIG_XPS +static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) +{ + struct net_device *dev = queue->dev; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) + if (queue == &dev->_tx[i]) + break; + + BUG_ON(i >= dev->num_tx_queues); + + return i; +} + +static ssize_t show_tx_maxrate(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + char *buf) +{ + return sprintf(buf, "%lu\n", queue->tx_maxrate); +} + +static ssize_t set_tx_maxrate(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + const char *buf, size_t len) +{ + struct net_device *dev = queue->dev; + int err, index = get_netdev_queue_index(queue); + u32 rate = 0; + + err = kstrtou32(buf, 10, &rate); + if (err < 0) + return err; + + if (!rtnl_trylock()) + return restart_syscall(); + + err = -EOPNOTSUPP; + if (dev->netdev_ops->ndo_set_tx_maxrate) + err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate); + + rtnl_unlock(); + if (!err) { + queue->tx_maxrate = rate; + return len; + } + return err; +} + +static struct netdev_queue_attribute queue_tx_maxrate = + __ATTR(tx_maxrate, S_IRUGO | S_IWUSR, + show_tx_maxrate, set_tx_maxrate); +#endif + static struct netdev_queue_attribute queue_trans_timeout = __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); @@ -1064,18 +1142,6 @@ static struct attribute_group dql_group = { #endif /* CONFIG_BQL */ #ifdef CONFIG_XPS -static unsigned int get_netdev_queue_index(struct netdev_queue *queue) -{ - struct net_device *dev = queue->dev; - unsigned int i; - - i = queue - dev->_tx; - BUG_ON(i >= dev->num_tx_queues); - - return i; -} - - static ssize_t show_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, char *buf) { @@ -1152,6 +1218,7 @@ static struct attribute *netdev_queue_default_attrs[] = { &queue_trans_timeout.attr, #ifdef CONFIG_XPS &xps_cpus_attribute.attr, + &queue_tx_maxrate.attr, #endif NULL }; @@ -1374,6 +1441,30 @@ static struct class net_class = { .namespace = net_namespace, }; +#ifdef CONFIG_OF_NET +static int of_dev_node_match(struct device *dev, const void *data) +{ + int ret = 0; + + if (dev->parent) + ret = dev->parent->of_node == data; + + return ret == 0 ? dev->of_node == data : ret; +} + +struct net_device *of_find_net_device_by_node(struct device_node *np) +{ + struct device *dev; + + dev = class_find_device(&net_class, NULL, np, of_dev_node_match); + if (!dev) + return NULL; + + return to_net_dev(dev); +} +EXPORT_SYMBOL(of_find_net_device_by_node); +#endif + /* Delete sysfs entries but hold kobject reference until after all * netdev references are gone. */ diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index cb5290b8c428..e5e96b0f6717 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -236,10 +236,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) net->user_ns = user_ns; idr_init(&net->netns_ids); -#ifdef NETNS_REFCNT_DEBUG - atomic_set(&net->use_count, 0); -#endif - list_for_each_entry(ops, &pernet_list, list) { error = ops_init(ops, net); if (error < 0) @@ -294,13 +290,6 @@ out_free: static void net_free(struct net *net) { -#ifdef NETNS_REFCNT_DEBUG - if (unlikely(atomic_read(&net->use_count) != 0)) { - pr_emerg("network namespace not free! Usage: %d\n", - atomic_read(&net->use_count)); - return; - } -#endif kfree(rcu_access_pointer(net->gen)); kmem_cache_free(net_cachep, net); } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b4899f5b7388..508155b283dd 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1134,6 +1134,9 @@ static ssize_t pktgen_if_write(struct file *file, return len; i += len; + if ((value > 1) && + (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) + return -ENOTSUPP; pkt_dev->burst = value < 1 ? 1 : value; sprintf(pg_result, "OK: burst=%d", pkt_dev->burst); return count; diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 04db318e6218..87b22c0bc08c 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -58,14 +58,14 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, return -ENOMEM; get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); - rwlock_init(&queue->syn_wait_lock); + spin_lock_init(&queue->syn_wait_lock); queue->rskq_accept_head = NULL; lopt->nr_table_entries = nr_table_entries; lopt->max_qlen_log = ilog2(nr_table_entries); - write_lock_bh(&queue->syn_wait_lock); + spin_lock_bh(&queue->syn_wait_lock); queue->listen_opt = lopt; - write_unlock_bh(&queue->syn_wait_lock); + spin_unlock_bh(&queue->syn_wait_lock); return 0; } @@ -81,10 +81,10 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk( { struct listen_sock *lopt; - write_lock_bh(&queue->syn_wait_lock); + spin_lock_bh(&queue->syn_wait_lock); lopt = queue->listen_opt; queue->listen_opt = NULL; - write_unlock_bh(&queue->syn_wait_lock); + spin_unlock_bh(&queue->syn_wait_lock); return lopt; } @@ -94,21 +94,26 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) /* make all the listen_opt local to us */ struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); - if (lopt->qlen != 0) { + if (listen_sock_qlen(lopt) != 0) { unsigned int i; for (i = 0; i < lopt->nr_table_entries; i++) { struct request_sock *req; + spin_lock_bh(&queue->syn_wait_lock); while ((req = lopt->syn_table[i]) != NULL) { lopt->syn_table[i] = req->dl_next; - lopt->qlen--; - reqsk_free(req); + atomic_inc(&lopt->qlen_dec); + if (del_timer(&req->rsk_timer)) + reqsk_put(req); + reqsk_put(req); } + spin_unlock_bh(&queue->syn_wait_lock); } } - WARN_ON(lopt->qlen != 0); + if (WARN_ON(listen_sock_qlen(lopt) != 0)) + pr_err("qlen %u\n", listen_sock_qlen(lopt)); kvfree(lopt); } @@ -153,24 +158,22 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) * case might also exist in tcp_v4_hnd_req() that will trigger this locking * order. * - * When a TFO req is created, it needs to sock_hold its listener to prevent - * the latter data structure from going away. - * - * This function also sets "treq->listener" to NULL and unreference listener - * socket. treq->listener is used by the listener so it is protected by the + * This function also sets "treq->tfo_listener" to false. + * treq->tfo_listener is used by the listener so it is protected by the * fastopenq->lock in this function. */ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, bool reset) { - struct sock *lsk = tcp_rsk(req)->listener; - struct fastopen_queue *fastopenq = - inet_csk(lsk)->icsk_accept_queue.fastopenq; + struct sock *lsk = req->rsk_listener; + struct fastopen_queue *fastopenq; + + fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq; tcp_sk(sk)->fastopen_rsk = NULL; spin_lock_bh(&fastopenq->lock); fastopenq->qlen--; - tcp_rsk(req)->listener = NULL; + tcp_rsk(req)->tfo_listener = false; if (req->sk) /* the child socket hasn't been accepted yet */ goto out; @@ -179,8 +182,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, * special RST handling below. */ spin_unlock_bh(&fastopenq->lock); - sock_put(lsk); - reqsk_free(req); + reqsk_put(req); return; } /* Wait for 60secs before removing a req that has triggered RST. @@ -190,7 +192,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, * * For more details see CoNext'11 "TCP Fast Open" paper. */ - req->expires = jiffies + 60*HZ; + req->rsk_timer.expires = jiffies + 60*HZ; if (fastopenq->rskq_rst_head == NULL) fastopenq->rskq_rst_head = req; else @@ -201,5 +203,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, fastopenq->qlen++; out: spin_unlock_bh(&fastopenq->lock); - sock_put(lsk); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ab293a3066b3..b96ac2109c82 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -982,6 +982,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev) return 0; } +static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) +{ + char name[IFNAMSIZ]; + int err; + + err = dev_get_phys_port_name(dev, name, sizeof(name)); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name)) + return -EMSGSIZE; + + return 0; +} + static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; @@ -1072,6 +1090,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_phys_port_id_fill(skb, dev)) goto nla_put_failure; + if (rtnl_phys_port_name_fill(skb, dev)) + goto nla_put_failure; + if (rtnl_phys_switch_id_fill(skb, dev)) goto nla_put_failure; @@ -1300,7 +1321,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) s_h = cb->args[0]; s_idx = cb->args[1]; - rcu_read_lock(); cb->seq = net->dev_base_seq; /* A hack to preserve kernel<->userspace interface. @@ -1322,7 +1342,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, head, index_hlist) { + hlist_for_each_entry(dev, head, index_hlist) { if (idx < s_idx) goto cont; err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, @@ -1344,7 +1364,6 @@ cont: } } out: - rcu_read_unlock(); cb->args[1] = idx; cb->args[0] = h; @@ -1817,6 +1836,42 @@ errout: return err; } +static int rtnl_group_dellink(const struct net *net, int group) +{ + struct net_device *dev, *aux; + LIST_HEAD(list_kill); + bool found = false; + + if (!group) + return -EPERM; + + for_each_netdev(net, dev) { + if (dev->group == group) { + const struct rtnl_link_ops *ops; + + found = true; + ops = dev->rtnl_link_ops; + if (!ops || !ops->dellink) + return -EOPNOTSUPP; + } + } + + if (!found) + return -ENODEV; + + for_each_netdev_safe(net, dev, aux) { + if (dev->group == group) { + const struct rtnl_link_ops *ops; + + ops = dev->rtnl_link_ops; + ops->dellink(dev, &list_kill); + } + } + unregister_netdevice_many(&list_kill); + + return 0; +} + static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); @@ -1840,6 +1895,8 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) dev = __dev_get_by_name(net, ifname); + else if (tb[IFLA_GROUP]) + return rtnl_group_dellink(net, nla_get_u32(tb[IFLA_GROUP])); else return -EINVAL; @@ -2012,8 +2069,8 @@ replay: } if (1) { - struct nlattr *attr[ops ? ops->maxtype + 1 : 0]; - struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0]; + struct nlattr *attr[ops ? ops->maxtype + 1 : 1]; + struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 1]; struct nlattr **data = NULL; struct nlattr **slave_data = NULL; struct net *dest_net, *link_net = NULL; @@ -2122,6 +2179,10 @@ replay: if (IS_ERR(dest_net)) return PTR_ERR(dest_net); + err = -EPERM; + if (!netlink_ns_capable(skb, dest_net->user_ns, CAP_NET_ADMIN)) + goto out; + if (tb[IFLA_LINK_NETNSID]) { int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); @@ -2130,6 +2191,9 @@ replay: err = -EINVAL; goto out; } + err = -EPERM; + if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) + goto out; } dev = rtnl_create_link(link_net ? : dest_net, ifname, @@ -2161,28 +2225,28 @@ replay: } } err = rtnl_configure_link(dev, ifm); - if (err < 0) { - if (ops->newlink) { - LIST_HEAD(list_kill); - - ops->dellink(dev, &list_kill); - unregister_netdevice_many(&list_kill); - } else { - unregister_netdevice(dev); - } - goto out; - } - + if (err < 0) + goto out_unregister; if (link_net) { err = dev_change_net_namespace(dev, dest_net, ifname); if (err < 0) - unregister_netdevice(dev); + goto out_unregister; } out: if (link_net) put_net(link_net); put_net(dest_net); return err; +out_unregister: + if (ops->newlink) { + LIST_HEAD(list_kill); + + ops->dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + } else { + unregister_netdevice(dev); + } + goto out; } } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 374e43bc6b80..cdb939b731aa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3206,10 +3206,9 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb); unsigned int offset = skb_gro_offset(skb); unsigned int headlen = skb_headlen(skb); - struct sk_buff *nskb, *lp, *p = *head; unsigned int len = skb_gro_len(skb); + struct sk_buff *lp, *p = *head; unsigned int delta_truesize; - unsigned int headroom; if (unlikely(p->len + len >= 65536)) return -E2BIG; @@ -3276,48 +3275,6 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; goto done; } - /* switch back to head shinfo */ - pinfo = skb_shinfo(p); - - if (pinfo->frag_list) - goto merge; - if (skb_gro_len(p) != pinfo->gso_size) - return -E2BIG; - - headroom = skb_headroom(p); - nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC); - if (unlikely(!nskb)) - return -ENOMEM; - - __copy_skb_header(nskb, p); - nskb->mac_len = p->mac_len; - - skb_reserve(nskb, headroom); - __skb_put(nskb, skb_gro_offset(p)); - - skb_set_mac_header(nskb, skb_mac_header(p) - p->data); - skb_set_network_header(nskb, skb_network_offset(p)); - skb_set_transport_header(nskb, skb_transport_offset(p)); - - __skb_pull(p, skb_gro_offset(p)); - memcpy(skb_mac_header(nskb), skb_mac_header(p), - p->data - skb_mac_header(p)); - - skb_shinfo(nskb)->frag_list = p; - skb_shinfo(nskb)->gso_size = pinfo->gso_size; - pinfo->gso_size = 0; - __skb_header_release(p); - NAPI_GRO_CB(nskb)->last = p; - - nskb->data_len += p->len; - nskb->truesize += p->truesize; - nskb->len += p->len; - - *head = nskb; - nskb->next = p->next; - p->next = NULL; - - p = nskb; merge: delta_truesize = skb->truesize; @@ -3620,13 +3577,14 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) { struct sk_buff_head *q = &sk->sk_error_queue; struct sk_buff *skb, *skb_next; + unsigned long flags; int err = 0; - spin_lock_bh(&q->lock); + spin_lock_irqsave(&q->lock, flags); skb = __skb_dequeue(q); if (skb && (skb_next = skb_peek(q))) err = SKB_EXT_ERR(skb_next)->ee.ee_errno; - spin_unlock_bh(&q->lock); + spin_unlock_irqrestore(&q->lock, flags); sk->sk_err = err; if (err) @@ -3731,9 +3689,13 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, struct sock *sk, int tstype) { struct sk_buff *skb; - bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; + bool tsonly; + + if (!sk) + return; - if (!sk || !skb_may_tx_timestamp(sk, tsonly)) + tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; + if (!skb_may_tx_timestamp(sk, tsonly)) return; if (tsonly) @@ -4171,7 +4133,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->ignore_df = 0; skb_dst_drop(skb); skb->mark = 0; - skb->sender_cpu = 0; + skb_sender_cpu_clear(skb); skb_init_secmark(skb); secpath_reset(skb); nf_reset(skb); diff --git a/net/core/sock.c b/net/core/sock.c index 93c8b20c91e4..119ae464b44a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -466,7 +466,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) skb_dst_force(skb); spin_lock_irqsave(&list->lock, flags); - skb->dropcount = atomic_read(&sk->sk_drops); + sock_skb_set_dropcount(sk, skb); __skb_queue_tail(list, skb); spin_unlock_irqrestore(&list->lock, flags); @@ -928,8 +928,6 @@ set_rcvbuf: sk->sk_mark = val; break; - /* We implement the SO_SNDLOWAT etc to - not be settable (1003.1g 5.3) */ case SO_RXQ_OVFL: sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); break; @@ -1234,6 +1232,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; default: + /* We implement the SO_SNDLOWAT etc to not be settable + * (1003.1g 7). + */ return -ENOPROTOOPT; } @@ -1454,9 +1455,8 @@ void sk_release_kernel(struct sock *sk) return; sock_hold(sk); - sock_release(sk->sk_socket); - release_net(sock_net(sk)); sock_net_set(sk, get_net(&init_net)); + sock_release(sk->sk_socket); sock_put(sk); } EXPORT_SYMBOL(sk_release_kernel); @@ -1538,6 +1538,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_err = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); + atomic64_set(&newsk->sk_cookie, 0); /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) @@ -1655,25 +1656,16 @@ void sock_rfree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_rfree); +/* + * Buffer destructor for skbs that are not used directly in read or write + * path, e.g. for error handler skbs. Automatically called from kfree_skb. + */ void sock_efree(struct sk_buff *skb) { sock_put(skb->sk); } EXPORT_SYMBOL(sock_efree); -#ifdef CONFIG_INET -void sock_edemux(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - if (sk->sk_state == TCP_TIME_WAIT) - inet_twsk_put(inet_twsk(sk)); - else - sock_put(sk); -} -EXPORT_SYMBOL(sock_edemux); -#endif - kuid_t sock_i_uid(struct sock *sk) { kuid_t uid; @@ -2163,15 +2155,14 @@ int sock_no_getsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(sock_no_getsockopt); -int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t len) +int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_sendmsg); -int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t len, int flags) +int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len, + int flags) { return -EOPNOTSUPP; } @@ -2543,14 +2534,14 @@ int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, EXPORT_SYMBOL(compat_sock_common_getsockopt); #endif -int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; int addr_len = 0; int err; - err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, + err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; @@ -2727,6 +2718,42 @@ static inline void release_proto_idx(struct proto *prot) } #endif +static void req_prot_cleanup(struct request_sock_ops *rsk_prot) +{ + if (!rsk_prot) + return; + kfree(rsk_prot->slab_name); + rsk_prot->slab_name = NULL; + if (rsk_prot->slab) { + kmem_cache_destroy(rsk_prot->slab); + rsk_prot->slab = NULL; + } +} + +static int req_prot_init(const struct proto *prot) +{ + struct request_sock_ops *rsk_prot = prot->rsk_prot; + + if (!rsk_prot) + return 0; + + rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", + prot->name); + if (!rsk_prot->slab_name) + return -ENOMEM; + + rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, + rsk_prot->obj_size, 0, + 0, NULL); + + if (!rsk_prot->slab) { + pr_crit("%s: Can't create request sock SLAB cache!\n", + prot->name); + return -ENOMEM; + } + return 0; +} + int proto_register(struct proto *prot, int alloc_slab) { if (alloc_slab) { @@ -2740,21 +2767,8 @@ int proto_register(struct proto *prot, int alloc_slab) goto out; } - if (prot->rsk_prot != NULL) { - prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name); - if (prot->rsk_prot->slab_name == NULL) - goto out_free_sock_slab; - - prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, - prot->rsk_prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL); - - if (prot->rsk_prot->slab == NULL) { - pr_crit("%s: Can't create request sock SLAB cache!\n", - prot->name); - goto out_free_request_sock_slab_name; - } - } + if (req_prot_init(prot)) + goto out_free_request_sock_slab; if (prot->twsk_prot != NULL) { prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name); @@ -2783,14 +2797,8 @@ int proto_register(struct proto *prot, int alloc_slab) out_free_timewait_sock_slab_name: kfree(prot->twsk_prot->twsk_slab_name); out_free_request_sock_slab: - if (prot->rsk_prot && prot->rsk_prot->slab) { - kmem_cache_destroy(prot->rsk_prot->slab); - prot->rsk_prot->slab = NULL; - } -out_free_request_sock_slab_name: - if (prot->rsk_prot) - kfree(prot->rsk_prot->slab_name); -out_free_sock_slab: + req_prot_cleanup(prot->rsk_prot); + kmem_cache_destroy(prot->slab); prot->slab = NULL; out: @@ -2810,11 +2818,7 @@ void proto_unregister(struct proto *prot) prot->slab = NULL; } - if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) { - kmem_cache_destroy(prot->rsk_prot->slab); - kfree(prot->rsk_prot->slab_name); - prot->rsk_prot->slab = NULL; - } + req_prot_cleanup(prot->rsk_prot); if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { kmem_cache_destroy(prot->twsk_prot->twsk_slab); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index ad704c757bb4..74dddf84adcd 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -13,22 +13,39 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); -int sock_diag_check_cookie(void *sk, __u32 *cookie) +static u64 sock_gen_cookie(struct sock *sk) { - if ((cookie[0] != INET_DIAG_NOCOOKIE || - cookie[1] != INET_DIAG_NOCOOKIE) && - ((u32)(unsigned long)sk != cookie[0] || - (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1])) - return -ESTALE; - else + while (1) { + u64 res = atomic64_read(&sk->sk_cookie); + + if (res) + return res; + res = atomic64_inc_return(&sock_net(sk)->cookie_gen); + atomic64_cmpxchg(&sk->sk_cookie, 0, res); + } +} + +int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie) +{ + u64 res; + + if (cookie[0] == INET_DIAG_NOCOOKIE && cookie[1] == INET_DIAG_NOCOOKIE) return 0; + + res = sock_gen_cookie(sk); + if ((u32)res != cookie[0] || (u32)(res >> 32) != cookie[1]) + return -ESTALE; + + return 0; } EXPORT_SYMBOL_GPL(sock_diag_check_cookie); -void sock_diag_save_cookie(void *sk, __u32 *cookie) +void sock_diag_save_cookie(struct sock *sk, __u32 *cookie) { - cookie[0] = (u32)(unsigned long)sk; - cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); + u64 res = sock_gen_cookie(sk); + + cookie[0] = (u32)res; + cookie[1] = (u32)(res >> 32); } EXPORT_SYMBOL_GPL(sock_diag_save_cookie); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 433424804284..95b6139d710c 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -24,7 +24,8 @@ static int zero = 0; static int one = 1; -static int ushort_max = USHRT_MAX; +static int min_sndbuf = SOCK_MIN_SNDBUF; +static int min_rcvbuf = SOCK_MIN_RCVBUF; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -237,7 +238,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_sndbuf, }, { .procname = "rmem_max", @@ -245,7 +246,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_rcvbuf, }, { .procname = "wmem_default", @@ -253,7 +254,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_sndbuf, }, { .procname = "rmem_default", @@ -261,7 +262,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_rcvbuf, }, { .procname = "dev_weight", @@ -401,7 +402,6 @@ static struct ctl_table netns_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .extra1 = &zero, - .extra2 = &ushort_max, .proc_handler = proc_dointvec_minmax }, { } diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 93ea80196f0e..5b21f6f88e97 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -177,6 +177,8 @@ static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = { [DCB_ATTR_IEEE_PFC] = {.len = sizeof(struct ieee_pfc)}, [DCB_ATTR_IEEE_APP_TABLE] = {.type = NLA_NESTED}, [DCB_ATTR_IEEE_MAXRATE] = {.len = sizeof(struct ieee_maxrate)}, + [DCB_ATTR_IEEE_QCN] = {.len = sizeof(struct ieee_qcn)}, + [DCB_ATTR_IEEE_QCN_STATS] = {.len = sizeof(struct ieee_qcn_stats)}, }; static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = { @@ -1030,7 +1032,7 @@ nla_put_failure: return err; } -/* Handle IEEE 802.1Qaz GET commands. */ +/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb GET commands. */ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) { struct nlattr *ieee, *app; @@ -1067,6 +1069,32 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) } } + if (ops->ieee_getqcn) { + struct ieee_qcn qcn; + + memset(&qcn, 0, sizeof(qcn)); + err = ops->ieee_getqcn(netdev, &qcn); + if (!err) { + err = nla_put(skb, DCB_ATTR_IEEE_QCN, + sizeof(qcn), &qcn); + if (err) + return -EMSGSIZE; + } + } + + if (ops->ieee_getqcnstats) { + struct ieee_qcn_stats qcn_stats; + + memset(&qcn_stats, 0, sizeof(qcn_stats)); + err = ops->ieee_getqcnstats(netdev, &qcn_stats); + if (!err) { + err = nla_put(skb, DCB_ATTR_IEEE_QCN_STATS, + sizeof(qcn_stats), &qcn_stats); + if (err) + return -EMSGSIZE; + } + } + if (ops->ieee_getpfc) { struct ieee_pfc pfc; memset(&pfc, 0, sizeof(pfc)); @@ -1379,8 +1407,9 @@ int dcbnl_cee_notify(struct net_device *dev, int event, int cmd, } EXPORT_SYMBOL(dcbnl_cee_notify); -/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not - * be completed the entire msg is aborted and error value is returned. +/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb SET commands. + * If any requested operation can not be completed + * the entire msg is aborted and error value is returned. * No attempt is made to reconcile the case where only part of the * cmd can be completed. */ @@ -1417,6 +1446,15 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, goto err; } + if (ieee[DCB_ATTR_IEEE_QCN] && ops->ieee_setqcn) { + struct ieee_qcn *qcn = + nla_data(ieee[DCB_ATTR_IEEE_QCN]); + + err = ops->ieee_setqcn(netdev, qcn); + if (err) + goto err; + } + if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) { struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); err = ops->ieee_setpfc(netdev, pfc); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index e4c144fa706f..bebc735f5afc 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -280,8 +280,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst); struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct request_sock **prev); + struct request_sock *req); int dccp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); @@ -310,16 +309,15 @@ int compat_dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); #endif int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); -int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size); -int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int nonblock, int flags, - int *addr_len); +int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len); void dccp_shutdown(struct sock *sk, int how); int inet_dccp_listen(struct socket *sock, int backlog); unsigned int dccp_poll(struct file *file, struct socket *sock, poll_table *wait); int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +void dccp_req_err(struct sock *sk, u64 seq); struct sk_buff *dccp_ctl_make_reset(struct sock *sk, struct sk_buff *skb); int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 028fc43aacbd..5a45f8de5d99 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -49,13 +49,14 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, struct nlattr *bc) { inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc); } -static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) +static int dccp_diag_dump_one(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) { return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e45b968613a4..2b4f21d34df6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -89,10 +89,9 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (inet->inet_saddr == 0) inet->inet_saddr = fl4->saddr; - inet->inet_rcv_saddr = inet->inet_saddr; - + sk_rcv_saddr_set(sk, inet->inet_saddr); inet->inet_dport = usin->sin_port; - inet->inet_daddr = daddr; + sk_daddr_set(sk, daddr); inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) @@ -196,6 +195,32 @@ static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk) dst->ops->redirect(dst, sk, skb); } +void dccp_req_err(struct sock *sk, u64 seq) + { + struct request_sock *req = inet_reqsk(sk); + struct net *net = sock_net(sk); + + /* + * ICMPs are not backlogged, hence we cannot get an established + * socket here. + */ + WARN_ON(req->sk); + + if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + reqsk_put(req); + } else { + /* + * Still in RESPOND, just remove it silently. + * There is no good way to pass the error to the newly + * created socket, and POSIX does not want network + * errors returned from accept(). + */ + inet_csk_reqsk_queue_drop(req->rsk_listener, req); + } +} +EXPORT_SYMBOL(dccp_req_err); + /* * This routine is called by the ICMP module when it gets some sort of error * condition. If err < 0 then the socket should be closed and the error @@ -228,10 +253,11 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) return; } - sk = inet_lookup(net, &dccp_hashinfo, - iph->daddr, dh->dccph_dport, - iph->saddr, dh->dccph_sport, inet_iif(skb)); - if (sk == NULL) { + sk = __inet_lookup_established(net, &dccp_hashinfo, + iph->daddr, dh->dccph_dport, + iph->saddr, ntohs(dh->dccph_sport), + inet_iif(skb)); + if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } @@ -240,6 +266,9 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) inet_twsk_put(inet_twsk(sk)); return; } + seq = dccp_hdr_seq(dh); + if (sk->sk_state == DCCP_NEW_SYN_RECV) + return dccp_req_err(sk, seq); bh_lock_sock(sk); /* If too many ICMPs get dropped on busy @@ -252,7 +281,6 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) goto out; dp = dccp_sk(sk); - seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); @@ -289,35 +317,6 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) } switch (sk->sk_state) { - struct request_sock *req , **prev; - case DCCP_LISTEN: - if (sock_owned_by_user(sk)) - goto out; - req = inet_csk_search_req(sk, &prev, dh->dccph_dport, - iph->daddr, iph->saddr); - if (!req) - goto out; - - /* - * ICMPs are not backlogged, hence we cannot get an established - * socket here. - */ - WARN_ON(req->sk); - - if (!between48(seq, dccp_rsk(req)->dreq_iss, - dccp_rsk(req)->dreq_gss)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - goto out; - } - /* - * Still in RESPOND, just remove it silently. - * There is no good way to pass the error to the newly - * created socket, and POSIX does not want network - * errors returned from accept(). - */ - inet_csk_reqsk_queue_drop(sk, req, prev); - goto out; - case DCCP_REQUESTING: case DCCP_RESPOND: if (!sock_owned_by_user(sk)) { @@ -408,8 +407,8 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->inet_daddr = ireq->ir_rmt_addr; - newinet->inet_rcv_saddr = ireq->ir_loc_addr; + sk_daddr_set(newsk, ireq->ir_rmt_addr); + sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); newinet->inet_saddr = ireq->ir_loc_addr; newinet->inet_opt = ireq->opt; ireq->opt = NULL; @@ -449,14 +448,14 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) const struct dccp_hdr *dh = dccp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); struct sock *nsk; - struct request_sock **prev; /* Find possible connection requests. */ - struct request_sock *req = inet_csk_search_req(sk, &prev, - dh->dccph_sport, + struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport, iph->saddr, iph->daddr); - if (req != NULL) - return dccp_check_req(sk, skb, req, prev); - + if (req) { + nsk = dccp_check_req(sk, skb, req); + reqsk_put(req); + return nsk; + } nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, iph->saddr, dh->dccph_sport, iph->daddr, dh->dccph_dport, @@ -575,7 +574,7 @@ static void dccp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -void dccp_syn_ack_timeout(struct sock *sk, struct request_sock *req) +void dccp_syn_ack_timeout(const struct request_sock *req) { } EXPORT_SYMBOL(dccp_syn_ack_timeout); @@ -624,7 +623,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = inet_reqsk_alloc(&dccp_request_sock_ops); + req = inet_reqsk_alloc(&dccp_request_sock_ops, sk); if (req == NULL) goto drop; @@ -639,8 +638,10 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; ireq = inet_rsk(req); - ireq->ir_loc_addr = ip_hdr(skb)->daddr; - ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); + sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); + ireq->ireq_family = AF_INET; + ireq->ir_iif = sk->sk_bound_dev_if; /* * Step 3: Process LISTEN state diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6bcaa33cd804..9d0551092c6c 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -40,19 +40,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped; static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops; -static void dccp_v6_hash(struct sock *sk) -{ - if (sk->sk_state != DCCP_CLOSED) { - if (inet_csk(sk)->icsk_af_ops == &dccp_ipv6_mapped) { - inet_hash(sk); - return; - } - local_bh_disable(); - __inet6_hash(sk, NULL); - local_bh_enable(); - } -} - /* add pseudo-header to DCCP checksum stored in skb->csum */ static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb, const struct in6_addr *saddr, @@ -98,11 +85,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; } - sk = inet6_lookup(net, &dccp_hashinfo, - &hdr->daddr, dh->dccph_dport, - &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); + sk = __inet6_lookup_established(net, &dccp_hashinfo, + &hdr->daddr, dh->dccph_dport, + &hdr->saddr, ntohs(dh->dccph_sport), + inet6_iif(skb)); - if (sk == NULL) { + if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; @@ -112,6 +100,9 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, inet_twsk_put(inet_twsk(sk)); return; } + seq = dccp_hdr_seq(dh); + if (sk->sk_state == DCCP_NEW_SYN_RECV) + return dccp_req_err(sk, seq); bh_lock_sock(sk); if (sock_owned_by_user(sk)) @@ -121,7 +112,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; dp = dccp_sk(sk); - seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); @@ -162,32 +152,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* Might be for an request_sock */ switch (sk->sk_state) { - struct request_sock *req, **prev; - case DCCP_LISTEN: - if (sock_owned_by_user(sk)) - goto out; - - req = inet6_csk_search_req(sk, &prev, dh->dccph_dport, - &hdr->daddr, &hdr->saddr, - inet6_iif(skb)); - if (req == NULL) - goto out; - - /* - * ICMPs are not backlogged, hence we cannot get an established - * socket here. - */ - WARN_ON(req->sk != NULL); - - if (!between48(seq, dccp_rsk(req)->dreq_iss, - dccp_rsk(req)->dreq_gss)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - goto out; - } - - inet_csk_reqsk_queue_drop(sk, req, prev); - goto out; - case DCCP_REQUESTING: case DCCP_RESPOND: /* Cannot happen. It can, it SYNs are crossed. --ANK */ @@ -330,17 +294,16 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); const struct ipv6hdr *iph = ipv6_hdr(skb); + struct request_sock *req; struct sock *nsk; - struct request_sock **prev; - /* Find possible connection requests. */ - struct request_sock *req = inet6_csk_search_req(sk, &prev, - dh->dccph_sport, - &iph->saddr, - &iph->daddr, - inet6_iif(skb)); - if (req != NULL) - return dccp_check_req(sk, skb, req, prev); + req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr, + &iph->daddr, inet6_iif(skb)); + if (req) { + nsk = dccp_check_req(sk, skb, req); + reqsk_put(req); + return nsk; + } nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, &iph->saddr, dh->dccph_sport, &iph->daddr, ntohs(dh->dccph_dport), @@ -386,7 +349,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = inet_reqsk_alloc(&dccp6_request_sock_ops); + req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk); if (req == NULL) goto drop; @@ -403,6 +366,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + ireq->ireq_family = AF_INET6; if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || @@ -469,11 +433,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); - - ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - - newsk->sk_v6_rcv_saddr = newnp->saddr; + newnp->saddr = newsk->sk_v6_rcv_saddr; inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; @@ -591,7 +551,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, dccp_done(newsk); goto out; } - __inet6_hash(newsk, NULL); + __inet_hash(newsk, NULL); return newsk; @@ -916,9 +876,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_backlog_rcv = dccp_v6_do_rcv; goto failure; } - ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr); - + np->saddr = sk->sk_v6_rcv_saddr; return err; } @@ -1061,7 +1019,7 @@ static struct proto dccp_v6_prot = { .sendmsg = dccp_sendmsg, .recvmsg = dccp_recvmsg, .backlog_rcv = dccp_v6_do_rcv, - .hash = dccp_v6_hash, + .hash = inet_hash, .unhash = inet_unhash, .accept = inet_csk_accept, .get_port = inet_csk_get_port, diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index b50dc436db1f..332f7d6d9942 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -152,8 +152,7 @@ EXPORT_SYMBOL_GPL(dccp_create_openreq_child); * as an request_sock. */ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct request_sock **prev) + struct request_sock *req) { struct sock *child = NULL; struct dccp_request_sock *dreq = dccp_rsk(req); @@ -200,7 +199,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, if (child == NULL) goto listen_overflow; - inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_unlink(sk, req); inet_csk_reqsk_queue_removed(sk, req); inet_csk_reqsk_queue_add(sk, req, child); out: @@ -212,7 +211,7 @@ drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) req->rsk_ops->send_reset(sk, skb); - inet_csk_reqsk_queue_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req); goto out; } diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 595ddf0459db..d8346d0eadeb 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -72,8 +72,7 @@ static void printl(const char *fmt, ...) wake_up(&dccpw.wait); } -static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) +static int jdccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { const struct inet_sock *inet = inet_sk(sk); struct ccid3_hc_tx_sock *hc = NULL; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index e171b780b499..52a94016526d 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -741,8 +741,7 @@ static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) return 0; } -int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { const struct dccp_sock *dp = dccp_sk(sk); const int flags = msg->msg_flags; @@ -806,8 +805,8 @@ out_discard: EXPORT_SYMBOL_GPL(dccp_sendmsg); -int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, int *addr_len) +int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len) { const struct dccp_hdr *dh; long timeo; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 1cd46a345cb0..3ef7acef3ce8 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -161,33 +161,11 @@ out: sock_put(sk); } -/* - * Timer for listening sockets - */ -static void dccp_response_timer(struct sock *sk) -{ - inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, - DCCP_RTO_MAX); -} - static void dccp_keepalive_timer(unsigned long data) { struct sock *sk = (struct sock *)data; - /* Only process if socket is not in use. */ - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later. */ - inet_csk_reset_keepalive_timer(sk, HZ / 20); - goto out; - } - - if (sk->sk_state == DCCP_LISTEN) { - dccp_response_timer(sk); - goto out; - } -out: - bh_unlock_sock(sk); + pr_err("dccp should not use a keepalive timer !\n"); sock_put(sk); } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 810228646de3..754484b3cd0e 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1669,8 +1669,8 @@ static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int } -static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); @@ -1905,8 +1905,7 @@ static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk, return skb; } -static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 7ca7c3143da3..be1f08cdad29 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -49,41 +49,17 @@ #include <net/dn_route.h> static int dn_neigh_construct(struct neighbour *); -static void dn_long_error_report(struct neighbour *, struct sk_buff *); -static void dn_short_error_report(struct neighbour *, struct sk_buff *); -static int dn_long_output(struct neighbour *, struct sk_buff *); -static int dn_short_output(struct neighbour *, struct sk_buff *); -static int dn_phase3_output(struct neighbour *, struct sk_buff *); - - -/* - * For talking to broadcast devices: Ethernet & PPP - */ -static const struct neigh_ops dn_long_ops = { - .family = AF_DECnet, - .error_report = dn_long_error_report, - .output = dn_long_output, - .connected_output = dn_long_output, -}; +static void dn_neigh_error_report(struct neighbour *, struct sk_buff *); +static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb); /* - * For talking to pointopoint and multidrop devices: DDCMP and X.25 + * Operations for adding the link layer header. */ -static const struct neigh_ops dn_short_ops = { +static const struct neigh_ops dn_neigh_ops = { .family = AF_DECnet, - .error_report = dn_short_error_report, - .output = dn_short_output, - .connected_output = dn_short_output, -}; - -/* - * For talking to DECnet phase III nodes - */ -static const struct neigh_ops dn_phase3_ops = { - .family = AF_DECnet, - .error_report = dn_short_error_report, /* Can use short version here */ - .output = dn_phase3_output, - .connected_output = dn_phase3_output, + .error_report = dn_neigh_error_report, + .output = dn_neigh_output, + .connected_output = dn_neigh_output, }; static u32 dn_neigh_hash(const void *pkey, @@ -93,11 +69,18 @@ static u32 dn_neigh_hash(const void *pkey, return jhash_2words(*(__u16 *)pkey, 0, hash_rnd[0]); } +static bool dn_key_eq(const struct neighbour *neigh, const void *pkey) +{ + return neigh_key_eq16(neigh, pkey); +} + struct neigh_table dn_neigh_table = { .family = PF_DECnet, .entry_size = NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)), .key_len = sizeof(__le16), + .protocol = cpu_to_be16(ETH_P_DNA_RT), .hash = dn_neigh_hash, + .key_eq = dn_key_eq, .constructor = dn_neigh_construct, .id = "dn_neigh_cache", .parms ={ @@ -146,16 +129,9 @@ static int dn_neigh_construct(struct neighbour *neigh) __neigh_parms_put(neigh->parms); neigh->parms = neigh_parms_clone(parms); - - if (dn_db->use_long) - neigh->ops = &dn_long_ops; - else - neigh->ops = &dn_short_ops; rcu_read_unlock(); - if (dn->flags & DN_NDFLAG_P3) - neigh->ops = &dn_phase3_ops; - + neigh->ops = &dn_neigh_ops; neigh->nud_state = NUD_NOARP; neigh->output = neigh->ops->connected_output; @@ -187,24 +163,16 @@ static int dn_neigh_construct(struct neighbour *neigh) return 0; } -static void dn_long_error_report(struct neighbour *neigh, struct sk_buff *skb) +static void dn_neigh_error_report(struct neighbour *neigh, struct sk_buff *skb) { - printk(KERN_DEBUG "dn_long_error_report: called\n"); + printk(KERN_DEBUG "dn_neigh_error_report: called\n"); kfree_skb(skb); } - -static void dn_short_error_report(struct neighbour *neigh, struct sk_buff *skb) -{ - printk(KERN_DEBUG "dn_short_error_report: called\n"); - kfree_skb(skb); -} - -static int dn_neigh_output_packet(struct sk_buff *skb) +static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; - struct neighbour *neigh = rt->n; struct net_device *dev = neigh->dev; char mac_addr[ETH_ALEN]; unsigned int seq; @@ -226,6 +194,18 @@ static int dn_neigh_output_packet(struct sk_buff *skb) return err; } +static int dn_neigh_output_packet(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct dn_route *rt = (struct dn_route *)dst; + struct neighbour *neigh = rt->n; + + return neigh->output(neigh, skb); +} + +/* + * For talking to broadcast devices: Ethernet & PPP + */ static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb) { struct net_device *dev = neigh->dev; @@ -269,6 +249,9 @@ static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb) neigh->dev, dn_neigh_output_packet); } +/* + * For talking to pointopoint and multidrop devices: DDCMP and X.25 + */ static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb) { struct net_device *dev = neigh->dev; @@ -306,7 +289,8 @@ static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb) } /* - * Phase 3 output is the same is short output, execpt that + * For talking to DECnet phase III nodes + * Phase 3 output is the same as short output, execpt that * it clears the area bits before transmission. */ static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb) @@ -344,6 +328,32 @@ static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb) neigh->dev, dn_neigh_output_packet); } +int dn_to_neigh_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct dn_route *rt = (struct dn_route *) dst; + struct neighbour *neigh = rt->n; + struct dn_neigh *dn = (struct dn_neigh *)neigh; + struct dn_dev *dn_db; + bool use_long; + + rcu_read_lock(); + dn_db = rcu_dereference(neigh->dev->dn_ptr); + if (dn_db == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + use_long = dn_db->use_long; + rcu_read_unlock(); + + if (dn->flags & DN_NDFLAG_P3) + return dn_phase3_output(neigh, skb); + if (use_long) + return dn_long_output(neigh, skb); + else + return dn_short_output(neigh, skb); +} + /* * Unfortunately, the neighbour code uses the device in its hash * function, so we don't get any advantage from it. This function diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 1d7c1256e845..9ab0c4ba297f 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -136,7 +136,6 @@ int decnet_dst_gc_interval = 2; static struct dst_ops dn_dst_ops = { .family = PF_DECnet, - .protocol = cpu_to_be16(ETH_P_DNA_RT), .gc_thresh = 128, .gc = dn_dst_gc, .check = dn_dst_check, @@ -743,15 +742,6 @@ out: return NET_RX_DROP; } -static int dn_to_neigh_output(struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct dn_route *rt = (struct dn_route *) dst; - struct neighbour *n = rt->n; - - return n->output(n, skb); -} - static int dn_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -1062,7 +1052,7 @@ source_ok: if (decnet_debug_level & 16) printk(KERN_DEBUG "dn_route_output_slow: initial checks complete." - " dst=%o4x src=%04x oif=%d try_hard=%d\n", + " dst=%04x src=%04x oif=%d try_hard=%d\n", le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr), fld.flowidn_oif, try_hard); diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 5f8ac404535b..ff7736f7ff42 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -5,9 +5,12 @@ config HAVE_NET_DSA # Drivers must select NET_DSA and the appropriate tagging format config NET_DSA - tristate - depends on HAVE_NET_DSA + tristate "Distributed Switch Architecture" + depends on HAVE_NET_DSA && NET_SWITCHDEV select PHYLIB + ---help--- + Say Y if you want to enable support for the hardware switches supported + by the Distributed Switch Architecture. if NET_DSA diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 2173402d87e0..899772108ee3 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -20,6 +20,7 @@ #include <linux/of.h> #include <linux/of_mdio.h> #include <linux/of_platform.h> +#include <linux/of_net.h> #include <linux/sysfs.h> #include "dsa_priv.h" @@ -175,43 +176,14 @@ __ATTRIBUTE_GROUPS(dsa_hwmon); #endif /* CONFIG_NET_DSA_HWMON */ /* basic switch operations **************************************************/ -static struct dsa_switch * -dsa_switch_setup(struct dsa_switch_tree *dst, int index, - struct device *parent, struct device *host_dev) +static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) { - struct dsa_chip_data *pd = dst->pd->chip + index; - struct dsa_switch_driver *drv; - struct dsa_switch *ds; - int ret; - char *name; - int i; + struct dsa_switch_driver *drv = ds->drv; + struct dsa_switch_tree *dst = ds->dst; + struct dsa_chip_data *pd = ds->pd; bool valid_name_found = false; - - /* - * Probe for switch model. - */ - drv = dsa_switch_probe(host_dev, pd->sw_addr, &name); - if (drv == NULL) { - netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", - index); - return ERR_PTR(-EINVAL); - } - netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n", - index, name); - - - /* - * Allocate and initialise switch state. - */ - ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL); - if (ds == NULL) - return ERR_PTR(-ENOMEM); - - ds->dst = dst; - ds->index = index; - ds->pd = dst->pd->chip + index; - ds->drv = drv; - ds->master_dev = host_dev; + int index = ds->index; + int i, ret; /* * Validate supplied switch configuration. @@ -256,7 +228,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, * switch. */ if (dst->cpu_switch == index) { - switch (drv->tag_protocol) { + switch (ds->tag_protocol) { #ifdef CONFIG_NET_DSA_TAG_DSA case DSA_TAG_PROTO_DSA: dst->rcv = dsa_netdev_ops.rcv; @@ -284,7 +256,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, goto out; } - dst->tag_protocol = drv->tag_protocol; + dst->tag_protocol = ds->tag_protocol; } /* @@ -314,19 +286,15 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, * Create network devices for physical switch ports. */ for (i = 0; i < DSA_MAX_PORTS; i++) { - struct net_device *slave_dev; - if (!(ds->phys_port_mask & (1 << i))) continue; - slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]); - if (slave_dev == NULL) { + ret = dsa_slave_create(ds, parent, i, pd->port_names[i]); + if (ret < 0) { netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s)\n", index, i, pd->port_names[i]); - continue; + ret = 0; } - - ds->ports[i] = slave_dev; } #ifdef CONFIG_NET_DSA_HWMON @@ -354,13 +322,57 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, } #endif /* CONFIG_NET_DSA_HWMON */ - return ds; + return ret; out_free: mdiobus_free(ds->slave_mii_bus); out: kfree(ds); - return ERR_PTR(ret); + return ret; +} + +static struct dsa_switch * +dsa_switch_setup(struct dsa_switch_tree *dst, int index, + struct device *parent, struct device *host_dev) +{ + struct dsa_chip_data *pd = dst->pd->chip + index; + struct dsa_switch_driver *drv; + struct dsa_switch *ds; + int ret; + char *name; + + /* + * Probe for switch model. + */ + drv = dsa_switch_probe(host_dev, pd->sw_addr, &name); + if (drv == NULL) { + netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", + index); + return ERR_PTR(-EINVAL); + } + netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n", + index, name); + + + /* + * Allocate and initialise switch state. + */ + ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL); + if (ds == NULL) + return NULL; + + ds->dst = dst; + ds->index = index; + ds->pd = pd; + ds->drv = drv; + ds->tag_protocol = drv->tag_protocol; + ds->master_dev = host_dev; + + ret = dsa_switch_setup_one(ds, parent); + if (ret) + return NULL; + + return ds; } static void dsa_switch_destroy(struct dsa_switch *ds) @@ -378,7 +390,7 @@ static int dsa_switch_suspend(struct dsa_switch *ds) /* Suspend slave network devices */ for (i = 0; i < DSA_MAX_PORTS; i++) { - if (!(ds->phys_port_mask & (1 << i))) + if (!dsa_is_port_initialized(ds, i)) continue; ret = dsa_slave_suspend(ds->ports[i]); @@ -404,7 +416,7 @@ static int dsa_switch_resume(struct dsa_switch *ds) /* Resume slave network devices */ for (i = 0; i < DSA_MAX_PORTS; i++) { - if (!(ds->phys_port_mask & (1 << i))) + if (!dsa_is_port_initialized(ds, i)) continue; ret = dsa_slave_resume(ds->ports[i]); @@ -567,12 +579,12 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd) kfree(pd->chip); } -static int dsa_of_probe(struct platform_device *pdev) +static int dsa_of_probe(struct device *dev) { - struct device_node *np = pdev->dev.of_node; + struct device_node *np = dev->of_node; struct device_node *child, *mdio, *ethernet, *port, *link; struct mii_bus *mdio_bus; - struct platform_device *ethernet_dev; + struct net_device *ethernet_dev; struct dsa_platform_data *pd; struct dsa_chip_data *cd; const char *port_name; @@ -587,22 +599,22 @@ static int dsa_of_probe(struct platform_device *pdev) mdio_bus = of_mdio_find_bus(mdio); if (!mdio_bus) - return -EINVAL; + return -EPROBE_DEFER; ethernet = of_parse_phandle(np, "dsa,ethernet", 0); if (!ethernet) return -EINVAL; - ethernet_dev = of_find_device_by_node(ethernet); + ethernet_dev = of_find_net_device_by_node(ethernet); if (!ethernet_dev) - return -ENODEV; + return -EPROBE_DEFER; pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) return -ENOMEM; - pdev->dev.platform_data = pd; - pd->netdev = ðernet_dev->dev; + dev->platform_data = pd; + pd->of_netdev = ethernet_dev; pd->nr_chips = of_get_available_child_count(np); if (pd->nr_chips > DSA_MAX_SWITCHES) pd->nr_chips = DSA_MAX_SWITCHES; @@ -674,72 +686,35 @@ out_free_chip: dsa_of_free_platform_data(pd); out_free: kfree(pd); - pdev->dev.platform_data = NULL; + dev->platform_data = NULL; return ret; } -static void dsa_of_remove(struct platform_device *pdev) +static void dsa_of_remove(struct device *dev) { - struct dsa_platform_data *pd = pdev->dev.platform_data; + struct dsa_platform_data *pd = dev->platform_data; - if (!pdev->dev.of_node) + if (!dev->of_node) return; dsa_of_free_platform_data(pd); kfree(pd); } #else -static inline int dsa_of_probe(struct platform_device *pdev) +static inline int dsa_of_probe(struct device *dev) { return 0; } -static inline void dsa_of_remove(struct platform_device *pdev) +static inline void dsa_of_remove(struct device *dev) { } #endif -static int dsa_probe(struct platform_device *pdev) +static void dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, + struct device *parent, struct dsa_platform_data *pd) { - struct dsa_platform_data *pd = pdev->dev.platform_data; - struct net_device *dev; - struct dsa_switch_tree *dst; - int i, ret; - - pr_notice_once("Distributed Switch Architecture driver version %s\n", - dsa_driver_version); - - if (pdev->dev.of_node) { - ret = dsa_of_probe(pdev); - if (ret) - return ret; - - pd = pdev->dev.platform_data; - } - - if (pd == NULL || pd->netdev == NULL) - return -EINVAL; - - dev = dev_to_net_device(pd->netdev); - if (dev == NULL) { - ret = -EINVAL; - goto out; - } - - if (dev->dsa_ptr != NULL) { - dev_put(dev); - ret = -EEXIST; - goto out; - } - - dst = kzalloc(sizeof(*dst), GFP_KERNEL); - if (dst == NULL) { - dev_put(dev); - ret = -ENOMEM; - goto out; - } - - platform_set_drvdata(pdev, dst); + int i; dst->pd = pd; dst->master_netdev = dev; @@ -749,7 +724,7 @@ static int dsa_probe(struct platform_device *pdev) for (i = 0; i < pd->nr_chips; i++) { struct dsa_switch *ds; - ds = dsa_switch_setup(dst, i, &pdev->dev, pd->chip[i].host_dev); + ds = dsa_switch_setup(dst, i, parent, pd->chip[i].host_dev); if (IS_ERR(ds)) { netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n", i, PTR_ERR(ds)); @@ -777,18 +752,67 @@ static int dsa_probe(struct platform_device *pdev) dst->link_poll_timer.expires = round_jiffies(jiffies + HZ); add_timer(&dst->link_poll_timer); } +} + +static int dsa_probe(struct platform_device *pdev) +{ + struct dsa_platform_data *pd = pdev->dev.platform_data; + struct net_device *dev; + struct dsa_switch_tree *dst; + int ret; + + pr_notice_once("Distributed Switch Architecture driver version %s\n", + dsa_driver_version); + + if (pdev->dev.of_node) { + ret = dsa_of_probe(&pdev->dev); + if (ret) + return ret; + + pd = pdev->dev.platform_data; + } + + if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL)) + return -EINVAL; + + if (pd->of_netdev) { + dev = pd->of_netdev; + dev_hold(dev); + } else { + dev = dev_to_net_device(pd->netdev); + } + if (dev == NULL) { + ret = -EPROBE_DEFER; + goto out; + } + + if (dev->dsa_ptr != NULL) { + dev_put(dev); + ret = -EEXIST; + goto out; + } + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (dst == NULL) { + dev_put(dev); + ret = -ENOMEM; + goto out; + } + + platform_set_drvdata(pdev, dst); + + dsa_setup_dst(dst, dev, &pdev->dev, pd); return 0; out: - dsa_of_remove(pdev); + dsa_of_remove(&pdev->dev); return ret; } -static int dsa_remove(struct platform_device *pdev) +static void dsa_remove_dst(struct dsa_switch_tree *dst) { - struct dsa_switch_tree *dst = platform_get_drvdata(pdev); int i; if (dst->link_poll_needed) @@ -802,8 +826,14 @@ static int dsa_remove(struct platform_device *pdev) if (ds != NULL) dsa_switch_destroy(ds); } +} + +static int dsa_remove(struct platform_device *pdev) +{ + struct dsa_switch_tree *dst = platform_get_drvdata(pdev); - dsa_of_remove(pdev); + dsa_remove_dst(dst); + dsa_of_remove(&pdev->dev); return 0; } @@ -830,6 +860,10 @@ static struct packet_type dsa_pack_type __read_mostly = { .func = dsa_switch_rcv, }; +static struct notifier_block dsa_netdevice_nb __read_mostly = { + .notifier_call = dsa_slave_netdevice_event, +}; + #ifdef CONFIG_PM_SLEEP static int dsa_suspend(struct device *d) { @@ -888,6 +922,8 @@ static int __init dsa_init_module(void) { int rc; + register_netdevice_notifier(&dsa_netdevice_nb); + rc = platform_driver_register(&dsa_driver); if (rc) return rc; @@ -900,6 +936,7 @@ module_init(dsa_init_module); static void __exit dsa_cleanup_module(void) { + unregister_netdevice_notifier(&dsa_netdevice_nb); dev_remove_pack(&dsa_pack_type); platform_driver_unregister(&dsa_driver); } diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index dc9756d3154c..d5f1f9b862ea 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -45,6 +45,8 @@ struct dsa_slave_priv { int old_link; int old_pause; int old_duplex; + + struct net_device *bridge_dev; }; /* dsa.c */ @@ -53,11 +55,12 @@ extern char dsa_driver_version[]; /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); -struct net_device *dsa_slave_create(struct dsa_switch *ds, - struct device *parent, - int port, char *name); +int dsa_slave_create(struct dsa_switch *ds, struct device *parent, + int port, char *name); int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); +int dsa_slave_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr); /* tag_dsa.c */ extern const struct dsa_device_ops dsa_netdev_ops; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index f23deadf42a0..3597724ec3d8 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -10,10 +10,14 @@ #include <linux/list.h> #include <linux/etherdevice.h> +#include <linux/netdevice.h> #include <linux/phy.h> #include <linux/phy_fixed.h> #include <linux/of_net.h> #include <linux/of_mdio.h> +#include <net/rtnetlink.h> +#include <net/switchdev.h> +#include <linux/if_bridge.h> #include "dsa_priv.h" /* slave mii_bus handling ***************************************************/ @@ -60,11 +64,18 @@ static int dsa_slave_init(struct net_device *dev) return 0; } +static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) +{ + return !!p->bridge_dev; +} + static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct net_device *master = p->parent->dst->master_netdev; struct dsa_switch *ds = p->parent; + u8 stp_state = dsa_port_is_bridged(p) ? + BR_STATE_BLOCKING : BR_STATE_FORWARDING; int err; if (!(master->flags & IFF_UP)) @@ -93,6 +104,9 @@ static int dsa_slave_open(struct net_device *dev) goto clear_promisc; } + if (ds->drv->port_stp_update) + ds->drv->port_stp_update(ds, p->port, stp_state); + if (p->phy) phy_start(p->phy); @@ -133,6 +147,9 @@ static int dsa_slave_close(struct net_device *dev) if (ds->drv->port_disable) ds->drv->port_disable(ds, p->port, p->phy); + if (ds->drv->port_stp_update) + ds->drv->port_stp_update(ds, p->port, BR_STATE_DISABLED); + return 0; } @@ -184,6 +201,105 @@ out: return 0; } +static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, u16 nlm_flags) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + if (ds->drv->fdb_add) + ret = ds->drv->fdb_add(ds, p->port, addr, vid); + + return ret; +} + +static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + if (ds->drv->fdb_del) + ret = ds->drv->fdb_del(ds, p->port, addr, vid); + + return ret; +} + +static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb, + const unsigned char *addr, u16 vid, + bool is_static, + u32 portid, u32 seq, int type, + unsigned int flags) +{ + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); + if (!nlh) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = NTF_EXT_LEARNED; + ndm->ndm_type = 0; + ndm->ndm_ifindex = dev->ifindex; + ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; + + if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) + goto nla_put_failure; + + if (vid && nla_put_u16(skb, NDA_VLAN, vid)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +/* Dump information about entries, in response to GETNEIGH */ +static int dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, int idx) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + unsigned char addr[ETH_ALEN] = { 0 }; + int ret; + + if (!ds->drv->fdb_getnext) + return -EOPNOTSUPP; + + for (; ; idx++) { + bool is_static; + + ret = ds->drv->fdb_getnext(ds, p->port, addr, &is_static); + if (ret < 0) + break; + + if (idx < cb->args[0]) + continue; + + ret = dsa_slave_fill_info(dev, skb, addr, 0, + is_static, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, NLM_F_MULTI); + if (ret < 0) + break; + } + + return idx; +} + static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -194,6 +310,92 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EOPNOTSUPP; } +/* Return a bitmask of all ports being currently bridged within a given bridge + * device. Note that on leave, the mask will still return the bitmask of ports + * currently bridged, prior to port removal, and this is exactly what we want. + */ +static u32 dsa_slave_br_port_mask(struct dsa_switch *ds, + struct net_device *bridge) +{ + struct dsa_slave_priv *p; + unsigned int port; + u32 mask = 0; + + for (port = 0; port < DSA_MAX_PORTS; port++) { + if (!dsa_is_port_initialized(ds, port)) + continue; + + p = netdev_priv(ds->ports[port]); + + if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT && + p->bridge_dev == bridge) + mask |= 1 << port; + } + + return mask; +} + +static int dsa_slave_stp_update(struct net_device *dev, u8 state) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + if (ds->drv->port_stp_update) + ret = ds->drv->port_stp_update(ds, p->port, state); + + return ret; +} + +static int dsa_slave_bridge_port_join(struct net_device *dev, + struct net_device *br) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + p->bridge_dev = br; + + if (ds->drv->port_join_bridge) + ret = ds->drv->port_join_bridge(ds, p->port, + dsa_slave_br_port_mask(ds, br)); + + return ret; +} + +static int dsa_slave_bridge_port_leave(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + + if (ds->drv->port_leave_bridge) + ret = ds->drv->port_leave_bridge(ds, p->port, + dsa_slave_br_port_mask(ds, p->bridge_dev)); + + p->bridge_dev = NULL; + + /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, + * so allow it to be in BR_STATE_FORWARDING to be kept functional + */ + dsa_slave_stp_update(dev, BR_STATE_FORWARDING); + + return ret; +} + +static int dsa_slave_parent_id_get(struct net_device *dev, + struct netdev_phys_item_id *psid) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + psid->id_len = sizeof(ds->index); + memcpy(&psid->id, &ds->index, psid->id_len); + + return 0; +} + static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -469,9 +671,17 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, + .ndo_fdb_add = dsa_slave_fdb_add, + .ndo_fdb_del = dsa_slave_fdb_del, + .ndo_fdb_dump = dsa_slave_fdb_dump, .ndo_do_ioctl = dsa_slave_ioctl, }; +static const struct swdev_ops dsa_slave_swdev_ops = { + .swdev_parent_id_get = dsa_slave_parent_id_get, + .swdev_port_stp_update = dsa_slave_stp_update, +}; + static void dsa_slave_adjust_link(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -513,6 +723,24 @@ static int dsa_slave_fixed_link_update(struct net_device *dev, } /* slave device setup *******************************************************/ +static int dsa_slave_phy_connect(struct dsa_slave_priv *p, + struct net_device *slave_dev, + int addr) +{ + struct dsa_switch *ds = p->parent; + + p->phy = ds->slave_mii_bus->phy_map[addr]; + if (!p->phy) + return -ENODEV; + + /* Use already configured phy mode */ + p->phy_interface = p->phy->interface; + phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, + p->phy_interface); + + return 0; +} + static int dsa_slave_phy_setup(struct dsa_slave_priv *p, struct net_device *slave_dev) { @@ -546,10 +774,25 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, if (ds->drv->get_phy_flags) phy_flags = ds->drv->get_phy_flags(ds, p->port); - if (phy_dn) - p->phy = of_phy_connect(slave_dev, phy_dn, - dsa_slave_adjust_link, phy_flags, - p->phy_interface); + if (phy_dn) { + ret = of_mdio_parse_addr(&slave_dev->dev, phy_dn); + /* If this PHY address is part of phys_mii_mask, which means + * that we need to divert reads and writes to/from it, then we + * want to bind this device using the slave MII bus created by + * DSA to make that happen. + */ + if (!phy_is_fixed && ret >= 0 && + (ds->phys_mii_mask & (1 << ret))) { + ret = dsa_slave_phy_connect(p, slave_dev, ret); + if (ret) + return ret; + } else { + p->phy = of_phy_connect(slave_dev, phy_dn, + dsa_slave_adjust_link, + phy_flags, + p->phy_interface); + } + } if (p->phy && phy_is_fixed) fixed_phy_set_link_update(p->phy, dsa_slave_fixed_link_update); @@ -558,14 +801,9 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, * MDIO bus instead */ if (!p->phy) { - p->phy = ds->slave_mii_bus->phy_map[p->port]; - if (!p->phy) - return -ENODEV; - - /* Use already configured phy mode */ - p->phy_interface = p->phy->interface; - phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, - p->phy_interface); + ret = dsa_slave_phy_connect(p, slave_dev, p->port); + if (ret) + return ret; } else { netdev_info(slave_dev, "attached PHY at address %d [%s]\n", p->phy->addr, p->phy->drv->name); @@ -605,9 +843,8 @@ int dsa_slave_resume(struct net_device *slave_dev) return 0; } -struct net_device * -dsa_slave_create(struct dsa_switch *ds, struct device *parent, - int port, char *name) +int dsa_slave_create(struct dsa_switch *ds, struct device *parent, + int port, char *name) { struct net_device *master = ds->dst->master_netdev; struct net_device *slave_dev; @@ -617,13 +854,14 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name, NET_NAME_UNKNOWN, ether_setup); if (slave_dev == NULL) - return slave_dev; + return -ENOMEM; slave_dev->features = master->vlan_features; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; slave_dev->netdev_ops = &dsa_slave_netdev_ops; + slave_dev->swdev_ops = &dsa_slave_swdev_ops; SET_NETDEV_DEV(slave_dev, parent); slave_dev->dev.of_node = ds->pd->port_dn[port]; @@ -667,19 +905,64 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, ret = dsa_slave_phy_setup(p, slave_dev); if (ret) { free_netdev(slave_dev); - return NULL; + return ret; } + ds->ports[port] = slave_dev; ret = register_netdev(slave_dev); if (ret) { netdev_err(master, "error %d registering interface %s\n", ret, slave_dev->name); phy_disconnect(p->phy); + ds->ports[port] = NULL; free_netdev(slave_dev); - return NULL; + return ret; } netif_carrier_off(slave_dev); - return slave_dev; + return 0; +} + +static bool dsa_slave_dev_check(struct net_device *dev) +{ + return dev->netdev_ops == &dsa_slave_netdev_ops; +} + +static int dsa_slave_master_changed(struct net_device *dev) +{ + struct net_device *master = netdev_master_upper_dev_get(dev); + struct dsa_slave_priv *p = netdev_priv(dev); + int err = 0; + + if (master && master->rtnl_link_ops && + !strcmp(master->rtnl_link_ops->kind, "bridge")) + err = dsa_slave_bridge_port_join(dev, master); + else if (dsa_port_is_bridged(p)) + err = dsa_slave_bridge_port_leave(dev); + + return err; +} + +int dsa_slave_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev; + int err = 0; + + switch (event) { + case NETDEV_CHANGEUPPER: + dev = netdev_notifier_info_to_dev(ptr); + if (!dsa_slave_dev_check(dev)) + goto out; + + err = dsa_slave_master_changed(dev); + if (err) + netdev_warn(dev, "failed to reflect master change\n"); + + break; + } + +out: + return NOTIFY_DONE; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 238f38d21641..f3bad41d725f 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -104,7 +104,7 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, */ if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) { - memset(eth->h_dest, 0, ETH_ALEN); + eth_zero_addr(eth->h_dest); return ETH_HLEN; } @@ -113,39 +113,6 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, EXPORT_SYMBOL(eth_header); /** - * eth_rebuild_header- rebuild the Ethernet MAC header. - * @skb: socket buffer to update - * - * This is called after an ARP or IPV6 ndisc it's resolution on this - * sk_buff. We now let protocol (ARP) fill in the other fields. - * - * This routine CANNOT use cached dst->neigh! - * Really, it is used only when dst->neigh is wrong. - */ -int eth_rebuild_header(struct sk_buff *skb) -{ - struct ethhdr *eth = (struct ethhdr *)skb->data; - struct net_device *dev = skb->dev; - - switch (eth->h_proto) { -#ifdef CONFIG_INET - case htons(ETH_P_IP): - return arp_find(eth->h_dest, skb); -#endif - default: - netdev_dbg(dev, - "%s: unable to resolve type %X addresses.\n", - dev->name, ntohs(eth->h_proto)); - - memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); - break; - } - - return 0; -} -EXPORT_SYMBOL(eth_rebuild_header); - -/** * eth_get_headlen - determine the the length of header for an ethernet frame * @data: pointer to start of frame * @len: total length of frame @@ -369,7 +336,6 @@ EXPORT_SYMBOL(eth_validate_addr); const struct header_ops eth_header_ops ____cacheline_aligned = { .create = eth_header, .parse = eth_header_parse, - .rebuild = eth_rebuild_header, .cache = eth_header_cache, .cache_update = eth_header_cache_update, }; @@ -391,7 +357,7 @@ void ether_setup(struct net_device *dev) dev->flags = IFF_BROADCAST|IFF_MULTICAST; dev->priv_flags |= IFF_TX_SKB_SHARING; - memset(dev->broadcast, 0xFF, ETH_ALEN); + eth_broadcast_addr(dev->broadcast); } EXPORT_SYMBOL(ether_setup); diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index a138d75751df..44d27469ae55 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -359,8 +359,11 @@ static void hsr_dev_destroy(struct net_device *hsr_dev) struct hsr_port *port; hsr = netdev_priv(hsr_dev); + + rtnl_lock(); hsr_for_each_port(hsr, port) hsr_del_port(port); + rtnl_unlock(); del_timer_sync(&hsr->prune_timer); del_timer_sync(&hsr->announce_timer); diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index 779d28b65417..cd37d0011b42 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -36,6 +36,10 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; /* Not an HSR device */ hsr = netdev_priv(dev); port = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + if (port == NULL) { + /* Resend of notification concerning removed device? */ + return NOTIFY_DONE; + } } else { hsr = port->hsr; } diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index a348dcbcd683..7d37366cc695 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -181,8 +181,10 @@ void hsr_del_port(struct hsr_port *port) list_del_rcu(&port->port_list); if (port != master) { - netdev_update_features(master->dev); - dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); + if (master != NULL) { + netdev_update_features(master->dev); + dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); + } netdev_rx_handler_unregister(port->dev); dev_set_promiscuity(port->dev, -1); } @@ -192,5 +194,7 @@ void hsr_del_port(struct hsr_port *port) */ synchronize_rcu(); - dev_put(port->dev); + + if (port != master) + dev_put(port->dev); } diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 055fbb71ba6f..0ae5822ef944 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -113,7 +113,7 @@ static void lowpan_setup(struct net_device *dev) { dev->addr_len = IEEE802154_ADDR_LEN; memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); - dev->type = ARPHRD_IEEE802154; + dev->type = ARPHRD_6LOWPAN; /* Frame Control + Sequence Number + Address fields + Security Header */ dev->hard_header_len = 2 + 1 + 20 + 14; dev->needed_tailroom = 2; /* FCS */ @@ -126,6 +126,7 @@ static void lowpan_setup(struct net_device *dev) dev->header_ops = &lowpan_header_ops; dev->ml_priv = &lowpan_mlme; dev->destructor = free_netdev; + dev->features |= NETIF_F_NETNS_LOCAL; } static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -148,10 +149,11 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, pr_debug("adding new link\n"); - if (!tb[IFLA_LINK]) + if (!tb[IFLA_LINK] || + !net_eq(dev_net(dev), &init_net)) return -EINVAL; /* find and hold real wpan device */ - real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + real_dev = dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; if (real_dev->type != ARPHRD_IEEE802154) { diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c index 18bc7e738507..2ee00e8a0308 100644 --- a/net/ieee802154/core.c +++ b/net/ieee802154/core.c @@ -25,6 +25,9 @@ #include "sysfs.h" #include "core.h" +/* name for sysfs, %d is appended */ +#define PHY_NAME "phy" + /* RCU-protected (and RTNL for writers) */ LIST_HEAD(cfg802154_rdev_list); int cfg802154_rdev_list_generation; @@ -122,7 +125,7 @@ wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size) INIT_LIST_HEAD(&rdev->wpan_dev_list); device_initialize(&rdev->wpan_phy.dev); - dev_set_name(&rdev->wpan_phy.dev, "wpan-phy%d", rdev->wpan_phy_idx); + dev_set_name(&rdev->wpan_phy.dev, PHY_NAME "%d", rdev->wpan_phy_idx); rdev->wpan_phy.dev.class = &wpan_phy_class; rdev->wpan_phy.dev.platform_data = rdev; @@ -225,6 +228,7 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb, switch (state) { /* TODO NETDEV_DEVTYPE */ case NETDEV_REGISTER: + dev->features |= NETIF_F_NETNS_LOCAL; wpan_dev->identifier = ++rdev->wpan_dev_id; list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list); rdev->devlist_generation++; diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 9105265920fe..2b4955d7aae5 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -76,7 +76,6 @@ nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } -EXPORT_SYMBOL(ieee802154_nl_start_confirm); static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev) diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 2878d8ca6d3b..b60c65f70346 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -98,12 +98,12 @@ static int ieee802154_sock_release(struct socket *sock) return 0; } -static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int ieee802154_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; - return sk->sk_prot->sendmsg(iocb, sk, msg, len); + return sk->sk_prot->sendmsg(sk, msg, len); } static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, @@ -255,8 +255,7 @@ static int raw_disconnect(struct sock *sk, int flags) return 0; } -static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) +static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; @@ -327,8 +326,8 @@ out: return err; } -static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; @@ -615,8 +614,7 @@ static int dgram_disconnect(struct sock *sk, int flags) return 0; } -static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) +static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; @@ -715,9 +713,8 @@ out: return err; } -static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; diff --git a/net/ieee802154/sysfs.c b/net/ieee802154/sysfs.c index dff55c2d87f3..133b4280660c 100644 --- a/net/ieee802154/sysfs.c +++ b/net/ieee802154/sysfs.c @@ -48,49 +48,6 @@ static ssize_t name_show(struct device *dev, } static DEVICE_ATTR_RO(name); -#define MASTER_SHOW_COMPLEX(name, format_string, args...) \ -static ssize_t name ## _show(struct device *dev, \ - struct device_attribute *attr, char *buf) \ -{ \ - struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev); \ - int ret; \ - \ - mutex_lock(&phy->pib_lock); \ - ret = snprintf(buf, PAGE_SIZE, format_string "\n", args); \ - mutex_unlock(&phy->pib_lock); \ - return ret; \ -} \ -static DEVICE_ATTR_RO(name) - -#define MASTER_SHOW(field, format_string) \ - MASTER_SHOW_COMPLEX(field, format_string, phy->field) - -MASTER_SHOW(current_channel, "%d"); -MASTER_SHOW(current_page, "%d"); -MASTER_SHOW(transmit_power, "%d +- 1 dB"); -MASTER_SHOW_COMPLEX(cca_mode, "%d", phy->cca.mode); - -static ssize_t channels_supported_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev); - int ret; - int i, len = 0; - - mutex_lock(&phy->pib_lock); - for (i = 0; i < 32; i++) { - ret = snprintf(buf + len, PAGE_SIZE - len, - "%#09x\n", phy->channels_supported[i]); - if (ret < 0) - break; - len += ret; - } - mutex_unlock(&phy->pib_lock); - return len; -} -static DEVICE_ATTR_RO(channels_supported); - static void wpan_phy_release(struct device *dev) { struct cfg802154_registered_device *rdev = dev_to_rdev(dev); @@ -101,12 +58,6 @@ static void wpan_phy_release(struct device *dev) static struct attribute *pmib_attrs[] = { &dev_attr_index.attr, &dev_attr_name.attr, - /* below will be removed soon */ - &dev_attr_current_channel.attr, - &dev_attr_current_page.attr, - &dev_attr_channels_supported.attr, - &dev_attr_transmit_power.attr, - &dev_attr_cca_mode.attr, NULL, }; ATTRIBUTE_GROUPS(pmib); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d2e49baaff63..64a9c0fdc4aa 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -716,8 +716,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, } EXPORT_SYMBOL(inet_getname); -int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size) +int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; @@ -728,7 +727,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, inet_autobind(sk)) return -EAGAIN; - return sk->sk_prot->sendmsg(iocb, sk, msg, size); + return sk->sk_prot->sendmsg(sk, msg, size); } EXPORT_SYMBOL(inet_sendmsg); @@ -750,8 +749,8 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, } EXPORT_SYMBOL(inet_sendpage); -int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags) +int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; int addr_len = 0; @@ -759,7 +758,7 @@ int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, sock_rps_record_flow(sk); - err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, + err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; @@ -1675,7 +1674,7 @@ static int __init inet_init(void) struct list_head *r; int rc = -EINVAL; - BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); + sock_skb_cb_check_size(sizeof(struct inet_skb_parm)); rc = proto_register(&tcp_prot, 1); if (rc) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 205e1472aa78..5f5c674e130a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -122,6 +122,7 @@ * Interface to generic neighbour cache. */ static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); +static bool arp_key_eq(const struct neighbour *n, const void *pkey); static int arp_constructor(struct neighbour *neigh); static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); @@ -149,18 +150,12 @@ static const struct neigh_ops arp_direct_ops = { .connected_output = neigh_direct_output, }; -static const struct neigh_ops arp_broken_ops = { - .family = AF_INET, - .solicit = arp_solicit, - .error_report = arp_error_report, - .output = neigh_compat_output, - .connected_output = neigh_compat_output, -}; - struct neigh_table arp_tbl = { .family = AF_INET, .key_len = 4, + .protocol = cpu_to_be16(ETH_P_IP), .hash = arp_hash, + .key_eq = arp_key_eq, .constructor = arp_constructor, .proxy_redo = parp_redo, .id = "arp_cache", @@ -216,7 +211,12 @@ static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd) { - return arp_hashfn(*(u32 *)pkey, dev, *hash_rnd); + return arp_hashfn(pkey, dev, hash_rnd); +} + +static bool arp_key_eq(const struct neighbour *neigh, const void *pkey) +{ + return neigh_key_eq32(neigh, pkey); } static int arp_constructor(struct neighbour *neigh) @@ -260,35 +260,6 @@ static int arp_constructor(struct neighbour *neigh) in old paradigm. */ -#if 1 - /* So... these "amateur" devices are hopeless. - The only thing, that I can say now: - It is very sad that we need to keep ugly obsolete - code to make them happy. - - They should be moved to more reasonable state, now - they use rebuild_header INSTEAD OF hard_start_xmit!!! - Besides that, they are sort of out of date - (a lot of redundant clones/copies, useless in 2.1), - I wonder why people believe that they work. - */ - switch (dev->type) { - default: - break; - case ARPHRD_ROSE: -#if IS_ENABLED(CONFIG_AX25) - case ARPHRD_AX25: -#if IS_ENABLED(CONFIG_NETROM) - case ARPHRD_NETROM: -#endif - neigh->ops = &arp_broken_ops; - neigh->output = neigh->ops->output; - return 0; -#else - break; -#endif - } -#endif if (neigh->type == RTN_MULTICAST) { neigh->nud_state = NUD_NOARP; arp_mc_map(addr, neigh->ha, dev, 1); @@ -433,71 +404,6 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) return flag; } -/* OBSOLETE FUNCTIONS */ - -/* - * Find an arp mapping in the cache. If not found, post a request. - * - * It is very UGLY routine: it DOES NOT use skb->dst->neighbour, - * even if it exists. It is supposed that skb->dev was mangled - * by a virtual device (eql, shaper). Nobody but broken devices - * is allowed to use this function, it is scheduled to be removed. --ANK - */ - -static int arp_set_predefined(int addr_hint, unsigned char *haddr, - __be32 paddr, struct net_device *dev) -{ - switch (addr_hint) { - case RTN_LOCAL: - pr_debug("arp called for own IP address\n"); - memcpy(haddr, dev->dev_addr, dev->addr_len); - return 1; - case RTN_MULTICAST: - arp_mc_map(paddr, haddr, dev, 1); - return 1; - case RTN_BROADCAST: - memcpy(haddr, dev->broadcast, dev->addr_len); - return 1; - } - return 0; -} - - -int arp_find(unsigned char *haddr, struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - __be32 paddr; - struct neighbour *n; - - if (!skb_dst(skb)) { - pr_debug("arp_find is called with dst==NULL\n"); - kfree_skb(skb); - return 1; - } - - paddr = rt_nexthop(skb_rtable(skb), ip_hdr(skb)->daddr); - if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, - paddr, dev)) - return 0; - - n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); - - if (n) { - n->used = jiffies; - if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) { - neigh_ha_snapshot(haddr, n, dev); - neigh_release(n); - return 0; - } - neigh_release(n); - } else - kfree_skb(skb); - return 1; -} -EXPORT_SYMBOL(arp_find); - -/* END OF OBSOLETE FUNCTIONS */ - /* * Check if we can use proxy ARP for this path */ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 3a8985c94581..c6473f365ad1 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -107,7 +107,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; -static u32 inet_addr_hash(struct net *net, __be32 addr) +static u32 inet_addr_hash(const struct net *net, __be32 addr) { u32 val = (__force u32) addr ^ net_hash_mix(net); @@ -548,6 +548,26 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, return NULL; } +static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) +{ + struct ip_mreqn mreq = { + .imr_multiaddr.s_addr = ifa->ifa_address, + .imr_ifindex = ifa->ifa_dev->dev->ifindex, + }; + int ret; + + ASSERT_RTNL(); + + lock_sock(sk); + if (join) + ret = ip_mc_join_group(sk, &mreq); + else + ret = ip_mc_leave_group(sk, &mreq); + release_sock(sk); + + return ret; +} + static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); @@ -573,7 +593,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { if (tb[IFA_LOCAL] && - ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL])) + ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL])) continue; if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) @@ -581,9 +601,11 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) if (tb[IFA_ADDRESS] && (ifm->ifa_prefixlen != ifa->ifa_prefixlen || - !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) + !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa))) continue; + if (ipv4_is_multicast(ifa->ifa_address)) + ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa); __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); return 0; } @@ -769,11 +791,11 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ifa->ifa_scope = ifm->ifa_scope; ifa->ifa_dev = in_dev; - ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]); - ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]); + ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]); + ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]); if (tb[IFA_BROADCAST]) - ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); + ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]); if (tb[IFA_LABEL]) nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); @@ -838,6 +860,15 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) * userspace already relies on not having to provide this. */ set_ifa_lifetime(ifa, valid_lft, prefered_lft); + if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { + int ret = ip_mc_config(net->ipv4.mc_autojoin_sk, + true, ifa); + + if (ret < 0) { + inet_free_ifa(ifa); + return ret; + } + } return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); } else { inet_free_ifa(ifa); @@ -1510,11 +1541,11 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, valid = INFINITY_LIFE_TIME; } if ((ifa->ifa_address && - nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) || + nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) || (ifa->ifa_local && - nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) || + nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) || (ifa->ifa_broadcast && - nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || + nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || (ifa->ifa_label[0] && nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 57be71dd6a9e..718b0a16ea40 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -52,12 +52,12 @@ static int __net_init fib4_rules_init(struct net *net) { struct fib_table *local_table, *main_table; - local_table = fib_trie_table(RT_TABLE_LOCAL); - if (local_table == NULL) + main_table = fib_trie_table(RT_TABLE_MAIN, NULL); + if (main_table == NULL) return -ENOMEM; - main_table = fib_trie_table(RT_TABLE_MAIN); - if (main_table == NULL) + local_table = fib_trie_table(RT_TABLE_LOCAL, main_table); + if (local_table == NULL) goto fail; hlist_add_head_rcu(&local_table->tb_hlist, @@ -67,14 +67,14 @@ static int __net_init fib4_rules_init(struct net *net) return 0; fail: - fib_free_table(local_table); + fib_free_table(main_table); return -ENOMEM; } #else struct fib_table *fib_new_table(struct net *net, u32 id) { - struct fib_table *tb; + struct fib_table *tb, *alias = NULL; unsigned int h; if (id == 0) @@ -83,23 +83,23 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (tb) return tb; - tb = fib_trie_table(id); + if (id == RT_TABLE_LOCAL) + alias = fib_new_table(net, RT_TABLE_MAIN); + + tb = fib_trie_table(id, alias); if (!tb) return NULL; switch (id) { case RT_TABLE_LOCAL: - net->ipv4.fib_local = tb; + rcu_assign_pointer(net->ipv4.fib_local, tb); break; - case RT_TABLE_MAIN: - net->ipv4.fib_main = tb; + rcu_assign_pointer(net->ipv4.fib_main, tb); break; - case RT_TABLE_DEFAULT: - net->ipv4.fib_default = tb; + rcu_assign_pointer(net->ipv4.fib_default, tb); break; - default: break; } @@ -129,16 +129,62 @@ struct fib_table *fib_get_table(struct net *net, u32 id) } #endif /* CONFIG_IP_MULTIPLE_TABLES */ +static void fib_replace_table(struct net *net, struct fib_table *old, + struct fib_table *new) +{ +#ifdef CONFIG_IP_MULTIPLE_TABLES + switch (new->tb_id) { + case RT_TABLE_LOCAL: + rcu_assign_pointer(net->ipv4.fib_local, new); + break; + case RT_TABLE_MAIN: + rcu_assign_pointer(net->ipv4.fib_main, new); + break; + case RT_TABLE_DEFAULT: + rcu_assign_pointer(net->ipv4.fib_default, new); + break; + default: + break; + } + +#endif + /* replace the old table in the hlist */ + hlist_replace_rcu(&old->tb_hlist, &new->tb_hlist); +} + +int fib_unmerge(struct net *net) +{ + struct fib_table *old, *new; + + /* attempt to fetch local table if it has been allocated */ + old = fib_get_table(net, RT_TABLE_LOCAL); + if (!old) + return 0; + + new = fib_trie_unmerge(old); + if (!new) + return -ENOMEM; + + /* replace merged table with clean table */ + if (new != old) { + fib_replace_table(net, old, new); + fib_free_table(old); + } + + return 0; +} + static void fib_flush(struct net *net) { int flushed = 0; - struct fib_table *tb; - struct hlist_head *head; unsigned int h; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, head, tb_hlist) + struct hlist_head *head = &net->ipv4.fib_table_hash[h]; + struct hlist_node *tmp; + struct fib_table *tb; + + hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) flushed += fib_table_flush(tb); } @@ -146,6 +192,19 @@ static void fib_flush(struct net *net) rt_cache_flush(net); } +void fib_flush_external(struct net *net) +{ + struct fib_table *tb; + struct hlist_head *head; + unsigned int h; + + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + head = &net->ipv4.fib_table_hash[h]; + hlist_for_each_entry(tb, head, tb_hlist) + fib_table_flush_external(tb); + } +} + /* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. @@ -665,10 +724,12 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) s_h = cb->args[0]; s_e = cb->args[1]; + rcu_read_lock(); + for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { if (e < s_e) goto next; if (dumped) @@ -682,6 +743,8 @@ next: } } out: + rcu_read_unlock(); + cb->args[1] = e; cb->args[0] = h; @@ -1111,23 +1174,29 @@ static void ip_fib_net_exit(struct net *net) { unsigned int i; + rtnl_lock(); + #ifdef CONFIG_IP_MULTIPLE_TABLES - fib4_rules_exit(net); + RCU_INIT_POINTER(net->ipv4.fib_local, NULL); + RCU_INIT_POINTER(net->ipv4.fib_main, NULL); + RCU_INIT_POINTER(net->ipv4.fib_default, NULL); #endif - rtnl_lock(); for (i = 0; i < FIB_TABLE_HASHSZ; i++) { - struct fib_table *tb; - struct hlist_head *head; + struct hlist_head *head = &net->ipv4.fib_table_hash[i]; struct hlist_node *tmp; + struct fib_table *tb; - head = &net->ipv4.fib_table_hash[i]; hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { hlist_del(&tb->tb_hlist); fib_table_flush(tb); fib_free_table(tb); } } + +#ifdef CONFIG_IP_MULTIPLE_TABLES + fib4_rules_exit(net); +#endif rtnl_unlock(); kfree(net->ipv4.fib_table_hash); } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 825981b1049a..c6211ed60b03 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -6,11 +6,13 @@ #include <net/ip_fib.h> struct fib_alias { - struct list_head fa_list; + struct hlist_node fa_list; struct fib_info *fa_info; u8 fa_tos; u8 fa_type; u8 fa_state; + u8 fa_slen; + u32 tb_id; struct rcu_head rcu; }; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index d3db718be51d..8162dd8e86d7 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -174,6 +174,11 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (frh->tos & ~IPTOS_TOS_MASK) goto errout; + /* split local/main if they are not already split */ + err = fib_unmerge(net); + if (err) + goto errout; + if (rule->table == RT_TABLE_UNSPEC) { if (rule->action == FR_ACT_TO_TBL) { struct fib_table *table; @@ -189,10 +194,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, } if (frh->src_len) - rule4->src = nla_get_be32(tb[FRA_SRC]); + rule4->src = nla_get_in_addr(tb[FRA_SRC]); if (frh->dst_len) - rule4->dst = nla_get_be32(tb[FRA_DST]); + rule4->dst = nla_get_in_addr(tb[FRA_DST]); #ifdef CONFIG_IP_ROUTE_CLASSID if (tb[FRA_FLOW]) { @@ -209,21 +214,31 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; + fib_flush_external(rule->fr_net); + err = 0; errout: return err; } -static void fib4_rule_delete(struct fib_rule *rule) +static int fib4_rule_delete(struct fib_rule *rule) { struct net *net = rule->fr_net; -#ifdef CONFIG_IP_ROUTE_CLASSID - struct fib4_rule *rule4 = (struct fib4_rule *) rule; + int err; - if (rule4->tclassid) + /* split local/main if they are not already split */ + err = fib_unmerge(net); + if (err) + goto errout; + +#ifdef CONFIG_IP_ROUTE_CLASSID + if (((struct fib4_rule *)rule)->tclassid) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; + fib_flush_external(rule->fr_net); +errout: + return err; } static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, @@ -245,10 +260,10 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, return 0; #endif - if (frh->src_len && (rule4->src != nla_get_be32(tb[FRA_SRC]))) + if (frh->src_len && (rule4->src != nla_get_in_addr(tb[FRA_SRC]))) return 0; - if (frh->dst_len && (rule4->dst != nla_get_be32(tb[FRA_DST]))) + if (frh->dst_len && (rule4->dst != nla_get_in_addr(tb[FRA_DST]))) return 0; return 1; @@ -264,9 +279,9 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->tos = rule4->tos; if ((rule4->dst_len && - nla_put_be32(skb, FRA_DST, rule4->dst)) || + nla_put_in_addr(skb, FRA_DST, rule4->dst)) || (rule4->src_len && - nla_put_be32(skb, FRA_SRC, rule4->src))) + nla_put_in_addr(skb, FRA_SRC, rule4->src))) goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID if (rule4->tclassid && diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 1e2090ea663e..eac5aec7772a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -213,7 +213,6 @@ static void free_fib_info_rcu(struct rcu_head *head) rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); - release_net(fi->fib_net); if (fi->fib_metrics != (u32 *) dst_default_metrics) kfree(fi->fib_metrics); kfree(fi); @@ -469,7 +468,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, struct nlattr *nla, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); - nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; + nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0; #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; @@ -524,7 +523,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) struct nlattr *nla, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); - if (nla && nla_get_be32(nla) != nh->nh_gw) + if (nla && nla_get_in_addr(nla) != nh->nh_gw) return 1; #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); @@ -814,7 +813,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } else fi->fib_metrics = (u32 *) dst_default_metrics; - fi->fib_net = hold_net(net); + fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; fi->fib_flags = cfg->fc_flags; @@ -1016,7 +1015,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, rtm->rtm_protocol = fi->fib_protocol; if (rtm->rtm_dst_len && - nla_put_be32(skb, RTA_DST, dst)) + nla_put_in_addr(skb, RTA_DST, dst)) goto nla_put_failure; if (fi->fib_priority && nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) @@ -1025,11 +1024,11 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; if (fi->fib_prefsrc && - nla_put_be32(skb, RTA_PREFSRC, fi->fib_prefsrc)) + nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; if (fi->fib_nhs == 1) { if (fi->fib_nh->nh_gw && - nla_put_be32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) + nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) goto nla_put_failure; if (fi->fib_nh->nh_oif && nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) @@ -1059,7 +1058,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, rtnh->rtnh_ifindex = nh->nh_oif; if (nh->nh_gw && - nla_put_be32(skb, RTA_GATEWAY, nh->nh_gw)) + nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw)) goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID if (nh->nh_tclassid && @@ -1163,12 +1162,12 @@ int fib_sync_down_dev(struct net_device *dev, int force) void fib_select_default(struct fib_result *res) { struct fib_info *fi = NULL, *last_resort = NULL; - struct list_head *fa_head = res->fa_head; + struct hlist_head *fa_head = res->fa_head; struct fib_table *tb = res->table; int order = -1, last_idx = -1; struct fib_alias *fa; - list_for_each_entry_rcu(fa, fa_head, fa_list) { + hlist_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; if (next_fi->fib_scope != res->scope || diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 3daf0224ff2e..2c7c299ee2b9 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -79,6 +79,7 @@ #include <net/tcp.h> #include <net/sock.h> #include <net/ip_fib.h> +#include <net/switchdev.h> #include "fib_lookup.h" #define MAX_STAT_DEPTH 32 @@ -88,38 +89,35 @@ typedef unsigned int t_key; -#define IS_TNODE(n) ((n)->bits) -#define IS_LEAF(n) (!(n)->bits) +#define IS_TRIE(n) ((n)->pos >= KEYLENGTH) +#define IS_TNODE(n) ((n)->bits) +#define IS_LEAF(n) (!(n)->bits) -#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos) - -struct tnode { +struct key_vector { t_key key; - unsigned char bits; /* 2log(KEYLENGTH) bits needed */ unsigned char pos; /* 2log(KEYLENGTH) bits needed */ + unsigned char bits; /* 2log(KEYLENGTH) bits needed */ unsigned char slen; - struct tnode __rcu *parent; - struct rcu_head rcu; union { - /* The fields in this struct are valid if bits > 0 (TNODE) */ - struct { - t_key empty_children; /* KEYLENGTH bits needed */ - t_key full_children; /* KEYLENGTH bits needed */ - struct tnode __rcu *child[0]; - }; - /* This list pointer if valid if bits == 0 (LEAF) */ - struct hlist_head list; + /* This list pointer if valid if (pos | bits) == 0 (LEAF) */ + struct hlist_head leaf; + /* This array is valid if (pos | bits) > 0 (TNODE) */ + struct key_vector __rcu *tnode[0]; }; }; -struct leaf_info { - struct hlist_node hlist; - int plen; - u32 mask_plen; /* ntohl(inet_make_mask(plen)) */ - struct list_head falh; +struct tnode { struct rcu_head rcu; + t_key empty_children; /* KEYLENGTH bits needed */ + t_key full_children; /* KEYLENGTH bits needed */ + struct key_vector __rcu *parent; + struct key_vector kv[1]; +#define tn_bits kv[0].bits }; +#define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n]) +#define LEAF_SIZE TNODE_SIZE(1) + #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats { unsigned int gets; @@ -142,13 +140,13 @@ struct trie_stat { }; struct trie { - struct tnode __rcu *trie; + struct key_vector kv[1]; #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats __percpu *stats; #endif }; -static void resize(struct trie *t, struct tnode *tn); +static struct key_vector *resize(struct trie *t, struct key_vector *tn); static size_t tnode_free_size; /* @@ -161,41 +159,46 @@ static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; +static inline struct tnode *tn_info(struct key_vector *kv) +{ + return container_of(kv, struct tnode, kv[0]); +} + /* caller must hold RTNL */ -#define node_parent(n) rtnl_dereference((n)->parent) +#define node_parent(tn) rtnl_dereference(tn_info(tn)->parent) +#define get_child(tn, i) rtnl_dereference((tn)->tnode[i]) /* caller must hold RCU read lock or RTNL */ -#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent) +#define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent) +#define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i]) /* wrapper for rcu_assign_pointer */ -static inline void node_set_parent(struct tnode *n, struct tnode *tp) +static inline void node_set_parent(struct key_vector *n, struct key_vector *tp) { if (n) - rcu_assign_pointer(n->parent, tp); + rcu_assign_pointer(tn_info(n)->parent, tp); } -#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p) +#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p) /* This provides us with the number of children in this node, in the case of a * leaf this will return 0 meaning none of the children are accessible. */ -static inline unsigned long tnode_child_length(const struct tnode *tn) +static inline unsigned long child_length(const struct key_vector *tn) { return (1ul << tn->bits) & ~(1ul); } -/* caller must hold RTNL */ -static inline struct tnode *tnode_get_child(const struct tnode *tn, - unsigned long i) -{ - return rtnl_dereference(tn->child[i]); -} +#define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos) -/* caller must hold RCU read lock or RTNL */ -static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn, - unsigned long i) +static inline unsigned long get_index(t_key key, struct key_vector *kv) { - return rcu_dereference_rtnl(tn->child[i]); + unsigned long index = key ^ kv->key; + + if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos)) + return 0; + + return index >> kv->pos; } /* To understand this stuff, an understanding of keys and all their bits is @@ -274,106 +277,104 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa) } #define TNODE_KMALLOC_MAX \ - ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct tnode *)) + ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *)) +#define TNODE_VMALLOC_MAX \ + ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *)) static void __node_free_rcu(struct rcu_head *head) { struct tnode *n = container_of(head, struct tnode, rcu); - if (IS_LEAF(n)) + if (!n->tn_bits) kmem_cache_free(trie_leaf_kmem, n); - else if (n->bits <= TNODE_KMALLOC_MAX) + else if (n->tn_bits <= TNODE_KMALLOC_MAX) kfree(n); else vfree(n); } -#define node_free(n) call_rcu(&n->rcu, __node_free_rcu) +#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu) -static inline void free_leaf_info(struct leaf_info *leaf) +static struct tnode *tnode_alloc(int bits) { - kfree_rcu(leaf, rcu); -} + size_t size; + + /* verify bits is within bounds */ + if (bits > TNODE_VMALLOC_MAX) + return NULL; + + /* determine size and verify it is non-zero and didn't overflow */ + size = TNODE_SIZE(1ul << bits); -static struct tnode *tnode_alloc(size_t size) -{ if (size <= PAGE_SIZE) return kzalloc(size, GFP_KERNEL); else return vzalloc(size); } -static inline void empty_child_inc(struct tnode *n) +static inline void empty_child_inc(struct key_vector *n) { - ++n->empty_children ? : ++n->full_children; + ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children; } -static inline void empty_child_dec(struct tnode *n) +static inline void empty_child_dec(struct key_vector *n) { - n->empty_children-- ? : n->full_children--; + tn_info(n)->empty_children-- ? : tn_info(n)->full_children--; } -static struct tnode *leaf_new(t_key key) +static struct key_vector *leaf_new(t_key key, struct fib_alias *fa) { - struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); - if (l) { - l->parent = NULL; - /* set key and pos to reflect full key value - * any trailing zeros in the key should be ignored - * as the nodes are searched - */ - l->key = key; - l->slen = 0; - l->pos = 0; - /* set bits to 0 indicating we are not a tnode */ - l->bits = 0; + struct tnode *kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); + struct key_vector *l = kv->kv; - INIT_HLIST_HEAD(&l->list); - } - return l; -} + if (!kv) + return NULL; -static struct leaf_info *leaf_info_new(int plen) -{ - struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); - if (li) { - li->plen = plen; - li->mask_plen = ntohl(inet_make_mask(plen)); - INIT_LIST_HEAD(&li->falh); - } - return li; + /* initialize key vector */ + l->key = key; + l->pos = 0; + l->bits = 0; + l->slen = fa->fa_slen; + + /* link leaf to fib alias */ + INIT_HLIST_HEAD(&l->leaf); + hlist_add_head(&fa->fa_list, &l->leaf); + + return l; } -static struct tnode *tnode_new(t_key key, int pos, int bits) +static struct key_vector *tnode_new(t_key key, int pos, int bits) { - size_t sz = offsetof(struct tnode, child[1ul << bits]); - struct tnode *tn = tnode_alloc(sz); + struct tnode *tnode = tnode_alloc(bits); unsigned int shift = pos + bits; + struct key_vector *tn = tnode->kv; /* verify bits and pos their msb bits clear and values are valid */ BUG_ON(!bits || (shift > KEYLENGTH)); - if (tn) { - tn->parent = NULL; - tn->slen = pos; - tn->pos = pos; - tn->bits = bits; - tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; - if (bits == KEYLENGTH) - tn->full_children = 1; - else - tn->empty_children = 1ul << bits; - } + pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0), + sizeof(struct key_vector *) << bits); + + if (!tnode) + return NULL; + + if (bits == KEYLENGTH) + tnode->full_children = 1; + else + tnode->empty_children = 1ul << bits; + + tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; + tn->pos = pos; + tn->bits = bits; + tn->slen = pos; - pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), - sizeof(struct tnode *) << bits); return tn; } /* Check whether a tnode 'n' is "full", i.e. it is an internal node * and no bits are skipped. See discussion in dyntree paper p. 6 */ -static inline int tnode_full(const struct tnode *tn, const struct tnode *n) +static inline int tnode_full(struct key_vector *tn, struct key_vector *n) { return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n); } @@ -381,12 +382,13 @@ static inline int tnode_full(const struct tnode *tn, const struct tnode *n) /* Add a child at position i overwriting the old value. * Update the value of full_children and empty_children. */ -static void put_child(struct tnode *tn, unsigned long i, struct tnode *n) +static void put_child(struct key_vector *tn, unsigned long i, + struct key_vector *n) { - struct tnode *chi = tnode_get_child(tn, i); + struct key_vector *chi = get_child(tn, i); int isfull, wasfull; - BUG_ON(i >= tnode_child_length(tn)); + BUG_ON(i >= child_length(tn)); /* update emptyChildren, overflow into fullChildren */ if (n == NULL && chi != NULL) @@ -399,23 +401,23 @@ static void put_child(struct tnode *tn, unsigned long i, struct tnode *n) isfull = tnode_full(tn, n); if (wasfull && !isfull) - tn->full_children--; + tn_info(tn)->full_children--; else if (!wasfull && isfull) - tn->full_children++; + tn_info(tn)->full_children++; if (n && (tn->slen < n->slen)) tn->slen = n->slen; - rcu_assign_pointer(tn->child[i], n); + rcu_assign_pointer(tn->tnode[i], n); } -static void update_children(struct tnode *tn) +static void update_children(struct key_vector *tn) { unsigned long i; /* update all of the child parent pointers */ - for (i = tnode_child_length(tn); i;) { - struct tnode *inode = tnode_get_child(tn, --i); + for (i = child_length(tn); i;) { + struct key_vector *inode = get_child(tn, --i); if (!inode) continue; @@ -431,36 +433,37 @@ static void update_children(struct tnode *tn) } } -static inline void put_child_root(struct tnode *tp, struct trie *t, - t_key key, struct tnode *n) +static inline void put_child_root(struct key_vector *tp, t_key key, + struct key_vector *n) { - if (tp) - put_child(tp, get_index(key, tp), n); + if (IS_TRIE(tp)) + rcu_assign_pointer(tp->tnode[0], n); else - rcu_assign_pointer(t->trie, n); + put_child(tp, get_index(key, tp), n); } -static inline void tnode_free_init(struct tnode *tn) +static inline void tnode_free_init(struct key_vector *tn) { - tn->rcu.next = NULL; + tn_info(tn)->rcu.next = NULL; } -static inline void tnode_free_append(struct tnode *tn, struct tnode *n) +static inline void tnode_free_append(struct key_vector *tn, + struct key_vector *n) { - n->rcu.next = tn->rcu.next; - tn->rcu.next = &n->rcu; + tn_info(n)->rcu.next = tn_info(tn)->rcu.next; + tn_info(tn)->rcu.next = &tn_info(n)->rcu; } -static void tnode_free(struct tnode *tn) +static void tnode_free(struct key_vector *tn) { - struct callback_head *head = &tn->rcu; + struct callback_head *head = &tn_info(tn)->rcu; while (head) { head = head->next; - tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]); + tnode_free_size += TNODE_SIZE(1ul << tn->bits); node_free(tn); - tn = container_of(head, struct tnode, rcu); + tn = container_of(head, struct tnode, rcu)->kv; } if (tnode_free_size >= PAGE_SIZE * sync_pages) { @@ -469,14 +472,16 @@ static void tnode_free(struct tnode *tn) } } -static void replace(struct trie *t, struct tnode *oldtnode, struct tnode *tn) +static struct key_vector *replace(struct trie *t, + struct key_vector *oldtnode, + struct key_vector *tn) { - struct tnode *tp = node_parent(oldtnode); + struct key_vector *tp = node_parent(oldtnode); unsigned long i; /* setup the parent pointer out of and back into this node */ NODE_INIT_PARENT(tn, tp); - put_child_root(tp, t, tn->key, tn); + put_child_root(tp, tn->key, tn); /* update all of the child parent pointers */ update_children(tn); @@ -485,18 +490,21 @@ static void replace(struct trie *t, struct tnode *oldtnode, struct tnode *tn) tnode_free(oldtnode); /* resize children now that oldtnode is freed */ - for (i = tnode_child_length(tn); i;) { - struct tnode *inode = tnode_get_child(tn, --i); + for (i = child_length(tn); i;) { + struct key_vector *inode = get_child(tn, --i); /* resize child node */ if (tnode_full(tn, inode)) - resize(t, inode); + tn = resize(t, inode); } + + return tp; } -static int inflate(struct trie *t, struct tnode *oldtnode) +static struct key_vector *inflate(struct trie *t, + struct key_vector *oldtnode) { - struct tnode *tn; + struct key_vector *tn; unsigned long i; t_key m; @@ -504,7 +512,7 @@ static int inflate(struct trie *t, struct tnode *oldtnode) tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1); if (!tn) - return -ENOMEM; + goto notnode; /* prepare oldtnode to be freed */ tnode_free_init(oldtnode); @@ -514,9 +522,9 @@ static int inflate(struct trie *t, struct tnode *oldtnode) * point to existing tnodes and the links between our allocated * nodes. */ - for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) { - struct tnode *inode = tnode_get_child(oldtnode, --i); - struct tnode *node0, *node1; + for (i = child_length(oldtnode), m = 1u << tn->pos; i;) { + struct key_vector *inode = get_child(oldtnode, --i); + struct key_vector *node0, *node1; unsigned long j, k; /* An empty child */ @@ -534,8 +542,8 @@ static int inflate(struct trie *t, struct tnode *oldtnode) /* An internal node with two children */ if (inode->bits == 1) { - put_child(tn, 2 * i + 1, tnode_get_child(inode, 1)); - put_child(tn, 2 * i, tnode_get_child(inode, 0)); + put_child(tn, 2 * i + 1, get_child(inode, 1)); + put_child(tn, 2 * i, get_child(inode, 0)); continue; } @@ -564,11 +572,11 @@ static int inflate(struct trie *t, struct tnode *oldtnode) tnode_free_append(tn, node0); /* populate child pointers in new nodes */ - for (k = tnode_child_length(inode), j = k / 2; j;) { - put_child(node1, --j, tnode_get_child(inode, --k)); - put_child(node0, j, tnode_get_child(inode, j)); - put_child(node1, --j, tnode_get_child(inode, --k)); - put_child(node0, j, tnode_get_child(inode, j)); + for (k = child_length(inode), j = k / 2; j;) { + put_child(node1, --j, get_child(inode, --k)); + put_child(node0, j, get_child(inode, j)); + put_child(node1, --j, get_child(inode, --k)); + put_child(node0, j, get_child(inode, j)); } /* link new nodes to parent */ @@ -581,25 +589,25 @@ static int inflate(struct trie *t, struct tnode *oldtnode) } /* setup the parent pointers into and out of this node */ - replace(t, oldtnode, tn); - - return 0; + return replace(t, oldtnode, tn); nomem: /* all pointers should be clean so we are done */ tnode_free(tn); - return -ENOMEM; +notnode: + return NULL; } -static int halve(struct trie *t, struct tnode *oldtnode) +static struct key_vector *halve(struct trie *t, + struct key_vector *oldtnode) { - struct tnode *tn; + struct key_vector *tn; unsigned long i; pr_debug("In halve\n"); tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1); if (!tn) - return -ENOMEM; + goto notnode; /* prepare oldtnode to be freed */ tnode_free_init(oldtnode); @@ -609,10 +617,10 @@ static int halve(struct trie *t, struct tnode *oldtnode) * point to existing tnodes and the links between our allocated * nodes. */ - for (i = tnode_child_length(oldtnode); i;) { - struct tnode *node1 = tnode_get_child(oldtnode, --i); - struct tnode *node0 = tnode_get_child(oldtnode, --i); - struct tnode *inode; + for (i = child_length(oldtnode); i;) { + struct key_vector *node1 = get_child(oldtnode, --i); + struct key_vector *node0 = get_child(oldtnode, --i); + struct key_vector *inode; /* At least one of the children is empty */ if (!node1 || !node0) { @@ -622,10 +630,8 @@ static int halve(struct trie *t, struct tnode *oldtnode) /* Two nonempty children */ inode = tnode_new(node0->key, oldtnode->pos, 1); - if (!inode) { - tnode_free(tn); - return -ENOMEM; - } + if (!inode) + goto nomem; tnode_free_append(tn, inode); /* initialize pointers out of node */ @@ -638,30 +644,36 @@ static int halve(struct trie *t, struct tnode *oldtnode) } /* setup the parent pointers into and out of this node */ - replace(t, oldtnode, tn); - - return 0; + return replace(t, oldtnode, tn); +nomem: + /* all pointers should be clean so we are done */ + tnode_free(tn); +notnode: + return NULL; } -static void collapse(struct trie *t, struct tnode *oldtnode) +static struct key_vector *collapse(struct trie *t, + struct key_vector *oldtnode) { - struct tnode *n, *tp; + struct key_vector *n, *tp; unsigned long i; /* scan the tnode looking for that one child that might still exist */ - for (n = NULL, i = tnode_child_length(oldtnode); !n && i;) - n = tnode_get_child(oldtnode, --i); + for (n = NULL, i = child_length(oldtnode); !n && i;) + n = get_child(oldtnode, --i); /* compress one level */ tp = node_parent(oldtnode); - put_child_root(tp, t, oldtnode->key, n); + put_child_root(tp, oldtnode->key, n); node_set_parent(n, tp); /* drop dead node */ node_free(oldtnode); + + return tp; } -static unsigned char update_suffix(struct tnode *tn) +static unsigned char update_suffix(struct key_vector *tn) { unsigned char slen = tn->pos; unsigned long stride, i; @@ -671,8 +683,8 @@ static unsigned char update_suffix(struct tnode *tn) * why we start with a stride of 2 since a stride of 1 would * represent the nodes with suffix length equal to tn->pos */ - for (i = 0, stride = 0x2ul ; i < tnode_child_length(tn); i += stride) { - struct tnode *n = tnode_get_child(tn, i); + for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) { + struct key_vector *n = get_child(tn, i); if (!n || (n->slen <= slen)) continue; @@ -704,12 +716,12 @@ static unsigned char update_suffix(struct tnode *tn) * * 'high' in this instance is the variable 'inflate_threshold'. It * is expressed as a percentage, so we multiply it with - * tnode_child_length() and instead of multiplying by 2 (since the + * child_length() and instead of multiplying by 2 (since the * child array will be doubled by inflate()) and multiplying * the left-hand side by 100 (to handle the percentage thing) we * multiply the left-hand side by 50. * - * The left-hand side may look a bit weird: tnode_child_length(tn) + * The left-hand side may look a bit weird: child_length(tn) * - tn->empty_children is of course the number of non-null children * in the current node. tn->full_children is the number of "full" * children, that is non-null tnodes with a skip value of 0. @@ -719,10 +731,10 @@ static unsigned char update_suffix(struct tnode *tn) * A clearer way to write this would be: * * to_be_doubled = tn->full_children; - * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - + * not_to_be_doubled = child_length(tn) - tn->empty_children - * tn->full_children; * - * new_child_length = tnode_child_length(tn) * 2; + * new_child_length = child_length(tn) * 2; * * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / * new_child_length; @@ -739,57 +751,57 @@ static unsigned char update_suffix(struct tnode *tn) * inflate_threshold * new_child_length * * expand not_to_be_doubled and to_be_doubled, and shorten: - * 100 * (tnode_child_length(tn) - tn->empty_children + + * 100 * (child_length(tn) - tn->empty_children + * tn->full_children) >= inflate_threshold * new_child_length * * expand new_child_length: - * 100 * (tnode_child_length(tn) - tn->empty_children + + * 100 * (child_length(tn) - tn->empty_children + * tn->full_children) >= - * inflate_threshold * tnode_child_length(tn) * 2 + * inflate_threshold * child_length(tn) * 2 * * shorten again: - * 50 * (tn->full_children + tnode_child_length(tn) - + * 50 * (tn->full_children + child_length(tn) - * tn->empty_children) >= inflate_threshold * - * tnode_child_length(tn) + * child_length(tn) * */ -static bool should_inflate(const struct tnode *tp, const struct tnode *tn) +static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn) { - unsigned long used = tnode_child_length(tn); + unsigned long used = child_length(tn); unsigned long threshold = used; /* Keep root node larger */ - threshold *= tp ? inflate_threshold : inflate_threshold_root; - used -= tn->empty_children; - used += tn->full_children; + threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold; + used -= tn_info(tn)->empty_children; + used += tn_info(tn)->full_children; /* if bits == KEYLENGTH then pos = 0, and will fail below */ return (used > 1) && tn->pos && ((50 * used) >= threshold); } -static bool should_halve(const struct tnode *tp, const struct tnode *tn) +static inline bool should_halve(struct key_vector *tp, struct key_vector *tn) { - unsigned long used = tnode_child_length(tn); + unsigned long used = child_length(tn); unsigned long threshold = used; /* Keep root node larger */ - threshold *= tp ? halve_threshold : halve_threshold_root; - used -= tn->empty_children; + threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold; + used -= tn_info(tn)->empty_children; /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); } -static bool should_collapse(const struct tnode *tn) +static inline bool should_collapse(struct key_vector *tn) { - unsigned long used = tnode_child_length(tn); + unsigned long used = child_length(tn); - used -= tn->empty_children; + used -= tn_info(tn)->empty_children; /* account for bits == KEYLENGTH case */ - if ((tn->bits == KEYLENGTH) && tn->full_children) + if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children) used -= KEY_MAX; /* One child or none, time to drop us from the trie */ @@ -797,10 +809,13 @@ static bool should_collapse(const struct tnode *tn) } #define MAX_WORK 10 -static void resize(struct trie *t, struct tnode *tn) +static struct key_vector *resize(struct trie *t, struct key_vector *tn) { - struct tnode *tp = node_parent(tn); - struct tnode __rcu **cptr; +#ifdef CONFIG_IP_FIB_TRIE_STATS + struct trie_use_stats __percpu *stats = t->stats; +#endif + struct key_vector *tp = node_parent(tn); + unsigned long cindex = get_index(tn->key, tp); int max_work = MAX_WORK; pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", @@ -810,183 +825,128 @@ static void resize(struct trie *t, struct tnode *tn) * doing it ourselves. This way we can let RCU fully do its * thing without us interfering */ - cptr = tp ? &tp->child[get_index(tn->key, tp)] : &t->trie; - BUG_ON(tn != rtnl_dereference(*cptr)); + BUG_ON(tn != get_child(tp, cindex)); /* Double as long as the resulting node has a number of * nonempty nodes that are above the threshold. */ while (should_inflate(tp, tn) && max_work) { - if (inflate(t, tn)) { + tp = inflate(t, tn); + if (!tp) { #ifdef CONFIG_IP_FIB_TRIE_STATS - this_cpu_inc(t->stats->resize_node_skipped); + this_cpu_inc(stats->resize_node_skipped); #endif break; } max_work--; - tn = rtnl_dereference(*cptr); + tn = get_child(tp, cindex); } + /* update parent in case inflate failed */ + tp = node_parent(tn); + /* Return if at least one inflate is run */ if (max_work != MAX_WORK) - return; + return tp; /* Halve as long as the number of empty children in this * node is above threshold. */ while (should_halve(tp, tn) && max_work) { - if (halve(t, tn)) { + tp = halve(t, tn); + if (!tp) { #ifdef CONFIG_IP_FIB_TRIE_STATS - this_cpu_inc(t->stats->resize_node_skipped); + this_cpu_inc(stats->resize_node_skipped); #endif break; } max_work--; - tn = rtnl_dereference(*cptr); + tn = get_child(tp, cindex); } /* Only one child remains */ - if (should_collapse(tn)) { - collapse(t, tn); - return; - } + if (should_collapse(tn)) + return collapse(t, tn); + + /* update parent in case halve failed */ + tp = node_parent(tn); /* Return if at least one deflate was run */ if (max_work != MAX_WORK) - return; + return tp; /* push the suffix length to the parent node */ if (tn->slen > tn->pos) { unsigned char slen = update_suffix(tn); - if (tp && (slen > tp->slen)) + if (slen > tp->slen) tp->slen = slen; } -} - -/* readside must use rcu_read_lock currently dump routines - via get_fa_head and dump */ - -static struct leaf_info *find_leaf_info(struct tnode *l, int plen) -{ - struct hlist_head *head = &l->list; - struct leaf_info *li; - - hlist_for_each_entry_rcu(li, head, hlist) - if (li->plen == plen) - return li; - - return NULL; -} - -static inline struct list_head *get_fa_head(struct tnode *l, int plen) -{ - struct leaf_info *li = find_leaf_info(l, plen); - - if (!li) - return NULL; - return &li->falh; + return tp; } -static void leaf_pull_suffix(struct tnode *l) +static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l) { - struct tnode *tp = node_parent(l); - - while (tp && (tp->slen > tp->pos) && (tp->slen > l->slen)) { + while ((tp->slen > tp->pos) && (tp->slen > l->slen)) { if (update_suffix(tp) > l->slen) break; tp = node_parent(tp); } } -static void leaf_push_suffix(struct tnode *l) +static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l) { - struct tnode *tn = node_parent(l); - /* if this is a new leaf then tn will be NULL and we can sort * out parent suffix lengths as a part of trie_rebalance */ - while (tn && (tn->slen < l->slen)) { + while (tn->slen < l->slen) { tn->slen = l->slen; tn = node_parent(tn); } } -static void remove_leaf_info(struct tnode *l, struct leaf_info *old) -{ - /* record the location of the previous list_info entry */ - struct hlist_node **pprev = old->hlist.pprev; - struct leaf_info *li = hlist_entry(pprev, typeof(*li), hlist.next); - - /* remove the leaf info from the list */ - hlist_del_rcu(&old->hlist); - - /* only access li if it is pointing at the last valid hlist_node */ - if (hlist_empty(&l->list) || (*pprev)) - return; - - /* update the trie with the latest suffix length */ - l->slen = KEYLENGTH - li->plen; - leaf_pull_suffix(l); -} - -static void insert_leaf_info(struct tnode *l, struct leaf_info *new) +/* rcu_read_lock needs to be hold by caller from readside */ +static struct key_vector *fib_find_node(struct trie *t, + struct key_vector **tp, u32 key) { - struct hlist_head *head = &l->list; - struct leaf_info *li = NULL, *last = NULL; + struct key_vector *pn, *n = t->kv; + unsigned long index = 0; - if (hlist_empty(head)) { - hlist_add_head_rcu(&new->hlist, head); - } else { - hlist_for_each_entry(li, head, hlist) { - if (new->plen > li->plen) - break; - - last = li; - } - if (last) - hlist_add_behind_rcu(&new->hlist, &last->hlist); - else - hlist_add_before_rcu(&new->hlist, &li->hlist); - } - - /* if we added to the tail node then we need to update slen */ - if (l->slen < (KEYLENGTH - new->plen)) { - l->slen = KEYLENGTH - new->plen; - leaf_push_suffix(l); - } -} + do { + pn = n; + n = get_child_rcu(n, index); -/* rcu_read_lock needs to be hold by caller from readside */ -static struct tnode *fib_find_node(struct trie *t, u32 key) -{ - struct tnode *n = rcu_dereference_rtnl(t->trie); + if (!n) + break; - while (n) { - unsigned long index = get_index(key, n); + index = get_cindex(key, n); /* This bit of code is a bit tricky but it combines multiple * checks into a single check. The prefix consists of the * prefix plus zeros for the bits in the cindex. The index * is the difference between the key and this value. From * this we can actually derive several pieces of data. - * if (index & (~0ul << bits)) + * if (index >= (1ul << bits)) * we have a mismatch in skip bits and failed * else * we know the value is cindex + * + * This check is safe even if bits == KEYLENGTH due to the + * fact that we can only allocate a node with 32 bits if a + * long is greater than 32 bits. */ - if (index & (~0ul << n->bits)) - return NULL; - - /* we have found a leaf. Prefixes have already been compared */ - if (IS_LEAF(n)) + if (index >= (1ul << n->bits)) { + n = NULL; break; + } - n = tnode_get_child_rcu(n, index); - } + /* keep searching until we find a perfect match leaf or NULL */ + } while (IS_TNODE(n)); + + *tp = pn; return n; } @@ -994,14 +954,23 @@ static struct tnode *fib_find_node(struct trie *t, u32 key) /* Return the first fib alias matching TOS with * priority less than or equal to PRIO. */ -static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) +static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen, + u8 tos, u32 prio, u32 tb_id) { struct fib_alias *fa; if (!fah) return NULL; - list_for_each_entry(fa, fah, fa_list) { + hlist_for_each_entry(fa, fah, fa_list) { + if (fa->fa_slen < slen) + continue; + if (fa->fa_slen != slen) + break; + if (fa->tb_id > tb_id) + continue; + if (fa->tb_id != tb_id) + break; if (fa->fa_tos > tos) continue; if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) @@ -1011,77 +980,23 @@ static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) return NULL; } -static void trie_rebalance(struct trie *t, struct tnode *tn) +static void trie_rebalance(struct trie *t, struct key_vector *tn) { - struct tnode *tp; - - while ((tp = node_parent(tn)) != NULL) { - resize(t, tn); - tn = tp; - } - - /* Handle last (top) tnode */ - if (IS_TNODE(tn)) - resize(t, tn); + while (!IS_TRIE(tn)) + tn = resize(t, tn); } -/* only used from updater-side */ - -static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) +static int fib_insert_node(struct trie *t, struct key_vector *tp, + struct fib_alias *new, t_key key) { - struct list_head *fa_head = NULL; - struct tnode *l, *n, *tp = NULL; - struct leaf_info *li; - - li = leaf_info_new(plen); - if (!li) - return NULL; - fa_head = &li->falh; + struct key_vector *n, *l; - n = rtnl_dereference(t->trie); - - /* If we point to NULL, stop. Either the tree is empty and we should - * just put a new leaf in if, or we have reached an empty child slot, - * and we should just put our new leaf in that. - * - * If we hit a node with a key that does't match then we should stop - * and create a new tnode to replace that node and insert ourselves - * and the other node into the new tnode. - */ - while (n) { - unsigned long index = get_index(key, n); - - /* This bit of code is a bit tricky but it combines multiple - * checks into a single check. The prefix consists of the - * prefix plus zeros for the "bits" in the prefix. The index - * is the difference between the key and this value. From - * this we can actually derive several pieces of data. - * if !(index >> bits) - * we know the value is child index - * else - * we have a mismatch in skip bits and failed - */ - if (index >> n->bits) - break; - - /* we have found a leaf. Prefixes have already been compared */ - if (IS_LEAF(n)) { - /* Case 1: n is a leaf, and prefixes match*/ - insert_leaf_info(n, li); - return fa_head; - } - - tp = n; - n = tnode_get_child_rcu(n, index); - } - - l = leaf_new(key); - if (!l) { - free_leaf_info(li); - return NULL; - } + l = leaf_new(key, new); + if (!l) + goto noleaf; - insert_leaf_info(l, li); + /* retrieve child from parent node */ + n = get_child(tp, get_index(key, tp)); /* Case 2: n is a LEAF or a TNODE and the key doesn't match. * @@ -1090,21 +1005,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) * leaves us in position for handling as case 3 */ if (n) { - struct tnode *tn; + struct key_vector *tn; tn = tnode_new(key, __fls(key ^ n->key), 1); - if (!tn) { - free_leaf_info(li); - node_free(l); - return NULL; - } + if (!tn) + goto notnode; /* initialize routes out of node */ NODE_INIT_PARENT(tn, tp); put_child(tn, get_index(key, tn) ^ 1, n); /* start adding routes into the node */ - put_child_root(tp, t, key, tn); + put_child_root(tp, key, tn); node_set_parent(n, tn); /* parent now has a NULL spot where the leaf can go */ @@ -1112,69 +1024,93 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) } /* Case 3: n is NULL, and will just insert a new leaf */ - if (tp) { - NODE_INIT_PARENT(l, tp); - put_child(tp, get_index(key, tp), l); - trie_rebalance(t, tp); + NODE_INIT_PARENT(l, tp); + put_child_root(tp, key, l); + trie_rebalance(t, tp); + + return 0; +notnode: + node_free(l); +noleaf: + return -ENOMEM; +} + +static int fib_insert_alias(struct trie *t, struct key_vector *tp, + struct key_vector *l, struct fib_alias *new, + struct fib_alias *fa, t_key key) +{ + if (!l) + return fib_insert_node(t, tp, new, key); + + if (fa) { + hlist_add_before_rcu(&new->fa_list, &fa->fa_list); } else { - rcu_assign_pointer(t->trie, l); + struct fib_alias *last; + + hlist_for_each_entry(last, &l->leaf, fa_list) { + if (new->fa_slen < last->fa_slen) + break; + if ((new->fa_slen == last->fa_slen) && + (new->tb_id > last->tb_id)) + break; + fa = last; + } + + if (fa) + hlist_add_behind_rcu(&new->fa_list, &fa->fa_list); + else + hlist_add_head_rcu(&new->fa_list, &l->leaf); } - return fa_head; + /* if we added to the tail node then we need to update slen */ + if (l->slen < new->fa_slen) { + l->slen = new->fa_slen; + leaf_push_suffix(tp, l); + } + + return 0; } -/* - * Caller must hold RTNL. - */ +/* Caller must hold RTNL. */ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) { - struct trie *t = (struct trie *) tb->tb_data; + struct trie *t = (struct trie *)tb->tb_data; struct fib_alias *fa, *new_fa; - struct list_head *fa_head = NULL; + struct key_vector *l, *tp; struct fib_info *fi; - int plen = cfg->fc_dst_len; + u8 plen = cfg->fc_dst_len; + u8 slen = KEYLENGTH - plen; u8 tos = cfg->fc_tos; - u32 key, mask; + u32 key; int err; - struct tnode *l; - if (plen > 32) + if (plen > KEYLENGTH) return -EINVAL; key = ntohl(cfg->fc_dst); pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); - mask = ntohl(inet_make_mask(plen)); - - if (key & ~mask) + if ((plen < KEYLENGTH) && (key << plen)) return -EINVAL; - key = key & mask; - fi = fib_create_info(cfg); if (IS_ERR(fi)) { err = PTR_ERR(fi); goto err; } - l = fib_find_node(t, key); - fa = NULL; - - if (l) { - fa_head = get_fa_head(l, plen); - fa = fib_find_alias(fa_head, tos, fi->fib_priority); - } + l = fib_find_node(t, &tp, key); + fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority, + tb->tb_id) : NULL; /* Now fa, if non-NULL, points to the first fib alias * with the same keys [prefix,tos,priority], if such key already * exists or to the node before which we will insert new one. * * If fa is NULL, we will need to allocate a new one and - * insert to the head of f. - * - * If f is NULL, no fib node matched the destination key - * and we need to allocate a new one of those as well. + * insert to the tail of the section matching the suffix length + * of the new alias. */ if (fa && fa->fa_tos == tos && @@ -1192,9 +1128,10 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) */ fa_match = NULL; fa_first = fa; - fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); - list_for_each_entry_continue(fa, fa_head, fa_list) { - if (fa->fa_tos != tos) + hlist_for_each_entry_from(fa, fa_list) { + if ((fa->fa_slen != slen) || + (fa->tb_id != tb->tb_id) || + (fa->fa_tos != tos)) break; if (fa->fa_info->fib_priority != fi->fib_priority) break; @@ -1226,8 +1163,21 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_type = cfg->fc_type; state = fa->fa_state; new_fa->fa_state = state & ~FA_S_ACCESSED; + new_fa->fa_slen = fa->fa_slen; + + err = netdev_switch_fib_ipv4_add(key, plen, fi, + new_fa->fa_tos, + cfg->fc_type, + cfg->fc_nlflags, + tb->tb_id); + if (err) { + netdev_switch_fib_ipv4_abort(fi); + kmem_cache_free(fn_alias_kmem, new_fa); + goto out; + } + + hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); - list_replace_rcu(&fa->fa_list, &new_fa->fa_list); alias_free_mem_rcu(fa); fib_release_info(fi_drop); @@ -1261,30 +1211,35 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_tos = tos; new_fa->fa_type = cfg->fc_type; new_fa->fa_state = 0; - /* - * Insert new entry to the list. - */ - - if (!fa_head) { - fa_head = fib_insert_node(t, key, plen); - if (unlikely(!fa_head)) { - err = -ENOMEM; - goto out_free_new_fa; - } + new_fa->fa_slen = slen; + new_fa->tb_id = tb->tb_id; + + /* (Optionally) offload fib entry to switch hardware. */ + err = netdev_switch_fib_ipv4_add(key, plen, fi, tos, + cfg->fc_type, + cfg->fc_nlflags, + tb->tb_id); + if (err) { + netdev_switch_fib_ipv4_abort(fi); + goto out_free_new_fa; } + /* Insert new entry to the list. */ + err = fib_insert_alias(t, tp, l, new_fa, fa, key); + if (err) + goto out_sw_fib_del; + if (!plen) tb->tb_num_default++; - list_add_tail_rcu(&new_fa->fa_list, - (fa ? &fa->fa_list : fa_head)); - rt_cache_flush(cfg->fc_nlinfo.nl_net); - rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, 0); succeeded: return 0; +out_sw_fib_del: + netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -1293,7 +1248,7 @@ err: return err; } -static inline t_key prefix_mismatch(t_key key, struct tnode *n) +static inline t_key prefix_mismatch(t_key key, struct key_vector *n) { t_key prefix = n->key; @@ -1304,16 +1259,20 @@ static inline t_key prefix_mismatch(t_key key, struct tnode *n) int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags) { - struct trie *t = (struct trie *)tb->tb_data; + struct trie *t = (struct trie *) tb->tb_data; #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats __percpu *stats = t->stats; #endif const t_key key = ntohl(flp->daddr); - struct tnode *n, *pn; - struct leaf_info *li; + struct key_vector *n, *pn; + struct fib_alias *fa; + unsigned long index; t_key cindex; - n = rcu_dereference(t->trie); + pn = t->kv; + cindex = 0; + + n = get_child_rcu(pn, cindex); if (!n) return -EAGAIN; @@ -1321,24 +1280,25 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, this_cpu_inc(stats->gets); #endif - pn = n; - cindex = 0; - /* Step 1: Travel to the longest prefix match in the trie */ for (;;) { - unsigned long index = get_index(key, n); + index = get_cindex(key, n); /* This bit of code is a bit tricky but it combines multiple * checks into a single check. The prefix consists of the * prefix plus zeros for the "bits" in the prefix. The index * is the difference between the key and this value. From * this we can actually derive several pieces of data. - * if (index & (~0ul << bits)) + * if (index >= (1ul << bits)) * we have a mismatch in skip bits and failed * else * we know the value is cindex + * + * This check is safe even if bits == KEYLENGTH due to the + * fact that we can only allocate a node with 32 bits if a + * long is greater than 32 bits. */ - if (index & (~0ul << n->bits)) + if (index >= (1ul << n->bits)) break; /* we have found a leaf. Prefixes have already been compared */ @@ -1353,7 +1313,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, cindex = index; } - n = tnode_get_child_rcu(n, index); + n = get_child_rcu(n, index); if (unlikely(!n)) goto backtrace; } @@ -1361,7 +1321,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, /* Step 2: Sort out leaves and begin backtracing for longest prefix */ for (;;) { /* record the pointer where our next node pointer is stored */ - struct tnode __rcu **cptr = n->child; + struct key_vector __rcu **cptr = n->tnode; /* This test verifies that none of the bits that differ * between the key and the prefix exist in the region of @@ -1393,13 +1353,17 @@ backtrace: while (!cindex) { t_key pkey = pn->key; - pn = node_parent_rcu(pn); - if (unlikely(!pn)) + /* If we don't have a parent then there is + * nothing for us to do as we do not have any + * further nodes to parse. + */ + if (IS_TRIE(pn)) return -EAGAIN; #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->backtrack); #endif /* Get Child's index */ + pn = node_parent_rcu(pn); cindex = get_index(pkey, pn); } @@ -1407,138 +1371,134 @@ backtrace: cindex &= cindex - 1; /* grab pointer for next child node */ - cptr = &pn->child[cindex]; + cptr = &pn->tnode[cindex]; } } found: + /* this line carries forward the xor from earlier in the function */ + index = key ^ n->key; + /* Step 3: Process the leaf, if that fails fall back to backtracing */ - hlist_for_each_entry_rcu(li, &n->list, hlist) { - struct fib_alias *fa; + hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + int nhsel, err; - if ((key ^ n->key) & li->mask_plen) + if ((index >= (1ul << fa->fa_slen)) && + ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH))) continue; - - list_for_each_entry_rcu(fa, &li->falh, fa_list) { - struct fib_info *fi = fa->fa_info; - int nhsel, err; - - if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) - continue; - if (fi->fib_dead) - continue; - if (fa->fa_info->fib_scope < flp->flowi4_scope) - continue; - fib_alias_accessed(fa); - err = fib_props[fa->fa_type].error; - if (unlikely(err < 0)) { + if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) + continue; + if (fi->fib_dead) + continue; + if (fa->fa_info->fib_scope < flp->flowi4_scope) + continue; + fib_alias_accessed(fa); + err = fib_props[fa->fa_type].error; + if (unlikely(err < 0)) { #ifdef CONFIG_IP_FIB_TRIE_STATS - this_cpu_inc(stats->semantic_match_passed); + this_cpu_inc(stats->semantic_match_passed); #endif - return err; - } - if (fi->fib_flags & RTNH_F_DEAD) + return err; + } + if (fi->fib_flags & RTNH_F_DEAD) + continue; + for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { + const struct fib_nh *nh = &fi->fib_nh[nhsel]; + + if (nh->nh_flags & RTNH_F_DEAD) continue; - for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { - const struct fib_nh *nh = &fi->fib_nh[nhsel]; - - if (nh->nh_flags & RTNH_F_DEAD) - continue; - if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) - continue; - - if (!(fib_flags & FIB_LOOKUP_NOREF)) - atomic_inc(&fi->fib_clntref); - - res->prefixlen = li->plen; - res->nh_sel = nhsel; - res->type = fa->fa_type; - res->scope = fi->fib_scope; - res->fi = fi; - res->table = tb; - res->fa_head = &li->falh; + if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) + continue; + + if (!(fib_flags & FIB_LOOKUP_NOREF)) + atomic_inc(&fi->fib_clntref); + + res->prefixlen = KEYLENGTH - fa->fa_slen; + res->nh_sel = nhsel; + res->type = fa->fa_type; + res->scope = fi->fib_scope; + res->fi = fi; + res->table = tb; + res->fa_head = &n->leaf; #ifdef CONFIG_IP_FIB_TRIE_STATS - this_cpu_inc(stats->semantic_match_passed); + this_cpu_inc(stats->semantic_match_passed); #endif - return err; - } + return err; } - + } #ifdef CONFIG_IP_FIB_TRIE_STATS - this_cpu_inc(stats->semantic_match_miss); + this_cpu_inc(stats->semantic_match_miss); #endif - } goto backtrace; } EXPORT_SYMBOL_GPL(fib_table_lookup); -/* - * Remove the leaf and return parent. - */ -static void trie_leaf_remove(struct trie *t, struct tnode *l) +static void fib_remove_alias(struct trie *t, struct key_vector *tp, + struct key_vector *l, struct fib_alias *old) { - struct tnode *tp = node_parent(l); + /* record the location of the previous list_info entry */ + struct hlist_node **pprev = old->fa_list.pprev; + struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next); - pr_debug("entering trie_leaf_remove(%p)\n", l); + /* remove the fib_alias from the list */ + hlist_del_rcu(&old->fa_list); - if (tp) { - put_child(tp, get_index(l->key, tp), NULL); + /* if we emptied the list this leaf will be freed and we can sort + * out parent suffix lengths as a part of trie_rebalance + */ + if (hlist_empty(&l->leaf)) { + put_child_root(tp, l->key, NULL); + node_free(l); trie_rebalance(t, tp); - } else { - RCU_INIT_POINTER(t->trie, NULL); + return; } - node_free(l); + /* only access fa if it is pointing at the last valid hlist_node */ + if (*pprev) + return; + + /* update the trie with the latest suffix length */ + l->slen = fa->fa_slen; + leaf_pull_suffix(tp, l); } -/* - * Caller must hold RTNL. - */ +/* Caller must hold RTNL. */ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; - u32 key, mask; - int plen = cfg->fc_dst_len; - u8 tos = cfg->fc_tos; struct fib_alias *fa, *fa_to_delete; - struct list_head *fa_head; - struct tnode *l; - struct leaf_info *li; + struct key_vector *l, *tp; + u8 plen = cfg->fc_dst_len; + u8 slen = KEYLENGTH - plen; + u8 tos = cfg->fc_tos; + u32 key; - if (plen > 32) + if (plen > KEYLENGTH) return -EINVAL; key = ntohl(cfg->fc_dst); - mask = ntohl(inet_make_mask(plen)); - if (key & ~mask) + if ((plen < KEYLENGTH) && (key << plen)) return -EINVAL; - key = key & mask; - l = fib_find_node(t, key); - + l = fib_find_node(t, &tp, key); if (!l) return -ESRCH; - li = find_leaf_info(l, plen); - - if (!li) - return -ESRCH; - - fa_head = &li->falh; - fa = fib_find_alias(fa_head, tos, 0); - + fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id); if (!fa) return -ESRCH; pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); fa_to_delete = NULL; - fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); - list_for_each_entry_continue(fa, fa_head, fa_list) { + hlist_for_each_entry_from(fa, fa_list) { struct fib_info *fi = fa->fa_info; - if (fa->fa_tos != tos) + if ((fa->fa_slen != slen) || + (fa->tb_id != tb->tb_id) || + (fa->fa_tos != tos)) break; if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && @@ -1557,240 +1517,397 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if (!fa_to_delete) return -ESRCH; - fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, - &cfg->fc_nlinfo, 0); + netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, + cfg->fc_type, tb->tb_id); - list_del_rcu(&fa->fa_list); + rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, + &cfg->fc_nlinfo, 0); if (!plen) tb->tb_num_default--; - if (list_empty(fa_head)) { - remove_leaf_info(l, li); - free_leaf_info(li); - } + fib_remove_alias(t, tp, l, fa_to_delete); - if (hlist_empty(&l->list)) - trie_leaf_remove(t, l); - - if (fa->fa_state & FA_S_ACCESSED) + if (fa_to_delete->fa_state & FA_S_ACCESSED) rt_cache_flush(cfg->fc_nlinfo.nl_net); - fib_release_info(fa->fa_info); - alias_free_mem_rcu(fa); + fib_release_info(fa_to_delete->fa_info); + alias_free_mem_rcu(fa_to_delete); return 0; } -static int trie_flush_list(struct list_head *head) +/* Scan for the next leaf starting at the provided key value */ +static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key) { - struct fib_alias *fa, *fa_node; - int found = 0; + struct key_vector *pn, *n = *tn; + unsigned long cindex; - list_for_each_entry_safe(fa, fa_node, head, fa_list) { - struct fib_info *fi = fa->fa_info; + /* this loop is meant to try and find the key in the trie */ + do { + /* record parent and next child index */ + pn = n; + cindex = key ? get_index(key, pn) : 0; - if (fi && (fi->fib_flags & RTNH_F_DEAD)) { - list_del_rcu(&fa->fa_list); - fib_release_info(fa->fa_info); - alias_free_mem_rcu(fa); - found++; + if (cindex >> pn->bits) + break; + + /* descend into the next child */ + n = get_child_rcu(pn, cindex++); + if (!n) + break; + + /* guarantee forward progress on the keys */ + if (IS_LEAF(n) && (n->key >= key)) + goto found; + } while (IS_TNODE(n)); + + /* this loop will search for the next leaf with a greater key */ + while (!IS_TRIE(pn)) { + /* if we exhausted the parent node we will need to climb */ + if (cindex >= (1ul << pn->bits)) { + t_key pkey = pn->key; + + pn = node_parent_rcu(pn); + cindex = get_index(pkey, pn) + 1; + continue; } + + /* grab the next available node */ + n = get_child_rcu(pn, cindex++); + if (!n) + continue; + + /* no need to compare keys since we bumped the index */ + if (IS_LEAF(n)) + goto found; + + /* Rescan start scanning in new node */ + pn = n; + cindex = 0; } - return found; + + *tn = pn; + return NULL; /* Root of trie */ +found: + /* if we are at the limit for keys just return NULL for the tnode */ + *tn = pn; + return n; } -static int trie_flush_leaf(struct tnode *l) +static void fib_trie_free(struct fib_table *tb) { - int found = 0; - struct hlist_head *lih = &l->list; + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; + unsigned long cindex = 1; struct hlist_node *tmp; - struct leaf_info *li = NULL; - unsigned char plen = KEYLENGTH; + struct fib_alias *fa; + + /* walk trie in reverse order and free everything */ + for (;;) { + struct key_vector *n; + + if (!(cindex--)) { + t_key pkey = pn->key; + + if (IS_TRIE(pn)) + break; + + n = pn; + pn = node_parent(pn); - hlist_for_each_entry_safe(li, tmp, lih, hlist) { - found += trie_flush_list(&li->falh); + /* drop emptied tnode */ + put_child_root(pn, n->key, NULL); + node_free(n); + + cindex = get_index(pkey, pn); - if (list_empty(&li->falh)) { - hlist_del_rcu(&li->hlist); - free_leaf_info(li); continue; } - plen = li->plen; - } + /* grab the next available node */ + n = get_child(pn, cindex); + if (!n) + continue; - l->slen = KEYLENGTH - plen; + if (IS_TNODE(n)) { + /* record pn and cindex for leaf walking */ + pn = n; + cindex = 1ul << n->bits; - return found; + continue; + } + + hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { + hlist_del_rcu(&fa->fa_list); + alias_free_mem_rcu(fa); + } + + put_child_root(pn, n->key, NULL); + node_free(n); + } + +#ifdef CONFIG_IP_FIB_TRIE_STATS + free_percpu(t->stats); +#endif + kfree(tb); } -/* - * Scan for the next right leaf starting at node p->child[idx] - * Since we have back pointer, no recursion necessary. - */ -static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c) +struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) { - do { - unsigned long idx = c ? idx = get_index(c->key, p) + 1 : 0; + struct trie *ot = (struct trie *)oldtb->tb_data; + struct key_vector *l, *tp = ot->kv; + struct fib_table *local_tb; + struct fib_alias *fa; + struct trie *lt; + t_key key = 0; - while (idx < tnode_child_length(p)) { - c = tnode_get_child_rcu(p, idx++); - if (!c) + if (oldtb->tb_data == oldtb->__data) + return oldtb; + + local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL); + if (!local_tb) + return NULL; + + lt = (struct trie *)local_tb->tb_data; + + while ((l = leaf_walk_rcu(&tp, key)) != NULL) { + struct key_vector *local_l = NULL, *local_tp; + + hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { + struct fib_alias *new_fa; + + if (local_tb->tb_id != fa->tb_id) continue; - if (IS_LEAF(c)) - return c; + /* clone fa for new local table */ + new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); + if (!new_fa) + goto out; + + memcpy(new_fa, fa, sizeof(*fa)); - /* Rescan start scanning in new node */ - p = c; - idx = 0; + /* insert clone into table */ + if (!local_l) + local_l = fib_find_node(lt, &local_tp, l->key); + + if (fib_insert_alias(lt, local_tp, local_l, new_fa, + NULL, l->key)) + goto out; } - /* Node empty, walk back up to parent */ - c = p; - } while ((p = node_parent_rcu(c)) != NULL); + /* stop loop if key wrapped back to 0 */ + key = l->key + 1; + if (key < l->key) + break; + } - return NULL; /* Root of trie */ + return local_tb; +out: + fib_trie_free(local_tb); + + return NULL; } -static struct tnode *trie_firstleaf(struct trie *t) +/* Caller must hold RTNL */ +void fib_table_flush_external(struct fib_table *tb) { - struct tnode *n = rcu_dereference_rtnl(t->trie); + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; + unsigned long cindex = 1; + struct hlist_node *tmp; + struct fib_alias *fa; - if (!n) - return NULL; + /* walk trie in reverse order */ + for (;;) { + unsigned char slen = 0; + struct key_vector *n; - if (IS_LEAF(n)) /* trie is just a leaf */ - return n; + if (!(cindex--)) { + t_key pkey = pn->key; - return leaf_walk_rcu(n, NULL); -} + /* cannot resize the trie vector */ + if (IS_TRIE(pn)) + break; -static struct tnode *trie_nextleaf(struct tnode *l) -{ - struct tnode *p = node_parent_rcu(l); + /* resize completed node */ + pn = resize(t, pn); + cindex = get_index(pkey, pn); - if (!p) - return NULL; /* trie with just one leaf */ + continue; + } - return leaf_walk_rcu(p, l); -} + /* grab the next available node */ + n = get_child(pn, cindex); + if (!n) + continue; -static struct tnode *trie_leafindex(struct trie *t, int index) -{ - struct tnode *l = trie_firstleaf(t); + if (IS_TNODE(n)) { + /* record pn and cindex for leaf walking */ + pn = n; + cindex = 1ul << n->bits; - while (l && index-- > 0) - l = trie_nextleaf(l); + continue; + } - return l; -} + hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + + /* if alias was cloned to local then we just + * need to remove the local copy from main + */ + if (tb->tb_id != fa->tb_id) { + hlist_del_rcu(&fa->fa_list); + alias_free_mem_rcu(fa); + continue; + } + /* record local slen */ + slen = fa->fa_slen; -/* - * Caller must hold RTNL. - */ + if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL)) + continue; + + netdev_switch_fib_ipv4_del(n->key, + KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, + fa->fa_type, tb->tb_id); + } + + /* update leaf slen */ + n->slen = slen; + + if (hlist_empty(&n->leaf)) { + put_child_root(pn, n->key, NULL); + node_free(n); + } else { + leaf_pull_suffix(pn, n); + } + } +} + +/* Caller must hold RTNL. */ int fib_table_flush(struct fib_table *tb) { - struct trie *t = (struct trie *) tb->tb_data; - struct tnode *l, *ll = NULL; + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; + unsigned long cindex = 1; + struct hlist_node *tmp; + struct fib_alias *fa; int found = 0; - for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { - found += trie_flush_leaf(l); + /* walk trie in reverse order */ + for (;;) { + unsigned char slen = 0; + struct key_vector *n; + + if (!(cindex--)) { + t_key pkey = pn->key; - if (ll) { - if (hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); - else - leaf_pull_suffix(ll); + /* cannot resize the trie vector */ + if (IS_TRIE(pn)) + break; + + /* resize completed node */ + pn = resize(t, pn); + cindex = get_index(pkey, pn); + + continue; } - ll = l; - } + /* grab the next available node */ + n = get_child(pn, cindex); + if (!n) + continue; - if (ll) { - if (hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); - else - leaf_pull_suffix(ll); + if (IS_TNODE(n)) { + /* record pn and cindex for leaf walking */ + pn = n; + cindex = 1ul << n->bits; + + continue; + } + + hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + + if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) { + slen = fa->fa_slen; + continue; + } + + netdev_switch_fib_ipv4_del(n->key, + KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, + fa->fa_type, tb->tb_id); + hlist_del_rcu(&fa->fa_list); + fib_release_info(fa->fa_info); + alias_free_mem_rcu(fa); + found++; + } + + /* update leaf slen */ + n->slen = slen; + + if (hlist_empty(&n->leaf)) { + put_child_root(pn, n->key, NULL); + node_free(n); + } else { + leaf_pull_suffix(pn, n); + } } pr_debug("trie_flush found=%d\n", found); return found; } -void fib_free_table(struct fib_table *tb) +static void __trie_free_rcu(struct rcu_head *head) { + struct fib_table *tb = container_of(head, struct fib_table, rcu); #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie *t = (struct trie *)tb->tb_data; - free_percpu(t->stats); + if (tb->tb_data == tb->__data) + free_percpu(t->stats); #endif /* CONFIG_IP_FIB_TRIE_STATS */ kfree(tb); } -static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, - struct fib_table *tb, - struct sk_buff *skb, struct netlink_callback *cb) +void fib_free_table(struct fib_table *tb) { - int i, s_i; + call_rcu(&tb->rcu, __trie_free_rcu); +} + +static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, + struct sk_buff *skb, struct netlink_callback *cb) +{ + __be32 xkey = htonl(l->key); struct fib_alias *fa; - __be32 xkey = htonl(key); + int i, s_i; - s_i = cb->args[5]; + s_i = cb->args[4]; i = 0; /* rcu_read_lock is hold by caller */ - - list_for_each_entry_rcu(fa, fah, fa_list) { + hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { if (i < s_i) { i++; continue; } + if (tb->tb_id != fa->tb_id) { + i++; + continue; + } + if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->tb_id, fa->fa_type, xkey, - plen, + KEYLENGTH - fa->fa_slen, fa->fa_tos, fa->fa_info, NLM_F_MULTI) < 0) { - cb->args[5] = i; - return -1; - } - i++; - } - cb->args[5] = i; - return skb->len; -} - -static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb, - struct sk_buff *skb, struct netlink_callback *cb) -{ - struct leaf_info *li; - int i, s_i; - - s_i = cb->args[4]; - i = 0; - - /* rcu_read_lock is hold by caller */ - hlist_for_each_entry_rcu(li, &l->list, hlist) { - if (i < s_i) { - i++; - continue; - } - - if (i > s_i) - cb->args[5] = 0; - - if (list_empty(&li->falh)) - continue; - - if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) { cb->args[4] = i; return -1; } @@ -1801,44 +1918,38 @@ static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb, return skb->len; } +/* rcu_read_lock needs to be hold by caller from readside */ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) { - struct tnode *l; - struct trie *t = (struct trie *) tb->tb_data; - t_key key = cb->args[2]; - int count = cb->args[3]; - - rcu_read_lock(); + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *l, *tp = t->kv; /* Dump starting at last key. * Note: 0.0.0.0/0 (ie default) is first key. */ - if (count == 0) - l = trie_firstleaf(t); - else { - /* Normally, continue from last key, but if that is missing - * fallback to using slow rescan - */ - l = fib_find_node(t, key); - if (!l) - l = trie_leafindex(t, count); - } + int count = cb->args[2]; + t_key key = cb->args[3]; - while (l) { - cb->args[2] = l->key; + while ((l = leaf_walk_rcu(&tp, key)) != NULL) { if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { - cb->args[3] = count; - rcu_read_unlock(); + cb->args[3] = key; + cb->args[2] = count; return -1; } ++count; - l = trie_nextleaf(l); + key = l->key + 1; + memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0])); + + /* stop loop if key wrapped back to 0 */ + if (key < l->key) + break; } - cb->args[3] = count; - rcu_read_unlock(); + + cb->args[3] = key; + cb->args[2] = count; return skb->len; } @@ -1850,28 +1961,34 @@ void __init fib_trie_init(void) 0, SLAB_PANIC, NULL); trie_leaf_kmem = kmem_cache_create("ip_fib_trie", - max(sizeof(struct tnode), - sizeof(struct leaf_info)), + LEAF_SIZE, 0, SLAB_PANIC, NULL); } - -struct fib_table *fib_trie_table(u32 id) +struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) { struct fib_table *tb; struct trie *t; + size_t sz = sizeof(*tb); + + if (!alias) + sz += sizeof(struct trie); - tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), - GFP_KERNEL); + tb = kzalloc(sz, GFP_KERNEL); if (tb == NULL) return NULL; tb->tb_id = id; tb->tb_default = -1; tb->tb_num_default = 0; + tb->tb_data = (alias ? alias->__data : tb->__data); + + if (alias) + return tb; t = (struct trie *) tb->tb_data; - RCU_INIT_POINTER(t->trie, NULL); + t->kv[0].pos = KEYLENGTH; + t->kv[0].slen = KEYLENGTH; #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats = alloc_percpu(struct trie_use_stats); if (!t->stats) { @@ -1888,65 +2005,63 @@ struct fib_table *fib_trie_table(u32 id) struct fib_trie_iter { struct seq_net_private p; struct fib_table *tb; - struct tnode *tnode; + struct key_vector *tnode; unsigned int index; unsigned int depth; }; -static struct tnode *fib_trie_get_next(struct fib_trie_iter *iter) +static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter) { unsigned long cindex = iter->index; - struct tnode *tn = iter->tnode; - struct tnode *p; - - /* A single entry routing table */ - if (!tn) - return NULL; + struct key_vector *pn = iter->tnode; + t_key pkey; pr_debug("get_next iter={node=%p index=%d depth=%d}\n", iter->tnode, iter->index, iter->depth); -rescan: - while (cindex < tnode_child_length(tn)) { - struct tnode *n = tnode_get_child_rcu(tn, cindex); - if (n) { + while (!IS_TRIE(pn)) { + while (cindex < child_length(pn)) { + struct key_vector *n = get_child_rcu(pn, cindex++); + + if (!n) + continue; + if (IS_LEAF(n)) { - iter->tnode = tn; - iter->index = cindex + 1; + iter->tnode = pn; + iter->index = cindex; } else { /* push down one level */ iter->tnode = n; iter->index = 0; ++iter->depth; } + return n; } - ++cindex; - } - - /* Current node exhausted, pop back up */ - p = node_parent_rcu(tn); - if (p) { - cindex = get_index(tn->key, p) + 1; - tn = p; + /* Current node exhausted, pop back up */ + pkey = pn->key; + pn = node_parent_rcu(pn); + cindex = get_index(pkey, pn) + 1; --iter->depth; - goto rescan; } - /* got root? */ + /* record root node so further searches know we are done */ + iter->tnode = pn; + iter->index = 0; + return NULL; } -static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter, - struct trie *t) +static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter, + struct trie *t) { - struct tnode *n; + struct key_vector *n, *pn = t->kv; if (!t) return NULL; - n = rcu_dereference(t->trie); + n = rcu_dereference(pn->tnode[0]); if (!n) return NULL; @@ -1955,7 +2070,7 @@ static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter, iter->index = 0; iter->depth = 1; } else { - iter->tnode = NULL; + iter->tnode = pn; iter->index = 0; iter->depth = 0; } @@ -1965,7 +2080,7 @@ static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter, static void trie_collect_stats(struct trie *t, struct trie_stat *s) { - struct tnode *n; + struct key_vector *n; struct fib_trie_iter iter; memset(s, 0, sizeof(*s)); @@ -1973,20 +2088,20 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) rcu_read_lock(); for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { if (IS_LEAF(n)) { - struct leaf_info *li; + struct fib_alias *fa; s->leaves++; s->totdepth += iter.depth; if (iter.depth > s->maxdepth) s->maxdepth = iter.depth; - hlist_for_each_entry_rcu(li, &n->list, hlist) + hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) ++s->prefixes; } else { s->tnodes++; if (n->bits < MAX_STAT_DEPTH) s->nodesizes[n->bits]++; - s->nullpointers += n->empty_children; + s->nullpointers += tn_info(n)->empty_children; } } rcu_read_unlock(); @@ -2009,13 +2124,13 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); seq_printf(seq, "\tLeaves: %u\n", stat->leaves); - bytes = sizeof(struct tnode) * stat->leaves; + bytes = LEAF_SIZE * stat->leaves; seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); - bytes += sizeof(struct leaf_info) * stat->prefixes; + bytes += sizeof(struct fib_alias) * stat->prefixes; seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); - bytes += sizeof(struct tnode) * stat->tnodes; + bytes += TNODE_SIZE(0) * stat->tnodes; max = MAX_STAT_DEPTH; while (max > 0 && stat->nodesizes[max-1] == 0) @@ -2030,7 +2145,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) seq_putc(seq, '\n'); seq_printf(seq, "\tPointers: %u\n", pointers); - bytes += sizeof(struct tnode *) * pointers; + bytes += sizeof(struct key_vector *) * pointers; seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); } @@ -2084,7 +2199,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "Basic info: size of leaf:" " %Zd bytes, size of tnode: %Zd bytes.\n", - sizeof(struct tnode), sizeof(struct tnode)); + LEAF_SIZE, TNODE_SIZE(0)); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; @@ -2123,7 +2238,7 @@ static const struct file_operations fib_triestat_fops = { .release = single_release_net, }; -static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos) +static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos) { struct fib_trie_iter *iter = seq->private; struct net *net = seq_file_net(seq); @@ -2135,7 +2250,7 @@ static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos) struct fib_table *tb; hlist_for_each_entry_rcu(tb, head, tb_hlist) { - struct tnode *n; + struct key_vector *n; for (n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); @@ -2164,7 +2279,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct fib_table *tb = iter->tb; struct hlist_node *tb_node; unsigned int h; - struct tnode *n; + struct key_vector *n; ++*pos; /* next node in same table */ @@ -2250,9 +2365,9 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t) static int fib_trie_seq_show(struct seq_file *seq, void *v) { const struct fib_trie_iter *iter = seq->private; - struct tnode *n = v; + struct key_vector *n = v; - if (!node_parent_rcu(n)) + if (IS_TRIE(node_parent_rcu(n))) fib_table_print(seq, iter->tb); if (IS_TNODE(n)) { @@ -2261,30 +2376,28 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) seq_indent(seq, iter->depth-1); seq_printf(seq, " +-- %pI4/%zu %u %u %u\n", &prf, KEYLENGTH - n->pos - n->bits, n->bits, - n->full_children, n->empty_children); + tn_info(n)->full_children, + tn_info(n)->empty_children); } else { - struct leaf_info *li; __be32 val = htonl(n->key); + struct fib_alias *fa; seq_indent(seq, iter->depth); seq_printf(seq, " |-- %pI4\n", &val); - hlist_for_each_entry_rcu(li, &n->list, hlist) { - struct fib_alias *fa; - - list_for_each_entry_rcu(fa, &li->falh, fa_list) { - char buf1[32], buf2[32]; - - seq_indent(seq, iter->depth+1); - seq_printf(seq, " /%d %s %s", li->plen, - rtn_scope(buf1, sizeof(buf1), - fa->fa_info->fib_scope), - rtn_type(buf2, sizeof(buf2), - fa->fa_type)); - if (fa->fa_tos) - seq_printf(seq, " tos=%d", fa->fa_tos); - seq_putc(seq, '\n'); - } + hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { + char buf1[32], buf2[32]; + + seq_indent(seq, iter->depth + 1); + seq_printf(seq, " /%zu %s %s", + KEYLENGTH - fa->fa_slen, + rtn_scope(buf1, sizeof(buf1), + fa->fa_info->fib_scope), + rtn_type(buf2, sizeof(buf2), + fa->fa_type)); + if (fa->fa_tos) + seq_printf(seq, " tos=%d", fa->fa_tos); + seq_putc(seq, '\n'); } } @@ -2314,31 +2427,47 @@ static const struct file_operations fib_trie_fops = { struct fib_route_iter { struct seq_net_private p; - struct trie *main_trie; + struct fib_table *main_tb; + struct key_vector *tnode; loff_t pos; t_key key; }; -static struct tnode *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) +static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, + loff_t pos) { - struct tnode *l = NULL; - struct trie *t = iter->main_trie; + struct fib_table *tb = iter->main_tb; + struct key_vector *l, **tp = &iter->tnode; + struct trie *t; + t_key key; - /* use cache location of last found key */ - if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key))) + /* use cache location of next-to-find key */ + if (iter->pos > 0 && pos >= iter->pos) { pos -= iter->pos; - else { + key = iter->key; + } else { + t = (struct trie *)tb->tb_data; + iter->tnode = t->kv; iter->pos = 0; - l = trie_firstleaf(t); + key = 0; } - while (l && pos-- > 0) { + while ((l = leaf_walk_rcu(tp, key)) != NULL) { + key = l->key + 1; iter->pos++; - l = trie_nextleaf(l); + + if (pos-- <= 0) + break; + + l = NULL; + + /* handle unlikely case of a key wrap */ + if (!key) + break; } if (l) - iter->key = pos; /* remember it */ + iter->key = key; /* remember it */ else iter->pos = 0; /* forget it */ @@ -2350,37 +2479,46 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) { struct fib_route_iter *iter = seq->private; struct fib_table *tb; + struct trie *t; rcu_read_lock(); + tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); if (!tb) return NULL; - iter->main_trie = (struct trie *) tb->tb_data; - if (*pos == 0) - return SEQ_START_TOKEN; - else - return fib_route_get_idx(iter, *pos - 1); + iter->main_tb = tb; + + if (*pos != 0) + return fib_route_get_idx(iter, *pos); + + t = (struct trie *)tb->tb_data; + iter->tnode = t->kv; + iter->pos = 0; + iter->key = 0; + + return SEQ_START_TOKEN; } static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_route_iter *iter = seq->private; - struct tnode *l = v; + struct key_vector *l = NULL; + t_key key = iter->key; ++*pos; - if (v == SEQ_START_TOKEN) { - iter->pos = 0; - l = trie_firstleaf(iter->main_trie); - } else { + + /* only allow key of 0 for start of sequence */ + if ((v == SEQ_START_TOKEN) || key) + l = leaf_walk_rcu(&iter->tnode, key); + + if (l) { + iter->key = l->key + 1; iter->pos++; - l = trie_nextleaf(l); + } else { + iter->pos = 0; } - if (l) - iter->key = l->key; - else - iter->pos = 0; return l; } @@ -2412,8 +2550,11 @@ static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info */ static int fib_route_seq_show(struct seq_file *seq, void *v) { - struct tnode *l = v; - struct leaf_info *li; + struct fib_route_iter *iter = seq->private; + struct fib_table *tb = iter->main_tb; + struct fib_alias *fa; + struct key_vector *l = v; + __be32 prefix; if (v == SEQ_START_TOKEN) { seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " @@ -2422,45 +2563,43 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) return 0; } - hlist_for_each_entry_rcu(li, &l->list, hlist) { - struct fib_alias *fa; - __be32 mask, prefix; + prefix = htonl(l->key); - mask = inet_make_mask(li->plen); - prefix = htonl(l->key); + hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { + const struct fib_info *fi = fa->fa_info; + __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen); + unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); - list_for_each_entry_rcu(fa, &li->falh, fa_list) { - const struct fib_info *fi = fa->fa_info; - unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); + if ((fa->fa_type == RTN_BROADCAST) || + (fa->fa_type == RTN_MULTICAST)) + continue; - if (fa->fa_type == RTN_BROADCAST - || fa->fa_type == RTN_MULTICAST) - continue; + if (fa->tb_id != tb->tb_id) + continue; - seq_setwidth(seq, 127); - - if (fi) - seq_printf(seq, - "%s\t%08X\t%08X\t%04X\t%d\t%u\t" - "%d\t%08X\t%d\t%u\t%u", - fi->fib_dev ? fi->fib_dev->name : "*", - prefix, - fi->fib_nh->nh_gw, flags, 0, 0, - fi->fib_priority, - mask, - (fi->fib_advmss ? - fi->fib_advmss + 40 : 0), - fi->fib_window, - fi->fib_rtt >> 3); - else - seq_printf(seq, - "*\t%08X\t%08X\t%04X\t%d\t%u\t" - "%d\t%08X\t%d\t%u\t%u", - prefix, 0, flags, 0, 0, 0, - mask, 0, 0, 0); - - seq_pad(seq, '\n'); - } + seq_setwidth(seq, 127); + + if (fi) + seq_printf(seq, + "%s\t%08X\t%08X\t%04X\t%d\t%u\t" + "%d\t%08X\t%d\t%u\t%u", + fi->fib_dev ? fi->fib_dev->name : "*", + prefix, + fi->fib_nh->nh_gw, flags, 0, 0, + fi->fib_priority, + mask, + (fi->fib_advmss ? + fi->fib_advmss + 40 : 0), + fi->fib_window, + fi->fib_rtt >> 3); + else + seq_printf(seq, + "*\t%08X\t%08X\t%04X\t%d\t%u\t" + "%d\t%08X\t%d\t%u\t%u", + prefix, 0, flags, 0, 0, 0, + mask, 0, 0, 0); + + seq_pad(seq, '\n'); } return 0; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 4b1172d73e03..ad09213ac5b2 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -97,6 +97,7 @@ #include <net/route.h> #include <net/sock.h> #include <net/checksum.h> +#include <net/inet_common.h> #include <linux/netfilter_ipv4.h> #ifdef CONFIG_IP_MROUTE #include <linux/mroute.h> @@ -369,7 +370,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) pip->saddr = fl4.saddr; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); ((u8 *)&pip[1])[0] = IPOPT_RA; ((u8 *)&pip[1])[1] = 4; ((u8 *)&pip[1])[2] = 0; @@ -713,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; ((u8 *)&iph[1])[2] = 0; @@ -1849,7 +1850,10 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) pmc->sfcount[MCAST_EXCLUDE] = 1; } -int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) +/* Join a multicast group + */ + +int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) { __be32 addr = imr->imr_multiaddr.s_addr; struct ip_mc_socklist *iml, *i; @@ -1897,20 +1901,6 @@ int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) done: return err; } -EXPORT_SYMBOL(__ip_mc_join_group); - -/* Join a multicast group - */ -int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) -{ - int ret; - - rtnl_lock(); - ret = __ip_mc_join_group(sk, imr); - rtnl_unlock(); - - return ret; -} EXPORT_SYMBOL(ip_mc_join_group); static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, @@ -1933,7 +1923,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, return err; } -int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) +int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml; @@ -1978,18 +1968,6 @@ int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) out: return ret; } -EXPORT_SYMBOL(__ip_mc_leave_group); - -int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) -{ - int ret; - - rtnl_lock(); - ret = __ip_mc_leave_group(sk, imr); - rtnl_unlock(); - - return ret; -} EXPORT_SYMBOL(ip_mc_leave_group); int ip_mc_source(int add, int omode, struct sock *sk, struct @@ -2009,7 +1987,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (!ipv4_is_multicast(addr)) return -EINVAL; - rtnl_lock(); + ASSERT_RTNL(); imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; imr.imr_address.s_addr = mreqs->imr_interface; @@ -2123,9 +2101,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 1, &mreqs->imr_sourceaddr, 1); done: - rtnl_unlock(); if (leavegroup) - return ip_mc_leave_group(sk, &imr); + err = ip_mc_leave_group(sk, &imr); return err; } @@ -2147,7 +2124,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) msf->imsf_fmode != MCAST_EXCLUDE) return -EINVAL; - rtnl_lock(); + ASSERT_RTNL(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; @@ -2209,7 +2186,6 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) pmc->sfmode = msf->imsf_fmode; err = 0; done: - rtnl_unlock(); if (leavegroup) err = ip_mc_leave_group(sk, &imr); return err; @@ -2740,6 +2716,7 @@ static const struct file_operations igmp_mcf_seq_fops = { static int __net_init igmp_net_init(struct net *net) { struct proc_dir_entry *pde; + int err; pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops); if (!pde) @@ -2748,8 +2725,18 @@ static int __net_init igmp_net_init(struct net *net) &igmp_mcf_seq_fops); if (!pde) goto out_mcfilter; + err = inet_ctl_sock_create(&net->ipv4.mc_autojoin_sk, AF_INET, + SOCK_DGRAM, 0, net); + if (err < 0) { + pr_err("Failed to initialize the IGMP autojoin socket (err %d)\n", + err); + goto out_sock; + } + return 0; +out_sock: + remove_proc_entry("mcfilter", net->proc_net); out_mcfilter: remove_proc_entry("igmp", net->proc_net); out_igmp: @@ -2760,6 +2747,7 @@ static void __net_exit igmp_net_exit(struct net *net) { remove_proc_entry("mcfilter", net->proc_net); remove_proc_entry("igmp", net->proc_net); + inet_ctl_sock_destroy(net->ipv4.mc_autojoin_sk); } static struct pernet_operations igmp_net_ops = { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 14d02ea905b6..79c0c9439fdc 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -23,6 +23,7 @@ #include <net/route.h> #include <net/tcp_states.h> #include <net/xfrm.h> +#include <net/tcp.h> #ifdef INET_CSK_DEBUG const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; @@ -268,6 +269,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) release_sock(sk); if (reqsk_queue_empty(&icsk->icsk_accept_queue)) timeo = schedule_timeout(timeo); + sched_annotate_sleep(); lock_sock(sk); err = 0; if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) @@ -293,8 +295,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) { struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock_queue *queue = &icsk->icsk_accept_queue; - struct sock *newsk; struct request_sock *req; + struct sock *newsk; int error; lock_sock(sk); @@ -323,9 +325,11 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) newsk = req->sk; sk_acceptq_removed(sk); - if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) { + if (sk->sk_protocol == IPPROTO_TCP && + tcp_rsk(req)->tfo_listener && + queue->fastopenq) { spin_lock_bh(&queue->fastopenq->lock); - if (tcp_rsk(req)->listener) { + if (tcp_rsk(req)->tfo_listener) { /* We are still waiting for the final ACK from 3WHS * so can't free req now. Instead, we set req->sk to * NULL to signify that the child socket is taken @@ -340,7 +344,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) out: release_sock(sk); if (req) - __reqsk_free(req); + reqsk_put(req); return newsk; out_err: newsk = NULL; @@ -399,18 +403,17 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, const struct request_sock *req) { - struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); - struct ip_options_rcu *opt = inet_rsk(req)->opt; - struct net *net = sock_net(sk); - int flags = inet_sk_flowi_flags(sk); + struct net *net = read_pnet(&ireq->ireq_net); + struct ip_options_rcu *opt = ireq->opt; + struct rtable *rt; - flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, - sk->sk_protocol, - flags, + sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, - ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport); + ireq->ir_loc_addr, ireq->ir_rmt_port, + htons(ireq->ir_num)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -432,9 +435,9 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, const struct request_sock *req) { const struct inet_request_sock *ireq = inet_rsk(req); + struct net *net = read_pnet(&ireq->ireq_net); struct inet_sock *newinet = inet_sk(newsk); struct ip_options_rcu *opt; - struct net *net = sock_net(sk); struct flowi4 *fl4; struct rtable *rt; @@ -442,11 +445,12 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, rcu_read_lock(); opt = rcu_dereference(newinet->inet_opt); - flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark, + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, - ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport); + ireq->ir_loc_addr, ireq->ir_rmt_port, + htons(ireq->ir_num)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -474,33 +478,37 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, #if IS_ENABLED(CONFIG_IPV6) #define AF_INET_FAMILY(fam) ((fam) == AF_INET) #else -#define AF_INET_FAMILY(fam) 1 +#define AF_INET_FAMILY(fam) true #endif -struct request_sock *inet_csk_search_req(const struct sock *sk, - struct request_sock ***prevp, - const __be16 rport, const __be32 raddr, +/* Note: this is temporary : + * req sock will no longer be in listener hash table +*/ +struct request_sock *inet_csk_search_req(struct sock *sk, + const __be16 rport, + const __be32 raddr, const __be32 laddr) { - const struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - struct request_sock *req, **prev; + struct request_sock *req; + u32 hash = inet_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries); - for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, - lopt->nr_table_entries)]; - (req = *prev) != NULL; - prev = &req->dl_next) { + spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); + for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); if (ireq->ir_rmt_port == rport && ireq->ir_rmt_addr == raddr && ireq->ir_loc_addr == laddr && AF_INET_FAMILY(req->rsk_ops->family)) { + atomic_inc(&req->rsk_refcnt); WARN_ON(req->sk); - *prevp = prev; break; } } + spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); return req; } @@ -556,23 +564,24 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) } EXPORT_SYMBOL(inet_rtx_syn_ack); -void inet_csk_reqsk_queue_prune(struct sock *parent, - const unsigned long interval, - const unsigned long timeout, - const unsigned long max_rto) +static void reqsk_timer_handler(unsigned long data) { - struct inet_connection_sock *icsk = inet_csk(parent); + struct request_sock *req = (struct request_sock *)data; + struct sock *sk_listener = req->rsk_listener; + struct inet_connection_sock *icsk = inet_csk(sk_listener); struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct listen_sock *lopt = queue->listen_opt; - int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; - int thresh = max_retries; - unsigned long now = jiffies; - struct request_sock **reqp, *req; - int i, budget; + int qlen, expire = 0, resend = 0; + int max_retries, thresh; + u8 defer_accept; - if (lopt == NULL || lopt->qlen == 0) + if (sk_listener->sk_state != TCP_LISTEN || !lopt) { + reqsk_put(req); return; + } + max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + thresh = max_retries; /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. * If synack was not acknowledged for 1 second, it means @@ -590,67 +599,65 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, * embrions; and abort old ones without pity, if old * ones are about to clog our table. */ - if (lopt->qlen>>(lopt->max_qlen_log-1)) { - int young = (lopt->qlen_young<<1); + qlen = listen_sock_qlen(lopt); + if (qlen >> (lopt->max_qlen_log - 1)) { + int young = listen_sock_young(lopt) << 1; while (thresh > 2) { - if (lopt->qlen < young) + if (qlen < young) break; thresh--; young <<= 1; } } + defer_accept = READ_ONCE(queue->rskq_defer_accept); + if (defer_accept) + max_retries = defer_accept; + syn_ack_recalc(req, thresh, max_retries, defer_accept, + &expire, &resend); + req->rsk_ops->syn_ack_timeout(req); + if (!expire && + (!resend || + !inet_rtx_syn_ack(sk_listener, req) || + inet_rsk(req)->acked)) { + unsigned long timeo; + + if (req->num_timeout++ == 0) + atomic_inc(&lopt->young_dec); + timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); + mod_timer_pinned(&req->rsk_timer, jiffies + timeo); + return; + } + inet_csk_reqsk_queue_drop(sk_listener, req); + reqsk_put(req); +} - if (queue->rskq_defer_accept) - max_retries = queue->rskq_defer_accept; - - budget = 2 * (lopt->nr_table_entries / (timeout / interval)); - i = lopt->clock_hand; - - do { - reqp=&lopt->syn_table[i]; - while ((req = *reqp) != NULL) { - if (time_after_eq(now, req->expires)) { - int expire = 0, resend = 0; - - syn_ack_recalc(req, thresh, max_retries, - queue->rskq_defer_accept, - &expire, &resend); - req->rsk_ops->syn_ack_timeout(parent, req); - if (!expire && - (!resend || - !inet_rtx_syn_ack(parent, req) || - inet_rsk(req)->acked)) { - unsigned long timeo; - - if (req->num_timeout++ == 0) - lopt->qlen_young--; - timeo = min(timeout << req->num_timeout, - max_rto); - req->expires = now + timeo; - reqp = &req->dl_next; - continue; - } - - /* Drop this request */ - inet_csk_reqsk_queue_unlink(parent, req, reqp); - reqsk_queue_removed(queue, req); - reqsk_free(req); - continue; - } - reqp = &req->dl_next; - } +void reqsk_queue_hash_req(struct request_sock_queue *queue, + u32 hash, struct request_sock *req, + unsigned long timeout) +{ + struct listen_sock *lopt = queue->listen_opt; - i = (i + 1) & (lopt->nr_table_entries - 1); + req->num_retrans = 0; + req->num_timeout = 0; + req->sk = NULL; - } while (--budget > 0); + /* before letting lookups find us, make sure all req fields + * are committed to memory and refcnt initialized. + */ + smp_wmb(); + atomic_set(&req->rsk_refcnt, 2); + setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); + req->rsk_hash = hash; - lopt->clock_hand = i; + spin_lock(&queue->syn_wait_lock); + req->dl_next = lopt->syn_table[hash]; + lopt->syn_table[hash] = req; + spin_unlock(&queue->syn_wait_lock); - if (lopt->qlen) - inet_csk_reset_keepalive_timer(parent, interval); + mod_timer_pinned(&req->rsk_timer, jiffies + timeout); } -EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); +EXPORT_SYMBOL(reqsk_queue_hash_req); /** * inet_csk_clone_lock - clone an inet socket, and lock its clone @@ -678,6 +685,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, newsk->sk_write_space = sk_stream_write_space; newsk->sk_mark = inet_rsk(req)->ir_mark; + atomic64_set(&newsk->sk_cookie, + atomic64_read(&inet_rsk(req)->ir_cookie)); newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; @@ -784,8 +793,6 @@ void inet_csk_listen_stop(struct sock *sk) struct request_sock *acc_req; struct request_sock *req; - inet_csk_delete_keepalive_timer(sk); - /* make all the listen_opt local to us */ acc_req = reqsk_queue_yank_acceptq(queue); @@ -815,9 +822,9 @@ void inet_csk_listen_stop(struct sock *sk) percpu_counter_inc(sk->sk_prot->orphan_count); - if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) { + if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { BUG_ON(tcp_sk(child)->fastopen_rsk != req); - BUG_ON(sk != tcp_rsk(req)->listener); + BUG_ON(sk != req->rsk_listener); /* Paranoid, to prevent race condition if * an inbound pkt destined for child is @@ -826,7 +833,6 @@ void inet_csk_listen_stop(struct sock *sk) * tcp_v4_destroy_sock(). */ tcp_sk(child)->fastopen_rsk = NULL; - sock_put(sk); } inet_csk_destroy_sock(child); @@ -835,7 +841,7 @@ void inet_csk_listen_stop(struct sock *sk) sock_put(child); sk_acceptq_removed(sk); - __reqsk_free(req); + reqsk_put(req); } if (queue->fastopenq != NULL) { /* Free all the reqs queued in rskq_rst_head. */ @@ -845,7 +851,7 @@ void inet_csk_listen_stop(struct sock *sk) spin_unlock_bh(&queue->fastopenq->lock); while ((req = acc_req) != NULL) { acc_req = req->dl_next; - __reqsk_free(req); + reqsk_put(req); } } WARN_ON(sk->sk_ack_backlog); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 81751f12645f..76322c9867d5 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -38,16 +38,12 @@ static const struct inet_diag_handler **inet_diag_table; struct inet_diag_entry { - __be32 *saddr; - __be32 *daddr; + const __be32 *saddr; + const __be32 *daddr; u16 sport; u16 dport; u16 family; u16 userlocks; -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr saddr_storage; /* for IPv4-mapped-IPv6 addresses */ - struct in6_addr daddr_storage; /* for IPv4-mapped-IPv6 addresses */ -#endif }; static DEFINE_MUTEX(inet_diag_table_mutex); @@ -65,28 +61,65 @@ static const struct inet_diag_handler *inet_diag_lock_handler(int proto) return inet_diag_table[proto]; } -static inline void inet_diag_unlock_handler( - const struct inet_diag_handler *handler) +static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) { mutex_unlock(&inet_diag_table_mutex); } +static void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) +{ + r->idiag_family = sk->sk_family; + + r->id.idiag_sport = htons(sk->sk_num); + r->id.idiag_dport = sk->sk_dport; + r->id.idiag_if = sk->sk_bound_dev_if; + sock_diag_save_cookie(sk, r->id.idiag_cookie); + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; + *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; + } else +#endif + { + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + + r->id.idiag_src[0] = sk->sk_rcv_saddr; + r->id.idiag_dst[0] = sk->sk_daddr; + } +} + +static size_t inet_sk_attr_size(void) +{ + return nla_total_size(sizeof(struct tcp_info)) + + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(sizeof(struct inet_diag_meminfo)) + + nla_total_size(sizeof(struct inet_diag_msg)) + + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + + nla_total_size(TCP_CA_NAME_MAX) + + nla_total_size(sizeof(struct tcpvegas_info)) + + 64; +} + int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + struct sk_buff *skb, const struct inet_diag_req_v2 *req, + struct user_namespace *user_ns, + u32 portid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); + const struct inet_diag_handler *handler; + int ext = req->idiag_ext; struct inet_diag_msg *r; struct nlmsghdr *nlh; struct nlattr *attr; void *info = NULL; - const struct inet_diag_handler *handler; - int ext = req->idiag_ext; handler = inet_diag_table[req->sdiag_protocol]; - BUG_ON(handler == NULL); + BUG_ON(!handler); nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); @@ -94,25 +127,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, return -EMSGSIZE; r = nlmsg_data(nlh); - BUG_ON(sk->sk_state == TCP_TIME_WAIT); + BUG_ON(!sk_fullsock(sk)); - r->idiag_family = sk->sk_family; + inet_diag_msg_common_fill(r, sk); r->idiag_state = sk->sk_state; r->idiag_timer = 0; r->idiag_retrans = 0; - r->id.idiag_if = sk->sk_bound_dev_if; - sock_diag_save_cookie(sk, r->id.idiag_cookie); - - r->id.idiag_sport = inet->inet_sport; - r->id.idiag_dport = inet->inet_dport; - - memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); - memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); - - r->id.idiag_src[0] = inet->inet_rcv_saddr; - r->id.idiag_dst[0] = inet->inet_daddr; - if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) goto errout; @@ -125,10 +146,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, #if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { - - *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; - *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; - if (ext & (1 << (INET_DIAG_TCLASS - 1))) if (nla_put_u8(skb, INET_DIAG_TCLASS, inet6_sk(sk)->tclass) < 0) @@ -155,7 +172,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) goto errout; - if (icsk == NULL) { + if (!icsk) { handler->idiag_get_info(sk, r, NULL); goto out; } @@ -213,23 +230,25 @@ errout: EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_csk_diag_fill(struct sock *sk, - struct sk_buff *skb, struct inet_diag_req_v2 *req, + struct sk_buff *skb, + const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { - return inet_sk_diag_fill(sk, inet_csk(sk), - skb, req, user_ns, portid, seq, nlmsg_flags, unlh); + return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, + user_ns, portid, seq, nlmsg_flags, unlh); } -static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, - struct sk_buff *skb, struct inet_diag_req_v2 *req, +static int inet_twsk_diag_fill(struct sock *sk, + struct sk_buff *skb, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { - s32 tmo; + struct inet_timewait_sock *tw = inet_twsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; + s32 tmo; nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); @@ -243,21 +262,9 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, if (tmo < 0) tmo = 0; - r->idiag_family = tw->tw_family; + inet_diag_msg_common_fill(r, sk); r->idiag_retrans = 0; - r->id.idiag_if = tw->tw_bound_dev_if; - sock_diag_save_cookie(tw, r->id.idiag_cookie); - - r->id.idiag_sport = tw->tw_sport; - r->id.idiag_dport = tw->tw_dport; - - memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); - memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); - - r->id.idiag_src[0] = tw->tw_rcv_saddr; - r->id.idiag_dst[0] = tw->tw_daddr; - r->idiag_state = tw->tw_substate; r->idiag_timer = 3; r->idiag_expires = jiffies_to_msecs(tmo); @@ -265,70 +272,98 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_wqueue = 0; r->idiag_uid = 0; r->idiag_inode = 0; -#if IS_ENABLED(CONFIG_IPV6) - if (tw->tw_family == AF_INET6) { - *(struct in6_addr *)r->id.idiag_src = tw->tw_v6_rcv_saddr; - *(struct in6_addr *)r->id.idiag_dst = tw->tw_v6_daddr; - } -#endif + + nlmsg_end(skb, nlh); + return 0; +} + +static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, + u32 portid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) +{ + struct inet_diag_msg *r; + struct nlmsghdr *nlh; + long tmo; + + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), + nlmsg_flags); + if (!nlh) + return -EMSGSIZE; + + r = nlmsg_data(nlh); + inet_diag_msg_common_fill(r, sk); + r->idiag_state = TCP_SYN_RECV; + r->idiag_timer = 1; + r->idiag_retrans = inet_reqsk(sk)->num_retrans; + + BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != + offsetof(struct sock, sk_cookie)); + + tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; + r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0; + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = 0; + r->idiag_inode = 0; nlmsg_end(skb, nlh); return 0; } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_req_v2 *r, + const struct inet_diag_req_v2 *r, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) - return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq, + return inet_twsk_diag_fill(sk, skb, portid, seq, nlmsg_flags, unlh); + if (sk->sk_state == TCP_NEW_SYN_RECV) + return inet_req_diag_fill(sk, skb, portid, seq, + nlmsg_flags, unlh); + return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh); } -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) +int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, + struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) { - int err; - struct sock *sk; - struct sk_buff *rep; struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + struct sock *sk; + int err; err = -EINVAL; - if (req->sdiag_family == AF_INET) { + if (req->sdiag_family == AF_INET) sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); - } #if IS_ENABLED(CONFIG_IPV6) - else if (req->sdiag_family == AF_INET6) { + else if (req->sdiag_family == AF_INET6) sk = inet6_lookup(net, hashinfo, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, req->id.idiag_sport, req->id.idiag_if); - } #endif - else { + else goto out_nosk; - } err = -ENOENT; - if (sk == NULL) + if (!sk) goto out_nosk; err = sock_diag_check_cookie(sk, req->id.idiag_cookie); if (err) goto out; - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + - sizeof(struct tcp_info) + 64, GFP_KERNEL); + rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL); if (!rep) { err = -ENOMEM; goto out; @@ -359,7 +394,7 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + const struct inet_diag_req_v2 *req) { const struct inet_diag_handler *handler; int err; @@ -400,9 +435,8 @@ static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) return 1; } - static int inet_diag_bc_run(const struct nlattr *_bc, - const struct inet_diag_entry *entry) + const struct inet_diag_entry *entry) { const void *bc = nla_data(_bc); int len = nla_len(_bc); @@ -434,10 +468,10 @@ static int inet_diag_bc_run(const struct nlattr *_bc, break; case INET_DIAG_BC_S_COND: case INET_DIAG_BC_D_COND: { - struct inet_diag_hostcond *cond; - __be32 *addr; + const struct inet_diag_hostcond *cond; + const __be32 *addr; - cond = (struct inet_diag_hostcond *)(op + 1); + cond = (const struct inet_diag_hostcond *)(op + 1); if (cond->port != -1 && cond->port != (op->code == INET_DIAG_BC_S_COND ? entry->sport : entry->dport)) { @@ -486,29 +520,36 @@ static int inet_diag_bc_run(const struct nlattr *_bc, return len == 0; } +/* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV) + */ +static void entry_fill_addrs(struct inet_diag_entry *entry, + const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32; + entry->daddr = sk->sk_v6_daddr.s6_addr32; + } else +#endif + { + entry->saddr = &sk->sk_rcv_saddr; + entry->daddr = &sk->sk_daddr; + } +} + int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) { - struct inet_diag_entry entry; struct inet_sock *inet = inet_sk(sk); + struct inet_diag_entry entry; - if (bc == NULL) + if (!bc) return 1; entry.family = sk->sk_family; -#if IS_ENABLED(CONFIG_IPV6) - if (entry.family == AF_INET6) { - - entry.saddr = sk->sk_v6_rcv_saddr.s6_addr32; - entry.daddr = sk->sk_v6_daddr.s6_addr32; - } else -#endif - { - entry.saddr = &inet->inet_rcv_saddr; - entry.daddr = &inet->inet_daddr; - } + entry_fill_addrs(&entry, sk); entry.sport = inet->inet_num; entry.dport = ntohs(inet->inet_dport); - entry.userlocks = sk->sk_userlocks; + entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; return inet_diag_bc_run(bc, &entry); } @@ -535,8 +576,8 @@ static int valid_cc(const void *bc, int len, int cc) static bool valid_hostcond(const struct inet_diag_bc_op *op, int len, int *min_len) { - int addr_len; struct inet_diag_hostcond *cond; + int addr_len; /* Check hostcond space. */ *min_len += sizeof(struct inet_diag_hostcond); @@ -570,8 +611,8 @@ static bool valid_hostcond(const struct inet_diag_bc_op *op, int len, } /* Validate a port comparison operator. */ -static inline bool valid_port_comparison(const struct inet_diag_bc_op *op, - int len, int *min_len) +static bool valid_port_comparison(const struct inet_diag_bc_op *op, + int len, int *min_len) { /* Port comparisons put the port in a follow-on inet_diag_bc_op. */ *min_len += sizeof(struct inet_diag_bc_op); @@ -586,10 +627,9 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) int len = bytecode_len; while (len > 0) { - const struct inet_diag_bc_op *op = bc; int min_len = sizeof(struct inet_diag_bc_op); + const struct inet_diag_bc_op *op = bc; -//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { case INET_DIAG_BC_S_COND: case INET_DIAG_BC_D_COND: @@ -630,7 +670,7 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) static int inet_csk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, + const struct inet_diag_req_v2 *r, const struct nlattr *bc) { if (!inet_diag_bc_sk(bc, sk)) @@ -642,139 +682,42 @@ static int inet_csk_diag_dump(struct sock *sk, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } -static int inet_twsk_diag_dump(struct sock *sk, - struct sk_buff *skb, - struct netlink_callback *cb, - struct inet_diag_req_v2 *r, - const struct nlattr *bc) +static void twsk_build_assert(void) { - struct inet_timewait_sock *tw = inet_twsk(sk); + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != + offsetof(struct sock, sk_family)); - if (bc != NULL) { - struct inet_diag_entry entry; + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) != + offsetof(struct inet_sock, inet_num)); - entry.family = tw->tw_family; -#if IS_ENABLED(CONFIG_IPV6) - if (tw->tw_family == AF_INET6) { - entry.saddr = tw->tw_v6_rcv_saddr.s6_addr32; - entry.daddr = tw->tw_v6_daddr.s6_addr32; - } else -#endif - { - entry.saddr = &tw->tw_rcv_saddr; - entry.daddr = &tw->tw_daddr; - } - entry.sport = tw->tw_num; - entry.dport = ntohs(tw->tw_dport); - entry.userlocks = 0; - - if (!inet_diag_bc_run(bc, &entry)) - return 0; - } + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) != + offsetof(struct inet_sock, inet_dport)); - return inet_twsk_diag_fill(tw, skb, r, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); -} + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) != + offsetof(struct inet_sock, inet_rcv_saddr)); -/* Get the IPv4, IPv6, or IPv4-mapped-IPv6 local and remote addresses - * from a request_sock. For IPv4-mapped-IPv6 we must map IPv4 to IPv6. - */ -static inline void inet_diag_req_addrs(const struct sock *sk, - const struct request_sock *req, - struct inet_diag_entry *entry) -{ - struct inet_request_sock *ireq = inet_rsk(req); + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) != + offsetof(struct inet_sock, inet_daddr)); #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - if (req->rsk_ops->family == AF_INET6) { - entry->saddr = ireq->ir_v6_loc_addr.s6_addr32; - entry->daddr = ireq->ir_v6_rmt_addr.s6_addr32; - } else if (req->rsk_ops->family == AF_INET) { - ipv6_addr_set_v4mapped(ireq->ir_loc_addr, - &entry->saddr_storage); - ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, - &entry->daddr_storage); - entry->saddr = entry->saddr_storage.s6_addr32; - entry->daddr = entry->daddr_storage.s6_addr32; - } - } else -#endif - { - entry->saddr = &ireq->ir_loc_addr; - entry->daddr = &ireq->ir_rmt_addr; - } -} + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) != + offsetof(struct sock, sk_v6_rcv_saddr)); -static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, - struct request_sock *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, - const struct nlmsghdr *unlh) -{ - const struct inet_request_sock *ireq = inet_rsk(req); - struct inet_sock *inet = inet_sk(sk); - struct inet_diag_msg *r; - struct nlmsghdr *nlh; - long tmo; - - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - NLM_F_MULTI); - if (!nlh) - return -EMSGSIZE; - - r = nlmsg_data(nlh); - r->idiag_family = sk->sk_family; - r->idiag_state = TCP_SYN_RECV; - r->idiag_timer = 1; - r->idiag_retrans = req->num_retrans; - - r->id.idiag_if = sk->sk_bound_dev_if; - sock_diag_save_cookie(req, r->id.idiag_cookie); - - tmo = req->expires - jiffies; - if (tmo < 0) - tmo = 0; - - r->id.idiag_sport = inet->inet_sport; - r->id.idiag_dport = ireq->ir_rmt_port; - - memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); - memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); - - r->id.idiag_src[0] = ireq->ir_loc_addr; - r->id.idiag_dst[0] = ireq->ir_rmt_addr; - - r->idiag_expires = jiffies_to_msecs(tmo); - r->idiag_rqueue = 0; - r->idiag_wqueue = 0; - r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); - r->idiag_inode = 0; -#if IS_ENABLED(CONFIG_IPV6) - if (r->idiag_family == AF_INET6) { - struct inet_diag_entry entry; - inet_diag_req_addrs(sk, req, &entry); - memcpy(r->id.idiag_src, entry.saddr, sizeof(struct in6_addr)); - memcpy(r->id.idiag_dst, entry.daddr, sizeof(struct in6_addr)); - } + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) != + offsetof(struct sock, sk_v6_daddr)); #endif - - nlmsg_end(skb, nlh); - return 0; } static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, + const struct inet_diag_req_v2 *r, const struct nlattr *bc) { - struct inet_diag_entry entry; struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt; struct inet_sock *inet = inet_sk(sk); - int j, s_j; - int reqnum, s_reqnum; + struct inet_diag_entry entry; + int j, s_j, reqnum, s_reqnum; + struct listen_sock *lopt; int err = 0; s_j = cb->args[3]; @@ -785,13 +728,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, entry.family = sk->sk_family; - read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); lopt = icsk->icsk_accept_queue.listen_opt; - if (!lopt || !lopt->qlen) + if (!lopt || !listen_sock_qlen(lopt)) goto out; - if (bc != NULL) { + if (bc) { entry.sport = inet->inet_num; entry.userlocks = sk->sk_userlocks; } @@ -810,17 +753,18 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, continue; if (bc) { - inet_diag_req_addrs(sk, req, &entry); + /* Note: entry.sport and entry.userlocks are already set */ + entry_fill_addrs(&entry, req_to_sk(req)); entry.dport = ntohs(ireq->ir_rmt_port); if (!inet_diag_bc_run(bc, &entry)) continue; } - err = inet_diag_fill_req(skb, sk, req, - sk_user_ns(NETLINK_CB(cb->skb).sk), - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, cb->nlh); + err = inet_req_diag_fill(req_to_sk(req), skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, cb->nlh); if (err < 0) { cb->args[3] = j + 1; cb->args[4] = reqnum; @@ -832,17 +776,17 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, } out: - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); return err; } void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req_v2 *r, struct nlattr *bc) + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, struct nlattr *bc) { - int i, num; - int s_i, s_num; struct net *net = sock_net(skb->sk); + int i, num, s_i, s_num; s_i = cb->args[1]; s_num = num = cb->args[2]; @@ -852,9 +796,9 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, goto skip_listen_ht; for (i = s_i; i < INET_LHTABLE_SIZE; i++) { - struct sock *sk; - struct hlist_nulls_node *node; struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; + struct sock *sk; num = 0; ilb = &hashinfo->listening_hash[i]; @@ -871,7 +815,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, } if (r->sdiag_family != AF_UNSPEC && - sk->sk_family != r->sdiag_family) + sk->sk_family != r->sdiag_family) goto next_listen; if (r->id.idiag_sport != inet->inet_sport && @@ -919,8 +863,8 @@ skip_listen_ht: for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; spinlock_t *lock = inet_ehash_lockp(hashinfo, i); - struct sock *sk; struct hlist_nulls_node *node; + struct sock *sk; num = 0; @@ -932,8 +876,7 @@ skip_listen_ht: spin_lock_bh(lock); sk_nulls_for_each(sk, node, &head->chain) { - int res; - int state; + int state, res; if (!net_eq(sock_net(sk), net)) continue; @@ -952,10 +895,16 @@ skip_listen_ht: if (r->id.idiag_dport != sk->sk_dport && r->id.idiag_dport) goto next_normal; - if (sk->sk_state == TCP_TIME_WAIT) - res = inet_twsk_diag_dump(sk, skb, cb, r, bc); - else - res = inet_csk_diag_dump(sk, skb, cb, r, bc); + twsk_build_assert(); + + if (!inet_diag_bc_sk(bc, sk)) + goto next_normal; + + res = sk_diag_fill(sk, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->nlh); if (res < 0) { spin_unlock_bh(lock); goto done; @@ -976,7 +925,8 @@ out: EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, + struct nlattr *bc) { const struct inet_diag_handler *handler; int err = 0; @@ -993,8 +943,8 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct nlattr *bc = NULL; int hdrlen = sizeof(struct inet_diag_req_v2); + struct nlattr *bc = NULL; if (nlmsg_attrlen(cb->nlh, hdrlen)) bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); @@ -1002,7 +952,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh), bc); } -static inline int inet_diag_type2proto(int type) +static int inet_diag_type2proto(int type) { switch (type) { case TCPDIAG_GETSOCK: @@ -1014,12 +964,13 @@ static inline int inet_diag_type2proto(int type) } } -static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) +static int inet_diag_dump_compat(struct sk_buff *skb, + struct netlink_callback *cb) { struct inet_diag_req *rc = nlmsg_data(cb->nlh); + int hdrlen = sizeof(struct inet_diag_req); struct inet_diag_req_v2 req; struct nlattr *bc = NULL; - int hdrlen = sizeof(struct inet_diag_req); req.sdiag_family = AF_UNSPEC; /* compatibility */ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); @@ -1034,7 +985,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *c } static int inet_diag_get_exact_compat(struct sk_buff *in_skb, - const struct nlmsghdr *nlh) + const struct nlmsghdr *nlh) { struct inet_diag_req *rc = nlmsg_data(nlh); struct inet_diag_req_v2 req; @@ -1063,7 +1014,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) attr = nlmsg_find_attr(nlh, hdrlen, INET_DIAG_REQ_BYTECODE); - if (attr == NULL || + if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op) || inet_diag_bc_audit(nla_data(attr), nla_len(attr))) return -EINVAL; @@ -1090,9 +1041,10 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (h->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(h, hdrlen)) { struct nlattr *attr; + attr = nlmsg_find_attr(h, hdrlen, INET_DIAG_REQ_BYTECODE); - if (attr == NULL || + if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op) || inet_diag_bc_audit(nla_data(attr), nla_len(attr))) return -EINVAL; @@ -1128,7 +1080,7 @@ int inet_diag_register(const struct inet_diag_handler *h) mutex_lock(&inet_diag_table_mutex); err = -EEXIST; - if (inet_diag_table[type] == NULL) { + if (!inet_diag_table[type]) { inet_diag_table[type] = h; err = 0; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 9111a4e22155..0fb841b9d834 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -24,9 +24,9 @@ #include <net/secure_seq.h> #include <net/ip.h> -static unsigned int inet_ehashfn(struct net *net, const __be32 laddr, - const __u16 lport, const __be32 faddr, - const __be16 fport) +static u32 inet_ehashfn(const struct net *net, const __be32 laddr, + const __u16 lport, const __be32 faddr, + const __be16 fport) { static u32 inet_ehash_secret __read_mostly; @@ -36,17 +36,21 @@ static unsigned int inet_ehashfn(struct net *net, const __be32 laddr, inet_ehash_secret + net_hash_mix(net)); } - -static unsigned int inet_sk_ehashfn(const struct sock *sk) +/* This function handles inet_sock, but also timewait and request sockets + * for IPv4/IPv6. + */ +u32 sk_ehashfn(const struct sock *sk) { - const struct inet_sock *inet = inet_sk(sk); - const __be32 laddr = inet->inet_rcv_saddr; - const __u16 lport = inet->inet_num; - const __be32 faddr = inet->inet_daddr; - const __be16 fport = inet->inet_dport; - struct net *net = sock_net(sk); - - return inet_ehashfn(net, laddr, lport, faddr, fport); +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6 && + !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + return inet6_ehashfn(sock_net(sk), + &sk->sk_v6_rcv_saddr, sk->sk_num, + &sk->sk_v6_daddr, sk->sk_dport); +#endif + return inet_ehashfn(sock_net(sk), + sk->sk_rcv_saddr, sk->sk_num, + sk->sk_daddr, sk->sk_dport); } /* @@ -61,7 +65,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { - write_pnet(&tb->ib_net, hold_net(net)); + write_pnet(&tb->ib_net, net); tb->port = snum; tb->fastreuse = 0; tb->fastreuseport = 0; @@ -79,7 +83,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); - release_net(ib_net(tb)); kmem_cache_free(cachep, tb); } } @@ -263,11 +266,19 @@ void sock_gen_put(struct sock *sk) if (sk->sk_state == TCP_TIME_WAIT) inet_twsk_free(inet_twsk(sk)); + else if (sk->sk_state == TCP_NEW_SYN_RECV) + reqsk_free(inet_reqsk(sk)); else sk_free(sk); } EXPORT_SYMBOL_GPL(sock_gen_put); +void sock_edemux(struct sk_buff *skb) +{ + sock_gen_put(skb->sk); +} +EXPORT_SYMBOL(sock_edemux); + struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, @@ -400,13 +411,13 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; - spinlock_t *lock; struct inet_ehash_bucket *head; + spinlock_t *lock; int twrefcnt = 0; WARN_ON(!sk_unhashed(sk)); - sk->sk_hash = inet_sk_ehashfn(sk); + sk->sk_hash = sk_ehashfn(sk); head = inet_ehash_bucket(hashinfo, sk->sk_hash); list = &head->chain; lock = inet_ehash_lockp(hashinfo, sk->sk_hash); @@ -423,15 +434,13 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) } EXPORT_SYMBOL_GPL(__inet_hash_nolisten); -static void __inet_hash(struct sock *sk) +int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_listen_hashbucket *ilb; - if (sk->sk_state != TCP_LISTEN) { - __inet_hash_nolisten(sk, NULL); - return; - } + if (sk->sk_state != TCP_LISTEN) + return __inet_hash_nolisten(sk, tw); WARN_ON(!sk_unhashed(sk)); ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; @@ -440,13 +449,15 @@ static void __inet_hash(struct sock *sk) __sk_nulls_add_node_rcu(sk, &ilb->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); spin_unlock(&ilb->lock); + return 0; } +EXPORT_SYMBOL(__inet_hash); void inet_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); - __inet_hash(sk); + __inet_hash(sk, NULL); local_bh_enable(); } } @@ -477,8 +488,7 @@ EXPORT_SYMBOL_GPL(inet_unhash); int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u32 port_offset, int (*check_established)(struct inet_timewait_death_row *, - struct sock *, __u16, struct inet_timewait_sock **), - int (*hash)(struct sock *sk, struct inet_timewait_sock *twp)) + struct sock *, __u16, struct inet_timewait_sock **)) { struct inet_hashinfo *hinfo = death_row->hashinfo; const unsigned short snum = inet_sk(sk)->inet_num; @@ -548,7 +558,7 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); - twrefcnt += hash(sk, tw); + twrefcnt += __inet_hash_nolisten(sk, tw); } if (tw) twrefcnt += inet_twsk_bind_unhash(tw, hinfo); @@ -570,7 +580,7 @@ ok: tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - hash(sk, NULL); + __inet_hash_nolisten(sk, NULL); spin_unlock_bh(&head->lock); return 0; } else { @@ -590,7 +600,7 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), - __inet_check_established, __inet_hash_nolisten); + __inet_check_established); } EXPORT_SYMBOL_GPL(inet_hash_connect); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 6d592f8555fb..f38e387448fb 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -98,7 +98,6 @@ void inet_twsk_free(struct inet_timewait_sock *tw) #ifdef SOCK_REFCNT_DEBUG pr_debug("%s timewait_sock %p released\n", tw->tw_prot->name, tw); #endif - release_net(twsk_net(tw)); kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); module_put(owner); } @@ -195,7 +194,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_ipv6only = 0; tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; - twsk_net_set(tw, hold_net(sock_net(sk))); + atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); + twsk_net_set(tw, sock_net(sk)); /* * Because we use RCU lookups, we should not set tw_refcnt * to a non null value before everything is setup for this @@ -487,6 +487,7 @@ void inet_twsk_purge(struct inet_hashinfo *hashinfo, for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; restart_rcu: + cond_resched(); rcu_read_lock(); restart: sk_nulls_for_each_rcu(sk, node, &head->chain) { diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 787b3c294ce6..d9bc28ac5d1b 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -67,6 +67,7 @@ static int ip_forward_finish(struct sk_buff *skb) if (unlikely(opt->optlen)) ip_forward_options(skb); + skb_sender_cpu_clear(skb); return dst_output(skb); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index e5b6d0ddcb58..145a50c4d566 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -659,27 +659,30 @@ EXPORT_SYMBOL(ip_defrag); struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) { struct iphdr iph; + int netoff; u32 len; if (skb->protocol != htons(ETH_P_IP)) return skb; - if (!skb_copy_bits(skb, 0, &iph, sizeof(iph))) + netoff = skb_network_offset(skb); + + if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0) return skb; if (iph.ihl < 5 || iph.version != 4) return skb; len = ntohs(iph.tot_len); - if (skb->len < len || len < (iph.ihl * 4)) + if (skb->len < netoff + len || len < (iph.ihl * 4)) return skb; if (ip_is_fragment(&iph)) { skb = skb_share_check(skb, GFP_ATOMIC); if (skb) { - if (!pskb_may_pull(skb, iph.ihl*4)) + if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) return skb; - if (pskb_trim_rcsum(skb, len)) + if (pskb_trim_rcsum(skb, netoff + len)) return skb; memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(skb, user)) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 6207275fc749..0eb2a040a830 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -621,10 +621,10 @@ static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[], parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); if (data[IFLA_GRE_LOCAL]) - parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]); + parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]); if (data[IFLA_GRE_REMOTE]) - parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]); + parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]); if (data[IFLA_GRE_TTL]) parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); @@ -776,8 +776,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || - nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || - nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) || + nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || + nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) || nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || nla_put_u8(skb, IFLA_GRE_PMTUDISC, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d68199d9b2b0..8259e777b249 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - ip_select_ident(skb, sk); + ip_select_ident(sock_net(sk), skb, sk); if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; @@ -430,7 +430,8 @@ packet_routed: ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); } - ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1); + ip_select_ident_segs(sock_net(sk), skb, sk, + skb_shinfo(skb)->gso_segs ?: 1); /* TODO : should we use skb->sk here instead of sk ? */ skb->priority = sk->sk_priority; @@ -636,10 +637,7 @@ slow_path: left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ - /* for bridged IP traffic encapsulated inside f.e. a vlan header, - * we need to make room for the encapsulating header - */ - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb)); + ll_rs = LL_RESERVED_SPACE(rt->dst.dev); /* * Fragment the datagram. @@ -888,7 +886,8 @@ static int __ip_append_data(struct sock *sk, cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && + (sk->sk_type == SOCK_DGRAM)) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); @@ -1381,7 +1380,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); - ip_select_ident(skb, sk); + ip_select_ident(net, skb, sk); if (opt) { iph->ihl += opt->optlen>>2; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 31d8c71986b4..f6a0d54b308a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -432,17 +432,32 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf kfree_skb(skb); } -static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk, - const struct sk_buff *skb, - int ee_origin) +/* IPv4 supports cmsg on all imcp errors and some timestamps + * + * Timestamp code paths do not initialize the fields expected by cmsg: + * the PKTINFO fields in skb->cb[]. Fill those in here. + */ +static bool ipv4_datagram_support_cmsg(const struct sock *sk, + struct sk_buff *skb, + int ee_origin) { - struct in_pktinfo *info = PKTINFO_SKB_CB(skb); + struct in_pktinfo *info; + + if (ee_origin == SO_EE_ORIGIN_ICMP) + return true; - if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) || - (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || + if (ee_origin == SO_EE_ORIGIN_LOCAL) + return false; + + /* Support IP_PKTINFO on tstamp packets if requested, to correlate + * timestamp with egress dev. Not possible for packets without dev + * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). + */ + if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || (!skb->dev)) return false; + info = PKTINFO_SKB_CB(skb); info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; info->ipi_ifindex = skb->dev->ifindex; return true; @@ -483,7 +498,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && skb->len) { + if (sin && serr->port) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); @@ -496,9 +511,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin = &errhdr.offender; memset(sin, 0, sizeof(*sin)); - if (skb->len && - (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || - ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) { + if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; if (inet_sk(sk)->cmsg_flags) @@ -523,12 +536,34 @@ out: * Socket option code for IP. This is the end of the line after any * TCP,UDP etc options on an IP socket. */ +static bool setsockopt_needs_rtnl(int optname) +{ + switch (optname) { + case IP_ADD_MEMBERSHIP: + case IP_ADD_SOURCE_MEMBERSHIP: + case IP_BLOCK_SOURCE: + case IP_DROP_MEMBERSHIP: + case IP_DROP_SOURCE_MEMBERSHIP: + case IP_MSFILTER: + case IP_UNBLOCK_SOURCE: + case MCAST_BLOCK_SOURCE: + case MCAST_MSFILTER: + case MCAST_JOIN_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_UNBLOCK_SOURCE: + return true; + } + return false; +} static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct inet_sock *inet = inet_sk(sk); int val = 0, err; + bool needs_rtnl = setsockopt_needs_rtnl(optname); switch (optname) { case IP_PKTINFO: @@ -571,6 +606,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, return ip_mroute_setsockopt(sk, optname, optval, optlen); err = 0; + if (needs_rtnl) + rtnl_lock(); lock_sock(sk); switch (optname) { @@ -1105,10 +1142,14 @@ mc_msf_out: break; } release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); return err; e_inval: release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); return -EINVAL; } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 88c386cf7d85..8c4dcc46acd2 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -74,7 +74,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; - __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1); + __ip_select_ident(sock_net(sk), iph, skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out_sk(sk, skb); if (unlikely(net_xmit_eval(err))) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 94efe148181c..5a6e27054f0a 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -456,10 +456,10 @@ static void vti_netlink_parms(struct nlattr *data[], parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]); if (data[IFLA_VTI_LOCAL]) - parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]); + parms->iph.saddr = nla_get_in_addr(data[IFLA_VTI_LOCAL]); if (data[IFLA_VTI_REMOTE]) - parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]); + parms->iph.daddr = nla_get_in_addr(data[IFLA_VTI_REMOTE]); } @@ -505,8 +505,8 @@ static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u32(skb, IFLA_VTI_LINK, p->link); nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key); nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key); - nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr); - nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr); + nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr); + nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr); return 0; } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 915d215a7d14..bfbcc85c02ee 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -325,10 +325,10 @@ static void ipip_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); if (data[IFLA_IPTUN_LOCAL]) - parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]); + parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]); if (data[IFLA_IPTUN_REMOTE]) - parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]); + parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]); if (data[IFLA_IPTUN_TTL]) { parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); @@ -450,8 +450,8 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || - nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || - nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d78427652d2..b4a545d24adb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -73,9 +73,7 @@ struct mr_table { struct list_head list; -#ifdef CONFIG_NET_NS - struct net *net; -#endif + possible_net_t net; u32 id; struct sock __rcu *mroute_sk; struct timer_list ipmr_expire_timer; @@ -1644,7 +1642,8 @@ static struct notifier_block ip_mr_notifier = { * important for multicast video. */ -static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) +static void ip_encap(struct net *net, struct sk_buff *skb, + __be32 saddr, __be32 daddr) { struct iphdr *iph; const struct iphdr *old_iph = ip_hdr(skb); @@ -1663,7 +1662,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->protocol = IPPROTO_IPIP; iph->ihl = 5; iph->tot_len = htons(skb->len); - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1760,7 +1759,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * What do we do with netfilter? -- RR */ if (vif->flags & VIFF_TUNNEL) { - ip_encap(skb, vif->local, vif->remote); + ip_encap(net, skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ vif->dev->stats.tx_packets++; vif->dev->stats.tx_bytes += skb->len; @@ -2282,8 +2281,8 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; - if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) || - nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp)) + if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || + nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) goto nla_put_failure; err = __ipmr_fill_mroute(mrt, skb, c, rtm); /* do not break the dump if cache is unresolved */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 59f883d9cadf..fb20f363151f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -36,24 +36,16 @@ config NF_CONNTRACK_PROC_COMPAT If unsure, say Y. -config NF_LOG_ARP - tristate "ARP packet logging" - default m if NETFILTER_ADVANCED=n - select NF_LOG_COMMON - -config NF_LOG_IPV4 - tristate "IPv4 packet logging" - default m if NETFILTER_ADVANCED=n - select NF_LOG_COMMON +if NF_TABLES config NF_TABLES_IPV4 - depends on NF_TABLES tristate "IPv4 nf_tables support" help This option enables the IPv4 support for nf_tables. +if NF_TABLES_IPV4 + config NFT_CHAIN_ROUTE_IPV4 - depends on NF_TABLES_IPV4 tristate "IPv4 nf_tables route chain support" help This option enables the "route" chain for IPv4 in nf_tables. This @@ -61,22 +53,34 @@ config NFT_CHAIN_ROUTE_IPV4 fields such as the source, destination, type of service and the packet mark. -config NF_REJECT_IPV4 - tristate "IPv4 packet rejection" - default m if NETFILTER_ADVANCED=n - config NFT_REJECT_IPV4 - depends on NF_TABLES_IPV4 select NF_REJECT_IPV4 default NFT_REJECT tristate +endif # NF_TABLES_IPV4 + config NF_TABLES_ARP - depends on NF_TABLES tristate "ARP nf_tables support" help This option enables the ARP support for nf_tables. +endif # NF_TABLES + +config NF_LOG_ARP + tristate "ARP packet logging" + default m if NETFILTER_ADVANCED=n + select NF_LOG_COMMON + +config NF_LOG_IPV4 + tristate "IPv4 packet logging" + default m if NETFILTER_ADVANCED=n + select NF_LOG_COMMON + +config NF_REJECT_IPV4 + tristate "IPv4 packet rejection" + default m if NETFILTER_ADVANCED=n + config NF_NAT_IPV4 tristate "IPv4 NAT" depends on NF_CONNTRACK_IPV4 diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 99e810f84671..cf5e82f39d3b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -272,9 +272,9 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo, - "TRACE: %s:%s:%s:%u ", - tablename, chainname, comment, rulenum); + nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); } #endif diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index e90f83a3415b..f75e9df5e017 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -418,6 +418,13 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); + + if (!par->net->xt.clusterip_deprecated_warning) { + pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, " + "use xt_cluster instead\n"); + par->net->xt.clusterip_deprecated_warning = true; + } + return ret; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 8f48f5517e33..87907d4bd259 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -34,31 +34,32 @@ static unsigned int reject_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipt_reject_info *reject = par->targinfo; + int hook = par->hooknum; switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: - nf_send_unreach(skb, ICMP_NET_UNREACH); + nf_send_unreach(skb, ICMP_NET_UNREACH, hook); break; case IPT_ICMP_HOST_UNREACHABLE: - nf_send_unreach(skb, ICMP_HOST_UNREACH); + nf_send_unreach(skb, ICMP_HOST_UNREACH, hook); break; case IPT_ICMP_PROT_UNREACHABLE: - nf_send_unreach(skb, ICMP_PROT_UNREACH); + nf_send_unreach(skb, ICMP_PROT_UNREACH, hook); break; case IPT_ICMP_PORT_UNREACHABLE: - nf_send_unreach(skb, ICMP_PORT_UNREACH); + nf_send_unreach(skb, ICMP_PORT_UNREACH, hook); break; case IPT_ICMP_NET_PROHIBITED: - nf_send_unreach(skb, ICMP_NET_ANO); + nf_send_unreach(skb, ICMP_NET_ANO, hook); break; case IPT_ICMP_HOST_PROHIBITED: - nf_send_unreach(skb, ICMP_HOST_ANO); + nf_send_unreach(skb, ICMP_HOST_ANO, hook); break; case IPT_ICMP_ADMIN_PROHIBITED: - nf_send_unreach(skb, ICMP_PKT_FILTERED); + nf_send_unreach(skb, ICMP_PKT_FILTERED, hook); break; case IPT_TCP_RESET: - nf_send_reset(skb, par->hooknum); + nf_send_reset(skb, hook); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5c61328b7704..8c8d6642cbb0 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -322,8 +322,8 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) static int ipv4_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - if (nla_put_be32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || - nla_put_be32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) + if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || + nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) goto nla_put_failure; return 0; @@ -342,8 +342,8 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[], if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) return -EINVAL; - t->src.u3.ip = nla_get_be32(tb[CTA_IP_V4_SRC]); - t->dst.u3.ip = nla_get_be32(tb[CTA_IP_V4_DST]); + t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]); + t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]); return 0; } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index a460a87e14f8..f0dfe92a00d6 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -300,7 +300,9 @@ static int exp_seq_show(struct seq_file *s, void *v) __nf_ct_l3proto_find(exp->tuple.src.l3num), __nf_ct_l4proto_find(exp->tuple.src.l3num, exp->tuple.dst.protonum)); - return seq_putc(s, '\n'); + seq_putc(s, '\n'); + + return 0; } static const struct seq_operations exp_seq_ops = { diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index d059182c1466..e7ad950cf9ef 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -10,8 +10,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> @@ -27,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 5, + .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_MASK, }, }, diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 75101980eeee..076aadda0473 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -5,8 +5,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> @@ -26,7 +28,7 @@ static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 5, + .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_MASK, }, }, diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 536da7bc598a..c5b794da51a9 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -43,7 +43,7 @@ EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get); struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, - __be16 protocol, int ttl) + __u8 protocol, int ttl) { struct iphdr *niph, *oiph = ip_hdr(oldskb); @@ -164,4 +164,27 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) } EXPORT_SYMBOL_GPL(nf_send_reset); +void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) +{ + struct iphdr *iph = ip_hdr(skb_in); + u8 proto; + + if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET)) + return; + + if (skb_csum_unnecessary(skb_in)) { + icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); + return; + } + + if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP) + proto = iph->protocol; + else + proto = 0; + + if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0) + icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); +} +EXPORT_SYMBOL_GPL(nf_send_unreach); + MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index d729542bd1b7..16a5d4d73d75 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -27,7 +27,8 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code); + nf_send_unreach(pkt->skb, priv->icmp_code, + pkt->ops->hooknum); break; case NFT_REJECT_TCP_RST: nf_send_reset(pkt->skb, pkt->ops->hooknum); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index e9f66e1cda50..344e7cdfb8d4 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -64,11 +64,11 @@ EXPORT_SYMBOL_GPL(pingv6_ops); static u16 ping_port_rover; -static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int mask) +static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask) { - int res = (num + net_hash_mix(net)) & mask; + u32 res = (num + net_hash_mix(net)) & mask; - pr_debug("hash(%d) = %d\n", num, res); + pr_debug("hash(%u) = %u\n", num, res); return res; } EXPORT_SYMBOL_GPL(ping_hash); @@ -259,6 +259,9 @@ int ping_init_sock(struct sock *sk) kgid_t low, high; int ret = 0; + if (sk->sk_family == AF_INET6) + sk->sk_ipv6only = 1; + inet_get_ping_group_range_net(net, &low, &high); if (gid_lte(low, group) && gid_lte(group, high)) return 0; @@ -305,6 +308,11 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, if (addr_len < sizeof(*addr)) return -EINVAL; + if (addr->sin_family != AF_INET && + !(addr->sin_family == AF_UNSPEC && + addr->sin_addr.s_addr == htonl(INADDR_ANY))) + return -EAFNOSUPPORT; + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); @@ -330,7 +338,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, return -EINVAL; if (addr->sin6_family != AF_INET6) - return -EINVAL; + return -EAFNOSUPPORT; pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); @@ -684,8 +692,7 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, } EXPORT_SYMBOL_GPL(ping_common_sendmsg); -static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct net *net = sock_net(sk); struct flowi4 fl4; @@ -716,7 +723,7 @@ static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (msg->msg_namelen < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) - return -EINVAL; + return -EAFNOSUPPORT; daddr = usin->sin_addr.s_addr; /* no remote port */ } else { @@ -841,8 +848,8 @@ do_confirm: goto out; } -int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); int family = sk->sk_family; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index f027a708b7e0..56946f47d446 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -404,7 +404,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } @@ -481,8 +481,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd, return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } -static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; @@ -709,8 +708,8 @@ out: return ret; * we return it, otherwise we block. */ -static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ad5064362c5c..652b92ebd7ba 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -152,7 +152,6 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, static struct dst_ops ipv4_dst_ops = { .family = AF_INET, - .protocol = cpu_to_be16(ETH_P_IP), .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, .mtu = ipv4_mtu, @@ -483,7 +482,7 @@ u32 ip_idents_reserve(u32 hash, int segs) } EXPORT_SYMBOL(ip_idents_reserve); -void __ip_select_ident(struct iphdr *iph, int segs) +void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) { static u32 ip_idents_hashrnd __read_mostly; u32 hash, id; @@ -492,7 +491,7 @@ void __ip_select_ident(struct iphdr *iph, int segs) hash = jhash_3words((__force u32)iph->daddr, (__force u32)iph->saddr, - iph->protocol, + iph->protocol ^ net_hash_mix(net), ip_idents_hashrnd); id = ip_idents_reserve(hash, segs); iph->id = htons(id); @@ -2225,7 +2224,6 @@ static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, static struct dst_ops ipv4_dst_blackhole_ops = { .family = AF_INET, - .protocol = cpu_to_be16(ETH_P_IP), .check = ipv4_blackhole_dst_check, .mtu = ipv4_blackhole_mtu, .default_advmss = ipv4_default_advmss, @@ -2321,11 +2319,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (IPCB(skb)->flags & IPSKB_DOREDIRECT) r->rtm_flags |= RTCF_DOREDIRECT; - if (nla_put_be32(skb, RTA_DST, dst)) + if (nla_put_in_addr(skb, RTA_DST, dst)) goto nla_put_failure; if (src) { r->rtm_src_len = 32; - if (nla_put_be32(skb, RTA_SRC, src)) + if (nla_put_in_addr(skb, RTA_SRC, src)) goto nla_put_failure; } if (rt->dst.dev && @@ -2338,11 +2336,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, #endif if (!rt_is_input_route(rt) && fl4->saddr != src) { - if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) + if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } if (rt->rt_uses_gateway && - nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) + nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway)) goto nla_put_failure; expires = rt->dst.expires; @@ -2438,8 +2436,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) ip_hdr(skb)->protocol = IPPROTO_ICMP; skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); - src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; - dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; + src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; + dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 45fe60c5238e..df849e5a10f1 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -219,19 +219,20 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, } EXPORT_SYMBOL_GPL(__cookie_v4_check); -static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst) +static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); - if (child) + if (child) { + atomic_set(&req->rsk_refcnt, 1); inet_csk_reqsk_queue_add(sk, req, child); - else + } else { reqsk_free(req); - + } return child; } @@ -325,7 +326,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ + req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */ if (!req) goto out; @@ -336,8 +337,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) req->mss = mss; ireq->ir_num = ntohs(th->dest); ireq->ir_rmt_port = th->source; - ireq->ir_loc_addr = ip_hdr(skb)->daddr; - ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); + sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); ireq->ir_mark = inet_request_mark(sk, skb); ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -345,7 +346,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; - treq->listener = NULL; + treq->tfo_listener = false; + + ireq->ir_iif = sk->sk_bound_dev_if; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -357,7 +360,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; } - req->expires = 0UL; req->num_retrans = 0; /* diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d151539da8e6..fdf899163d44 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -883,6 +883,20 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_probe_threshold", + .data = &init_net.ipv4.sysctl_tcp_probe_threshold, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_probe_interval", + .data = &init_net.ipv4.sysctl_tcp_probe_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9d72a0fcd928..dbd51cefaf02 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -835,17 +835,13 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed) { struct tcp_sock *tp = tcp_sk(sk); - u32 new_size_goal, size_goal, hlen; + u32 new_size_goal, size_goal; if (!large_allowed || !sk_can_gso(sk)) return mss_now; - /* Maybe we should/could use sk->sk_prot->max_header here ? */ - hlen = inet_csk(sk)->icsk_af_ops->net_header_len + - inet_csk(sk)->icsk_ext_hdr_len + - tp->tcp_header_len; - - new_size_goal = sk->sk_gso_max_size - 1 - hlen; + /* Note : tcp_tso_autosize() will eventually split this later */ + new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER; new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal); /* We try hard to avoid divides here */ @@ -1064,8 +1060,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, return err; } -int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size) +int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -1543,8 +1538,8 @@ EXPORT_SYMBOL(tcp_read_sock); * Probably, code can be easily improved even more. */ -int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, int *addr_len) +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len) { struct tcp_sock *tp = tcp_sk(sk); int copied = 0; @@ -1918,18 +1913,19 @@ EXPORT_SYMBOL_GPL(tcp_set_state); static const unsigned char new_state[16] = { /* current state: new state: action: */ - /* (Invalid) */ TCP_CLOSE, - /* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, - /* TCP_SYN_SENT */ TCP_CLOSE, - /* TCP_SYN_RECV */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, - /* TCP_FIN_WAIT1 */ TCP_FIN_WAIT1, - /* TCP_FIN_WAIT2 */ TCP_FIN_WAIT2, - /* TCP_TIME_WAIT */ TCP_CLOSE, - /* TCP_CLOSE */ TCP_CLOSE, - /* TCP_CLOSE_WAIT */ TCP_LAST_ACK | TCP_ACTION_FIN, - /* TCP_LAST_ACK */ TCP_LAST_ACK, - /* TCP_LISTEN */ TCP_CLOSE, - /* TCP_CLOSING */ TCP_CLOSING, + [0 /* (Invalid) */] = TCP_CLOSE, + [TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, + [TCP_SYN_SENT] = TCP_CLOSE, + [TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, + [TCP_FIN_WAIT1] = TCP_FIN_WAIT1, + [TCP_FIN_WAIT2] = TCP_FIN_WAIT2, + [TCP_TIME_WAIT] = TCP_CLOSE, + [TCP_CLOSE] = TCP_CLOSE, + [TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN, + [TCP_LAST_ACK] = TCP_LAST_ACK, + [TCP_LISTEN] = TCP_CLOSE, + [TCP_CLOSING] = TCP_CLOSING, + [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */ }; static int tcp_close_state(struct sock *sk) @@ -3005,12 +3001,11 @@ static void __init tcp_init_mem(void) void __init tcp_init(void) { - struct sk_buff *skb = NULL; unsigned long limit; int max_rshare, max_wshare, cnt; unsigned int i; - BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); + sock_skb_cb_check_size(sizeof(struct tcp_skb_cb)); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index d4c3a5e66380..7a5ae50c80c8 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -378,6 +378,12 @@ EXPORT_SYMBOL_GPL(tcp_slow_start); */ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) { + /* If credits accumulated at a higher w, apply them gently now. */ + if (tp->snd_cwnd_cnt >= w) { + tp->snd_cwnd_cnt = 0; + tp->snd_cwnd++; + } + tp->snd_cwnd_cnt += acked; if (tp->snd_cwnd_cnt >= w) { u32 delta = tp->snd_cwnd_cnt / w; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 4b276d1ed980..06d3d665a9fd 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -306,8 +306,10 @@ tcp_friendliness: } } - if (ca->cnt == 0) /* cannot be zero */ - ca->cnt = 1; + /* The maximum rate of cwnd increase CUBIC allows is 1 packet per + * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT. + */ + ca->cnt = max(ca->cnt, 2U); } static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 0d73f9ddb55b..86dc119a3815 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -34,13 +34,13 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, struct nlattr *bc) { inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); } static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + const struct inet_diag_req_v2 *req) { return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index ea82fd492c1b..2eb887ec0ce3 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -155,12 +155,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, tp = tcp_sk(child); tp->fastopen_rsk = req; - /* Do a hold on the listner sk so that if the listener is being - * closed, the child that has been accepted can live on and still - * access listen_lock. - */ - sock_hold(sk); - tcp_rsk(req)->listener = sk; + tcp_rsk(req)->tfo_listener = true; /* RFC1323: The window in SYN & SYN/ACK segments is never * scaled. So correct it appropriately. @@ -174,6 +169,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT, TCP_RTO_MAX); + atomic_set(&req->rsk_refcnt, 1); /* Add the child socket directly into the accept queue */ inet_csk_reqsk_queue_add(sk, req, child); @@ -221,7 +217,6 @@ static bool tcp_fastopen_create_child(struct sock *sk, WARN_ON(req->sk == NULL); return true; } -EXPORT_SYMBOL(tcp_fastopen_create_child); static bool tcp_fastopen_queue_check(struct sock *sk) { @@ -245,7 +240,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk) struct request_sock *req1; spin_lock(&fastopenq->lock); req1 = fastopenq->rskq_rst_head; - if ((req1 == NULL) || time_after(req1->expires, jiffies)) { + if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) { spin_unlock(&fastopenq->lock); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); @@ -254,7 +249,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk) fastopenq->rskq_rst_head = req1->dl_next; fastopenq->qlen--; spin_unlock(&fastopenq->lock); - reqsk_free(req1); + reqsk_put(req1); } return true; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8fdd27b17306..18b80e8bc533 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3321,6 +3321,36 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 return flag; } +/* Return true if we're currently rate-limiting out-of-window ACKs and + * thus shouldn't send a dupack right now. We rate-limit dupacks in + * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS + * attacks that send repeated SYNs or ACKs for the same connection. To + * do this, we do not send a duplicate SYNACK or ACK if the remote + * endpoint is sending out-of-window SYNs or pure ACKs at a high rate. + */ +bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, + int mib_idx, u32 *last_oow_ack_time) +{ + /* Data packets without SYNs are not likely part of an ACK loop. */ + if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) && + !tcp_hdr(skb)->syn) + goto not_rate_limited; + + if (*last_oow_ack_time) { + s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); + + if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { + NET_INC_STATS_BH(net, mib_idx); + return true; /* rate-limited: don't send yet! */ + } + } + + *last_oow_ack_time = tcp_time_stamp; + +not_rate_limited: + return false; /* not rate-limited: go ahead, send dupack now! */ +} + /* RFC 5961 7 [ACK Throttling] */ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) { @@ -4770,7 +4800,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk) return false; /* If we filled the congestion window, do not expand. */ - if (tp->packets_out >= tp->snd_cwnd) + if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) return false; return true; @@ -5664,7 +5694,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); - if (tcp_check_req(sk, skb, req, NULL, true) == NULL) + if (tcp_check_req(sk, skb, req, true) == NULL) goto discard; } @@ -5912,6 +5942,80 @@ static void tcp_ecn_create_request(struct request_sock *req, inet_rsk(req)->ecn_ok = 1; } +static void tcp_openreq_init(struct request_sock *req, + const struct tcp_options_received *rx_opt, + struct sk_buff *skb, const struct sock *sk) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ + req->cookie_ts = 0; + tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; + tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + tcp_rsk(req)->snt_synack = tcp_time_stamp; + tcp_rsk(req)->last_oow_ack_time = 0; + req->mss = rx_opt->mss_clamp; + req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; + ireq->tstamp_ok = rx_opt->tstamp_ok; + ireq->sack_ok = rx_opt->sack_ok; + ireq->snd_wscale = rx_opt->snd_wscale; + ireq->wscale_ok = rx_opt->wscale_ok; + ireq->acked = 0; + ireq->ecn_ok = 0; + ireq->ir_rmt_port = tcp_hdr(skb)->source; + ireq->ir_num = ntohs(tcp_hdr(skb)->dest); + ireq->ir_mark = inet_request_mark(sk, skb); +} + +struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener) +{ + struct request_sock *req = reqsk_alloc(ops, sk_listener); + + if (req) { + struct inet_request_sock *ireq = inet_rsk(req); + + kmemcheck_annotate_bitfield(ireq, flags); + ireq->opt = NULL; + atomic64_set(&ireq->ir_cookie, 0); + ireq->ireq_state = TCP_NEW_SYN_RECV; + write_pnet(&ireq->ireq_net, sock_net(sk_listener)); + ireq->ireq_family = sk_listener->sk_family; + } + + return req; +} +EXPORT_SYMBOL(inet_reqsk_alloc); + +/* + * Return true if a syncookie should be sent + */ +static bool tcp_syn_flood_action(struct sock *sk, + const struct sk_buff *skb, + const char *proto) +{ + const char *msg = "Dropping request"; + bool want_cookie = false; + struct listen_sock *lopt; + +#ifdef CONFIG_SYN_COOKIES + if (sysctl_tcp_syncookies) { + msg = "Sending cookies"; + want_cookie = true; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); + } else +#endif + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); + + lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; + if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { + lopt->synflood_warned = 1; + pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", + proto, ntohs(tcp_hdr(skb)->dest), msg); + } + return want_cookie; +} + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) @@ -5949,7 +6053,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop; } - req = inet_reqsk_alloc(rsk_ops); + req = inet_reqsk_alloc(rsk_ops, sk); if (!req) goto drop; @@ -5966,6 +6070,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb, sk); + /* Note: tcp_v6_init_req() might override ir_iif for link locals */ + inet_rsk(req)->ir_iif = sk->sk_bound_dev_if; + af_ops->init_req(req, sk, skb); if (security_inet_conn_request(sk, skb, req)) @@ -6038,7 +6145,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (err || want_cookie) goto drop_and_free; - tcp_rsk(req)->listener = NULL; + tcp_rsk(req)->tfo_listener = false; af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5a2dfed4783b..5aababa20a21 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -189,7 +189,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!inet->inet_saddr) inet->inet_saddr = fl4->saddr; - inet->inet_rcv_saddr = inet->inet_saddr; + sk_rcv_saddr_set(sk, inet->inet_saddr); if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { /* Reset inherited state */ @@ -204,7 +204,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tcp_fetch_timewait_stamp(sk, &rt->dst); inet->inet_dport = usin->sin_port; - inet->inet_daddr = daddr; + sk_daddr_set(sk, daddr); inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) @@ -310,6 +310,34 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) dst->ops->redirect(dst, sk, skb); } + +/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ +void tcp_req_err(struct sock *sk, u32 seq) +{ + struct request_sock *req = inet_reqsk(sk); + struct net *net = sock_net(sk); + + /* ICMPs are not backlogged, hence we cannot get + * an established socket here. + */ + WARN_ON(req->sk); + + if (seq != tcp_rsk(req)->snt_isn) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + reqsk_put(req); + } else { + /* + * Still in SYN_RECV, just remove it silently. + * There is no good way to pass the error to the newly + * created socket, and POSIX does not want network + * errors returned from accept(). + */ + NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS); + inet_csk_reqsk_queue_drop(req->rsk_listener, req); + } +} +EXPORT_SYMBOL(tcp_req_err); + /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should @@ -343,8 +371,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) int err; struct net *net = dev_net(icmp_skb->dev); - sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, - iph->saddr, th->source, inet_iif(icmp_skb)); + sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr, + th->dest, iph->saddr, ntohs(th->source), + inet_iif(icmp_skb)); if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; @@ -353,6 +382,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) inet_twsk_put(inet_twsk(sk)); return; } + seq = ntohl(th->seq); + if (sk->sk_state == TCP_NEW_SYN_RECV) + return tcp_req_err(sk, seq); bh_lock_sock(sk); /* If too many ICMPs get dropped on busy @@ -374,7 +406,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) icsk = inet_csk(sk); tp = tcp_sk(sk); - seq = ntohl(th->seq); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ fastopen = tp->fastopen_rsk; snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; @@ -458,36 +489,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } switch (sk->sk_state) { - struct request_sock *req, **prev; - case TCP_LISTEN: - if (sock_owned_by_user(sk)) - goto out; - - req = inet_csk_search_req(sk, &prev, th->dest, - iph->daddr, iph->saddr); - if (!req) - goto out; - - /* ICMPs are not backlogged, hence we cannot get - an established socket here. - */ - WARN_ON(req->sk); - - if (seq != tcp_rsk(req)->snt_isn) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - goto out; - } - - /* - * Still in SYN_RECV, just remove it silently. - * There is no good way to pass the error to the newly - * created socket, and POSIX does not want network - * errors returned from accept(). - */ - inet_csk_reqsk_queue_drop(sk, req, prev); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); - goto out; - case TCP_SYN_SENT: case TCP_SYN_RECV: /* Only in fast or simultaneous open. If a fast open socket is @@ -647,7 +648,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) if (!key) goto release_sk1; - genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb); + genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; } else { @@ -855,35 +856,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -/* - * Return true if a syncookie should be sent - */ -bool tcp_syn_flood_action(struct sock *sk, - const struct sk_buff *skb, - const char *proto) -{ - const char *msg = "Dropping request"; - bool want_cookie = false; - struct listen_sock *lopt; - -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { - msg = "Sending cookies"; - want_cookie = true; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); - } else -#endif - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - - lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; - if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { - lopt->synflood_warned = 1; - pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", - proto, ntohs(tcp_hdr(skb)->dest), msg); - } - return want_cookie; -} -EXPORT_SYMBOL(tcp_syn_flood_action); #ifdef CONFIG_TCP_MD5SIG /* @@ -897,10 +869,10 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, const union tcp_md5_addr *addr, int family) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; unsigned int size = sizeof(struct in_addr); - struct tcp_md5sig_info *md5sig; + const struct tcp_md5sig_info *md5sig; /* caller either holds rcu_read_lock() or socket lock */ md5sig = rcu_dereference_check(tp->md5sig_info, @@ -923,24 +895,15 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, EXPORT_SYMBOL(tcp_md5_do_lookup); struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, - struct sock *addr_sk) + const struct sock *addr_sk) { union tcp_md5_addr *addr; - addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr; + addr = (union tcp_md5_addr *)&sk->sk_daddr; return tcp_md5_do_lookup(sk, addr, AF_INET); } EXPORT_SYMBOL(tcp_v4_md5_lookup); -static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, - struct request_sock *req) -{ - union tcp_md5_addr *addr; - - addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr; - return tcp_md5_do_lookup(sk, addr, AF_INET); -} - /* This can be called on a newly created socket, from other files */ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) @@ -1101,8 +1064,8 @@ clear_hash_noput: return 1; } -int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, - const struct sock *sk, const struct request_sock *req, +int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, + const struct sock *sk, const struct sk_buff *skb) { struct tcp_md5sig_pool *hp; @@ -1110,12 +1073,9 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, const struct tcphdr *th = tcp_hdr(skb); __be32 saddr, daddr; - if (sk) { - saddr = inet_sk(sk)->inet_saddr; - daddr = inet_sk(sk)->inet_daddr; - } else if (req) { - saddr = inet_rsk(req)->ir_loc_addr; - daddr = inet_rsk(req)->ir_rmt_addr; + if (sk) { /* valid for establish/request sockets */ + saddr = sk->sk_rcv_saddr; + daddr = sk->sk_daddr; } else { const struct iphdr *iph = ip_hdr(skb); saddr = iph->saddr; @@ -1152,8 +1112,9 @@ clear_hash_noput: } EXPORT_SYMBOL(tcp_v4_md5_hash_skb); -static bool __tcp_v4_inbound_md5_hash(struct sock *sk, - const struct sk_buff *skb) +/* Called with rcu_read_lock() */ +static bool tcp_v4_inbound_md5_hash(struct sock *sk, + const struct sk_buff *skb) { /* * This gets called for each TCP segment that arrives @@ -1193,7 +1154,7 @@ static bool __tcp_v4_inbound_md5_hash(struct sock *sk, */ genhash = tcp_v4_md5_hash_skb(newhash, hash_expected, - NULL, NULL, skb); + NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", @@ -1205,28 +1166,16 @@ static bool __tcp_v4_inbound_md5_hash(struct sock *sk, } return false; } - -static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) -{ - bool ret; - - rcu_read_lock(); - ret = __tcp_v4_inbound_md5_hash(sk, skb); - rcu_read_unlock(); - - return ret; -} - #endif -static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, +static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener, struct sk_buff *skb) { struct inet_request_sock *ireq = inet_rsk(req); - ireq->ir_loc_addr = ip_hdr(skb)->daddr; - ireq->ir_rmt_addr = ip_hdr(skb)->saddr; - ireq->no_srccheck = inet_sk(sk)->transparent; + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); + sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); + ireq->no_srccheck = inet_sk(sk_listener)->transparent; ireq->opt = tcp_v4_save_options(skb); } @@ -1259,7 +1208,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .mss_clamp = TCP_MSS_DEFAULT, #ifdef CONFIG_TCP_MD5SIG - .md5_lookup = tcp_v4_reqsk_md5_lookup, + .req_md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, #endif .init_req = tcp_v4_init_req, @@ -1318,8 +1267,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp = tcp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->inet_daddr = ireq->ir_rmt_addr; - newinet->inet_rcv_saddr = ireq->ir_loc_addr; + sk_daddr_set(newsk, ireq->ir_rmt_addr); + sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = ireq->opt; rcu_assign_pointer(newinet->inet_opt, inet_opt); @@ -1391,15 +1340,17 @@ EXPORT_SYMBOL(tcp_v4_syn_recv_sock); static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); + struct request_sock *req; struct sock *nsk; - struct request_sock **prev; - /* Find possible connection requests. */ - struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, - iph->saddr, iph->daddr); - if (req) - return tcp_check_req(sk, skb, req, prev, false); + + req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); + if (req) { + nsk = tcp_check_req(sk, skb, req, false); + reqsk_put(req); + return nsk; + } nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); @@ -1517,7 +1468,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) if (sk) { skb->sk = sk; skb->destructor = sock_edemux; - if (sk->sk_state != TCP_TIME_WAIT) { + if (sk_fullsock(sk)) { struct dst_entry *dst = sk->sk_rx_dst; if (dst) @@ -1904,13 +1855,13 @@ get_req: } sk = sk_nulls_next(st->syn_wait_sk); st->state = TCP_SEQ_STATE_LISTENING; - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } else { icsk = inet_csk(sk); - read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); if (reqsk_queue_len(&icsk->icsk_accept_queue)) goto start_req; - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); sk = sk_nulls_next(sk); } get_sk: @@ -1922,7 +1873,7 @@ get_sk: goto out; } icsk = inet_csk(sk); - read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); if (reqsk_queue_len(&icsk->icsk_accept_queue)) { start_req: st->uid = sock_i_uid(sk); @@ -1931,7 +1882,7 @@ start_req: st->sbucket = 0; goto get_req; } - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } spin_unlock_bh(&ilb->lock); st->offset = 0; @@ -2150,7 +2101,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) case TCP_SEQ_STATE_OPENREQ: if (v) { struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) @@ -2204,17 +2155,17 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) } EXPORT_SYMBOL(tcp_proc_unregister); -static void get_openreq4(const struct sock *sk, const struct request_sock *req, +static void get_openreq4(const struct request_sock *req, struct seq_file *f, int i, kuid_t uid) { const struct inet_request_sock *ireq = inet_rsk(req); - long delta = req->expires - jiffies; + long delta = req->rsk_timer.expires - jiffies; seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK", i, ireq->ir_loc_addr, - ntohs(inet_sk(sk)->inet_sport), + ireq->ir_num, ireq->ir_rmt_addr, ntohs(ireq->ir_rmt_port), TCP_SYN_RECV, @@ -2225,7 +2176,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, from_kuid_munged(seq_user_ns(f), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ - atomic_read(&sk->sk_refcnt), + 0, req); } @@ -2332,7 +2283,7 @@ static int tcp4_seq_show(struct seq_file *seq, void *v) get_tcp4_sock(v, seq, st->num); break; case TCP_SEQ_STATE_OPENREQ: - get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid); + get_openreq4(v, seq, st->num, st->uid); break; } out: @@ -2460,6 +2411,8 @@ static int __net_init tcp_sk_init(struct net *net) } net->ipv4.sysctl_tcp_ecn = 2; net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; + net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; + net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; return 0; fail: diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index e5f41bd5ec1b..71ec14c87579 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -40,6 +40,7 @@ struct tcp_fastopen_metrics { struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; + possible_net_t tcpm_net; struct inetpeer_addr tcpm_saddr; struct inetpeer_addr tcpm_daddr; unsigned long tcpm_stamp; @@ -52,6 +53,11 @@ struct tcp_metrics_block { struct rcu_head rcu_head; }; +static inline struct net *tm_net(struct tcp_metrics_block *tm) +{ + return read_pnet(&tm->tcpm_net); +} + static bool tcp_metric_locked(struct tcp_metrics_block *tm, enum tcp_metric_index idx) { @@ -74,23 +80,20 @@ static void tcp_metric_set(struct tcp_metrics_block *tm, static bool addr_same(const struct inetpeer_addr *a, const struct inetpeer_addr *b) { - const struct in6_addr *a6, *b6; - if (a->family != b->family) return false; if (a->family == AF_INET) return a->addr.a4 == b->addr.a4; - - a6 = (const struct in6_addr *) &a->addr.a6[0]; - b6 = (const struct in6_addr *) &b->addr.a6[0]; - - return ipv6_addr_equal(a6, b6); + return ipv6_addr_equal(&a->addr.in6, &b->addr.in6); } struct tcpm_hash_bucket { struct tcp_metrics_block __rcu *chain; }; +static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly; +static unsigned int tcp_metrics_hash_log __read_mostly; + static DEFINE_SPINLOCK(tcp_metrics_lock); static void tcpm_suck_dst(struct tcp_metrics_block *tm, @@ -143,6 +146,9 @@ static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst #define TCP_METRICS_RECLAIM_DEPTH 5 #define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL +#define deref_locked(p) \ + rcu_dereference_protected(p, lockdep_is_held(&tcp_metrics_lock)) + static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, struct inetpeer_addr *saddr, struct inetpeer_addr *daddr, @@ -171,9 +177,9 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, if (unlikely(reclaim)) { struct tcp_metrics_block *oldest; - oldest = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); - for (tm = rcu_dereference(oldest->tcpm_next); tm; - tm = rcu_dereference(tm->tcpm_next)) { + oldest = deref_locked(tcp_metrics_hash[hash].chain); + for (tm = deref_locked(oldest->tcpm_next); tm; + tm = deref_locked(tm->tcpm_next)) { if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) oldest = tm; } @@ -183,14 +189,15 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, if (!tm) goto out_unlock; } + write_pnet(&tm->tcpm_net, net); tm->tcpm_saddr = *saddr; tm->tcpm_daddr = *daddr; tcpm_suck_dst(tm, dst, true); if (likely(!reclaim)) { - tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain; - rcu_assign_pointer(net->ipv4.tcp_metrics_hash[hash].chain, tm); + tm->tcpm_next = tcp_metrics_hash[hash].chain; + rcu_assign_pointer(tcp_metrics_hash[hash].chain, tm); } out_unlock: @@ -214,10 +221,11 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s struct tcp_metrics_block *tm; int depth = 0; - for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_saddr, saddr) && - addr_same(&tm->tcpm_daddr, daddr)) + addr_same(&tm->tcpm_daddr, daddr) && + net_eq(tm_net(tm), net)) break; depth++; } @@ -242,8 +250,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - *(struct in6_addr *)saddr.addr.a6 = inet_rsk(req)->ir_v6_loc_addr; - *(struct in6_addr *)daddr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; + saddr.addr.in6 = inet_rsk(req)->ir_v6_loc_addr; + daddr.addr.in6 = inet_rsk(req)->ir_v6_rmt_addr; hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); break; #endif @@ -252,12 +260,14 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, } net = dev_net(dst->dev); - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); - for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_saddr, &saddr) && - addr_same(&tm->tcpm_daddr, &daddr)) + addr_same(&tm->tcpm_daddr, &daddr) && + net_eq(tm_net(tm), net)) break; } tcpm_check_stamp(tm, dst); @@ -288,9 +298,9 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock hash = (__force unsigned int) daddr.addr.a4; } else { saddr.family = AF_INET6; - *(struct in6_addr *)saddr.addr.a6 = tw->tw_v6_rcv_saddr; + saddr.addr.in6 = tw->tw_v6_rcv_saddr; daddr.family = AF_INET6; - *(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr; + daddr.addr.in6 = tw->tw_v6_daddr; hash = ipv6_addr_hash(&tw->tw_v6_daddr); } } @@ -299,12 +309,14 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock return NULL; net = twsk_net(tw); - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); - for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_saddr, &saddr) && - addr_same(&tm->tcpm_daddr, &daddr)) + addr_same(&tm->tcpm_daddr, &daddr) && + net_eq(tm_net(tm), net)) break; } return tm; @@ -336,9 +348,9 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, hash = (__force unsigned int) daddr.addr.a4; } else { saddr.family = AF_INET6; - *(struct in6_addr *)saddr.addr.a6 = sk->sk_v6_rcv_saddr; + saddr.addr.in6 = sk->sk_v6_rcv_saddr; daddr.family = AF_INET6; - *(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr; + daddr.addr.in6 = sk->sk_v6_daddr; hash = ipv6_addr_hash(&sk->sk_v6_daddr); } } @@ -347,7 +359,8 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, return NULL; net = dev_net(dst->dev); - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); tm = __tcp_get_metrics(&saddr, &daddr, net, hash); if (tm == TCP_METRICS_RECLAIM_PTR) @@ -773,19 +786,19 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, switch (tm->tcpm_daddr.family) { case AF_INET: - if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, - tm->tcpm_daddr.addr.a4) < 0) + if (nla_put_in_addr(msg, TCP_METRICS_ATTR_ADDR_IPV4, + tm->tcpm_daddr.addr.a4) < 0) goto nla_put_failure; - if (nla_put_be32(msg, TCP_METRICS_ATTR_SADDR_IPV4, - tm->tcpm_saddr.addr.a4) < 0) + if (nla_put_in_addr(msg, TCP_METRICS_ATTR_SADDR_IPV4, + tm->tcpm_saddr.addr.a4) < 0) goto nla_put_failure; break; case AF_INET6: - if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, - tm->tcpm_daddr.addr.a6) < 0) + if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_ADDR_IPV6, + &tm->tcpm_daddr.addr.in6) < 0) goto nla_put_failure; - if (nla_put(msg, TCP_METRICS_ATTR_SADDR_IPV6, 16, - tm->tcpm_saddr.addr.a6) < 0) + if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_SADDR_IPV6, + &tm->tcpm_saddr.addr.in6) < 0) goto nla_put_failure; break; default: @@ -898,17 +911,19 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; + unsigned int max_rows = 1U << tcp_metrics_hash_log; unsigned int row, s_row = cb->args[0]; int s_col = cb->args[1], col = s_col; for (row = s_row; row < max_rows; row++, s_col = 0) { struct tcp_metrics_block *tm; - struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row; + struct tcpm_hash_bucket *hb = tcp_metrics_hash + row; rcu_read_lock(); for (col = 0, tm = rcu_dereference(hb->chain); tm; tm = rcu_dereference(tm->tcpm_next), col++) { + if (!net_eq(tm_net(tm), net)) + continue; if (col < s_col) continue; if (tcp_metrics_dump_info(skb, cb, tm) < 0) { @@ -933,7 +948,7 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, a = info->attrs[v4]; if (a) { addr->family = AF_INET; - addr->addr.a4 = nla_get_be32(a); + addr->addr.a4 = nla_get_in_addr(a); if (hash) *hash = (__force unsigned int) addr->addr.a4; return 0; @@ -943,9 +958,9 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, if (nla_len(a) != sizeof(struct in6_addr)) return -EINVAL; addr->family = AF_INET6; - memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); + addr->addr.in6 = nla_get_in6_addr(a); if (hash) - *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); + *hash = ipv6_addr_hash(&addr->addr.in6); return 0; } return optional ? 1 : -EAFNOSUPPORT; @@ -994,13 +1009,15 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) if (!reply) goto nla_put_failure; - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); ret = -ESRCH; rcu_read_lock(); - for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_daddr, &daddr) && - (!src || addr_same(&tm->tcpm_saddr, &saddr))) { + (!src || addr_same(&tm->tcpm_saddr, &saddr)) && + net_eq(tm_net(tm), net)) { ret = tcp_metrics_fill_info(msg, tm); break; } @@ -1020,34 +1037,27 @@ out_free: return ret; } -#define deref_locked_genl(p) \ - rcu_dereference_protected(p, lockdep_genl_is_held() && \ - lockdep_is_held(&tcp_metrics_lock)) - -#define deref_genl(p) rcu_dereference_protected(p, lockdep_genl_is_held()) - -static int tcp_metrics_flush_all(struct net *net) +static void tcp_metrics_flush_all(struct net *net) { - unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; - struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash; + unsigned int max_rows = 1U << tcp_metrics_hash_log; + struct tcpm_hash_bucket *hb = tcp_metrics_hash; struct tcp_metrics_block *tm; unsigned int row; for (row = 0; row < max_rows; row++, hb++) { + struct tcp_metrics_block __rcu **pp; spin_lock_bh(&tcp_metrics_lock); - tm = deref_locked_genl(hb->chain); - if (tm) - hb->chain = NULL; - spin_unlock_bh(&tcp_metrics_lock); - while (tm) { - struct tcp_metrics_block *next; - - next = deref_genl(tm->tcpm_next); - kfree_rcu(tm, rcu_head); - tm = next; + pp = &hb->chain; + for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { + if (net_eq(tm_net(tm), net)) { + *pp = tm->tcpm_next; + kfree_rcu(tm, rcu_head); + } else { + pp = &tm->tcpm_next; + } } + spin_unlock_bh(&tcp_metrics_lock); } - return 0; } static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) @@ -1064,19 +1074,23 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) ret = parse_nl_addr(info, &daddr, &hash, 1); if (ret < 0) return ret; - if (ret > 0) - return tcp_metrics_flush_all(net); + if (ret > 0) { + tcp_metrics_flush_all(net); + return 0; + } ret = parse_nl_saddr(info, &saddr); if (ret < 0) src = false; - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); - hb = net->ipv4.tcp_metrics_hash + hash; + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); + hb = tcp_metrics_hash + hash; pp = &hb->chain; spin_lock_bh(&tcp_metrics_lock); - for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) { + for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { if (addr_same(&tm->tcpm_daddr, &daddr) && - (!src || addr_same(&tm->tcpm_saddr, &saddr))) { + (!src || addr_same(&tm->tcpm_saddr, &saddr)) && + net_eq(tm_net(tm), net)) { *pp = tm->tcpm_next; kfree_rcu(tm, rcu_head); found = true; @@ -1126,6 +1140,9 @@ static int __net_init tcp_net_metrics_init(struct net *net) size_t size; unsigned int slots; + if (!net_eq(net, &init_net)) + return 0; + slots = tcpmhash_entries; if (!slots) { if (totalram_pages >= 128 * 1024) @@ -1134,14 +1151,14 @@ static int __net_init tcp_net_metrics_init(struct net *net) slots = 8 * 1024; } - net->ipv4.tcp_metrics_hash_log = order_base_2(slots); - size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log; + tcp_metrics_hash_log = order_base_2(slots); + size = sizeof(struct tcpm_hash_bucket) << tcp_metrics_hash_log; - net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!net->ipv4.tcp_metrics_hash) - net->ipv4.tcp_metrics_hash = vzalloc(size); + tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (!tcp_metrics_hash) + tcp_metrics_hash = vzalloc(size); - if (!net->ipv4.tcp_metrics_hash) + if (!tcp_metrics_hash) return -ENOMEM; return 0; @@ -1149,19 +1166,7 @@ static int __net_init tcp_net_metrics_init(struct net *net) static void __net_exit tcp_net_metrics_exit(struct net *net) { - unsigned int i; - - for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) { - struct tcp_metrics_block *tm, *next; - - tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1); - while (tm) { - next = rcu_dereference_protected(tm->tcpm_next, 1); - kfree(tm); - tm = next; - } - } - kvfree(net->ipv4.tcp_metrics_hash); + tcp_metrics_flush_all(net); } static __net_initdata struct pernet_operations tcp_net_metrics_ops = { @@ -1175,16 +1180,10 @@ void __init tcp_metrics_init(void) ret = register_pernet_subsys(&tcp_net_metrics_ops); if (ret < 0) - goto cleanup; + panic("Could not allocate the tcp_metrics hash table\n"); + ret = genl_register_family_with_ops(&tcp_metrics_nl_family, tcp_metrics_nl_ops); if (ret < 0) - goto cleanup_subsys; - return; - -cleanup_subsys: - unregister_pernet_subsys(&tcp_net_metrics_ops); - -cleanup: - return; + panic("Could not register tcp_metrics generic netlink\n"); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index dd11ac7798c6..274e96fb369b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -572,7 +572,6 @@ EXPORT_SYMBOL(tcp_create_openreq_child); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct request_sock **prev, bool fastopen) { struct tcp_options_received tmp_opt; @@ -630,8 +629,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, &tcp_rsk(req)->last_oow_ack_time) && !inet_rtx_syn_ack(sk, req)) - req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout, - TCP_RTO_MAX) + jiffies; + mod_timer_pending(&req->rsk_timer, jiffies + + min(TCP_TIMEOUT_INIT << req->num_timeout, + TCP_RTO_MAX)); return NULL; } @@ -766,7 +766,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (child == NULL) goto listen_overflow; - inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_unlink(sk, req); inet_csk_reqsk_queue_removed(sk, req); inet_csk_reqsk_queue_add(sk, req, child); @@ -791,7 +791,7 @@ embryonic_reset: tcp_reset(sk); } if (!fastopen) { - inet_csk_reqsk_queue_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); } return NULL; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 9d7930ba8e0f..3f7c2fca5431 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -29,8 +29,8 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, } } -struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, + netdev_features_t features) { if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a2a796c5536b..2e69b8d16e68 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -601,15 +601,14 @@ static unsigned int tcp_synack_options(struct sock *sk, struct request_sock *req, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5, + const struct tcp_md5sig_key *md5, struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; #ifdef CONFIG_TCP_MD5SIG - *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); - if (*md5) { + if (md5) { opts->options |= OPTION_MD5; remaining -= TCPOLEN_MD5SIG_ALIGNED; @@ -620,8 +619,6 @@ static unsigned int tcp_synack_options(struct sock *sk, */ ireq->tstamp_ok &= !ireq->sack_ok; } -#else - *md5 = NULL; #endif /* We always send an MSS option. */ @@ -989,7 +986,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (md5) { sk_nocaps_add(sk, NETIF_F_GSO_MASK); tp->af_specific->calc_md5_hash(opts.hash_location, - md5, sk, NULL, skb); + md5, sk, skb); } #endif @@ -1354,6 +1351,8 @@ void tcp_mtup_init(struct sock *sk) icsk->icsk_af_ops->net_header_len; icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); icsk->icsk_mtup.probe_size = 0; + if (icsk->icsk_mtup.enabled) + icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; } EXPORT_SYMBOL(tcp_mtup_init); @@ -1752,20 +1751,23 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, bool *is_cwnd_limited, u32 max_segs) { - struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - u32 send_win, cong_win, limit, in_flight; + u32 age, send_win, cong_win, limit, in_flight; + struct tcp_sock *tp = tcp_sk(sk); + struct skb_mstamp now; + struct sk_buff *head; int win_divisor; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto send_now; - if (icsk->icsk_ca_state != TCP_CA_Open) + if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_CWR))) goto send_now; - /* Defer for less than two clock ticks. */ - if (tp->tso_deferred && - (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) + /* Avoid bursty behavior by allowing defer + * only if the last write was recent. + */ + if ((s32)(tcp_time_stamp - tp->lsndtime) > 0) goto send_now; in_flight = tcp_packets_in_flight(tp); @@ -1807,11 +1809,14 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, goto send_now; } - /* Ok, it looks like it is advisable to defer. - * Do not rearm the timer if already set to not break TCP ACK clocking. - */ - if (!tp->tso_deferred) - tp->tso_deferred = 1 | (jiffies << 1); + head = tcp_write_queue_head(sk); + skb_mstamp_get(&now); + age = skb_mstamp_us_delta(&now, &head->skb_mstamp); + /* If next ACK is likely to come too late (half srtt), do not defer */ + if (age < (tp->srtt_us >> 4)) + goto send_now; + + /* Ok, it looks like it is advisable to defer. */ if (cong_win < send_win && cong_win < skb->len) *is_cwnd_limited = true; @@ -1819,10 +1824,34 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, return true; send_now: - tp->tso_deferred = 0; return false; } +static inline void tcp_mtu_check_reprobe(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); + u32 interval; + s32 delta; + + interval = net->ipv4.sysctl_tcp_probe_interval; + delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp; + if (unlikely(delta >= interval * HZ)) { + int mss = tcp_current_mss(sk); + + /* Update current search range */ + icsk->icsk_mtup.probe_size = 0; + icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + + sizeof(struct tcphdr) + + icsk->icsk_af_ops->net_header_len; + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); + + /* Update probe time stamp */ + icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; + } +} + /* Create a new MTU probe if we are ready. * MTU probe is regularly attempting to increase the path MTU by * deliberately sending larger packets. This discovers routing @@ -1837,11 +1866,13 @@ static int tcp_mtu_probe(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb, *nskb, *next; + struct net *net = sock_net(sk); int len; int probe_size; int size_needed; int copy; int mss_now; + int interval; /* Not currently probing/verifying, * not in recovery, @@ -1854,12 +1885,25 @@ static int tcp_mtu_probe(struct sock *sk) tp->rx_opt.num_sacks || tp->rx_opt.dsack) return -1; - /* Very simple search strategy: just double the MSS. */ + /* Use binary search for probe_size between tcp_mss_base, + * and current mss_clamp. if (search_high - search_low) + * smaller than a threshold, backoff from probing. + */ mss_now = tcp_current_mss(sk); - probe_size = 2 * tp->mss_cache; + probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high + + icsk->icsk_mtup.search_low) >> 1); size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; - if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { - /* TODO: set timer for probe_converge_event */ + interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low; + /* When misfortune happens, we are reprobing actively, + * and then reprobe timer has expired. We stick with current + * probing process by not resetting search range to its orignal. + */ + if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) || + interval < net->ipv4.sysctl_tcp_probe_threshold) { + /* Check whether enough time has elaplased for + * another round of probing. + */ + tcp_mtu_check_reprobe(sk); return -1; } @@ -2773,15 +2817,11 @@ void tcp_send_fin(struct sock *sk) } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { - skb = alloc_skb_fclone(MAX_TCP_HEADER, - sk->sk_allocation); + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); if (skb) break; yield(); } - - /* Reserve space for headers and prepare control bits. */ - skb_reserve(skb, MAX_TCP_HEADER); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); @@ -2870,7 +2910,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th; struct sk_buff *skb; - struct tcp_md5sig_key *md5; + struct tcp_md5sig_key *md5 = NULL; int tcp_header_size; int mss; @@ -2883,7 +2923,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, MAX_TCP_HEADER); skb_dst_set(skb, dst); - security_skb_owned_by(skb, sk); mss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) @@ -2896,7 +2935,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, else #endif skb_mstamp_get(&skb->skb_mstamp); - tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, + +#ifdef CONFIG_TCP_MD5SIG + rcu_read_lock(); + md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); +#endif + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, foc) + sizeof(*th); skb_push(skb, tcp_header_size); @@ -2927,10 +2971,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ - if (md5) { + if (md5) tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, - md5, NULL, req, skb); - } + md5, req_to_sk(req), skb); + rcu_read_unlock(); #endif return skb; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0732b787904e..2568fd282873 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -107,6 +107,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) if (net->ipv4.sysctl_tcp_mtu_probing) { if (!icsk->icsk_mtup.enabled) { icsk->icsk_mtup.enabled = 1; + icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { struct net *net = sock_net(sk); @@ -326,7 +327,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) struct request_sock *req; req = tcp_sk(sk)->fastopen_rsk; - req->rsk_ops->syn_ack_timeout(sk, req); + req->rsk_ops->syn_ack_timeout(req); if (req->num_timeout >= max_retries) { tcp_write_err(sk); @@ -538,19 +539,11 @@ static void tcp_write_timer(unsigned long data) sock_put(sk); } -/* - * Timer for listening sockets - */ - -static void tcp_synack_timer(struct sock *sk) +void tcp_syn_ack_timeout(const struct request_sock *req) { - inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, - TCP_TIMEOUT_INIT, TCP_RTO_MAX); -} + struct net *net = read_pnet(&inet_rsk(req)->ireq_net); -void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req) -{ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); + NET_INC_STATS_BH(net, LINUX_MIB_TCPTIMEOUTS); } EXPORT_SYMBOL(tcp_syn_ack_timeout); @@ -582,7 +575,7 @@ static void tcp_keepalive_timer (unsigned long data) } if (sk->sk_state == TCP_LISTEN) { - tcp_synack_timer(sk); + pr_err("Hmm... keepalive on a LISTEN ???\n"); goto out; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 97ef1f8b7be8..294af16633af 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -318,8 +318,8 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); } -static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, - unsigned int port) +static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr, + unsigned int port) { return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; } @@ -421,9 +421,9 @@ static inline int compute_score2(struct sock *sk, struct net *net, return score; } -static unsigned int udp_ehashfn(struct net *net, const __be32 laddr, - const __u16 lport, const __be32 faddr, - const __be16 fport) +static u32 udp_ehashfn(const struct net *net, const __be32 laddr, + const __u16 lport, const __be32 faddr, + const __be16 fport) { static u32 udp_ehash_secret __read_mostly; @@ -873,8 +873,7 @@ out: } EXPORT_SYMBOL(udp_push_pending_frames); -int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); @@ -1136,7 +1135,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, * sendpage interface can't pass. * This will succeed only when the socket is connected. */ - ret = udp_sendmsg(NULL, sk, &msg, 0); + ret = udp_sendmsg(sk, &msg, 0); if (ret < 0) return ret; } @@ -1254,8 +1253,8 @@ EXPORT_SYMBOL(udp_ioctl); * return it, otherwise we block. */ -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); @@ -2525,6 +2524,16 @@ void __init udp_table_init(struct udp_table *table, const char *name) } } +u32 udp_flow_hashrnd(void) +{ + static u32 hashrnd __read_mostly; + + net_get_random_once(&hashrnd, sizeof(hashrnd)); + + return hashrnd; +} +EXPORT_SYMBOL(udp_flow_hashrnd); + void __init udp_init(void) { unsigned long limit; diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 4a000f1dd757..2dbfc1f1f7b3 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -18,8 +18,9 @@ #include <linux/sock_diag.h> static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req_v2 *req, - struct nlattr *bc) + struct netlink_callback *cb, + const struct inet_diag_req_v2 *req, + struct nlattr *bc) { if (!inet_diag_bc_sk(bc, sk)) return 0; @@ -31,7 +32,8 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, } static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) { int err = -EINVAL; struct sock *sk; @@ -90,8 +92,9 @@ out_nosk: return err; } -static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) +static void udp_dump(struct udp_table *table, struct sk_buff *skb, + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, struct nlattr *bc) { int num, s_num, slot, s_slot; struct net *net = sock_net(skb->sk); @@ -144,13 +147,13 @@ done: } static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, struct nlattr *bc) { udp_dump(&udp_table, skb, cb, r, bc); } static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + const struct inet_diag_req_v2 *req) { return udp_dump_one(&udp_table, in_skb, nlh, req); } @@ -170,13 +173,14 @@ static const struct inet_diag_handler udp_diag_handler = { }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, + struct nlattr *bc) { udp_dump(&udplite_table, skb, cb, r, bc); } static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + const struct inet_diag_req_v2 *req) { return udp_dump_one(&udplite_table, in_skb, nlh, req); } diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index f3c27899f62b..7e0fe4bdd967 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -21,8 +21,8 @@ int compat_udp_setsockopt(struct sock *sk, int level, int optname, int compat_udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #endif -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len); +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 91771a7c802f..35feda676464 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -63,7 +63,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; - ip_select_ident(skb, NULL); + ip_select_ident(dev_net(dst->dev), skb, NULL); return 0; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d5f6bd9a210a..dab73813cb92 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -63,6 +63,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) return err; IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; + skb->protocol = htons(ETH_P_IP); return x->outer_mode->output2(x, skb); } @@ -71,7 +72,6 @@ EXPORT_SYMBOL(xfrm4_prepare_output); int xfrm4_output_finish(struct sk_buff *skb) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - skb->protocol = htons(ETH_P_IP); #ifdef CONFIG_NETFILTER IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 6156f68a1e90..c224c856247b 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -232,7 +232,6 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm4_dst_ops = { .family = AF_INET, - .protocol = cpu_to_be16(ETH_P_IP), .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, .redirect = xfrm4_redirect, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 98e4a63d72bb..5c9e94cb1b2c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -46,6 +46,7 @@ #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> +#include <linux/inet.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_addr.h> @@ -102,6 +103,9 @@ #define INFINITY_LIFE_TIME 0xFFFFFFFF +#define IPV6_MAX_STRLEN \ + sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255") + static inline u32 cstamp_delta(unsigned long cstamp) { return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; @@ -127,6 +131,9 @@ static void ipv6_regen_rndid(unsigned long data); static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); +static int ipv6_generate_stable_address(struct in6_addr *addr, + u8 dad_count, + const struct inet6_dev *idev); /* * Configured unicast address hash table @@ -202,6 +209,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_dad = 1, .suppress_frag_ndisc = 1, .accept_ra_mtu = 1, + .stable_secret = { + .initialized = false, + } }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -240,6 +250,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_dad = 1, .suppress_frag_ndisc = 1, .accept_ra_mtu = 1, + .stable_secret = { + .initialized = false, + }, }; /* Check if a valid qdisc is available */ @@ -321,7 +334,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) return ERR_PTR(-EINVAL); ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); - if (ndev == NULL) + if (!ndev) return ERR_PTR(err); rwlock_init(&ndev->lock); @@ -333,7 +346,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); - if (ndev->nd_parms == NULL) { + if (!ndev->nd_parms) { kfree(ndev); return ERR_PTR(err); } @@ -468,7 +481,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; ncm = nlmsg_data(nlh); @@ -506,7 +519,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, int err = -ENOBUFS; skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, @@ -561,10 +574,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, break; default: dev = __dev_get_by_index(net, ifindex); - if (dev == NULL) + if (!dev) goto errout; in6_dev = __in6_dev_get(dev); - if (in6_dev == NULL) + if (!in6_dev) goto errout; devconf = &in6_dev->cnf; break; @@ -572,7 +585,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, err = -ENOBUFS; skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_netconf_fill_devconf(skb, ifindex, devconf, @@ -841,7 +854,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); - if (ifa == NULL) { + if (!ifa) { ADBG("ipv6_add_addr: malloc failed\n"); err = -ENOBUFS; goto out; @@ -860,7 +873,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa->peer_addr = *peer_addr; spin_lock_init(&ifa->lock); - spin_lock_init(&ifa->state_lock); INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work); INIT_HLIST_NODE(&ifa->addr_lst); ifa->scope = scope; @@ -1003,10 +1015,10 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ASSERT_RTNL(); - spin_lock_bh(&ifp->state_lock); + spin_lock_bh(&ifp->lock); state = ifp->state; ifp->state = INET6_IFADDR_STATE_DEAD; - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); if (state == INET6_IFADDR_STATE_DEAD) goto out; @@ -1546,7 +1558,7 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, : ifp->flags; if (ipv6_addr_equal(&ifp->addr, addr) && !(ifp_flags&banned_flags) && - (dev == NULL || ifp->idev->dev == dev || + (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { rcu_read_unlock_bh(); return 1; @@ -1568,7 +1580,7 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { - if (dev == NULL || ifp->idev->dev == dev) + if (!dev || ifp->idev->dev == dev) return true; } } @@ -1637,7 +1649,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { - if (dev == NULL || ifp->idev->dev == dev || + if (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { result = ifp; in6_ifa_hold(ifp); @@ -1686,19 +1698,21 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp) { int err = -ENOENT; - spin_lock_bh(&ifp->state_lock); + spin_lock_bh(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_DAD) { ifp->state = INET6_IFADDR_STATE_POSTDAD; err = 0; } - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); return err; } void addrconf_dad_failure(struct inet6_ifaddr *ifp) { + struct in6_addr addr; struct inet6_dev *idev = ifp->idev; + struct net *net = dev_net(ifp->idev->dev); if (addrconf_dad_end(ifp)) { in6_ifa_put(ifp); @@ -1708,9 +1722,57 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n", ifp->idev->dev->name, &ifp->addr); - if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { - struct in6_addr addr; + spin_lock_bh(&ifp->lock); + + if (ifp->flags & IFA_F_STABLE_PRIVACY) { + int scope = ifp->scope; + u32 flags = ifp->flags; + struct in6_addr new_addr; + struct inet6_ifaddr *ifp2; + u32 valid_lft, preferred_lft; + int pfxlen = ifp->prefix_len; + int retries = ifp->stable_privacy_retry + 1; + + if (retries > net->ipv6.sysctl.idgen_retries) { + net_info_ratelimited("%s: privacy stable address generation failed because of DAD conflicts!\n", + ifp->idev->dev->name); + goto errdad; + } + + new_addr = ifp->addr; + if (ipv6_generate_stable_address(&new_addr, retries, + idev)) + goto errdad; + + valid_lft = ifp->valid_lft; + preferred_lft = ifp->prefered_lft; + + spin_unlock_bh(&ifp->lock); + if (idev->cnf.max_addresses && + ipv6_count_addresses(idev) >= + idev->cnf.max_addresses) + goto lock_errdad; + + net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n", + ifp->idev->dev->name); + + ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen, + scope, flags, valid_lft, + preferred_lft); + if (IS_ERR(ifp2)) + goto lock_errdad; + + spin_lock_bh(&ifp2->lock); + ifp2->stable_privacy_retry = retries; + ifp2->state = INET6_IFADDR_STATE_PREDAD; + spin_unlock_bh(&ifp2->lock); + + addrconf_mod_dad_work(ifp2, net->ipv6.sysctl.idgen_delay); + in6_ifa_put(ifp2); +lock_errdad: + spin_lock_bh(&ifp->lock); + } else if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { addr.s6_addr32[0] = htonl(0xfe800000); addr.s6_addr32[1] = 0; @@ -1724,10 +1786,10 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) } } - spin_lock_bh(&ifp->state_lock); +errdad: /* transition from _POSTDAD to _ERRDAD */ ifp->state = INET6_IFADDR_STATE_ERRDAD; - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); addrconf_mod_dad_work(ifp, 0); } @@ -2052,7 +2114,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, struct fib6_table *table; table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX); - if (table == NULL) + if (!table) return NULL; read_lock_bh(&table->tb6_lock); @@ -2186,6 +2248,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) __u32 valid_lft; __u32 prefered_lft; int addr_type; + u32 addr_flags = 0; struct inet6_dev *in6_dev; struct net *net = dev_net(dev); @@ -2215,7 +2278,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) in6_dev = in6_dev_get(dev); - if (in6_dev == NULL) { + if (!in6_dev) { net_dbg_ratelimited("addrconf: device %s not configured\n", dev->name); return; @@ -2292,6 +2355,12 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) in6_dev->token.s6_addr + 8, 8); read_unlock_bh(&in6_dev->lock); tokenized = true; + } else if (in6_dev->addr_gen_mode == + IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + !ipv6_generate_stable_address(&addr, 0, + in6_dev)) { + addr_flags |= IFA_F_STABLE_PRIVACY; + goto ok; } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { in6_dev_put(in6_dev); @@ -2308,9 +2377,8 @@ ok: ifp = ipv6_get_ifaddr(net, &addr, dev, 1); - if (ifp == NULL && valid_lft) { + if (!ifp && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; - u32 addr_flags = 0; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (in6_dev->cnf.optimistic_dad && @@ -2350,7 +2418,7 @@ ok: u32 stored_lft; /* update lifetime (RFC2462 5.5.3 e) */ - spin_lock(&ifp->lock); + spin_lock_bh(&ifp->lock); now = jiffies; if (ifp->valid_lft > (now - ifp->tstamp) / HZ) stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; @@ -2380,12 +2448,12 @@ ok: ifp->tstamp = now; flags = ifp->flags; ifp->flags &= ~IFA_F_DEPRECATED; - spin_unlock(&ifp->lock); + spin_unlock_bh(&ifp->lock); if (!(flags&IFA_F_TENTATIVE)) ipv6_ifa_notify(0, ifp); } else - spin_unlock(&ifp->lock); + spin_unlock_bh(&ifp->lock); manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft, create, now); @@ -2418,7 +2486,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) dev = __dev_get_by_index(net, ireq.ifr6_ifindex); err = -ENODEV; - if (dev == NULL) + if (!dev) goto err_exit; #if IS_ENABLED(CONFIG_IPV6_SIT) @@ -2464,6 +2532,23 @@ err_exit: return err; } +static int ipv6_mc_config(struct sock *sk, bool join, + const struct in6_addr *addr, int ifindex) +{ + int ret; + + ASSERT_RTNL(); + + lock_sock(sk); + if (join) + ret = ipv6_sock_mc_join(sk, ifindex, addr); + else + ret = ipv6_sock_mc_drop(sk, ifindex, addr); + release_sock(sk); + + return ret; +} + /* * Manual configuration of address on an interface */ @@ -2476,10 +2561,10 @@ static int inet6_addr_add(struct net *net, int ifindex, struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + unsigned long timeout; + clock_t expires; int scope; u32 flags; - clock_t expires; - unsigned long timeout; ASSERT_RTNL(); @@ -2501,6 +2586,14 @@ static int inet6_addr_add(struct net *net, int ifindex, if (IS_ERR(idev)) return PTR_ERR(idev); + if (ifa_flags & IFA_F_MCAUTOJOIN) { + int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk, + true, pfx, ifindex); + + if (ret < 0) + return ret; + } + scope = ipv6_addr_scope(pfx); timeout = addrconf_timeout_fixup(valid_lft, HZ); @@ -2542,6 +2635,9 @@ static int inet6_addr_add(struct net *net, int ifindex, in6_ifa_put(ifp); addrconf_verify_rtnl(); return 0; + } else if (ifa_flags & IFA_F_MCAUTOJOIN) { + ipv6_mc_config(net->ipv6.mc_autojoin_sk, + false, pfx, ifindex); } return PTR_ERR(ifp); @@ -2562,7 +2658,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, return -ENODEV; idev = __in6_dev_get(dev); - if (idev == NULL) + if (!idev) return -ENXIO; read_lock_bh(&idev->lock); @@ -2578,6 +2674,10 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, jiffies); ipv6_del_addr(ifp); addrconf_verify_rtnl(); + if (ipv6_addr_is_multicast(pfx)) { + ipv6_mc_config(net->ipv6.mc_autojoin_sk, + false, pfx, dev->ifindex); + } return 0; } } @@ -2710,7 +2810,7 @@ static void init_loopback(struct net_device *dev) ASSERT_RTNL(); idev = ipv6_find_idev(dev); - if (idev == NULL) { + if (!idev) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2757,10 +2857,11 @@ static void init_loopback(struct net_device *dev) } } -static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr) +static void addrconf_add_linklocal(struct inet6_dev *idev, + const struct in6_addr *addr, u32 flags) { struct inet6_ifaddr *ifp; - u32 addr_flags = IFA_F_PERMANENT; + u32 addr_flags = flags | IFA_F_PERMANENT; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (idev->cnf.optimistic_dad && @@ -2768,7 +2869,6 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr addr_flags |= IFA_F_OPTIMISTIC; #endif - ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); if (!IS_ERR(ifp)) { @@ -2778,18 +2878,103 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr } } +static bool ipv6_reserved_interfaceid(struct in6_addr address) +{ + if ((address.s6_addr32[2] | address.s6_addr32[3]) == 0) + return true; + + if (address.s6_addr32[2] == htonl(0x02005eff) && + ((address.s6_addr32[3] & htonl(0xfe000000)) == htonl(0xfe000000))) + return true; + + if (address.s6_addr32[2] == htonl(0xfdffffff) && + ((address.s6_addr32[3] & htonl(0xffffff80)) == htonl(0xffffff80))) + return true; + + return false; +} + +static int ipv6_generate_stable_address(struct in6_addr *address, + u8 dad_count, + const struct inet6_dev *idev) +{ + static DEFINE_SPINLOCK(lock); + static __u32 digest[SHA_DIGEST_WORDS]; + static __u32 workspace[SHA_WORKSPACE_WORDS]; + + static union { + char __data[SHA_MESSAGE_BYTES]; + struct { + struct in6_addr secret; + __be32 prefix[2]; + unsigned char hwaddr[MAX_ADDR_LEN]; + u8 dad_count; + } __packed; + } data; + + struct in6_addr secret; + struct in6_addr temp; + struct net *net = dev_net(idev->dev); + + BUILD_BUG_ON(sizeof(data.__data) != sizeof(data)); + + if (idev->cnf.stable_secret.initialized) + secret = idev->cnf.stable_secret.secret; + else if (net->ipv6.devconf_dflt->stable_secret.initialized) + secret = net->ipv6.devconf_dflt->stable_secret.secret; + else + return -1; + +retry: + spin_lock_bh(&lock); + + sha_init(digest); + memset(&data, 0, sizeof(data)); + memset(workspace, 0, sizeof(workspace)); + memcpy(data.hwaddr, idev->dev->perm_addr, idev->dev->addr_len); + data.prefix[0] = address->s6_addr32[0]; + data.prefix[1] = address->s6_addr32[1]; + data.secret = secret; + data.dad_count = dad_count; + + sha_transform(digest, data.__data, workspace); + + temp = *address; + temp.s6_addr32[2] = (__force __be32)digest[0]; + temp.s6_addr32[3] = (__force __be32)digest[1]; + + spin_unlock_bh(&lock); + + if (ipv6_reserved_interfaceid(temp)) { + dad_count++; + if (dad_count > dev_net(idev->dev)->ipv6.sysctl.idgen_retries) + return -1; + goto retry; + } + + *address = temp; + return 0; +} + static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) { - if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { - struct in6_addr addr; + struct in6_addr addr; + + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); + if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) { + if (!ipv6_generate_stable_address(&addr, 0, idev)) + addrconf_add_linklocal(idev, &addr, + IFA_F_STABLE_PRIVACY); + else if (prefix_route) + addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + } else if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { /* addrconf_add_linklocal also adds a prefix_route and we * only need to care about prefix routes if ipv6_generate_eui64 * couldn't generate one. */ if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0) - addrconf_add_linklocal(idev, &addr); + addrconf_add_linklocal(idev, &addr, 0); else if (prefix_route) addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); } @@ -2834,7 +3019,7 @@ static void addrconf_sit_config(struct net_device *dev) */ idev = ipv6_find_idev(dev); - if (idev == NULL) { + if (!idev) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2859,7 +3044,7 @@ static void addrconf_gre_config(struct net_device *dev) ASSERT_RTNL(); idev = ipv6_find_idev(dev); - if (idev == NULL) { + if (!idev) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -3056,7 +3241,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) neigh_ifdown(&nd_tbl, dev); idev = __in6_dev_get(dev); - if (idev == NULL) + if (!idev) return -ENODEV; /* @@ -3127,10 +3312,10 @@ restart: write_unlock_bh(&idev->lock); - spin_lock_bh(&ifa->state_lock); + spin_lock_bh(&ifa->lock); state = ifa->state; ifa->state = INET6_IFADDR_STATE_DEAD; - spin_unlock_bh(&ifa->state_lock); + spin_unlock_bh(&ifa->lock); if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); @@ -3288,12 +3473,12 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp) { bool begin_dad = false; - spin_lock_bh(&ifp->state_lock); + spin_lock_bh(&ifp->lock); if (ifp->state != INET6_IFADDR_STATE_DEAD) { ifp->state = INET6_IFADDR_STATE_PREDAD; begin_dad = true; } - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); if (begin_dad) addrconf_mod_dad_work(ifp, 0); @@ -3315,7 +3500,7 @@ static void addrconf_dad_work(struct work_struct *w) rtnl_lock(); - spin_lock_bh(&ifp->state_lock); + spin_lock_bh(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_PREDAD) { action = DAD_BEGIN; ifp->state = INET6_IFADDR_STATE_DAD; @@ -3323,7 +3508,7 @@ static void addrconf_dad_work(struct work_struct *w) action = DAD_ABORT; ifp->state = INET6_IFADDR_STATE_POSTDAD; } - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); if (action == DAD_BEGIN) { addrconf_dad_begin(ifp); @@ -3811,7 +3996,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); - if (pfx == NULL) + if (!pfx) return -EINVAL; ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; @@ -3923,7 +4108,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); - if (pfx == NULL) + if (!pfx) return -EINVAL; if (tb[IFA_CACHEINFO]) { @@ -3938,17 +4123,17 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) } dev = __dev_get_by_index(net, ifm->ifa_index); - if (dev == NULL) + if (!dev) return -ENODEV; ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; /* We ignore other flags so far. */ ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | - IFA_F_NOPREFIXROUTE; + IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN; ifa = ipv6_get_ifaddr(net, pfx, dev, 1); - if (ifa == NULL) { + if (!ifa) { /* * It would be best to check for !NLM_F_CREATE here but * userspace already relies on not having to provide this. @@ -4023,7 +4208,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, u32 preferred, valid; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), @@ -4052,11 +4237,11 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, } if (!ipv6_addr_any(&ifa->peer_addr)) { - if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 || - nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0) + if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 || + nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->peer_addr) < 0) goto error; } else - if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0) + if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0) goto error; if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) @@ -4084,11 +4269,11 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, scope = RT_SCOPE_SITE; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || + if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 || put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { nlmsg_cancel(skb, nlh); @@ -4110,11 +4295,11 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, scope = RT_SCOPE_SITE; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || + if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 || put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { nlmsg_cancel(skb, nlh); @@ -4283,7 +4468,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) goto errout; addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); - if (addr == NULL) { + if (!addr) { err = -EINVAL; goto errout; } @@ -4326,7 +4511,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) int err = -ENOBUFS; skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); @@ -4398,6 +4583,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; + /* we omit DEVCONF_STABLE_SECRET for now */ } static inline size_t inet6_ifla6_size(void) @@ -4478,24 +4664,24 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); - if (nla == NULL) + if (!nla) goto nla_put_failure; ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); /* XXX - MC not implemented */ nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); - if (nla == NULL) + if (!nla) goto nla_put_failure; snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); - if (nla == NULL) + if (!nla) goto nla_put_failure; snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); - if (nla == NULL) + if (!nla) goto nla_put_failure; if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->addr_gen_mode)) @@ -4541,7 +4727,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) ASSERT_RTNL(); - if (token == NULL) + if (!token) return -EINVAL; if (ipv6_addr_any(token)) return -EINVAL; @@ -4632,8 +4818,15 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]); if (mode != IN6_ADDR_GEN_MODE_EUI64 && - mode != IN6_ADDR_GEN_MODE_NONE) + mode != IN6_ADDR_GEN_MODE_NONE && + mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY) return -EINVAL; + + if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + !idev->cnf.stable_secret.initialized && + !dev_net(dev)->ipv6.devconf_dflt->stable_secret.initialized) + return -EINVAL; + idev->addr_gen_mode = mode; err = 0; } @@ -4650,7 +4843,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, void *protoinfo; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; hdr = nlmsg_data(nlh); @@ -4669,7 +4862,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, nla_put_u32(skb, IFLA_LINK, dev->iflink))) goto nla_put_failure; protoinfo = nla_nest_start(skb, IFLA_PROTINFO); - if (protoinfo == NULL) + if (!protoinfo) goto nla_put_failure; if (inet6_fill_ifla6_attrs(skb, idev) < 0) @@ -4730,7 +4923,7 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) int err = -ENOBUFS; skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); @@ -4763,7 +4956,7 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, struct prefix_cacheinfo ci; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; pmsg = nlmsg_data(nlh); @@ -4802,7 +4995,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, int err = -ENOBUFS; skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); @@ -4903,6 +5096,21 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write, return ret; } +static +int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct inet6_dev *idev = ctl->extra1; + int min_mtu = IPV6_MIN_MTU; + struct ctl_table lctl; + + lctl = *ctl; + lctl.extra1 = &min_mtu; + lctl.extra2 = idev ? &idev->dev->mtu : NULL; + + return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos); +} + static void dev_disable_change(struct inet6_dev *idev) { struct netdev_notifier_info info; @@ -5027,6 +5235,74 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, return ret; } +static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int err; + struct in6_addr addr; + char str[IPV6_MAX_STRLEN]; + struct ctl_table lctl = *ctl; + struct net *net = ctl->extra2; + struct ipv6_stable_secret *secret = ctl->data; + + if (&net->ipv6.devconf_all->stable_secret == ctl->data) + return -EIO; + + lctl.maxlen = IPV6_MAX_STRLEN; + lctl.data = str; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (!write && !secret->initialized) { + err = -EIO; + goto out; + } + + if (!write) { + err = snprintf(str, sizeof(str), "%pI6", + &secret->secret); + if (err >= sizeof(str)) { + err = -EIO; + goto out; + } + } + + err = proc_dostring(&lctl, write, buffer, lenp, ppos); + if (err || !write) + goto out; + + if (in6_pton(str, -1, addr.in6_u.u6_addr8, -1, NULL) != 1) { + err = -EIO; + goto out; + } + + secret->initialized = true; + secret->secret = addr; + + if (&net->ipv6.devconf_dflt->stable_secret == ctl->data) { + struct net_device *dev; + + for_each_netdev(net, dev) { + struct inet6_dev *idev = __in6_dev_get(dev); + + if (idev) { + idev->addr_gen_mode = + IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + } + } + } else { + struct inet6_dev *idev = ctl->extra1; + + idev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + } + +out: + rtnl_unlock(); + + return err; +} static struct addrconf_sysctl_table { @@ -5054,7 +5330,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.mtu6, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = addrconf_sysctl_mtu, }, { .procname = "accept_ra", @@ -5300,6 +5576,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { + .procname = "stable_secret", + .data = &ipv6_devconf.stable_secret, + .maxlen = IPV6_MAX_STRLEN, + .mode = 0600, + .proc_handler = addrconf_sysctl_stable_secret, + }, + { /* sentinel */ } }, @@ -5313,7 +5596,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, char path[sizeof("net/ipv6/conf/") + IFNAMSIZ]; t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); - if (t == NULL) + if (!t) goto out; for (i = 0; t->addrconf_vars[i].data; i++) { @@ -5325,7 +5608,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars); - if (t->sysctl_header == NULL) + if (!t->sysctl_header) goto free; p->sysctl = t; @@ -5341,7 +5624,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) { struct addrconf_sysctl_table *t; - if (p->sysctl == NULL) + if (!p->sysctl) return; t = p->sysctl; @@ -5384,17 +5667,20 @@ static int __net_init addrconf_init_net(struct net *net) struct ipv6_devconf *all, *dflt; all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL); - if (all == NULL) + if (!all) goto err_alloc_all; dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); - if (dflt == NULL) + if (!dflt) goto err_alloc_dflt; /* these will be inherited by all namespaces */ dflt->autoconf = ipv6_defaults.autoconf; dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; + dflt->stable_secret.initialized = false; + all->stable_secret.initialized = false; + net->ipv6.devconf_all = all; net->ipv6.devconf_dflt = dflt; diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 98cc4cd570e2..d873ceea86e6 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -140,7 +140,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) struct net_device *dev = idev->dev; WARN_ON(!list_empty(&idev->addr_list)); - WARN_ON(idev->mc_list != NULL); + WARN_ON(idev->mc_list); WARN_ON(timer_pending(&idev->rs_timer)); #ifdef NET_REFCNT_DEBUG diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index e43e79d0a612..882124ebb438 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -29,9 +29,7 @@ * Policy Table */ struct ip6addrlbl_entry { -#ifdef CONFIG_NET_NS - struct net *lbl_net; -#endif + possible_net_t lbl_net; struct in6_addr prefix; int prefixlen; int ifindex; @@ -129,9 +127,6 @@ static const __net_initconst struct ip6addrlbl_init_table /* Object management */ static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) { -#ifdef CONFIG_NET_NS - release_net(p->lbl_net); -#endif kfree(p); } @@ -240,9 +235,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, newp->addrtype = addrtype; newp->label = label; INIT_HLIST_NODE(&newp->list); -#ifdef CONFIG_NET_NS - newp->lbl_net = hold_net(net); -#endif + write_pnet(&newp->lbl_net, net); atomic_set(&newp->refcnt, 1); return newp; } @@ -484,7 +477,7 @@ static int ip6addrlbl_fill(struct sk_buff *skb, ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); - if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 || + if (nla_put_in6_addr(skb, IFAL_ADDRESS, &p->prefix) < 0 || nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { nlmsg_cancel(skb, nlh); return -EMSGSIZE; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e8c4400f23e9..eef63b394c5a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -164,11 +164,11 @@ lookup_protocol: answer_flags = answer->flags; rcu_read_unlock(); - WARN_ON(answer_prot->slab == NULL); + WARN_ON(!answer_prot->slab); err = -ENOBUFS; sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); - if (sk == NULL) + if (!sk) goto out; sock_init_data(sock, sk); @@ -391,7 +391,7 @@ int inet6_release(struct socket *sock) { struct sock *sk = sock->sk; - if (sk == NULL) + if (!sk) return -EINVAL; /* Free mc lists */ @@ -413,11 +413,11 @@ void inet6_destroy_sock(struct sock *sk) /* Release rx options */ skb = xchg(&np->pktoptions, NULL); - if (skb != NULL) + if (skb) kfree_skb(skb); skb = xchg(&np->rxpmtu, NULL); - if (skb != NULL) + if (skb) kfree_skb(skb); /* Free flowlabels */ @@ -426,7 +426,7 @@ void inet6_destroy_sock(struct sock *sk) /* Free tx options */ opt = xchg(&np->opt, NULL); - if (opt != NULL) + if (opt) sock_kfree_s(sk, opt, opt->tot_len); } EXPORT_SYMBOL_GPL(inet6_destroy_sock); @@ -640,7 +640,7 @@ int inet6_sk_rebuild_header(struct sock *sk) dst = __sk_dst_check(sk, np->dst_cookie); - if (dst == NULL) { + if (!dst) { struct inet_sock *inet = inet_sk(sk); struct in6_addr *final_p, final; struct flowi6 fl6; @@ -766,6 +766,8 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.auto_flowlabels = 0; + net->ipv6.sysctl.idgen_retries = 3; + net->ipv6.sysctl.idgen_delay = 1 * HZ; atomic_set(&net->ipv6.fib6_sernum, 1); err = ipv6_init_mibs(net); @@ -824,7 +826,7 @@ static int __init inet6_init(void) struct list_head *r; int err = 0; - BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); + sock_skb_cb_check_size(sizeof(struct inet6_skb_parm)); /* Register the socket-side information for inet6_create. */ for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index a6727add2624..ed7d4e3f9c10 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -681,7 +681,7 @@ static int ah6_init_state(struct xfrm_state *x) goto error; ahp = kzalloc(sizeof(*ahp), GFP_KERNEL); - if (ahp == NULL) + if (!ahp) return -ENOMEM; ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index baf2742d1ec4..514ac259f543 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -60,6 +60,8 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) int ishost = !net->ipv6.devconf_all->forwarding; int err = 0; + ASSERT_RTNL(); + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (ipv6_addr_is_multicast(addr)) @@ -68,12 +70,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); - if (pac == NULL) + if (!pac) return -ENOMEM; pac->acl_next = NULL; pac->acl_addr = *addr; - rtnl_lock(); if (ifindex == 0) { struct rt6_info *rt; @@ -92,7 +93,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) } else dev = __dev_get_by_index(net, ifindex); - if (dev == NULL) { + if (!dev) { err = -ENODEV; goto error; } @@ -130,7 +131,6 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) } error: - rtnl_unlock(); if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); return err; @@ -146,7 +146,8 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) struct ipv6_ac_socklist *pac, *prev_pac; struct net *net = sock_net(sk); - rtnl_lock(); + ASSERT_RTNL(); + prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { if ((ifindex == 0 || pac->acl_ifindex == ifindex) && @@ -154,10 +155,8 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) break; prev_pac = pac; } - if (!pac) { - rtnl_unlock(); + if (!pac) return -ENOENT; - } if (prev_pac) prev_pac->acl_next = pac->acl_next; else @@ -166,7 +165,6 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) dev = __dev_get_by_index(net, pac->acl_ifindex); if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); - rtnl_unlock(); sock_kfree_s(sk, pac, sizeof(*pac)); return 0; @@ -224,7 +222,7 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, struct ifacaddr6 *aca; aca = kzalloc(sizeof(*aca), GFP_ATOMIC); - if (aca == NULL) + if (!aca) return NULL; aca->aca_addr = *addr; @@ -270,7 +268,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) goto out; } aca = aca_alloc(rt, addr); - if (aca == NULL) { + if (!aca) { ip6_rt_put(rt); err = -ENOMEM; goto out; @@ -339,7 +337,7 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) { struct inet6_dev *idev = __in6_dev_get(dev); - if (idev == NULL) + if (!idev) return -ENODEV; return __ipv6_dev_ac_dec(idev, addr); } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c215be70cac0..762a58c772b8 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -71,7 +71,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } } @@ -325,14 +325,34 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } -static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb) +/* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL. + * + * At one point, excluding local errors was a quick test to identify icmp/icmp6 + * errors. This is no longer true, but the test remained, so the v6 stack, + * unlike v4, also honors cmsg requests on all wifi and timestamp errors. + * + * Timestamp code paths do not initialize the fields expected by cmsg: + * the PKTINFO fields in skb->cb[]. Fill those in here. + */ +static bool ip6_datagram_support_cmsg(struct sk_buff *skb, + struct sock_exterr_skb *serr) { - int ifindex = skb->dev ? skb->dev->ifindex : -1; + if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) + return true; + + if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) + return false; + + if (!skb->dev) + return false; if (skb->protocol == htons(ETH_P_IPV6)) - IP6CB(skb)->iif = ifindex; + IP6CB(skb)->iif = skb->dev->ifindex; else - PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex; + PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex; + + return true; } /* @@ -353,7 +373,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) err = -EAGAIN; skb = sock_dequeue_err_skb(sk); - if (skb == NULL) + if (!skb) goto out; copied = skb->len; @@ -369,7 +389,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && skb->len) { + if (sin && serr->port) { const unsigned char *nh = skb_network_header(skb); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; @@ -394,14 +414,11 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; memset(sin, 0, sizeof(*sin)); - if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) { + + if (ip6_datagram_support_cmsg(skb, serr)) { sin->sin6_family = AF_INET6; - if (np->rxopt.all) { - if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && - serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6) - ip6_datagram_prepare_pktinfo_errqueue(skb); + if (np->rxopt.all) ip6_datagram_recv_common_ctl(sk, msg, skb); - } if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) @@ -446,7 +463,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, err = -EAGAIN; skb = xchg(&np->rxpmtu, NULL); - if (skb == NULL) + if (!skb) goto out; copied = skb->len; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e48f2c7c5c59..31f1b5d5e2ef 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -495,7 +495,7 @@ static int esp_init_authenc(struct xfrm_state *x) int err; err = -EINVAL; - if (x->ealg == NULL) + if (!x->ealg) goto error; err = -ENAMETOOLONG; diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 8af3eb57f438..5c5d23e59da5 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -82,7 +82,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, if (nexthdr == NEXTHDR_NONE) return -1; hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); - if (hp == NULL) + if (!hp) return -1; if (nexthdr == NEXTHDR_FRAGMENT) { __be16 _frag_off, *fp; @@ -91,7 +91,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, frag_off), sizeof(_frag_off), &_frag_off); - if (fp == NULL) + if (!fp) return -1; *frag_offp = *fp; @@ -218,7 +218,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, } hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); - if (hp == NULL) + if (!hp) return -EBADMSG; if (nexthdr == NEXTHDR_ROUTING) { @@ -226,7 +226,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, rh = skb_header_pointer(skb, start, sizeof(_rh), &_rh); - if (rh == NULL) + if (!rh) return -EBADMSG; if (flags && (*flags & IP6_FH_F_SKIP_RH) && @@ -245,7 +245,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, frag_off), sizeof(_frag_off), &_frag_off); - if (fp == NULL) + if (!fp) return -EBADMSG; _frag_off = ntohs(*fp) & ~0x7; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index b4d5e1d97c1b..61fb184b818d 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -104,6 +104,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto again; flp6->saddr = saddr; } + err = rt->dst.error; goto out; } again: @@ -198,12 +199,10 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, } if (frh->src_len) - nla_memcpy(&rule6->src.addr, tb[FRA_SRC], - sizeof(struct in6_addr)); + rule6->src.addr = nla_get_in6_addr(tb[FRA_SRC]); if (frh->dst_len) - nla_memcpy(&rule6->dst.addr, tb[FRA_DST], - sizeof(struct in6_addr)); + rule6->dst.addr = nla_get_in6_addr(tb[FRA_DST]); rule6->src.plen = frh->src_len; rule6->dst.plen = frh->dst_len; @@ -249,11 +248,9 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->tos = rule6->tclass; if ((rule6->dst.plen && - nla_put(skb, FRA_DST, sizeof(struct in6_addr), - &rule6->dst.addr)) || + nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) || (rule6->src.plen && - nla_put(skb, FRA_SRC, sizeof(struct in6_addr), - &rule6->src.addr))) + nla_put_in6_addr(skb, FRA_SRC, &rule6->src.addr))) goto nla_put_failure; return 0; @@ -298,19 +295,16 @@ static int __net_init fib6_rules_net_init(struct net *net) ops = fib_rules_register(&fib6_rules_ops_template, net); if (IS_ERR(ops)) return PTR_ERR(ops); - net->ipv6.fib6_rules_ops = ops; - - err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0, - RT6_TABLE_LOCAL, 0); + err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL, 0); if (err) goto out_fib6_rules_ops; - err = fib_default_rule_add(net->ipv6.fib6_rules_ops, - 0x7FFE, RT6_TABLE_MAIN, 0); + err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN, 0); if (err) goto out_fib6_rules_ops; + net->ipv6.fib6_rules_ops = ops; out: return err; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a5e95199585e..2c2b5d51f15c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -160,8 +160,7 @@ static bool is_ineligible(const struct sk_buff *skb) tp = skb_header_pointer(skb, ptr+offsetof(struct icmp6hdr, icmp6_type), sizeof(_type), &_type); - if (tp == NULL || - !(*tp & ICMPV6_INFOMSG_MASK)) + if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) return true; } return false; @@ -231,7 +230,7 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset) offset += skb_network_offset(skb); op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); - if (op == NULL) + if (!op) return true; return (*op & 0xC0) == 0x80; } @@ -244,7 +243,7 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, int err = 0; skb = skb_peek(&sk->sk_write_queue); - if (skb == NULL) + if (!skb) goto out; icmp6h = icmp6_hdr(skb); @@ -479,7 +478,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); - if (sk == NULL) + if (!sk) return; sk->sk_mark = mark; np = inet6_sk(sk); @@ -582,7 +581,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); - if (sk == NULL) + if (!sk) return; sk->sk_mark = mark; np = inet6_sk(sk); @@ -839,7 +838,7 @@ static int __net_init icmpv6_sk_init(struct net *net) net->ipv6.icmp_sk = kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); - if (net->ipv6.icmp_sk == NULL) + if (!net->ipv6.icmp_sk) return -ENOMEM; for_each_possible_cpu(i) { diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 29b32206e494..6927f3fb5597 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -112,22 +112,20 @@ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, return c & (synq_hsize - 1); } -struct request_sock *inet6_csk_search_req(const struct sock *sk, - struct request_sock ***prevp, +struct request_sock *inet6_csk_search_req(struct sock *sk, const __be16 rport, const struct in6_addr *raddr, const struct in6_addr *laddr, const int iif) { - const struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - struct request_sock *req, **prev; + struct request_sock *req; + u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries); - for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport, - lopt->hash_rnd, - lopt->nr_table_entries)]; - (req = *prev) != NULL; - prev = &req->dl_next) { + spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); + for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); if (ireq->ir_rmt_port == rport && @@ -135,13 +133,14 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && (!ireq->ir_iif || ireq->ir_iif == iif)) { + atomic_inc(&req->rsk_refcnt); WARN_ON(req->sk != NULL); - *prevp = prev; - return req; + break; } } + spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); - return NULL; + return req; } EXPORT_SYMBOL_GPL(inet6_csk_search_req); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 051dffb49c90..033f17816ef4 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -23,11 +23,9 @@ #include <net/secure_seq.h> #include <net/ip.h> -static unsigned int inet6_ehashfn(struct net *net, - const struct in6_addr *laddr, - const u16 lport, - const struct in6_addr *faddr, - const __be16 fport) +u32 inet6_ehashfn(const struct net *net, + const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const __be16 fport) { static u32 inet6_ehash_secret __read_mostly; static u32 ipv6_hash_secret __read_mostly; @@ -44,54 +42,6 @@ static unsigned int inet6_ehashfn(struct net *net, inet6_ehash_secret + net_hash_mix(net)); } -static int inet6_sk_ehashfn(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr; - const struct in6_addr *faddr = &sk->sk_v6_daddr; - const __u16 lport = inet->inet_num; - const __be16 fport = inet->inet_dport; - struct net *net = sock_net(sk); - - return inet6_ehashfn(net, laddr, lport, faddr, fport); -} - -int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) -{ - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - int twrefcnt = 0; - - WARN_ON(!sk_unhashed(sk)); - - if (sk->sk_state == TCP_LISTEN) { - struct inet_listen_hashbucket *ilb; - - ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - spin_lock(&ilb->lock); - __sk_nulls_add_node_rcu(sk, &ilb->head); - spin_unlock(&ilb->lock); - } else { - unsigned int hash; - struct hlist_nulls_head *list; - spinlock_t *lock; - - sk->sk_hash = hash = inet6_sk_ehashfn(sk); - list = &inet_ehash_bucket(hashinfo, hash)->chain; - lock = inet_ehash_lockp(hashinfo, hash); - spin_lock(lock); - __sk_nulls_add_node_rcu(sk, list); - if (tw) { - WARN_ON(sk->sk_hash != tw->tw_hash); - twrefcnt = inet_twsk_unhash(tw); - } - spin_unlock(lock); - } - - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - return twrefcnt; -} -EXPORT_SYMBOL(__inet6_hash); - /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM @@ -320,6 +270,6 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk), - __inet6_check_established, __inet6_hash); + __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 263ef4143bff..96dbffff5a24 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1206,7 +1206,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, WARN_ON(fn->fn_flags & RTN_RTINFO); WARN_ON(fn->fn_flags & RTN_TL_ROOT); - WARN_ON(fn->leaf != NULL); + WARN_ON(fn->leaf); children = 0; child = NULL; @@ -1361,7 +1361,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) #if RT6_DEBUG >= 2 if (rt->dst.obsolete > 0) { - WARN_ON(fn != NULL); + WARN_ON(fn); return -ENOENT; } #endif diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f45d6db50a45..d491125011c4 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -100,7 +100,6 @@ static void fl_free(struct ip6_flowlabel *fl) if (fl) { if (fl->share == IPV6_FL_S_PROCESS) put_pid(fl->owner.pid); - release_net(fl->fl_net); kfree(fl->opt); kfree_rcu(fl, rcu); } @@ -206,7 +205,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK; if (fl->label) { lfl = __fl_lookup(net, fl->label); - if (lfl == NULL) + if (!lfl) break; } } @@ -220,7 +219,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, * with the same label can only appear on another sock */ lfl = __fl_lookup(net, fl->label); - if (lfl != NULL) { + if (lfl) { atomic_inc(&lfl->users); spin_unlock_bh(&ip6_fl_lock); return lfl; @@ -298,10 +297,10 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, { struct ipv6_txoptions *fl_opt = fl->opt; - if (fopt == NULL || fopt->opt_flen == 0) + if (!fopt || fopt->opt_flen == 0) return fl_opt; - if (fl_opt != NULL) { + if (fl_opt) { opt_space->hopopt = fl_opt->hopopt; opt_space->dst0opt = fl_opt->dst0opt; opt_space->srcrt = fl_opt->srcrt; @@ -367,7 +366,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, err = -ENOMEM; fl = kzalloc(sizeof(*fl), GFP_KERNEL); - if (fl == NULL) + if (!fl) goto done; if (olen > 0) { @@ -377,7 +376,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, err = -ENOMEM; fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL); - if (fl->opt == NULL) + if (!fl->opt) goto done; memset(fl->opt, 0, sizeof(*fl->opt)); @@ -403,7 +402,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, } } - fl->fl_net = hold_net(net); + fl->fl_net = net; fl->expires = jiffies; err = fl6_renew(fl, freq->flr_linger, freq->flr_expires); if (err) @@ -597,7 +596,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) return -EINVAL; fl = fl_create(net, sk, &freq, optval, optlen, &err); - if (fl == NULL) + if (!fl) return err; sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); @@ -617,7 +616,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) } rcu_read_unlock_bh(); - if (fl1 == NULL) + if (!fl1) fl1 = fl_lookup(net, freq.flr_label); if (fl1) { recheck: @@ -634,7 +633,7 @@ recheck: goto release; err = -ENOMEM; - if (sfl1 == NULL) + if (!sfl1) goto release; if (fl->linger > fl1->linger) fl1->linger = fl->linger; @@ -654,7 +653,7 @@ release: goto done; err = -ENOMEM; - if (sfl1 == NULL) + if (!sfl1) goto done; err = mem_check(sk); @@ -662,7 +661,7 @@ release: goto done; fl1 = fl_intern(net, fl, freq.flr_label); - if (fl1 != NULL) + if (fl1) goto recheck; if (!freq.flr_label) { diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index bc28b7d42a6d..0f4e73da14e4 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -223,7 +223,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, } } - if (cand != NULL) + if (cand) return cand; dev = ign->fb_tunnel_dev; @@ -395,7 +395,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, flags & GRE_KEY ? *(((__be32 *)p) + (grehlen / 4) - 1) : 0, p[1]); - if (t == NULL) + if (!t) return; switch (type) { @@ -980,7 +980,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) &p->raddr, &p->laddr, p->link, strict); - if (rt == NULL) + if (!rt) return; if (rt->dst.dev) { @@ -1073,7 +1073,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, } ip6gre_tnl_parm_from_user(&p1, &p); t = ip6gre_tunnel_locate(net, &p1, 0); - if (t == NULL) + if (!t) t = netdev_priv(dev); } memset(&p, 0, sizeof(p)); @@ -1105,7 +1105,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (t) { if (t->dev != dev) { err = -EEXIST; break; @@ -1144,7 +1144,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, err = -ENOENT; ip6gre_tnl_parm_from_user(&p1, &p); t = ip6gre_tunnel_locate(net, &p1, 0); - if (t == NULL) + if (!t) goto done; err = -EPERM; if (t == netdev_priv(ign->fb_tunnel_dev)) @@ -1313,7 +1313,7 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) t = rtnl_dereference(ign->tunnels[prio][h]); - while (t != NULL) { + while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ @@ -1412,7 +1412,7 @@ static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) goto out; if (data[IFLA_GRE_REMOTE]) { - nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + daddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]); if (ipv6_addr_any(&daddr)) return -EINVAL; } @@ -1446,10 +1446,10 @@ static void ip6gre_netlink_parms(struct nlattr *data[], parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); if (data[IFLA_GRE_LOCAL]) - nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr)); + parms->laddr = nla_get_in6_addr(data[IFLA_GRE_LOCAL]); if (data[IFLA_GRE_REMOTE]) - nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + parms->raddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]); if (data[IFLA_GRE_TTL]) parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]); @@ -1622,8 +1622,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || - nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->laddr) || - nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->raddr) || + nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || + nla_put_in6_addr(skb, IFLA_GRE_REMOTE, &p->raddr) || nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/ nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index aacdcb4dc762..fb97f7f8d4ed 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -221,7 +221,7 @@ resubmit: raw = raw6_local_deliver(skb, nexthdr); ipprot = rcu_dereference(inet6_protos[nexthdr]); - if (ipprot != NULL) { + if (ipprot) { int ret; if (ipprot->flags & INET6_PROTO_FINAL) { diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 46d452a56d3e..e893cd18612f 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -124,7 +124,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); fptr->frag_off = htons(offset); - if (skb->next != NULL) + if (skb->next) fptr->frag_off |= htons(IP6_MF); offset += (ntohs(ipv6h->payload_len) - sizeof(struct frag_hdr)); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7deebf102cba..84c58da10f5c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -177,7 +177,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (skb_headroom(skb) < head_room) { struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (skb2 == NULL) { + if (!skb2) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); @@ -318,6 +318,7 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) static inline int ip6_forward_finish(struct sk_buff *skb) { + skb_sender_cpu_clear(skb); return dst_output(skb); } @@ -627,7 +628,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); - ipv6_select_ident(fh, rt); + ipv6_select_ident(net, fh, rt); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); @@ -656,7 +657,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(offset); - if (frag->next != NULL) + if (frag->next) fh->frag_off |= htons(IP6_MF); fh->identification = frag_id; ipv6_hdr(frag)->payload_len = @@ -774,7 +775,7 @@ slow_path: fh->nexthdr = nexthdr; fh->reserved = 0; if (!frag_id) { - ipv6_select_ident(fh, rt); + ipv6_select_ident(net, fh, rt); frag_id = fh->identification; } else fh->identification = frag_id; @@ -822,7 +823,7 @@ static inline int ip6_rt_check(const struct rt6key *rt_key, const struct in6_addr *addr_cache) { return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && - (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); + (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); } static struct dst_entry *ip6_sk_dst_check(struct sock *sk, @@ -881,7 +882,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, #endif int err; - if (*dst == NULL) + if (!*dst) *dst = ip6_route_output(net, sk, fl6); err = (*dst)->error; @@ -1044,11 +1045,11 @@ static inline int ip6_ufo_append_data(struct sock *sk, * udp datagram */ skb = skb_peek_tail(queue); - if (skb == NULL) { + if (!skb) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); - if (skb == NULL) + if (!skb) return err; /* reserve space for Hardware header */ @@ -1078,7 +1079,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - sizeof(struct frag_hdr)) & ~7; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - ipv6_select_ident(&fhdr, rt); + ipv6_select_ident(sock_net(sk), &fhdr, rt); skb_shinfo(skb)->ip6_frag_id = fhdr.identification; append: @@ -1106,7 +1107,7 @@ static void ip6_append_data_mtu(unsigned int *mtu, unsigned int orig_mtu) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { - if (skb == NULL) { + if (!skb) { /* first fragment, reserve header_len */ *mtu = orig_mtu - rt->dst.header_len; @@ -1138,7 +1139,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, return -EINVAL; v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); - if (unlikely(v6_cork->opt == NULL)) + if (unlikely(!v6_cork->opt)) return -ENOBUFS; v6_cork->opt->tot_len = opt->tot_len; @@ -1298,7 +1299,8 @@ emsgsize: if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO) && + (sk->sk_type == SOCK_DGRAM)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, rt); @@ -1329,7 +1331,7 @@ alloc_new_skb: else fraggap = 0; /* update mtu and maxfraglen if necessary */ - if (skb == NULL || skb_prev == NULL) + if (!skb || !skb_prev) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, orig_mtu); @@ -1381,10 +1383,10 @@ alloc_new_skb: skb = sock_wmalloc(sk, alloclen + hh_len, 1, sk->sk_allocation); - if (unlikely(skb == NULL)) + if (unlikely(!skb)) err = -ENOBUFS; } - if (skb == NULL) + if (!skb) goto error; /* * Fill in the control structures @@ -1576,7 +1578,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, unsigned char proto = fl6->flowi6_proto; skb = __skb_dequeue(queue); - if (skb == NULL) + if (!skb) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 266a264ec212..9bd85f0dff69 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -64,12 +64,6 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); -#ifdef IP6_TNL_DEBUG -#define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__) -#else -#define IP6_TNL_TRACE(x...) do {;} while(0) -#endif - #define HASH_SIZE_SHIFT 5 #define HASH_SIZE (1 << HASH_SIZE_SHIFT) @@ -137,7 +131,7 @@ struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) struct dst_entry *dst = t->dst_cache; if (dst && dst->obsolete && - dst->ops->check(dst, t->dst_cookie) == NULL) { + !dst->ops->check(dst, t->dst_cookie)) { t->dst_cache = NULL; dst_release(dst); return NULL; @@ -314,7 +308,7 @@ out: * Create tunnel matching given parameters. * * Return: - * created tunnel or NULL + * created tunnel or error pointer **/ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) @@ -322,7 +316,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) struct net_device *dev; struct ip6_tnl *t; char name[IFNAMSIZ]; - int err; + int err = -ENOMEM; if (p->name[0]) strlcpy(name, p->name, IFNAMSIZ); @@ -331,7 +325,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ip6_tnl_dev_setup); - if (dev == NULL) + if (!dev) goto failed; dev_net_set(dev, net); @@ -348,7 +342,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) failed_free: ip6_dev_free(dev); failed: - return NULL; + return ERR_PTR(err); } /** @@ -362,7 +356,7 @@ failed: * tunnel device is created and registered for use. * * Return: - * matching tunnel or NULL + * matching tunnel or error pointer **/ static struct ip6_tnl *ip6_tnl_locate(struct net *net, @@ -380,13 +374,13 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net, if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr)) { if (create) - return NULL; + return ERR_PTR(-EEXIST); return t; } } if (!create) - return NULL; + return ERR_PTR(-ENODEV); return ip6_tnl_create(net, p); } @@ -502,7 +496,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, rcu_read_lock(); t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr); - if (t == NULL) + if (!t) goto out; tproto = ACCESS_ONCE(t->parms.proto); @@ -813,7 +807,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, rcu_read_lock(); t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); - if (t != NULL) { + if (t) { struct pcpu_sw_netstats *tstats; tproto = ACCESS_ONCE(t->parms.proto); @@ -1280,7 +1274,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) &p->raddr, &p->laddr, p->link, strict); - if (rt == NULL) + if (!rt) return; if (rt->dst.dev) { @@ -1420,7 +1414,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); - if (t == NULL) + if (IS_ERR(t)) t = netdev_priv(dev); } else { memset(&p, 0, sizeof(p)); @@ -1445,7 +1439,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); if (cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (!IS_ERR(t)) { if (t->dev != dev) { err = -EEXIST; break; @@ -1457,14 +1451,15 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) else err = ip6_tnl_update(t, &p1); } - if (t) { + if (!IS_ERR(t)) { err = 0; ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) err = -EFAULT; - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + } else { + err = PTR_ERR(t); + } break; case SIOCDELTUNNEL: err = -EPERM; @@ -1478,7 +1473,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) err = -ENOENT; ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); - if (t == NULL) + if (IS_ERR(t)) break; err = -EPERM; if (t->dev == ip6n->fb_tnl_dev) @@ -1645,12 +1640,10 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); if (data[IFLA_IPTUN_LOCAL]) - nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL], - sizeof(struct in6_addr)); + parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]); if (data[IFLA_IPTUN_REMOTE]) - nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE], - sizeof(struct in6_addr)); + parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]); if (data[IFLA_IPTUN_TTL]) parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]); @@ -1672,12 +1665,13 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); - struct ip6_tnl *nt; + struct ip6_tnl *nt, *t; nt = netdev_priv(dev); ip6_tnl_netlink_parms(data, &nt->parms); - if (ip6_tnl_locate(net, &nt->parms, 0)) + t = ip6_tnl_locate(net, &nt->parms, 0); + if (!IS_ERR(t)) return -EEXIST; return ip6_tnl_create2(dev); @@ -1697,8 +1691,7 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], ip6_tnl_netlink_parms(data, &p); t = ip6_tnl_locate(net, &p, 0); - - if (t) { + if (!IS_ERR(t)) { if (t->dev != dev) return -EEXIST; } else @@ -1744,10 +1737,8 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) struct __ip6_tnl_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || - nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), - &parm->laddr) || - nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), - &parm->raddr) || + nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) || + nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || @@ -1820,7 +1811,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) for (h = 0; h < HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); - while (t != NULL) { + while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 5fb9e212eca8..53d90ed68905 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -218,7 +218,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p sprintf(name, "ip6_vti%%d"); dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup); - if (dev == NULL) + if (!dev) goto failed; dev_net_set(dev, net); @@ -305,7 +305,7 @@ static int vti6_rcv(struct sk_buff *skb) rcu_read_lock(); t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); - if (t != NULL) { + if (t) { if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); goto discard; @@ -716,7 +716,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } else { memset(&p, 0, sizeof(p)); } - if (t == NULL) + if (!t) t = netdev_priv(dev); vti6_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) @@ -736,7 +736,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) vti6_parm_from_user(&p1, &p); t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (t) { if (t->dev != dev) { err = -EEXIST; break; @@ -767,7 +767,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) err = -ENOENT; vti6_parm_from_user(&p1, &p); t = vti6_locate(net, &p1, 0); - if (t == NULL) + if (!t) break; err = -EPERM; if (t->dev == ip6n->fb_tnl_dev) @@ -897,12 +897,10 @@ static void vti6_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_VTI_LINK]); if (data[IFLA_VTI_LOCAL]) - nla_memcpy(&parms->laddr, data[IFLA_VTI_LOCAL], - sizeof(struct in6_addr)); + parms->laddr = nla_get_in6_addr(data[IFLA_VTI_LOCAL]); if (data[IFLA_VTI_REMOTE]) - nla_memcpy(&parms->raddr, data[IFLA_VTI_REMOTE], - sizeof(struct in6_addr)); + parms->raddr = nla_get_in6_addr(data[IFLA_VTI_REMOTE]); if (data[IFLA_VTI_IKEY]) parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]); @@ -983,10 +981,8 @@ static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev) struct __ip6_tnl_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) || - nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr), - &parm->laddr) || - nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr), - &parm->raddr) || + nla_put_in6_addr(skb, IFLA_VTI_LOCAL, &parm->laddr) || + nla_put_in6_addr(skb, IFLA_VTI_REMOTE, &parm->raddr) || nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) || nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key)) goto nla_put_failure; @@ -1027,7 +1023,7 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) for (h = 0; h < HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); - while (t != NULL) { + while (t) { unregister_netdevice_queue(t->dev, &list); t = rtnl_dereference(t->next); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 34b682617f50..caf6b99374e6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -56,9 +56,7 @@ struct mr6_table { struct list_head list; -#ifdef CONFIG_NET_NS - struct net *net; -#endif + possible_net_t net; u32 id; struct sock *mroute6_sk; struct timer_list ipmr_expire_timer; @@ -175,7 +173,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, } mrt = ip6mr_get_table(rule->fr_net, rule->table); - if (mrt == NULL) + if (!mrt) return -EAGAIN; res->mrt = mrt; return 0; @@ -239,7 +237,7 @@ static int __net_init ip6mr_rules_init(struct net *net) INIT_LIST_HEAD(&net->ipv6.mr6_tables); mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); - if (mrt == NULL) { + if (!mrt) { err = -ENOMEM; goto err1; } @@ -307,11 +305,11 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) unsigned int i; mrt = ip6mr_get_table(net, id); - if (mrt != NULL) + if (mrt) return mrt; mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); - if (mrt == NULL) + if (!mrt) return NULL; mrt->id = id; write_pnet(&mrt->net, net); @@ -410,7 +408,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) struct mr6_table *mrt; mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return ERR_PTR(-ENOENT); iter->mrt = mrt; @@ -494,7 +492,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) struct mr6_table *mrt; mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return ERR_PTR(-ENOENT); it->mrt = mrt; @@ -667,7 +665,7 @@ static int pim6_rcv(struct sk_buff *skb) dev_hold(reg_dev); read_unlock(&mrt_lock); - if (reg_dev == NULL) + if (!reg_dev) goto drop; skb->mac_header = skb->network_header; @@ -745,7 +743,7 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) sprintf(name, "pim6reg%u", mrt->id); dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); - if (dev == NULL) + if (!dev) return NULL; dev_net_set(dev, net); @@ -1074,7 +1072,7 @@ skip: static struct mfc6_cache *ip6mr_cache_alloc(void) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); - if (c == NULL) + if (!c) return NULL; c->mfc_un.res.minvif = MAXMIFS; return c; @@ -1083,7 +1081,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void) static struct mfc6_cache *ip6mr_cache_alloc_unres(void) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); - if (c == NULL) + if (!c) return NULL; skb_queue_head_init(&c->mfc_un.unres.unresolved); c->mfc_un.unres.expires = jiffies + 10 * HZ; @@ -1200,7 +1198,7 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (mrt->mroute6_sk == NULL) { + if (!mrt->mroute6_sk) { kfree_skb(skb); return -EINVAL; } @@ -1495,7 +1493,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, return -EINVAL; c = ip6mr_cache_alloc(); - if (c == NULL) + if (!c) return -ENOMEM; c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; @@ -1665,7 +1663,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns struct mr6_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; if (optname != MRT6_INIT) { @@ -1814,7 +1812,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, struct mr6_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; switch (optname) { @@ -1861,7 +1859,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) struct mr6_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; switch (cmd) { @@ -1935,7 +1933,7 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) struct mr6_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; switch (cmd) { @@ -2005,7 +2003,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, struct dst_entry *dst; struct flowi6 fl6; - if (vif->dev == NULL) + if (!vif->dev) goto out_free; #ifdef CONFIG_IPV6_PIMSM_V2 @@ -2194,7 +2192,7 @@ int ip6_mr_input(struct sk_buff *skb) read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); - if (cache == NULL) { + if (!cache) { int vif = ip6mr_find_vif(mrt, skb->dev); if (vif >= 0) @@ -2206,7 +2204,7 @@ int ip6_mr_input(struct sk_buff *skb) /* * No usable cache entry */ - if (cache == NULL) { + if (!cache) { int vif; vif = ip6mr_find_vif(mrt, skb->dev); @@ -2245,13 +2243,13 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0) return -EMSGSIZE; mp_attr = nla_nest_start(skb, RTA_MULTIPATH); - if (mp_attr == NULL) + if (!mp_attr) return -EMSGSIZE; for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); - if (nhp == NULL) { + if (!nhp) { nla_nest_cancel(skb, mp_attr); return -EMSGSIZE; } @@ -2284,7 +2282,7 @@ int ip6mr_get_route(struct net *net, struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; read_lock(&mrt_lock); @@ -2309,7 +2307,7 @@ int ip6mr_get_route(struct net *net, } dev = skb->dev; - if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) { + if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { read_unlock(&mrt_lock); return -ENODEV; } @@ -2361,7 +2359,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, int err; nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; rtm = nlmsg_data(nlh); @@ -2380,8 +2378,8 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; - if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) || - nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp)) + if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || + nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) goto nla_put_failure; err = __ip6mr_fill_mroute(mrt, skb, c, rtm); /* do not break the dump if cache is unresolved */ @@ -2426,7 +2424,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 8d766d9100cb..63e6956917c9 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -85,7 +85,7 @@ int ip6_ra_control(struct sock *sk, int sel) return 0; } } - if (new_ra == NULL) { + if (!new_ra) { write_unlock_bh(&ip6_ra_lock); return -ENOBUFS; } @@ -117,6 +117,25 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, return opt; } +static bool setsockopt_needs_rtnl(int optname) +{ + switch (optname) { + case IPV6_ADD_MEMBERSHIP: + case IPV6_DROP_MEMBERSHIP: + case IPV6_JOIN_ANYCAST: + case IPV6_LEAVE_ANYCAST: + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_MSFILTER: + return true; + } + return false; +} + static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -124,8 +143,9 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct net *net = sock_net(sk); int val, valbool; int retv = -ENOPROTOOPT; + bool needs_rtnl = setsockopt_needs_rtnl(optname); - if (optval == NULL) + if (!optval) val = 0; else { if (optlen >= sizeof(int)) { @@ -140,6 +160,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); + if (needs_rtnl) + rtnl_lock(); lock_sock(sk); switch (optname) { @@ -370,7 +392,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, */ if (optlen == 0) optval = NULL; - else if (optval == NULL) + else if (!optval) goto e_inval; else if (optlen < sizeof(struct ipv6_opt_hdr) || optlen & 0x7 || optlen > 8 * 255) @@ -421,7 +443,7 @@ sticky_done: if (optlen == 0) goto e_inval; - else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL) + else if (optlen < sizeof(struct in6_pktinfo) || !optval) goto e_inval; if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) { @@ -460,7 +482,7 @@ sticky_done: opt = sock_kmalloc(sk, sizeof(*opt) + optlen, GFP_KERNEL); retv = -ENOBUFS; - if (opt == NULL) + if (!opt) break; memset(opt, 0, sizeof(*opt)); @@ -624,10 +646,10 @@ done: psin6 = (struct sockaddr_in6 *)&greq.gr_group; if (optname == MCAST_JOIN_GROUP) retv = ipv6_sock_mc_join(sk, greq.gr_interface, - &psin6->sin6_addr); + &psin6->sin6_addr); else retv = ipv6_sock_mc_drop(sk, greq.gr_interface, - &psin6->sin6_addr); + &psin6->sin6_addr); break; } case MCAST_JOIN_SOURCE_GROUP: @@ -660,7 +682,7 @@ done: psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; retv = ipv6_sock_mc_join(sk, greqs.gsr_interface, - &psin6->sin6_addr); + &psin6->sin6_addr); /* prior join w/ different source is ok */ if (retv && retv != -EADDRINUSE) break; @@ -837,11 +859,15 @@ pref_skip_coa: } release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); return retv; e_inval: release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); return -EINVAL; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 5ce107c8aab3..fac1f27e428e 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -140,6 +140,8 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) struct net *net = sock_net(sk); int err; + ASSERT_RTNL(); + if (!ipv6_addr_is_multicast(addr)) return -EINVAL; @@ -155,13 +157,12 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); - if (mc_lst == NULL) + if (!mc_lst) return -ENOMEM; mc_lst->next = NULL; mc_lst->addr = *addr; - rtnl_lock(); if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, 0); @@ -172,8 +173,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) } else dev = __dev_get_by_index(net, ifindex); - if (dev == NULL) { - rtnl_unlock(); + if (!dev) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return -ENODEV; } @@ -190,7 +190,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = ipv6_dev_mc_inc(dev, addr); if (err) { - rtnl_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return err; } @@ -198,10 +197,9 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) mc_lst->next = np->ipv6_mc_list; rcu_assign_pointer(np->ipv6_mc_list, mc_lst); - rtnl_unlock(); - return 0; } +EXPORT_SYMBOL(ipv6_sock_mc_join); /* * socket leave on multicast group @@ -213,10 +211,11 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) struct ipv6_mc_socklist __rcu **lnk; struct net *net = sock_net(sk); + ASSERT_RTNL(); + if (!ipv6_addr_is_multicast(addr)) return -EINVAL; - rtnl_lock(); for (lnk = &np->ipv6_mc_list; (mc_lst = rtnl_dereference(*lnk)) != NULL; lnk = &mc_lst->next) { @@ -227,7 +226,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) *lnk = mc_lst->next; dev = __dev_get_by_index(net, mc_lst->ifindex); - if (dev != NULL) { + if (dev) { struct inet6_dev *idev = __in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); @@ -235,17 +234,16 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) __ipv6_dev_mc_dec(idev, &mc_lst->addr); } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); - rtnl_unlock(); atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); return 0; } } - rtnl_unlock(); return -EADDRNOTAVAIL; } +EXPORT_SYMBOL(ipv6_sock_mc_drop); /* called with rcu_read_lock() */ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, @@ -438,7 +436,7 @@ done: read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) - return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); + err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; } @@ -825,7 +823,7 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, struct ifmcaddr6 *mc; mc = kzalloc(sizeof(*mc), GFP_ATOMIC); - if (mc == NULL) + if (!mc) return NULL; setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); @@ -862,7 +860,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) /* we need to take a reference on idev */ idev = in6_dev_get(dev); - if (idev == NULL) + if (!idev) return -EINVAL; write_lock_bh(&idev->lock); @@ -1330,7 +1328,7 @@ int igmp6_event_query(struct sk_buff *skb) return -EINVAL; idev = __in6_dev_get(skb->dev); - if (idev == NULL) + if (!idev) return 0; mld = (struct mld_msg *)icmp6_hdr(skb); @@ -1445,7 +1443,7 @@ int igmp6_event_report(struct sk_buff *skb) return -EINVAL; idev = __in6_dev_get(skb->dev); - if (idev == NULL) + if (!idev) return -ENODEV; /* @@ -1964,7 +1962,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err); - if (skb == NULL) { + if (!skb) { rcu_read_lock(); IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTDISCARDS); @@ -2613,7 +2611,7 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr im = im->next; while (!im) { - if (likely(state->idev != NULL)) + if (likely(state->idev)) read_unlock_bh(&state->idev->lock); state->dev = next_net_device_rcu(state->dev); @@ -2659,7 +2657,7 @@ static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); - if (likely(state->idev != NULL)) { + if (likely(state->idev)) { read_unlock_bh(&state->idev->lock); state->idev = NULL; } @@ -2728,10 +2726,10 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) continue; read_lock_bh(&idev->lock); im = idev->mc_list; - if (likely(im != NULL)) { + if (likely(im)) { spin_lock_bh(&im->mca_lock); psf = im->mca_sources; - if (likely(psf != NULL)) { + if (likely(psf)) { state->im = im; state->idev = idev; break; @@ -2752,7 +2750,7 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s spin_unlock_bh(&state->im->mca_lock); state->im = state->im->next; while (!state->im) { - if (likely(state->idev != NULL)) + if (likely(state->idev)) read_unlock_bh(&state->idev->lock); state->dev = next_net_device_rcu(state->dev); @@ -2806,11 +2804,11 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); - if (likely(state->im != NULL)) { + if (likely(state->im)) { spin_unlock_bh(&state->im->mca_lock); state->im = NULL; } - if (likely(state->idev != NULL)) { + if (likely(state->idev)) { read_unlock_bh(&state->idev->lock); state->idev = NULL; } @@ -2907,20 +2905,32 @@ static int __net_init igmp6_net_init(struct net *net) inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1; + err = inet_ctl_sock_create(&net->ipv6.mc_autojoin_sk, PF_INET6, + SOCK_RAW, IPPROTO_ICMPV6, net); + if (err < 0) { + pr_err("Failed to initialize the IGMP6 autojoin socket (err %d)\n", + err); + goto out_sock_create; + } + err = igmp6_proc_init(net); if (err) - goto out_sock_create; -out: - return err; + goto out_sock_create_autojoin; + + return 0; +out_sock_create_autojoin: + inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk); out_sock_create: inet_ctl_sock_destroy(net->ipv6.igmp_sk); - goto out; +out: + return err; } static void __net_exit igmp6_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.igmp_sk); + inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk); igmp6_proc_exit(net); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 471ed24aabae..c283827d60e2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -84,6 +84,7 @@ do { \ static u32 ndisc_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); +static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey); static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); @@ -117,7 +118,9 @@ static const struct neigh_ops ndisc_direct_ops = { struct neigh_table nd_tbl = { .family = AF_INET6, .key_len = sizeof(struct in6_addr), + .protocol = cpu_to_be16(ETH_P_IPV6), .hash = ndisc_hash, + .key_eq = ndisc_key_eq, .constructor = ndisc_constructor, .pconstructor = pndisc_constructor, .pdestructor = pndisc_destructor, @@ -294,6 +297,11 @@ static u32 ndisc_hash(const void *pkey, return ndisc_hashfn(pkey, dev, hash_rnd); } +static bool ndisc_key_eq(const struct neighbour *n, const void *pkey) +{ + return neigh_key_eq128(n, pkey); +} + static int ndisc_constructor(struct neighbour *neigh) { struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key; @@ -303,7 +311,7 @@ static int ndisc_constructor(struct neighbour *neigh) bool is_multicast = ipv6_addr_is_multicast(addr); in6_dev = in6_dev_get(dev); - if (in6_dev == NULL) { + if (!in6_dev) { return -EINVAL; } @@ -348,7 +356,7 @@ static int pndisc_constructor(struct pneigh_entry *n) struct in6_addr maddr; struct net_device *dev = n->dev; - if (dev == NULL || __in6_dev_get(dev) == NULL) + if (!dev || !__in6_dev_get(dev)) return -EINVAL; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_inc(dev, &maddr); @@ -361,7 +369,7 @@ static void pndisc_destructor(struct pneigh_entry *n) struct in6_addr maddr; struct net_device *dev = n->dev; - if (dev == NULL || __in6_dev_get(dev) == NULL) + if (!dev || !__in6_dev_get(dev)) return; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_dec(dev, &maddr); @@ -552,7 +560,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, int optlen = 0; struct nd_msg *msg; - if (saddr == NULL) { + if (!saddr) { if (ipv6_get_lladdr(dev, &addr_buf, (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))) return; @@ -1022,13 +1030,13 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr)); skb = nlmsg_new(msg_size, GFP_ATOMIC); - if (skb == NULL) { + if (!skb) { err = -ENOBUFS; goto errout; } nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0); - if (nlh == NULL) { + if (!nlh) { goto nla_put_failure; } @@ -1041,8 +1049,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3); - if (nla_put(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr), - &ipv6_hdr(ra)->saddr)) + if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr)) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -1096,7 +1103,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) */ in6_dev = __in6_dev_get(skb->dev); - if (in6_dev == NULL) { + if (!in6_dev) { ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n", skb->dev->name); return; @@ -1191,11 +1198,11 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, for dev: %s\n", rt, lifetime, skb->dev->name); - if (rt == NULL && lifetime) { + if (!rt && lifetime) { ND_PRINTK(3, info, "RA: adding default router\n"); rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref); - if (rt == NULL) { + if (!rt) { ND_PRINTK(0, err, "RA: %s failed to add default route\n", __func__); @@ -1203,7 +1210,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) } neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); - if (neigh == NULL) { + if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index a069822936e6..ca6998345b42 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,14 +25,16 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +if NF_TABLES + config NF_TABLES_IPV6 - depends on NF_TABLES tristate "IPv6 nf_tables support" help This option enables the IPv6 support for nf_tables. +if NF_TABLES_IPV6 + config NFT_CHAIN_ROUTE_IPV6 - depends on NF_TABLES_IPV6 tristate "IPv6 nf_tables route chain support" help This option enables the "route" chain for IPv6 in nf_tables. This @@ -40,16 +42,18 @@ config NFT_CHAIN_ROUTE_IPV6 fields such as the source, destination, flowlabel, hop-limit and the packet mark. -config NF_REJECT_IPV6 - tristate "IPv6 packet rejection" - default m if NETFILTER_ADVANCED=n - config NFT_REJECT_IPV6 - depends on NF_TABLES_IPV6 select NF_REJECT_IPV6 default NFT_REJECT tristate +endif # NF_TABLES_IPV6 +endif # NF_TABLES + +config NF_REJECT_IPV6 + tristate "IPv6 packet rejection" + default m if NETFILTER_ADVANCED=n + config NF_LOG_IPV6 tristate "IPv6 packet logging" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e080fbbbc0e5..83f59dc3cccc 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -9,7 +9,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> #include <linux/capability.h> #include <linux/in.h> #include <linux/skbuff.h> @@ -234,7 +237,7 @@ static struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 4, + .level = LOGLEVEL_WARNING, .logflags = NF_LOG_MASK, }, }, @@ -298,9 +301,9 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo, - "TRACE: %s:%s:%s:%u ", - tablename, chainname, comment, rulenum); + nf_log_trace(net, AF_INET6, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); } #endif diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 544b0a9da1b5..12331efd49cf 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -83,7 +83,8 @@ static int reject_tg6_check(const struct xt_tgchk_param *par) return -EINVAL; } else if (rejinfo->with == IP6T_TCP_RESET) { /* Must specify that it's a TCP packet */ - if (e->ipv6.proto != IPPROTO_TCP || + if (!(e->ipv6.flags & IP6T_F_PROTO) || + e->ipv6.proto != IPPROTO_TCP || (e->ipv6.invflags & XT_INV_PROTO)) { pr_info("TCP_RESET illegal for non-tcp\n"); return -EINVAL; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index b68d0e59c1f8..fba91c6fc7ca 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -290,10 +290,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) static int ipv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - if (nla_put(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, - &tuple->src.u3.ip6) || - nla_put(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, - &tuple->dst.u3.ip6)) + if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) || + nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6)) goto nla_put_failure; return 0; @@ -312,10 +310,8 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[], if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST]) return -EINVAL; - memcpy(&t->src.u3.ip6, nla_data(tb[CTA_IP_V6_SRC]), - sizeof(u_int32_t) * 4); - memcpy(&t->dst.u3.ip6, nla_data(tb[CTA_IP_V6_DST]), - sizeof(u_int32_t) * 4); + t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]); + t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]); return 0; } diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index ddf07e6f59d7..8dd869642f45 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -5,8 +5,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> @@ -27,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 5, + .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_MASK, }, }, diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index d05b36440e8b..3afdce03d94e 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_get); struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, - __be16 protocol, int hoplimit) + __u8 protocol, int hoplimit) { struct ipv6hdr *ip6h; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); @@ -208,4 +208,39 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) } EXPORT_SYMBOL_GPL(nf_send_reset6); +static bool reject6_csum_ok(struct sk_buff *skb, int hook) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int thoff; + __be16 fo; + u8 proto; + + if (skb->csum_bad) + return false; + + if (skb_csum_unnecessary(skb)) + return true; + + proto = ip6h->nexthdr; + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; + + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; +} + +void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, + unsigned char code, unsigned int hooknum) +{ + if (!reject6_csum_ok(skb_in, hooknum)) + return; + + if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) + skb_in->dev = net->loopback_dev; + + icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); +} +EXPORT_SYMBOL_GPL(nf_send_unreach6); + MODULE_LICENSE("GPL"); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 74581f706c4d..4016a6ef9d61 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -9,13 +9,14 @@ #include <net/addrconf.h> #include <net/secure_seq.h> -static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, - struct in6_addr *src) +static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, + struct in6_addr *dst, struct in6_addr *src) { u32 hash, id; hash = __ipv6_addr_jhash(dst, hashrnd); hash = __ipv6_addr_jhash(src, hash); + hash ^= net_hash_mix(net); /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, * set the hight order instead thus minimizing possible future @@ -36,7 +37,7 @@ static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, * * The network header must be set before calling this. */ -void ipv6_proxy_select_ident(struct sk_buff *skb) +void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) { static u32 ip6_proxy_idents_hashrnd __read_mostly; struct in6_addr buf[2]; @@ -53,20 +54,21 @@ void ipv6_proxy_select_ident(struct sk_buff *skb) net_get_random_once(&ip6_proxy_idents_hashrnd, sizeof(ip6_proxy_idents_hashrnd)); - id = __ipv6_select_ident(ip6_proxy_idents_hashrnd, + id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, &addrs[1], &addrs[0]); skb_shinfo(skb)->ip6_frag_id = htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, + struct rt6_info *rt) { static u32 ip6_idents_hashrnd __read_mostly; u32 id; net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - id = __ipv6_select_ident(ip6_idents_hashrnd, &rt->rt6i_dst.addr, + id = __ipv6_select_ident(net, ip6_idents_hashrnd, &rt->rt6i_dst.addr, &rt->rt6i_src.addr); fhdr->identification = htonl(id); } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index bd46f736f61d..263a5164a6f5 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -77,8 +77,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } -int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -102,9 +101,10 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name); - if (msg->msg_namelen < sizeof(struct sockaddr_in6) || - u->sin6_family != AF_INET6) { + if (msg->msg_namelen < sizeof(*u)) return -EINVAL; + if (u->sin6_family != AF_INET6) { + return -EAFNOSUPPORT; } if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != u->sin6_scope_id) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0d84b2c7f24e..79ccdb4c1b33 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -172,7 +172,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) read_lock(&raw_v6_hashinfo.lock); sk = sk_head(&raw_v6_hashinfo.ht[hash]); - if (sk == NULL) + if (!sk) goto out; net = dev_net(skb->dev); @@ -367,7 +367,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, read_lock(&raw_v6_hashinfo.lock); sk = sk_head(&raw_v6_hashinfo.ht[hash]); - if (sk != NULL) { + if (sk) { /* Note: ipv6_hdr(skb) != skb->data */ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data; saddr = &ip6h->saddr; @@ -456,9 +456,8 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) * we return it, otherwise we block. */ -static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) +static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -631,7 +630,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb = sock_alloc_send_skb(sk, length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); - if (skb == NULL) + if (!skb) goto error; skb_reserve(skb, hlen); @@ -730,8 +729,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } -static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -791,7 +789,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } } @@ -833,13 +831,13 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, } if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } - if (opt == NULL) + if (!opt) opt = np->opt; if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); @@ -1132,7 +1130,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) + if (skb) amount = skb_tail_pointer(skb) - skb_transport_header(skb); spin_unlock_bh(&sk->sk_receive_queue.lock); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index d7d70e69973b..8ffa2c8cce77 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -430,7 +430,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, int i, plen = 0; clone = alloc_skb(0, GFP_ATOMIC); - if (clone == NULL) + if (!clone) goto out_oom; clone->next = head->next; head->next = clone; @@ -552,7 +552,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); - if (fq != NULL) { + if (fq) { int ret; spin_lock(&fq->q.lock); @@ -632,7 +632,7 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) table = ip6_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL); - if (table == NULL) + if (!table) goto err_alloc; table[0].data = &net->ipv6.frags.high_thresh; @@ -648,7 +648,7 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) } hdr = register_net_sysctl(net, "net/ipv6", table); - if (hdr == NULL) + if (!hdr) goto err_reg; net->ipv6.sysctl.frags_hdr = hdr; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4688bd4d7f59..5c48293ff062 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -194,7 +194,6 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, - .protocol = cpu_to_be16(ETH_P_IPV6), .gc = ip6_dst_gc, .gc_thresh = 1024, .check = ip6_dst_check, @@ -236,7 +235,6 @@ static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, static struct dst_ops ip6_dst_blackhole_ops = { .family = AF_INET6, - .protocol = cpu_to_be16(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, .mtu = ip6_blackhole_mtu, @@ -1478,7 +1476,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc, int remaining; u32 *mp; - if (cfg->fc_mx == NULL) + if (!cfg->fc_mx) return 0; mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); @@ -2400,6 +2398,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, + [RTA_PREF] = { .type = NLA_U8 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2407,6 +2406,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, { struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; + unsigned int pref; int err; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); @@ -2438,7 +2438,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_nlinfo.nl_net = sock_net(skb->sk); if (tb[RTA_GATEWAY]) { - nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); + cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); cfg->fc_flags |= RTF_GATEWAY; } @@ -2461,7 +2461,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, } if (tb[RTA_PREFSRC]) - nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16); + cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]); if (tb[RTA_OIF]) cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); @@ -2482,6 +2482,14 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); } + if (tb[RTA_PREF]) { + pref = nla_get_u8(tb[RTA_PREF]); + if (pref != ICMPV6_ROUTER_PREF_LOW && + pref != ICMPV6_ROUTER_PREF_HIGH) + pref = ICMPV6_ROUTER_PREF_MEDIUM; + cfg->fc_flags |= RTF_PREF(pref); + } + err = 0; errout: return err; @@ -2511,7 +2519,7 @@ beginning: nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla) { - nla_memcpy(&r_cfg.fc_gateway, nla, 16); + r_cfg.fc_gateway = nla_get_in6_addr(nla); r_cfg.fc_flags |= RTF_GATEWAY; } } @@ -2585,7 +2593,8 @@ static inline size_t rt6_nlmsg_size(void) + nla_total_size(4) /* RTA_PRIORITY */ + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ + nla_total_size(sizeof(struct rta_cacheinfo)) - + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ + + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + + nla_total_size(1); /* RTA_PREF */ } static int rt6_fill_node(struct net *net, @@ -2660,19 +2669,19 @@ static int rt6_fill_node(struct net *net, rtm->rtm_flags |= RTM_F_CLONED; if (dst) { - if (nla_put(skb, RTA_DST, 16, dst)) + if (nla_put_in6_addr(skb, RTA_DST, dst)) goto nla_put_failure; rtm->rtm_dst_len = 128; } else if (rtm->rtm_dst_len) - if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr)) + if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) goto nla_put_failure; #ifdef CONFIG_IPV6_SUBTREES if (src) { - if (nla_put(skb, RTA_SRC, 16, src)) + if (nla_put_in6_addr(skb, RTA_SRC, src)) goto nla_put_failure; rtm->rtm_src_len = 128; } else if (rtm->rtm_src_len && - nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr)) + nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) goto nla_put_failure; #endif if (iif) { @@ -2696,14 +2705,14 @@ static int rt6_fill_node(struct net *net, } else if (dst) { struct in6_addr saddr_buf; if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && - nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) + nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } if (rt->rt6i_prefsrc.plen) { struct in6_addr saddr_buf; saddr_buf = rt->rt6i_prefsrc.addr; - if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) + if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } @@ -2711,7 +2720,7 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; if (rt->rt6i_flags & RTF_GATEWAY) { - if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0) + if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) goto nla_put_failure; } @@ -2726,6 +2735,9 @@ static int rt6_fill_node(struct net *net, if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) goto nla_put_failure; + if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e4cbd5798eba..e6b9f51b15e8 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -118,7 +118,7 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, return t; } t = rcu_dereference(sitn->tunnels_wc[0]); - if ((t != NULL) && (t->dev->flags & IFF_UP)) + if (t && (t->dev->flags & IFF_UP)) return t; return NULL; } @@ -251,7 +251,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ipip6_tunnel_setup); - if (dev == NULL) + if (!dev) return NULL; dev_net_set(dev, net); @@ -555,7 +555,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) skb->dev, iph->daddr, iph->saddr); - if (t == NULL) + if (!t) goto out; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { @@ -671,7 +671,7 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); - if (tunnel != NULL) { + if (tunnel) { struct pcpu_sw_netstats *tstats; if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && @@ -733,7 +733,7 @@ static int ipip_rcv(struct sk_buff *skb) iph = ip_hdr(skb); tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); - if (tunnel != NULL) { + if (tunnel) { if (tunnel->parms.iph.protocol != IPPROTO_IPIP && tunnel->parms.iph.protocol != 0) goto drop; @@ -838,7 +838,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (skb_dst(skb)) neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); - if (neigh == NULL) { + if (!neigh) { net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -867,7 +867,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (skb_dst(skb)) neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); - if (neigh == NULL) { + if (!neigh) { net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -1158,7 +1158,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } t = ipip6_tunnel_locate(net, &p, 0); - if (t == NULL) + if (!t) t = netdev_priv(dev); } @@ -1206,7 +1206,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (t) { if (t->dev != dev) { err = -EEXIST; break; @@ -1242,7 +1242,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) goto done; err = -ENOENT; t = ipip6_tunnel_locate(net, &p, 0); - if (t == NULL) + if (!t) goto done; err = -EPERM; if (t == netdev_priv(sitn->fb_tunnel_dev)) @@ -1530,8 +1530,7 @@ static bool ipip6_netlink_6rd_parms(struct nlattr *data[], if (data[IFLA_IPTUN_6RD_PREFIX]) { ret = true; - nla_memcpy(&ip6rd->prefix, data[IFLA_IPTUN_6RD_PREFIX], - sizeof(struct in6_addr)); + ip6rd->prefix = nla_get_in6_addr(data[IFLA_IPTUN_6RD_PREFIX]); } if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) { @@ -1683,8 +1682,8 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || - nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || - nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, @@ -1694,10 +1693,10 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; #ifdef CONFIG_IPV6_SIT_6RD - if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr), - &tunnel->ip6rd.prefix) || - nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, - tunnel->ip6rd.relay_prefix) || + if (nla_put_in6_addr(skb, IFLA_IPTUN_6RD_PREFIX, + &tunnel->ip6rd.prefix) || + nla_put_in_addr(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, + tunnel->ip6rd.relay_prefix) || nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN, tunnel->ip6rd.prefixlen) || nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN, @@ -1795,7 +1794,7 @@ static void __net_exit sit_destroy_tunnels(struct net *net, struct ip_tunnel *t; t = rtnl_dereference(sitn->tunnels[prio][h]); - while (t != NULL) { + while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 7337fc7947e2..21bc2eb53c57 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -49,11 +49,12 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct sock *child; child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); - if (child) + if (child) { + atomic_set(&req->rsk_refcnt, 1); inet_csk_reqsk_queue_add(sk, req, child); - else + } else { reqsk_free(req); - + } return child; } @@ -189,13 +190,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp6_request_sock_ops); + req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk); if (!req) goto out; ireq = inet_rsk(req); treq = tcp_rsk(req); - treq->listener = NULL; + treq->tfo_listener = false; if (security_inet_conn_request(sk, skb, req)) goto out_free; @@ -220,7 +221,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->ir_mark = inet_request_mark(sk, skb); - req->expires = 0UL; req->num_retrans = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index c5c10fafcfe2..abcc79f649b3 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -54,6 +54,20 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "idgen_retries", + .data = &init_net.ipv6.sysctl.idgen_retries, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "idgen_delay", + .data = &init_net.ipv6.sysctl.idgen_delay, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { } }; @@ -93,6 +107,8 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels; ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect; + ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries; + ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) @@ -163,7 +179,7 @@ int ipv6_sysctl_register(void) int err = -ENOMEM; ip6_header = register_net_sysctl(&init_net, "net/ipv6", ipv6_rotable); - if (ip6_header == NULL) + if (!ip6_header) goto out; err = register_pernet_subsys(&ipv6_sysctl_net_ops); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5d46832c6f72..7cdad8401434 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -104,19 +104,6 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) } } -static void tcp_v6_hash(struct sock *sk) -{ - if (sk->sk_state != TCP_CLOSE) { - if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) { - tcp_prot.hash(sk); - return; - } - local_bh_disable(); - __inet6_hash(sk, NULL); - local_bh_enable(); - } -} - static __u32 tcp_v6_init_sequence(const struct sk_buff *skb) { return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, @@ -154,7 +141,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; fl6_sock_release(flowlabel); } @@ -233,11 +220,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tp->af_specific = &tcp_sock_ipv6_specific; #endif goto failure; - } else { - ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, - &sk->sk_v6_rcv_saddr); } + np->saddr = sk->sk_v6_rcv_saddr; return err; } @@ -263,7 +247,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto failure; } - if (saddr == NULL) { + if (!saddr) { saddr = &fl6.saddr; sk->sk_v6_rcv_saddr = *saddr; } @@ -340,18 +324,20 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); + struct net *net = dev_net(skb->dev); + struct request_sock *fastopen; struct ipv6_pinfo *np; - struct sock *sk; - int err; struct tcp_sock *tp; - struct request_sock *fastopen; __u32 seq, snd_una; - struct net *net = dev_net(skb->dev); + struct sock *sk; + int err; - sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, - th->dest, &hdr->saddr, th->source, skb->dev->ifindex); + sk = __inet6_lookup_established(net, &tcp_hashinfo, + &hdr->daddr, th->dest, + &hdr->saddr, ntohs(th->source), + skb->dev->ifindex); - if (sk == NULL) { + if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; @@ -361,6 +347,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, inet_twsk_put(inet_twsk(sk)); return; } + seq = ntohl(th->seq); + if (sk->sk_state == TCP_NEW_SYN_RECV) + return tcp_req_err(sk, seq); bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) @@ -375,7 +364,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } tp = tcp_sk(sk); - seq = ntohl(th->seq); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ fastopen = tp->fastopen_rsk; snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; @@ -419,37 +407,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* Might be for an request_sock */ switch (sk->sk_state) { - struct request_sock *req, **prev; - case TCP_LISTEN: - if (sock_owned_by_user(sk)) - goto out; - - /* Note : We use inet6_iif() here, not tcp_v6_iif() */ - req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, - &hdr->saddr, inet6_iif(skb)); - if (!req) - goto out; - - /* ICMPs are not backlogged, hence we cannot get - * an established socket here. - */ - WARN_ON(req->sk != NULL); - - if (seq != tcp_rsk(req)->snt_isn) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - goto out; - } - - inet_csk_reqsk_queue_drop(sk, req, prev); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); - goto out; - case TCP_SYN_SENT: case TCP_SYN_RECV: /* Only in fast or simultaneous open. If a fast open socket is * is already accepted it is treated as a connected one below. */ - if (fastopen && fastopen->sk == NULL) + if (fastopen && !fastopen->sk) break; if (!sock_owned_by_user(sk)) { @@ -497,7 +460,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, &ireq->ir_v6_rmt_addr); fl6->daddr = ireq->ir_v6_rmt_addr; - if (np->repflow && (ireq->pktopts != NULL)) + if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); skb_set_queue_mapping(skb, queue_mapping); @@ -523,17 +486,11 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, } static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, - struct sock *addr_sk) + const struct sock *addr_sk) { return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr); } -static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, - struct request_sock *req) -{ - return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr); -} - static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, int optlen) { @@ -619,9 +576,9 @@ clear_hash_noput: return 1; } -static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v6_md5_hash_skb(char *md5_hash, + const struct tcp_md5sig_key *key, const struct sock *sk, - const struct request_sock *req, const struct sk_buff *skb) { const struct in6_addr *saddr, *daddr; @@ -629,12 +586,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, struct hash_desc *desc; const struct tcphdr *th = tcp_hdr(skb); - if (sk) { - saddr = &inet6_sk(sk)->saddr; + if (sk) { /* valid for establish/request sockets */ + saddr = &sk->sk_v6_rcv_saddr; daddr = &sk->sk_v6_daddr; - } else if (req) { - saddr = &inet_rsk(req)->ir_v6_loc_addr; - daddr = &inet_rsk(req)->ir_v6_rmt_addr; } else { const struct ipv6hdr *ip6h = ipv6_hdr(skb); saddr = &ip6h->saddr; @@ -670,8 +624,7 @@ clear_hash_noput: return 1; } -static int __tcp_v6_inbound_md5_hash(struct sock *sk, - const struct sk_buff *skb) +static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) { const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; @@ -685,44 +638,32 @@ static int __tcp_v6_inbound_md5_hash(struct sock *sk, /* We've parsed the options - do we have a hash? */ if (!hash_expected && !hash_location) - return 0; + return false; if (hash_expected && !hash_location) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - return 1; + return true; } if (!hash_expected && hash_location) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); - return 1; + return true; } /* check the signature */ genhash = tcp_v6_md5_hash_skb(newhash, hash_expected, - NULL, NULL, skb); + NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n", genhash ? "failed" : "mismatch", &ip6h->saddr, ntohs(th->source), &ip6h->daddr, ntohs(th->dest)); - return 1; + return true; } - return 0; -} - -static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) -{ - int ret; - - rcu_read_lock(); - ret = __tcp_v6_inbound_md5_hash(sk, skb); - rcu_read_unlock(); - - return ret; + return false; } - #endif static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, @@ -734,8 +675,6 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; - ireq->ir_iif = sk->sk_bound_dev_if; - /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) @@ -774,7 +713,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr), #ifdef CONFIG_TCP_MD5SIG - .md5_lookup = tcp_v6_reqsk_md5_lookup, + .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif .init_req = tcp_v6_init_req, @@ -811,7 +750,7 @@ static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, GFP_ATOMIC); - if (buff == NULL) + if (!buff) return; skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); @@ -931,7 +870,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) if (!key) goto release_sk1; - genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb); + genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; } else { @@ -997,17 +936,19 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) { - struct request_sock *req, **prev; const struct tcphdr *th = tcp_hdr(skb); + struct request_sock *req; struct sock *nsk; /* Find possible connection requests. */ - req = inet6_csk_search_req(sk, &prev, th->source, + req = inet6_csk_search_req(sk, th->source, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); - if (req) - return tcp_check_req(sk, skb, req, prev, false); - + if (req) { + nsk = tcp_check_req(sk, skb, req, false); + reqsk_put(req); + return nsk; + } nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), @@ -1067,7 +1008,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst); - if (newsk == NULL) + if (!newsk) return NULL; newtcp6sk = (struct tcp6_sock *)newsk; @@ -1079,11 +1020,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); - - ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - - newsk->sk_v6_rcv_saddr = newnp->saddr; + newnp->saddr = newsk->sk_v6_rcv_saddr; inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; newsk->sk_backlog_rcv = tcp_v4_do_rcv; @@ -1128,7 +1065,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } newsk = tcp_create_openreq_child(sk, req, skb); - if (newsk == NULL) + if (!newsk) goto out_nonewsk; /* @@ -1170,7 +1107,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; - if (ireq->pktopts != NULL) { + if (ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, sk_gfp_atomic(sk, GFP_ATOMIC)); consume_skb(ireq->pktopts); @@ -1215,7 +1152,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr); - if (key != NULL) { + if (key) { /* We're using one, so create a matching key * on the newsk structure. If we fail to get * memory, then we end up not copying the key @@ -1232,7 +1169,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_done(newsk); goto out; } - __inet6_hash(newsk, NULL); + __inet_hash(newsk, NULL); return newsk; @@ -1538,7 +1475,7 @@ do_time_wait: &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), tcp_v6_iif(skb)); - if (sk2 != NULL) { + if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); inet_twsk_deschedule(tw, &tcp_death_row); inet_twsk_put(tw); @@ -1584,7 +1521,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) if (sk) { skb->sk = sk; skb->destructor = sock_edemux; - if (sk->sk_state != TCP_TIME_WAIT) { + if (sk_fullsock(sk)) { struct dst_entry *dst = sk->sk_rx_dst; if (dst) @@ -1689,9 +1626,9 @@ static void tcp_v6_destroy_sock(struct sock *sk) #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, - const struct sock *sk, struct request_sock *req, int i, kuid_t uid) + struct request_sock *req, int i, kuid_t uid) { - int ttd = req->expires - jiffies; + long ttd = req->rsk_timer.expires - jiffies; const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; @@ -1827,7 +1764,7 @@ static int tcp6_seq_show(struct seq_file *seq, void *v) get_tcp6_sock(seq, v, st->num); break; case TCP_SEQ_STATE_OPENREQ: - get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid); + get_openreq6(seq, v, st->num, st->uid); break; } out: @@ -1891,7 +1828,7 @@ struct proto tcpv6_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, - .hash = tcp_v6_hash, + .hash = inet_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index c1ab77105b4c..d883c9204c01 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -41,8 +41,8 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff) return tcp_gro_complete(skb); } -struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, + netdev_features_t features) { struct tcphdr *th; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d048d46779fc..120aff9aa010 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,11 +53,11 @@ #include <trace/events/skb.h> #include "udp_impl.h" -static unsigned int udp6_ehashfn(struct net *net, - const struct in6_addr *laddr, - const u16 lport, - const struct in6_addr *faddr, - const __be16 fport) +static u32 udp6_ehashfn(const struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) { static u32 udp6_ehash_secret __read_mostly; static u32 udp_ipv6_hash_secret __read_mostly; @@ -104,9 +104,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 0; } -static unsigned int udp6_portaddr_hash(struct net *net, - const struct in6_addr *addr6, - unsigned int port) +static u32 udp6_portaddr_hash(const struct net *net, + const struct in6_addr *addr6, + unsigned int port) { unsigned int hash, mix = net_hash_mix(net); @@ -391,8 +391,7 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup); * return it, otherwise we block. */ -int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -551,7 +550,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), udptable); - if (sk == NULL) { + if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; @@ -649,7 +648,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { + if (skb->len > sizeof(struct udphdr) && encap_rcv) { int ret; /* Verify checksum before giving to encap */ @@ -750,7 +749,7 @@ static void flush_stack(struct sock **stack, unsigned int count, for (i = 0; i < count; i++) { sk = stack[i]; - if (likely(skb1 == NULL)) + if (likely(!skb1)) skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); if (!skb1) { atomic_inc(&sk->sk_drops); @@ -900,7 +899,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * for sock caches... i'll skip this for now. */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); - if (sk != NULL) { + if (sk) { int ret; if (!uh->check && !udp_sk(sk)->no_check6_rx) { @@ -1101,8 +1100,7 @@ out: return err; } -int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; struct udp_sock *up = udp_sk(sk); @@ -1164,12 +1162,12 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, do_udp_sendmsg: if (__ipv6_only_sock(sk)) return -ENETUNREACH; - return udp_sendmsg(iocb, sk, msg, len); + return udp_sendmsg(sk, msg, len); } } if (up->pending == AF_INET) - return udp_sendmsg(iocb, sk, msg, len); + return udp_sendmsg(sk, msg, len); /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). @@ -1209,7 +1207,7 @@ do_udp_sendmsg: fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } } @@ -1257,14 +1255,14 @@ do_udp_sendmsg: } if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; connected = 0; } - if (opt == NULL) + if (!opt) opt = np->opt; if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index c779c3c90b9d..0682c031ccdc 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -23,10 +23,9 @@ int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #endif -int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len); -int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len); +int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len); int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udpv6_destroy_sock(struct sock *sk); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index ab889bb16b3c..7441e1e63893 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -54,7 +54,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Set the IPv6 fragment id if not set yet */ if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(skb); + ipv6_proxy_select_ident(dev_net(skb->dev), skb); segs = NULL; goto out; @@ -112,11 +112,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; - if (skb_shinfo(skb)->ip6_frag_id) - fptr->identification = skb_shinfo(skb)->ip6_frag_id; - else - ipv6_select_ident(fptr, - (struct rt6_info *)skb_dst(skb)); + if (!skb_shinfo(skb)->ip6_frag_id) + ipv6_proxy_select_ident(dev_net(skb->dev), skb); + fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the * fragment header are updated in ipv6_gso_segment() diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 9949a356d62c..1e205c3253ac 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -95,8 +95,8 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) ip6h = ipv6_hdr(skb); ip6h->payload_len = htons(skb->len - size); - ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6; - ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6; + ip6h->daddr = x->sel.daddr.in6; + ip6h->saddr = x->sel.saddr.in6; err = 0; out: return err; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index ca3f29b98ae5..010f8bd2d577 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -114,6 +114,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) return err; skb->ignore_df = 1; + skb->protocol = htons(ETH_P_IPV6); return x->outer_mode->output2(x, skb); } @@ -122,7 +123,6 @@ EXPORT_SYMBOL(xfrm6_prepare_output); int xfrm6_output_finish(struct sk_buff *skb) { memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - skb->protocol = htons(ETH_P_IPV6); #ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 48bf5a06847b..f337a908a76a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -61,9 +61,7 @@ static int xfrm6_get_saddr(struct net *net, return -EHOSTUNREACH; dev = ip6_dst_idev(dst)->dev; - ipv6_dev_get_saddr(dev_net(dev), dev, - (struct in6_addr *)&daddr->a6, 0, - (struct in6_addr *)&saddr->a6); + ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6); dst_release(dst); return 0; } @@ -200,6 +198,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: + offset += ipv6_optlen(exthdr); if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; @@ -292,7 +291,6 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm6_dst_ops = { .family = AF_INET6, - .protocol = cpu_to_be16(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, .redirect = xfrm6_redirect, @@ -370,7 +368,7 @@ static void __net_exit xfrm6_net_exit(struct net *net) { struct ctl_table *table; - if (net->ipv6.sysctl.xfrm6_hdr == NULL) + if (!net->ipv6.sysctl.xfrm6_hdr) return; table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index f11ad1d95e0e..4ea5d7497b5f 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1688,8 +1688,7 @@ out: return rc; } -static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int ipx_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct ipx_sock *ipxs = ipx_sk(sk); @@ -1754,8 +1753,8 @@ out: } -static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct ipx_sock *ipxs = ipx_sk(sk); diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 568edc72d737..ee0ea25c8e7a 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1256,14 +1256,13 @@ static int irda_release(struct socket *sock) } /* - * Function irda_sendmsg (iocb, sock, msg, len) + * Function irda_sendmsg (sock, msg, len) * * Send message down to TinyTP. This function is used for both STREAM and * SEQPACK services. This is possible since it forces the client to * fragment the message if necessary */ -static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int irda_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct irda_sock *self; @@ -1348,13 +1347,13 @@ out: } /* - * Function irda_recvmsg_dgram (iocb, sock, msg, size, flags) + * Function irda_recvmsg_dgram (sock, msg, size, flags) * * Try to receive message and copy it to user. The frame is discarded * after being read, regardless of how much the user actually read */ -static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int irda_recvmsg_dgram(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); @@ -1398,10 +1397,10 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, } /* - * Function irda_recvmsg_stream (iocb, sock, msg, size, flags) + * Function irda_recvmsg_stream (sock, msg, size, flags) */ -static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int irda_recvmsg_stream(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); @@ -1515,14 +1514,14 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, } /* - * Function irda_sendmsg_dgram (iocb, sock, msg, len) + * Function irda_sendmsg_dgram (sock, msg, len) * * Send message down to TinyTP for the unreliable sequenced * packet service... * */ -static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int irda_sendmsg_dgram(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct irda_sock *self; @@ -1594,14 +1593,14 @@ out: } /* - * Function irda_sendmsg_ultra (iocb, sock, msg, len) + * Function irda_sendmsg_ultra (sock, msg, len) * * Send message down to IrLMP for the unreliable Ultra * packet service... */ #ifdef CONFIG_IRDA_ULTRA -static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int irda_sendmsg_ultra(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct irda_sock *self; diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 40695b9751c1..683346d2d633 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -798,7 +798,9 @@ static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout) orig_jiffies = jiffies; /* Set poll time to 200 ms */ - poll_time = IRDA_MIN(timeout, msecs_to_jiffies(200)); + poll_time = msecs_to_jiffies(200); + if (timeout) + poll_time = min_t(unsigned long, timeout, poll_time); spin_lock_irqsave(&self->spinlock, flags); while (self->tx_skb && self->tx_skb->len) { @@ -811,7 +813,7 @@ static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout) break; } spin_unlock_irqrestore(&self->spinlock, flags); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); } /* diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 3c83a1e5ab03..1215693fdd22 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -305,7 +305,7 @@ irnet_ctrl_read(irnet_socket * ap, /* Put ourselves on the wait queue to be woken up */ add_wait_queue(&irnet_events.rwait, &wait); - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); for(;;) { /* If there is unread events */ @@ -321,7 +321,7 @@ irnet_ctrl_read(irnet_socket * ap, /* Yield and wait to be woken up */ schedule(); } - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); remove_wait_queue(&irnet_events.rwait, &wait); /* Did we got it ? */ diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 2e9953b2db84..94b4c898a116 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1026,8 +1026,8 @@ static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg, (void *) prmdata, 8); } -static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); @@ -1317,8 +1317,8 @@ static void iucv_process_message_q(struct sock *sk) } } -static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; diff --git a/net/key/af_key.c b/net/key/af_key.c index f8ac939d52b4..f0d52d721b3a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -709,7 +709,7 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port sin6->sin6_family = AF_INET6; sin6->sin6_port = port; sin6->sin6_flowinfo = 0; - sin6->sin6_addr = *(struct in6_addr *)xaddr->a6; + sin6->sin6_addr = xaddr->in6; sin6->sin6_scope_id = 0; return 128; } @@ -3588,8 +3588,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } #endif -static int pfkey_sendmsg(struct kiocb *kiocb, - struct socket *sock, struct msghdr *msg, size_t len) +static int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct sk_buff *skb = NULL; @@ -3630,8 +3629,7 @@ out: return err ? : len; } -static int pfkey_recvmsg(struct kiocb *kiocb, - struct socket *sock, struct msghdr *msg, size_t len, +static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 781b3a226ba7..4b552873b556 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -74,7 +74,7 @@ static int l2tp_eth_dev_init(struct net_device *dev) priv->dev = dev; eth_hw_addr_random(dev); - memset(&dev->broadcast[0], 0xff, 6); + eth_broadcast_addr(dev->broadcast); dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock; return 0; } diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 05dfc8aa36af..79649937ec71 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -385,7 +385,7 @@ drop: /* Userspace will call sendmsg() on the tunnel socket to send L2TP * control frames. */ -static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) +static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct sk_buff *skb; int rc; @@ -506,7 +506,7 @@ no_route: goto out; } -static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 8611f1b63141..d1ded3777815 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -480,8 +480,7 @@ out: /* Userspace will call sendmsg() on the tunnel socket to send L2TP * control frames. */ -static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); @@ -643,9 +642,8 @@ do_confirm: goto done; } -static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index b4e923f77954..9e13c2ff8789 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -205,9 +205,9 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info #endif if (info->attrs[L2TP_ATTR_IP_SADDR] && info->attrs[L2TP_ATTR_IP_DADDR]) { - cfg.local_ip.s_addr = nla_get_be32( + cfg.local_ip.s_addr = nla_get_in_addr( info->attrs[L2TP_ATTR_IP_SADDR]); - cfg.peer_ip.s_addr = nla_get_be32( + cfg.peer_ip.s_addr = nla_get_in_addr( info->attrs[L2TP_ATTR_IP_DADDR]); } else { ret = -EINVAL; @@ -376,15 +376,17 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla case L2TP_ENCAPTYPE_IP: #if IS_ENABLED(CONFIG_IPV6) if (np) { - if (nla_put(skb, L2TP_ATTR_IP6_SADDR, sizeof(np->saddr), - &np->saddr) || - nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(sk->sk_v6_daddr), - &sk->sk_v6_daddr)) + if (nla_put_in6_addr(skb, L2TP_ATTR_IP6_SADDR, + &np->saddr) || + nla_put_in6_addr(skb, L2TP_ATTR_IP6_DADDR, + &sk->sk_v6_daddr)) goto nla_put_failure; } else #endif - if (nla_put_be32(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr) || - nla_put_be32(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr)) + if (nla_put_in_addr(skb, L2TP_ATTR_IP_SADDR, + inet->inet_saddr) || + nla_put_in_addr(skb, L2TP_ATTR_IP_DADDR, + inet->inet_daddr)) goto nla_put_failure; break; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index cc7a828fc914..e9b0dec56b8e 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -185,9 +185,8 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb) /* Receive message. This is the recvmsg for the PPPoL2TP socket. */ -static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, - int flags) +static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int err; struct sk_buff *skb; @@ -295,7 +294,7 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session) * when a user application does a sendmsg() on the session socket. L2TP and * PPP headers must be inserted into the user's data. */ -static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, +static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { static const unsigned char ppph[2] = { 0xff, 0x03 }; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 2c0b83ce43bd..17a8dff06090 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -704,8 +704,8 @@ out: * Copy received data to the socket user. * Returns non-negative upon success, negative otherwise. */ -static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { DECLARE_SOCKADDR(struct sockaddr_llc *, uaddr, msg->msg_name); const int nonblock = flags & MSG_DONTWAIT; @@ -878,8 +878,7 @@ copy_uaddr: * Transmit data provided by the socket user. * Returns non-negative upon success, negative otherwise. */ -static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e4dd2fc34de3..265e42721a66 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1494,7 +1494,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, if (next_hop_sta) memcpy(next_hop, next_hop_sta->sta.addr, ETH_ALEN); else - memset(next_hop, 0, ETH_ALEN); + eth_zero_addr(next_hop); memset(pinfo, 0, sizeof(*pinfo)); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index ff0d2db09df9..5bcd4e5589d3 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1508,6 +1508,8 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) if (ieee80211_chanctx_refcount(local, ctx) == 0) ieee80211_free_chanctx(local, ctx); + sdata->radar_required = false; + /* Unreserving may ready an in-place reservation. */ if (use_reserved_switch) ieee80211_vif_use_reserved_switch(local); @@ -1566,6 +1568,9 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_smps_chanctx(local, ctx); ieee80211_recalc_radar_chanctx(local, ctx); out: + if (ret) + sdata->radar_required = false; + mutex_unlock(&local->chanctx_mtx); return ret; } diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 52d629d5e797..bfef1b215050 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1841,7 +1841,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) ieee80211_ibss_disconnect(sdata); ifibss->ssid_len = 0; - memset(ifibss->bssid, 0, ETH_ALEN); + eth_zero_addr(ifibss->bssid); /* remove beacon */ kfree(sdata->u.ibss.ie); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 81340abb3876..487f5e2a9283 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -58,13 +58,24 @@ struct ieee80211_local; #define IEEE80211_UNSET_POWER_LEVEL INT_MIN /* - * Some APs experience problems when working with U-APSD. Decrease the - * probability of that happening by using legacy mode for all ACs but VO. - * The AP that caused us trouble was a Cisco 4410N. It ignores our - * setting, and always treats non-VO ACs as legacy. + * Some APs experience problems when working with U-APSD. Decreasing the + * probability of that happening by using legacy mode for all ACs but VO isn't + * enough. + * + * Cisco 4410N originally forced us to enable VO by default only because it + * treated non-VO ACs as legacy. + * + * However some APs (notably Netgear R7000) silently reclassify packets to + * different ACs. Since u-APSD ACs require trigger frames for frame retrieval + * clients would never see some frames (e.g. ARP responses) or would fetch them + * accidentally after a long time. + * + * It makes little sense to enable u-APSD queues by default because it needs + * userspace applications to be aware of it to actually take advantage of the + * possible additional powersavings. Implicitly depending on driver autotrigger + * frame support doesn't make much sense. */ -#define IEEE80211_DEFAULT_UAPSD_QUEUES \ - IEEE80211_WMM_IE_STA_QOSINFO_AC_VO +#define IEEE80211_DEFAULT_UAPSD_QUEUES 0 #define IEEE80211_DEFAULT_MAX_SP_LEN \ IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL @@ -453,6 +464,7 @@ struct ieee80211_if_managed { unsigned int flags; bool csa_waiting_bcn; + bool csa_ignored_same_chan; bool beacon_crc_valid; u32 beacon_crc; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index acf441ff9f4a..d4684242e78b 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -520,7 +520,7 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, } else { *fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ - memset(hdr->addr1, 0, ETH_ALEN); /* RA is resolved later */ + eth_zero_addr(hdr->addr1); /* RA is resolved later */ memcpy(hdr->addr2, meshsa, ETH_ALEN); memcpy(hdr->addr3, meshda, ETH_ALEN); memcpy(hdr->addr4, meshsa, ETH_ALEN); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0cbcde11fae3..00103f36dcbf 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1150,6 +1150,17 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; } + if (cfg80211_chandef_identical(&csa_ie.chandef, + &sdata->vif.bss_conf.chandef)) { + if (ifmgd->csa_ignored_same_chan) + return; + sdata_info(sdata, + "AP %pM tries to chanswitch to same channel, ignore\n", + ifmgd->associated->bssid); + ifmgd->csa_ignored_same_chan = true; + return; + } + mutex_lock(&local->mtx); mutex_lock(&local->chanctx_mtx); conf = rcu_dereference_protected(sdata->vif.chanctx_conf, @@ -1194,6 +1205,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, sdata->vif.csa_active = true; sdata->csa_chandef = csa_ie.chandef; sdata->csa_block_tx = csa_ie.mode; + ifmgd->csa_ignored_same_chan = false; if (sdata->csa_block_tx) ieee80211_stop_vif_queues(local, sdata, @@ -2019,7 +2031,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_flush_queues(local, sdata, false); /* clear bssid only after building the needed mgmt frames */ - memset(ifmgd->bssid, 0, ETH_ALEN); + eth_zero_addr(ifmgd->bssid); /* remove AP and TDLS peers */ sta_info_flush(sdata); @@ -2076,6 +2088,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->vif.csa_active = false; ifmgd->csa_waiting_bcn = false; + ifmgd->csa_ignored_same_chan = false; if (sdata->csa_block_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); @@ -2469,7 +2482,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, del_timer_sync(&sdata->u.mgd.timer); sta_info_destroy_addr(sdata, auth_data->bss->bssid); - memset(sdata->u.mgd.bssid, 0, ETH_ALEN); + eth_zero_addr(sdata->u.mgd.bssid); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); sdata->u.mgd.flags = 0; mutex_lock(&sdata->local->mtx); @@ -2791,7 +2804,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, del_timer_sync(&sdata->u.mgd.timer); sta_info_destroy_addr(sdata, assoc_data->bss->bssid); - memset(sdata->u.mgd.bssid, 0, ETH_ALEN); + eth_zero_addr(sdata->u.mgd.bssid); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); sdata->u.mgd.flags = 0; mutex_lock(&sdata->local->mtx); @@ -3231,7 +3244,8 @@ static const u64 care_about_ies = (1ULL << WLAN_EID_CHANNEL_SWITCH) | (1ULL << WLAN_EID_PWR_CONSTRAINT) | (1ULL << WLAN_EID_HT_CAPABILITY) | - (1ULL << WLAN_EID_HT_OPERATION); + (1ULL << WLAN_EID_HT_OPERATION) | + (1ULL << WLAN_EID_EXT_CHANSWITCH_ANN); static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, @@ -4553,7 +4567,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, return 0; err_clear: - memset(ifmgd->bssid, 0, ETH_ALEN); + eth_zero_addr(ifmgd->bssid); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); ifmgd->auth_data = NULL; err_free: @@ -4897,7 +4911,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, return 0; err_clear: - memset(ifmgd->bssid, 0, ETH_ALEN); + eth_zero_addr(ifmgd->bssid); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); ifmgd->assoc_data = NULL; err_free: diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 7c86a002df95..ef6e8a6c4253 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -373,7 +373,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, rate++; mi->sample_deferred++; } else { - if (!msr->sample_limit != 0) + if (!msr->sample_limit) return; mi->sample_packets++; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9eab44317c87..4f7b922cfda4 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2214,6 +2214,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) hdr = (struct ieee80211_hdr *) skb->data; mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); + if (ieee80211_drop_unencrypted(rx, hdr->frame_control)) + return RX_DROP_MONITOR; + /* frame is in RMC, don't forward */ if (ieee80211_is_data(hdr->frame_control) && is_multicast_ether_addr(hdr->addr1) && diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e5d679f38cc1..9f7fb4eec37b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -565,6 +565,7 @@ ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx) if (tx->sdata->control_port_no_encrypt) info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; info->control.flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO; + info->flags |= IEEE80211_TX_CTL_USE_MINRATE; } return TX_CONTINUE; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 256647cb1d24..d1742a7d9ea4 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3278,7 +3278,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, wdev_iter = &sdata_iter->wdev; if (sdata_iter == sdata || - rcu_access_pointer(sdata_iter->vif.chanctx_conf) == NULL || + !ieee80211_sdata_running(sdata_iter) || local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) continue; diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h index 98180a9fff4a..a0533357b9ea 100644 --- a/net/mac802154/driver-ops.h +++ b/net/mac802154/driver-ops.h @@ -1,4 +1,4 @@ -#ifndef __MAC802154_DRVIER_OPS +#ifndef __MAC802154_DRIVER_OPS #define __MAC802154_DRIVER_OPS #include <linux/types.h> @@ -220,4 +220,4 @@ drv_set_promiscuous_mode(struct ieee802154_local *local, bool on) return local->ops->set_promiscuous_mode(&local->hw, on); } -#endif /* __MAC802154_DRVIER_OPS */ +#endif /* __MAC802154_DRIVER_OPS */ diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 6fb6bdf9868c..38b56f9d9386 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -174,24 +174,16 @@ ieee802154_check_mac_settings(struct ieee802154_local *local, } if (local->hw.flags & IEEE802154_HW_AFILT) { - if (wpan_dev->pan_id != nwpan_dev->pan_id) - return -EBUSY; - - if (wpan_dev->short_addr != nwpan_dev->short_addr) - return -EBUSY; - - if (wpan_dev->extended_addr != nwpan_dev->extended_addr) + if (wpan_dev->pan_id != nwpan_dev->pan_id || + wpan_dev->short_addr != nwpan_dev->short_addr || + wpan_dev->extended_addr != nwpan_dev->extended_addr) return -EBUSY; } if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) { - if (wpan_dev->min_be != nwpan_dev->min_be) - return -EBUSY; - - if (wpan_dev->max_be != nwpan_dev->max_be) - return -EBUSY; - - if (wpan_dev->csma_retries != nwpan_dev->csma_retries) + if (wpan_dev->min_be != nwpan_dev->min_be || + wpan_dev->max_be != nwpan_dev->max_be || + wpan_dev->csma_retries != nwpan_dev->csma_retries) return -EBUSY; } diff --git a/net/mac802154/util.c b/net/mac802154/util.c index 5fc979027919..150bf807e572 100644 --- a/net/mac802154/util.c +++ b/net/mac802154/util.c @@ -65,8 +65,19 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, { if (ifs_handling) { struct ieee802154_local *local = hw_to_local(hw); + u8 max_sifs_size; - if (skb->len > 18) + /* If transceiver sets CRC on his own we need to use lifs + * threshold len above 16 otherwise 18, because it's not + * part of skb->len. + */ + if (hw->flags & IEEE802154_HW_TX_OMIT_CKSUM) + max_sifs_size = IEEE802154_MAX_SIFS_FRAME_SIZE - + IEEE802154_FCS_LEN; + else + max_sifs_size = IEEE802154_MAX_SIFS_FRAME_SIZE; + + if (skb->len > max_sifs_size) hrtimer_start(&local->ifs_timer, ktime_set(0, hw->phy->lifs_period * NSEC_PER_USEC), HRTIMER_MODE_REL); diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig index 37421db88965..17bde799c854 100644 --- a/net/mpls/Kconfig +++ b/net/mpls/Kconfig @@ -1,9 +1,30 @@ # # MPLS configuration # + +menuconfig MPLS + bool "MultiProtocol Label Switching" + default n + ---help--- + MultiProtocol Label Switching routes packets through logical + circuits. Originally conceived as a way of routing packets at + hardware speeds (before hardware was capable of routing ipv4 packets), + MPLS remains a simple way of making tunnels. + + If you have not heard of MPLS you probably want to say N here. + +if MPLS + config NET_MPLS_GSO tristate "MPLS: GSO support" help This is helper module to allow segmentation of non-MPLS GSO packets that have had MPLS stack entries pushed onto them and thus become MPLS GSO packets. + +config MPLS_ROUTING + tristate "MPLS: routing support" + help + Add support for forwarding of mpls packets. + +endif # MPLS diff --git a/net/mpls/Makefile b/net/mpls/Makefile index 6dec088c2d0f..65bbe68c72e6 100644 --- a/net/mpls/Makefile +++ b/net/mpls/Makefile @@ -2,3 +2,6 @@ # Makefile for MPLS. # obj-$(CONFIG_NET_MPLS_GSO) += mpls_gso.o +obj-$(CONFIG_MPLS_ROUTING) += mpls_router.o + +mpls_router-y := af_mpls.o diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c new file mode 100644 index 000000000000..db8a2ea6d4de --- /dev/null +++ b/net/mpls/af_mpls.c @@ -0,0 +1,1023 @@ +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/socket.h> +#include <linux/sysctl.h> +#include <linux/net.h> +#include <linux/module.h> +#include <linux/if_arp.h> +#include <linux/ipv6.h> +#include <linux/mpls.h> +#include <linux/vmalloc.h> +#include <net/ip.h> +#include <net/dst.h> +#include <net/sock.h> +#include <net/arp.h> +#include <net/ip_fib.h> +#include <net/netevent.h> +#include <net/netns/generic.h> +#include "internal.h" + +#define LABEL_NOT_SPECIFIED (1<<20) +#define MAX_NEW_LABELS 2 + +/* This maximum ha length copied from the definition of struct neighbour */ +#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))) + +struct mpls_route { /* next hop label forwarding entry */ + struct net_device __rcu *rt_dev; + struct rcu_head rt_rcu; + u32 rt_label[MAX_NEW_LABELS]; + u8 rt_protocol; /* routing protocol that set this entry */ + u8 rt_labels; + u8 rt_via_alen; + u8 rt_via_table; + u8 rt_via[0]; +}; + +static int zero = 0; +static int label_limit = (1 << 20) - 1; + +static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, + struct nlmsghdr *nlh, struct net *net, u32 portid, + unsigned int nlm_flags); + +static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) +{ + struct mpls_route *rt = NULL; + + if (index < net->mpls.platform_labels) { + struct mpls_route __rcu **platform_label = + rcu_dereference(net->mpls.platform_label); + rt = rcu_dereference(platform_label[index]); + } + return rt; +} + +static bool mpls_output_possible(const struct net_device *dev) +{ + return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); +} + +static unsigned int mpls_rt_header_size(const struct mpls_route *rt) +{ + /* The size of the layer 2.5 labels to be added for this route */ + return rt->rt_labels * sizeof(struct mpls_shim_hdr); +} + +static unsigned int mpls_dev_mtu(const struct net_device *dev) +{ + /* The amount of data the layer 2 frame can hold */ + return dev->mtu; +} + +static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu) + return false; + + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) + return false; + + return true; +} + +static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, + struct mpls_entry_decoded dec) +{ + /* RFC4385 and RFC5586 encode other packets in mpls such that + * they don't conflict with the ip version number, making + * decoding by examining the ip version correct in everything + * except for the strangest cases. + * + * The strange cases if we choose to support them will require + * manual configuration. + */ + struct iphdr *hdr4; + bool success = true; + + /* The IPv4 code below accesses through the IPv4 header + * checksum, which is 12 bytes into the packet. + * The IPv6 code below accesses through the IPv6 hop limit + * which is 8 bytes into the packet. + * + * For all supported cases there should always be at least 12 + * bytes of packet data present. The IPv4 header is 20 bytes + * without options and the IPv6 header is always 40 bytes + * long. + */ + if (!pskb_may_pull(skb, 12)) + return false; + + /* Use ip_hdr to find the ip protocol version */ + hdr4 = ip_hdr(skb); + if (hdr4->version == 4) { + skb->protocol = htons(ETH_P_IP); + csum_replace2(&hdr4->check, + htons(hdr4->ttl << 8), + htons(dec.ttl << 8)); + hdr4->ttl = dec.ttl; + } + else if (hdr4->version == 6) { + struct ipv6hdr *hdr6 = ipv6_hdr(skb); + skb->protocol = htons(ETH_P_IPV6); + hdr6->hop_limit = dec.ttl; + } + else + /* version 0 and version 1 are used by pseudo wires */ + success = false; + return success; +} + +static int mpls_forward(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct net *net = dev_net(dev); + struct mpls_shim_hdr *hdr; + struct mpls_route *rt; + struct mpls_entry_decoded dec; + struct net_device *out_dev; + unsigned int hh_len; + unsigned int new_header_size; + unsigned int mtu; + int err; + + /* Careful this entire function runs inside of an rcu critical section */ + + if (skb->pkt_type != PACKET_HOST) + goto drop; + + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + goto drop; + + if (!pskb_may_pull(skb, sizeof(*hdr))) + goto drop; + + /* Read and decode the label */ + hdr = mpls_hdr(skb); + dec = mpls_entry_decode(hdr); + + /* Pop the label */ + skb_pull(skb, sizeof(*hdr)); + skb_reset_network_header(skb); + + skb_orphan(skb); + + rt = mpls_route_input_rcu(net, dec.label); + if (!rt) + goto drop; + + /* Find the output device */ + out_dev = rcu_dereference(rt->rt_dev); + if (!mpls_output_possible(out_dev)) + goto drop; + + if (skb_warn_if_lro(skb)) + goto drop; + + skb_forward_csum(skb); + + /* Verify ttl is valid */ + if (dec.ttl <= 1) + goto drop; + dec.ttl -= 1; + + /* Verify the destination can hold the packet */ + new_header_size = mpls_rt_header_size(rt); + mtu = mpls_dev_mtu(out_dev); + if (mpls_pkt_too_big(skb, mtu - new_header_size)) + goto drop; + + hh_len = LL_RESERVED_SPACE(out_dev); + if (!out_dev->header_ops) + hh_len = 0; + + /* Ensure there is enough space for the headers in the skb */ + if (skb_cow(skb, hh_len + new_header_size)) + goto drop; + + skb->dev = out_dev; + skb->protocol = htons(ETH_P_MPLS_UC); + + if (unlikely(!new_header_size && dec.bos)) { + /* Penultimate hop popping */ + if (!mpls_egress(rt, skb, dec)) + goto drop; + } else { + bool bos; + int i; + skb_push(skb, new_header_size); + skb_reset_network_header(skb); + /* Push the new labels */ + hdr = mpls_hdr(skb); + bos = dec.bos; + for (i = rt->rt_labels - 1; i >= 0; i--) { + hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos); + bos = false; + } + } + + err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb); + if (err) + net_dbg_ratelimited("%s: packet transmission failed: %d\n", + __func__, err); + return 0; + +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static struct packet_type mpls_packet_type __read_mostly = { + .type = cpu_to_be16(ETH_P_MPLS_UC), + .func = mpls_forward, +}; + +static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { + [RTA_DST] = { .type = NLA_U32 }, + [RTA_OIF] = { .type = NLA_U32 }, +}; + +struct mpls_route_config { + u32 rc_protocol; + u32 rc_ifindex; + u16 rc_via_table; + u16 rc_via_alen; + u8 rc_via[MAX_VIA_ALEN]; + u32 rc_label; + u32 rc_output_labels; + u32 rc_output_label[MAX_NEW_LABELS]; + u32 rc_nlflags; + struct nl_info rc_nlinfo; +}; + +static struct mpls_route *mpls_rt_alloc(size_t alen) +{ + struct mpls_route *rt; + + rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL); + if (rt) + rt->rt_via_alen = alen; + return rt; +} + +static void mpls_rt_free(struct mpls_route *rt) +{ + if (rt) + kfree_rcu(rt, rt_rcu); +} + +static void mpls_notify_route(struct net *net, unsigned index, + struct mpls_route *old, struct mpls_route *new, + const struct nl_info *info) +{ + struct nlmsghdr *nlh = info ? info->nlh : NULL; + unsigned portid = info ? info->portid : 0; + int event = new ? RTM_NEWROUTE : RTM_DELROUTE; + struct mpls_route *rt = new ? new : old; + unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0; + /* Ignore reserved labels for now */ + if (rt && (index >= 16)) + rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags); +} + +static void mpls_route_update(struct net *net, unsigned index, + struct net_device *dev, struct mpls_route *new, + const struct nl_info *info) +{ + struct mpls_route __rcu **platform_label; + struct mpls_route *rt, *old = NULL; + + ASSERT_RTNL(); + + platform_label = rtnl_dereference(net->mpls.platform_label); + rt = rtnl_dereference(platform_label[index]); + if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) { + rcu_assign_pointer(platform_label[index], new); + old = rt; + } + + mpls_notify_route(net, index, old, new, info); + + /* If we removed a route free it now */ + mpls_rt_free(old); +} + +static unsigned find_free_label(struct net *net) +{ + struct mpls_route __rcu **platform_label; + size_t platform_labels; + unsigned index; + + platform_label = rtnl_dereference(net->mpls.platform_label); + platform_labels = net->mpls.platform_labels; + for (index = 16; index < platform_labels; index++) { + if (!rtnl_dereference(platform_label[index])) + return index; + } + return LABEL_NOT_SPECIFIED; +} + +static int mpls_route_add(struct mpls_route_config *cfg) +{ + struct mpls_route __rcu **platform_label; + struct net *net = cfg->rc_nlinfo.nl_net; + struct net_device *dev = NULL; + struct mpls_route *rt, *old; + unsigned index; + int i; + int err = -EINVAL; + + index = cfg->rc_label; + + /* If a label was not specified during insert pick one */ + if ((index == LABEL_NOT_SPECIFIED) && + (cfg->rc_nlflags & NLM_F_CREATE)) { + index = find_free_label(net); + } + + /* The first 16 labels are reserved, and may not be set */ + if (index < 16) + goto errout; + + /* The full 20 bit range may not be supported. */ + if (index >= net->mpls.platform_labels) + goto errout; + + /* Ensure only a supported number of labels are present */ + if (cfg->rc_output_labels > MAX_NEW_LABELS) + goto errout; + + err = -ENODEV; + dev = dev_get_by_index(net, cfg->rc_ifindex); + if (!dev) + goto errout; + + /* For now just support ethernet devices */ + err = -EINVAL; + if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) + goto errout; + + err = -EINVAL; + if ((cfg->rc_via_table == NEIGH_LINK_TABLE) && + (dev->addr_len != cfg->rc_via_alen)) + goto errout; + + /* Append makes no sense with mpls */ + err = -EOPNOTSUPP; + if (cfg->rc_nlflags & NLM_F_APPEND) + goto errout; + + err = -EEXIST; + platform_label = rtnl_dereference(net->mpls.platform_label); + old = rtnl_dereference(platform_label[index]); + if ((cfg->rc_nlflags & NLM_F_EXCL) && old) + goto errout; + + err = -EEXIST; + if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old) + goto errout; + + err = -ENOENT; + if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old) + goto errout; + + err = -ENOMEM; + rt = mpls_rt_alloc(cfg->rc_via_alen); + if (!rt) + goto errout; + + rt->rt_labels = cfg->rc_output_labels; + for (i = 0; i < rt->rt_labels; i++) + rt->rt_label[i] = cfg->rc_output_label[i]; + rt->rt_protocol = cfg->rc_protocol; + RCU_INIT_POINTER(rt->rt_dev, dev); + rt->rt_via_table = cfg->rc_via_table; + memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen); + + mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo); + + dev_put(dev); + return 0; + +errout: + if (dev) + dev_put(dev); + return err; +} + +static int mpls_route_del(struct mpls_route_config *cfg) +{ + struct net *net = cfg->rc_nlinfo.nl_net; + unsigned index; + int err = -EINVAL; + + index = cfg->rc_label; + + /* The first 16 labels are reserved, and may not be removed */ + if (index < 16) + goto errout; + + /* The full 20 bit range may not be supported */ + if (index >= net->mpls.platform_labels) + goto errout; + + mpls_route_update(net, index, NULL, NULL, &cfg->rc_nlinfo); + + err = 0; +errout: + return err; +} + +static void mpls_ifdown(struct net_device *dev) +{ + struct mpls_route __rcu **platform_label; + struct net *net = dev_net(dev); + unsigned index; + + platform_label = rtnl_dereference(net->mpls.platform_label); + for (index = 0; index < net->mpls.platform_labels; index++) { + struct mpls_route *rt = rtnl_dereference(platform_label[index]); + if (!rt) + continue; + if (rtnl_dereference(rt->rt_dev) != dev) + continue; + rt->rt_dev = NULL; + } +} + +static int mpls_dev_notify(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + switch(event) { + case NETDEV_UNREGISTER: + mpls_ifdown(dev); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block mpls_dev_notifier = { + .notifier_call = mpls_dev_notify, +}; + +static int nla_put_via(struct sk_buff *skb, + u8 table, const void *addr, int alen) +{ + static const int table_to_family[NEIGH_NR_TABLES + 1] = { + AF_INET, AF_INET6, AF_DECnet, AF_PACKET, + }; + struct nlattr *nla; + struct rtvia *via; + int family = AF_UNSPEC; + + nla = nla_reserve(skb, RTA_VIA, alen + 2); + if (!nla) + return -EMSGSIZE; + + if (table <= NEIGH_NR_TABLES) + family = table_to_family[table]; + + via = nla_data(nla); + via->rtvia_family = family; + memcpy(via->rtvia_addr, addr, alen); + return 0; +} + +int nla_put_labels(struct sk_buff *skb, int attrtype, + u8 labels, const u32 label[]) +{ + struct nlattr *nla; + struct mpls_shim_hdr *nla_label; + bool bos; + int i; + nla = nla_reserve(skb, attrtype, labels*4); + if (!nla) + return -EMSGSIZE; + + nla_label = nla_data(nla); + bos = true; + for (i = labels - 1; i >= 0; i--) { + nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos); + bos = false; + } + + return 0; +} + +int nla_get_labels(const struct nlattr *nla, + u32 max_labels, u32 *labels, u32 label[]) +{ + unsigned len = nla_len(nla); + unsigned nla_labels; + struct mpls_shim_hdr *nla_label; + bool bos; + int i; + + /* len needs to be an even multiple of 4 (the label size) */ + if (len & 3) + return -EINVAL; + + /* Limit the number of new labels allowed */ + nla_labels = len/4; + if (nla_labels > max_labels) + return -EINVAL; + + nla_label = nla_data(nla); + bos = true; + for (i = nla_labels - 1; i >= 0; i--, bos = false) { + struct mpls_entry_decoded dec; + dec = mpls_entry_decode(nla_label + i); + + /* Ensure the bottom of stack flag is properly set + * and ttl and tc are both clear. + */ + if ((dec.bos != bos) || dec.ttl || dec.tc) + return -EINVAL; + + label[i] = dec.label; + } + *labels = nla_labels; + return 0; +} + +static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, + struct mpls_route_config *cfg) +{ + struct rtmsg *rtm; + struct nlattr *tb[RTA_MAX+1]; + int index; + int err; + + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy); + if (err < 0) + goto errout; + + err = -EINVAL; + rtm = nlmsg_data(nlh); + memset(cfg, 0, sizeof(*cfg)); + + if (rtm->rtm_family != AF_MPLS) + goto errout; + if (rtm->rtm_dst_len != 20) + goto errout; + if (rtm->rtm_src_len != 0) + goto errout; + if (rtm->rtm_tos != 0) + goto errout; + if (rtm->rtm_table != RT_TABLE_MAIN) + goto errout; + /* Any value is acceptable for rtm_protocol */ + + /* As mpls uses destination specific addresses + * (or source specific address in the case of multicast) + * all addresses have universal scope. + */ + if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) + goto errout; + if (rtm->rtm_type != RTN_UNICAST) + goto errout; + if (rtm->rtm_flags != 0) + goto errout; + + cfg->rc_label = LABEL_NOT_SPECIFIED; + cfg->rc_protocol = rtm->rtm_protocol; + cfg->rc_nlflags = nlh->nlmsg_flags; + cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid; + cfg->rc_nlinfo.nlh = nlh; + cfg->rc_nlinfo.nl_net = sock_net(skb->sk); + + for (index = 0; index <= RTA_MAX; index++) { + struct nlattr *nla = tb[index]; + if (!nla) + continue; + + switch(index) { + case RTA_OIF: + cfg->rc_ifindex = nla_get_u32(nla); + break; + case RTA_NEWDST: + if (nla_get_labels(nla, MAX_NEW_LABELS, + &cfg->rc_output_labels, + cfg->rc_output_label)) + goto errout; + break; + case RTA_DST: + { + u32 label_count; + if (nla_get_labels(nla, 1, &label_count, + &cfg->rc_label)) + goto errout; + + /* The first 16 labels are reserved, and may not be set */ + if (cfg->rc_label < 16) + goto errout; + + break; + } + case RTA_VIA: + { + struct rtvia *via = nla_data(nla); + if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) + goto errout; + cfg->rc_via_alen = nla_len(nla) - + offsetof(struct rtvia, rtvia_addr); + if (cfg->rc_via_alen > MAX_VIA_ALEN) + goto errout; + + /* Validate the address family */ + switch(via->rtvia_family) { + case AF_PACKET: + cfg->rc_via_table = NEIGH_LINK_TABLE; + break; + case AF_INET: + cfg->rc_via_table = NEIGH_ARP_TABLE; + if (cfg->rc_via_alen != 4) + goto errout; + break; + case AF_INET6: + cfg->rc_via_table = NEIGH_ND_TABLE; + if (cfg->rc_via_alen != 16) + goto errout; + break; + default: + /* Unsupported address family */ + goto errout; + } + + memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen); + break; + } + default: + /* Unsupported attribute */ + goto errout; + } + } + + err = 0; +errout: + return err; +} + +static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct mpls_route_config cfg; + int err; + + err = rtm_to_route_config(skb, nlh, &cfg); + if (err < 0) + return err; + + return mpls_route_del(&cfg); +} + + +static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct mpls_route_config cfg; + int err; + + err = rtm_to_route_config(skb, nlh, &cfg); + if (err < 0) + return err; + + return mpls_route_add(&cfg); +} + +static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, + u32 label, struct mpls_route *rt, int flags) +{ + struct net_device *dev; + struct nlmsghdr *nlh; + struct rtmsg *rtm; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); + if (nlh == NULL) + return -EMSGSIZE; + + rtm = nlmsg_data(nlh); + rtm->rtm_family = AF_MPLS; + rtm->rtm_dst_len = 20; + rtm->rtm_src_len = 0; + rtm->rtm_tos = 0; + rtm->rtm_table = RT_TABLE_MAIN; + rtm->rtm_protocol = rt->rt_protocol; + rtm->rtm_scope = RT_SCOPE_UNIVERSE; + rtm->rtm_type = RTN_UNICAST; + rtm->rtm_flags = 0; + + if (rt->rt_labels && + nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label)) + goto nla_put_failure; + if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen)) + goto nla_put_failure; + dev = rtnl_dereference(rt->rt_dev); + if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) + goto nla_put_failure; + if (nla_put_labels(skb, RTA_DST, 1, &label)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct mpls_route __rcu **platform_label; + size_t platform_labels; + unsigned int index; + + ASSERT_RTNL(); + + index = cb->args[0]; + if (index < 16) + index = 16; + + platform_label = rtnl_dereference(net->mpls.platform_label); + platform_labels = net->mpls.platform_labels; + for (; index < platform_labels; index++) { + struct mpls_route *rt; + rt = rtnl_dereference(platform_label[index]); + if (!rt) + continue; + + if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWROUTE, + index, rt, NLM_F_MULTI) < 0) + break; + } + cb->args[0] = index; + + return skb->len; +} + +static inline size_t lfib_nlmsg_size(struct mpls_route *rt) +{ + size_t payload = + NLMSG_ALIGN(sizeof(struct rtmsg)) + + nla_total_size(2 + rt->rt_via_alen) /* RTA_VIA */ + + nla_total_size(4); /* RTA_DST */ + if (rt->rt_labels) /* RTA_NEWDST */ + payload += nla_total_size(rt->rt_labels * 4); + if (rt->rt_dev) /* RTA_OIF */ + payload += nla_total_size(4); + return payload; +} + +static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, + struct nlmsghdr *nlh, struct net *net, u32 portid, + unsigned int nlm_flags) +{ + struct sk_buff *skb; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + int err = -ENOBUFS; + + skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL); + if (skb == NULL) + goto errout; + + err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags); + if (err < 0) { + /* -EMSGSIZE implies BUG in lfib_nlmsg_size */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL); + + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); +} + +static int resize_platform_label_table(struct net *net, size_t limit) +{ + size_t size = sizeof(struct mpls_route *) * limit; + size_t old_limit; + size_t cp_size; + struct mpls_route __rcu **labels = NULL, **old; + struct mpls_route *rt0 = NULL, *rt2 = NULL; + unsigned index; + + if (size) { + labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (!labels) + labels = vzalloc(size); + + if (!labels) + goto nolabels; + } + + /* In case the predefined labels need to be populated */ + if (limit > LABEL_IPV4_EXPLICIT_NULL) { + struct net_device *lo = net->loopback_dev; + rt0 = mpls_rt_alloc(lo->addr_len); + if (!rt0) + goto nort0; + RCU_INIT_POINTER(rt0->rt_dev, lo); + rt0->rt_protocol = RTPROT_KERNEL; + rt0->rt_via_table = NEIGH_LINK_TABLE; + memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); + } + if (limit > LABEL_IPV6_EXPLICIT_NULL) { + struct net_device *lo = net->loopback_dev; + rt2 = mpls_rt_alloc(lo->addr_len); + if (!rt2) + goto nort2; + RCU_INIT_POINTER(rt2->rt_dev, lo); + rt2->rt_protocol = RTPROT_KERNEL; + rt2->rt_via_table = NEIGH_LINK_TABLE; + memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len); + } + + rtnl_lock(); + /* Remember the original table */ + old = rtnl_dereference(net->mpls.platform_label); + old_limit = net->mpls.platform_labels; + + /* Free any labels beyond the new table */ + for (index = limit; index < old_limit; index++) + mpls_route_update(net, index, NULL, NULL, NULL); + + /* Copy over the old labels */ + cp_size = size; + if (old_limit < limit) + cp_size = old_limit * sizeof(struct mpls_route *); + + memcpy(labels, old, cp_size); + + /* If needed set the predefined labels */ + if ((old_limit <= LABEL_IPV6_EXPLICIT_NULL) && + (limit > LABEL_IPV6_EXPLICIT_NULL)) { + RCU_INIT_POINTER(labels[LABEL_IPV6_EXPLICIT_NULL], rt2); + rt2 = NULL; + } + + if ((old_limit <= LABEL_IPV4_EXPLICIT_NULL) && + (limit > LABEL_IPV4_EXPLICIT_NULL)) { + RCU_INIT_POINTER(labels[LABEL_IPV4_EXPLICIT_NULL], rt0); + rt0 = NULL; + } + + /* Update the global pointers */ + net->mpls.platform_labels = limit; + rcu_assign_pointer(net->mpls.platform_label, labels); + + rtnl_unlock(); + + mpls_rt_free(rt2); + mpls_rt_free(rt0); + + if (old) { + synchronize_rcu(); + kvfree(old); + } + return 0; + +nort2: + mpls_rt_free(rt0); +nort0: + kvfree(labels); +nolabels: + return -ENOMEM; +} + +static int mpls_platform_labels(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct net *net = table->data; + int platform_labels = net->mpls.platform_labels; + int ret; + struct ctl_table tmp = { + .procname = table->procname, + .data = &platform_labels, + .maxlen = sizeof(int), + .mode = table->mode, + .extra1 = &zero, + .extra2 = &label_limit, + }; + + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + + if (write && ret == 0) + ret = resize_platform_label_table(net, platform_labels); + + return ret; +} + +static struct ctl_table mpls_table[] = { + { + .procname = "platform_labels", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = mpls_platform_labels, + }, + { } +}; + +static int mpls_net_init(struct net *net) +{ + struct ctl_table *table; + + net->mpls.platform_labels = 0; + net->mpls.platform_label = NULL; + + table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL); + if (table == NULL) + return -ENOMEM; + + table[0].data = net; + net->mpls.ctl = register_net_sysctl(net, "net/mpls", table); + if (net->mpls.ctl == NULL) + return -ENOMEM; + + return 0; +} + +static void mpls_net_exit(struct net *net) +{ + struct mpls_route __rcu **platform_label; + size_t platform_labels; + struct ctl_table *table; + unsigned int index; + + table = net->mpls.ctl->ctl_table_arg; + unregister_net_sysctl_table(net->mpls.ctl); + kfree(table); + + /* An rcu grace period has passed since there was a device in + * the network namespace (and thus the last in flight packet) + * left this network namespace. This is because + * unregister_netdevice_many and netdev_run_todo has completed + * for each network device that was in this network namespace. + * + * As such no additional rcu synchronization is necessary when + * freeing the platform_label table. + */ + rtnl_lock(); + platform_label = rtnl_dereference(net->mpls.platform_label); + platform_labels = net->mpls.platform_labels; + for (index = 0; index < platform_labels; index++) { + struct mpls_route *rt = rtnl_dereference(platform_label[index]); + RCU_INIT_POINTER(platform_label[index], NULL); + mpls_rt_free(rt); + } + rtnl_unlock(); + + kvfree(platform_label); +} + +static struct pernet_operations mpls_net_ops = { + .init = mpls_net_init, + .exit = mpls_net_exit, +}; + +static int __init mpls_init(void) +{ + int err; + + BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4); + + err = register_pernet_subsys(&mpls_net_ops); + if (err) + goto out; + + err = register_netdevice_notifier(&mpls_dev_notifier); + if (err) + goto out_unregister_pernet; + + dev_add_pack(&mpls_packet_type); + + rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL); + rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL); + rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL); + err = 0; +out: + return err; + +out_unregister_pernet: + unregister_pernet_subsys(&mpls_net_ops); + goto out; +} +module_init(mpls_init); + +static void __exit mpls_exit(void) +{ + rtnl_unregister_all(PF_MPLS); + dev_remove_pack(&mpls_packet_type); + unregister_netdevice_notifier(&mpls_dev_notifier); + unregister_pernet_subsys(&mpls_net_ops); +} +module_exit(mpls_exit); + +MODULE_DESCRIPTION("MultiProtocol Label Switching"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_NETPROTO(PF_MPLS); diff --git a/net/mpls/internal.h b/net/mpls/internal.h new file mode 100644 index 000000000000..fb6de92052c4 --- /dev/null +++ b/net/mpls/internal.h @@ -0,0 +1,59 @@ +#ifndef MPLS_INTERNAL_H +#define MPLS_INTERNAL_H + +#define LABEL_IPV4_EXPLICIT_NULL 0 /* RFC3032 */ +#define LABEL_ROUTER_ALERT_LABEL 1 /* RFC3032 */ +#define LABEL_IPV6_EXPLICIT_NULL 2 /* RFC3032 */ +#define LABEL_IMPLICIT_NULL 3 /* RFC3032 */ +#define LABEL_ENTROPY_INDICATOR 7 /* RFC6790 */ +#define LABEL_GAL 13 /* RFC5586 */ +#define LABEL_OAM_ALERT 14 /* RFC3429 */ +#define LABEL_EXTENSION 15 /* RFC7274 */ + + +struct mpls_shim_hdr { + __be32 label_stack_entry; +}; + +struct mpls_entry_decoded { + u32 label; + u8 ttl; + u8 tc; + u8 bos; +}; + +struct sk_buff; + +static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) +{ + return (struct mpls_shim_hdr *)skb_network_header(skb); +} + +static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos) +{ + struct mpls_shim_hdr result; + result.label_stack_entry = + cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) | + (tc << MPLS_LS_TC_SHIFT) | + (bos ? (1 << MPLS_LS_S_SHIFT) : 0) | + (ttl << MPLS_LS_TTL_SHIFT)); + return result; +} + +static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr) +{ + struct mpls_entry_decoded result; + unsigned entry = be32_to_cpu(hdr->label_stack_entry); + + result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; + result.tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT; + result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT; + + return result; +} + +int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]); +int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels, u32 label[]); + +#endif /* MPLS_INTERNAL_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index b02660fa9eb0..f70e34a68f70 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -438,8 +438,10 @@ config NF_TABLES To compile it as a module, choose M here. +if NF_TABLES + config NF_TABLES_INET - depends on NF_TABLES && IPV6 + depends on IPV6 select NF_TABLES_IPV4 select NF_TABLES_IPV6 tristate "Netfilter nf_tables mixed IPv4/IPv6 tables support" @@ -447,21 +449,18 @@ config NF_TABLES_INET This option enables support for a mixed IPv4/IPv6 "inet" table. config NFT_EXTHDR - depends on NF_TABLES tristate "Netfilter nf_tables IPv6 exthdr module" help This option adds the "exthdr" expression that you can use to match IPv6 extension headers. config NFT_META - depends on NF_TABLES tristate "Netfilter nf_tables meta module" help This option adds the "meta" expression that you can use to match and to set packet metainformation such as the packet mark. config NFT_CT - depends on NF_TABLES depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" help @@ -469,42 +468,36 @@ config NFT_CT connection tracking information such as the flow state. config NFT_RBTREE - depends on NF_TABLES tristate "Netfilter nf_tables rbtree set module" help This option adds the "rbtree" set type (Red Black tree) that is used to build interval-based sets. config NFT_HASH - depends on NF_TABLES tristate "Netfilter nf_tables hash set module" help This option adds the "hash" set type that is used to build one-way mappings between matchings and actions. config NFT_COUNTER - depends on NF_TABLES tristate "Netfilter nf_tables counter module" help This option adds the "counter" expression that you can use to include packet and byte counters in a rule. config NFT_LOG - depends on NF_TABLES tristate "Netfilter nf_tables log module" help This option adds the "log" expression that you can use to log packets matching some criteria. config NFT_LIMIT - depends on NF_TABLES tristate "Netfilter nf_tables limit module" help This option adds the "limit" expression that you can use to ratelimit rule matchings. config NFT_MASQ - depends on NF_TABLES depends on NF_CONNTRACK depends on NF_NAT tristate "Netfilter nf_tables masquerade support" @@ -513,7 +506,6 @@ config NFT_MASQ to perform NAT in the masquerade flavour. config NFT_REDIR - depends on NF_TABLES depends on NF_CONNTRACK depends on NF_NAT tristate "Netfilter nf_tables redirect support" @@ -522,7 +514,6 @@ config NFT_REDIR to perform NAT in the redirect flavour. config NFT_NAT - depends on NF_TABLES depends on NF_CONNTRACK select NF_NAT tristate "Netfilter nf_tables nat module" @@ -531,8 +522,6 @@ config NFT_NAT typical Network Address Translation (NAT) packet transformations. config NFT_QUEUE - depends on NF_TABLES - depends on NETFILTER_XTABLES depends on NETFILTER_NETLINK_QUEUE tristate "Netfilter nf_tables queue module" help @@ -540,7 +529,6 @@ config NFT_QUEUE infrastructure (also known as NFQUEUE) from nftables. config NFT_REJECT - depends on NF_TABLES default m if NETFILTER_ADVANCED=n tristate "Netfilter nf_tables reject support" help @@ -554,7 +542,6 @@ config NFT_REJECT_INET tristate config NFT_COMPAT - depends on NF_TABLES depends on NETFILTER_XTABLES tristate "Netfilter x_tables over nf_tables module" help @@ -562,6 +549,8 @@ config NFT_COMPAT x_tables match/target extensions over the nf_tables framework. +endif # NF_TABLES + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n @@ -951,7 +940,7 @@ comment "Xtables matches" config NETFILTER_XT_MATCH_ADDRTYPE tristate '"addrtype" address type match support' - depends on NETFILTER_ADVANCED + default m if NETFILTER_ADVANCED=n ---help--- This option allows you to match what routing thinks of an address, eg. UNICAST, LOCAL, BROADCAST, ... diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b87ca32efa0b..04dbd9c7213f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -119,24 +119,24 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_service *svc; s = this_cpu_ptr(dest->stats.cpustats); - s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.inbytes += skb->len; + s->cnt.inpkts++; + s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); rcu_read_lock(); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); - s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.inbytes += skb->len; + s->cnt.inpkts++; + s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); - s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.inbytes += skb->len; + s->cnt.inpkts++; + s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); } } @@ -153,24 +153,24 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_service *svc; s = this_cpu_ptr(dest->stats.cpustats); - s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.outbytes += skb->len; + s->cnt.outpkts++; + s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); rcu_read_lock(); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); - s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.outbytes += skb->len; + s->cnt.outpkts++; + s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); - s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.outbytes += skb->len; + s->cnt.outpkts++; + s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); } } @@ -183,13 +183,19 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) struct ip_vs_cpu_stats *s; s = this_cpu_ptr(cp->dest->stats.cpustats); - s->ustats.conns++; + u64_stats_update_begin(&s->syncp); + s->cnt.conns++; + u64_stats_update_end(&s->syncp); s = this_cpu_ptr(svc->stats.cpustats); - s->ustats.conns++; + u64_stats_update_begin(&s->syncp); + s->cnt.conns++; + u64_stats_update_end(&s->syncp); s = this_cpu_ptr(ipvs->tot_stats.cpustats); - s->ustats.conns++; + u64_stats_update_begin(&s->syncp); + s->cnt.conns++; + u64_stats_update_end(&s->syncp); } @@ -1046,6 +1052,26 @@ static inline bool is_new_conn(const struct sk_buff *skb, } } +static inline bool is_new_conn_expected(const struct ip_vs_conn *cp, + int conn_reuse_mode) +{ + /* Controlled (FTP DATA or persistence)? */ + if (cp->control) + return false; + + switch (cp->protocol) { + case IPPROTO_TCP: + return (cp->state == IP_VS_TCP_S_TIME_WAIT) || + ((conn_reuse_mode & 2) && + (cp->state == IP_VS_TCP_S_FIN_WAIT) && + (cp->flags & IP_VS_CONN_F_NOOUTPUT)); + case IPPROTO_SCTP: + return cp->state == IP_VS_SCTP_S_CLOSED; + default: + return false; + } +} + /* Handle response packets: rewrite addresses and send away... */ static unsigned int @@ -1585,6 +1611,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) struct ip_vs_conn *cp; int ret, pkts; struct netns_ipvs *ipvs; + int conn_reuse_mode; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) @@ -1653,10 +1680,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) */ cp = pp->conn_in_get(af, skb, &iph, 0); - if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest && - unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs && - is_new_conn(skb, &iph)) { - ip_vs_conn_expire_now(cp); + conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); + if (conn_reuse_mode && !iph.fragoffs && + is_new_conn(skb, &iph) && cp && + ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && + unlikely(!atomic_read(&cp->dest->weight))) || + unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) { + if (!atomic_read(&cp->n_control)) + ip_vs_conn_expire_now(cp); __ip_vs_conn_put(cp); cp = NULL; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e55759056361..49532672f66d 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -729,9 +729,9 @@ static void ip_vs_trash_cleanup(struct net *net) } static void -ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) +ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src) { -#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c +#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c spin_lock_bh(&src->lock); @@ -747,13 +747,28 @@ ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) } static void +ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src) +{ + dst->conns = (u32)src->conns; + dst->inpkts = (u32)src->inpkts; + dst->outpkts = (u32)src->outpkts; + dst->inbytes = src->inbytes; + dst->outbytes = src->outbytes; + dst->cps = (u32)src->cps; + dst->inpps = (u32)src->inpps; + dst->outpps = (u32)src->outpps; + dst->inbps = (u32)src->inbps; + dst->outbps = (u32)src->outbps; +} + +static void ip_vs_zero_stats(struct ip_vs_stats *stats) { spin_lock_bh(&stats->lock); /* get current counters as zero point, rates are zeroed */ -#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c +#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c IP_VS_ZERO_STATS_COUNTER(conns); IP_VS_ZERO_STATS_COUNTER(inpkts); @@ -1808,6 +1823,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "conn_reuse_mode", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -2044,7 +2065,7 @@ static const struct file_operations ip_vs_info_fops = { static int ip_vs_stats_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); - struct ip_vs_stats_user show; + struct ip_vs_kstats show; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, @@ -2053,17 +2074,22 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) " Conns Packets Packets Bytes Bytes\n"); ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns, - show.inpkts, show.outpkts, - (unsigned long long) show.inbytes, - (unsigned long long) show.outbytes); - -/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ + seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n", + (unsigned long long)show.conns, + (unsigned long long)show.inpkts, + (unsigned long long)show.outpkts, + (unsigned long long)show.inbytes, + (unsigned long long)show.outbytes); + +/* 01234567 01234567 01234567 0123456701234567 0123456701234567*/ seq_puts(seq, - " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); - seq_printf(seq, "%8X %8X %8X %16X %16X\n", - show.cps, show.inpps, show.outpps, - show.inbps, show.outbps); + " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); + seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n", + (unsigned long long)show.cps, + (unsigned long long)show.inpps, + (unsigned long long)show.outpps, + (unsigned long long)show.inbps, + (unsigned long long)show.outbps); return 0; } @@ -2086,7 +2112,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) struct net *net = seq_file_single_net(seq); struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats; - struct ip_vs_stats_user rates; + struct ip_vs_kstats kstats; int i; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ @@ -2098,41 +2124,41 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) for_each_possible_cpu(i) { struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); unsigned int start; - __u64 inbytes, outbytes; + u64 conns, inpkts, outpkts, inbytes, outbytes; do { start = u64_stats_fetch_begin_irq(&u->syncp); - inbytes = u->ustats.inbytes; - outbytes = u->ustats.outbytes; + conns = u->cnt.conns; + inpkts = u->cnt.inpkts; + outpkts = u->cnt.outpkts; + inbytes = u->cnt.inbytes; + outbytes = u->cnt.outbytes; } while (u64_stats_fetch_retry_irq(&u->syncp, start)); - seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", - i, u->ustats.conns, u->ustats.inpkts, - u->ustats.outpkts, (__u64)inbytes, - (__u64)outbytes); + seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", + i, (u64)conns, (u64)inpkts, + (u64)outpkts, (u64)inbytes, + (u64)outbytes); } - spin_lock_bh(&tot_stats->lock); - - seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", - tot_stats->ustats.conns, tot_stats->ustats.inpkts, - tot_stats->ustats.outpkts, - (unsigned long long) tot_stats->ustats.inbytes, - (unsigned long long) tot_stats->ustats.outbytes); - - ip_vs_read_estimator(&rates, tot_stats); + ip_vs_copy_stats(&kstats, tot_stats); - spin_unlock_bh(&tot_stats->lock); + seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n", + (unsigned long long)kstats.conns, + (unsigned long long)kstats.inpkts, + (unsigned long long)kstats.outpkts, + (unsigned long long)kstats.inbytes, + (unsigned long long)kstats.outbytes); -/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ +/* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, - " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); - seq_printf(seq, " %8X %8X %8X %16X %16X\n", - rates.cps, - rates.inpps, - rates.outpps, - rates.inbps, - rates.outbps); + " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); + seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n", + kstats.cps, + kstats.inpps, + kstats.outpps, + kstats.inbps, + kstats.outbps); return 0; } @@ -2400,6 +2426,7 @@ static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { struct ip_vs_scheduler *sched; + struct ip_vs_kstats kstats; sched = rcu_dereference_protected(src->scheduler, 1); dst->protocol = src->protocol; @@ -2411,7 +2438,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; dst->num_dests = src->num_dests; - ip_vs_copy_stats(&dst->stats, &src->stats); + ip_vs_copy_stats(&kstats, &src->stats); + ip_vs_export_stats_user(&dst->stats, &kstats); } static inline int @@ -2485,6 +2513,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, int count = 0; struct ip_vs_dest *dest; struct ip_vs_dest_entry entry; + struct ip_vs_kstats kstats; memset(&entry, 0, sizeof(entry)); list_for_each_entry(dest, &svc->destinations, n_list) { @@ -2506,7 +2535,8 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, entry.activeconns = atomic_read(&dest->activeconns); entry.inactconns = atomic_read(&dest->inactconns); entry.persistconns = atomic_read(&dest->persistconns); - ip_vs_copy_stats(&entry.stats, &dest->stats); + ip_vs_copy_stats(&kstats, &dest->stats); + ip_vs_export_stats_user(&entry.stats, &kstats); if (copy_to_user(&uptr->entrytable[count], &entry, sizeof(entry))) { ret = -EFAULT; @@ -2798,25 +2828,51 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { }; static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, - struct ip_vs_stats *stats) + struct ip_vs_kstats *kstats) { - struct ip_vs_stats_user ustats; struct nlattr *nl_stats = nla_nest_start(skb, container_type); + if (!nl_stats) return -EMSGSIZE; - ip_vs_copy_stats(&ustats, stats); - - if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) || - nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) || - nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) || - nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) || - nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) || - nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) || - nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) || - nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) || - nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) || - nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps)) + if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) || + nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) || + nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) || + nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) || + nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) || + nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) || + nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) || + nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) || + nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps)) + goto nla_put_failure; + nla_nest_end(skb, nl_stats); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_stats); + return -EMSGSIZE; +} + +static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type, + struct ip_vs_kstats *kstats) +{ + struct nlattr *nl_stats = nla_nest_start(skb, container_type); + + if (!nl_stats) + return -EMSGSIZE; + + if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) || + nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) || + nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) || + nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) || + nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) || + nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps)) goto nla_put_failure; nla_nest_end(skb, nl_stats); @@ -2835,6 +2891,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, struct nlattr *nl_service; struct ip_vs_flags flags = { .flags = svc->flags, .mask = ~0 }; + struct ip_vs_kstats kstats; nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); if (!nl_service) @@ -2860,7 +2917,10 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) goto nla_put_failure; - if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) + ip_vs_copy_stats(&kstats, &svc->stats); + if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats)) + goto nla_put_failure; + if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats)) goto nla_put_failure; nla_nest_end(skb, nl_service); @@ -3032,6 +3092,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) { struct nlattr *nl_dest; + struct ip_vs_kstats kstats; nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); if (!nl_dest) @@ -3054,7 +3115,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) atomic_read(&dest->persistconns)) || nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af)) goto nla_put_failure; - if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) + ip_vs_copy_stats(&kstats, &dest->stats); + if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats)) + goto nla_put_failure; + if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats)) goto nla_put_failure; nla_nest_end(skb, nl_dest); @@ -3402,7 +3466,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) if (udest.af == 0) udest.af = svc->af; - if (udest.af != svc->af) { + if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) { /* The synchronization protocol is incompatible * with mixed family services */ @@ -3732,6 +3796,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) ipvs->sysctl_pmtu_disc = 1; tbl[idx++].data = &ipvs->sysctl_pmtu_disc; tbl[idx++].data = &ipvs->sysctl_backup_only; + ipvs->sysctl_conn_reuse_mode = 1; + tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 1425e9a924c4..ef0eb0a8d552 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -45,17 +45,19 @@ NOTES. - * The stored value for average bps is scaled by 2^5, so that maximal - rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10. + * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10. - * A lot code is taken from net/sched/estimator.c + * Netlink users can see 64-bit values but sockopt users are restricted + to 32-bit values for conns, packets, bps, cps and pps. + + * A lot of code is taken from net/core/gen_estimator.c */ /* * Make a summary from each cpu */ -static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, +static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum, struct ip_vs_cpu_stats __percpu *stats) { int i; @@ -64,27 +66,31 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, for_each_possible_cpu(i) { struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); unsigned int start; - __u64 inbytes, outbytes; + u64 conns, inpkts, outpkts, inbytes, outbytes; + if (add) { - sum->conns += s->ustats.conns; - sum->inpkts += s->ustats.inpkts; - sum->outpkts += s->ustats.outpkts; do { start = u64_stats_fetch_begin(&s->syncp); - inbytes = s->ustats.inbytes; - outbytes = s->ustats.outbytes; + conns = s->cnt.conns; + inpkts = s->cnt.inpkts; + outpkts = s->cnt.outpkts; + inbytes = s->cnt.inbytes; + outbytes = s->cnt.outbytes; } while (u64_stats_fetch_retry(&s->syncp, start)); + sum->conns += conns; + sum->inpkts += inpkts; + sum->outpkts += outpkts; sum->inbytes += inbytes; sum->outbytes += outbytes; } else { add = true; - sum->conns = s->ustats.conns; - sum->inpkts = s->ustats.inpkts; - sum->outpkts = s->ustats.outpkts; do { start = u64_stats_fetch_begin(&s->syncp); - sum->inbytes = s->ustats.inbytes; - sum->outbytes = s->ustats.outbytes; + sum->conns = s->cnt.conns; + sum->inpkts = s->cnt.inpkts; + sum->outpkts = s->cnt.outpkts; + sum->inbytes = s->cnt.inbytes; + sum->outbytes = s->cnt.outbytes; } while (u64_stats_fetch_retry(&s->syncp, start)); } } @@ -95,10 +101,7 @@ static void estimation_timer(unsigned long arg) { struct ip_vs_estimator *e; struct ip_vs_stats *s; - u32 n_conns; - u32 n_inpkts, n_outpkts; - u64 n_inbytes, n_outbytes; - u32 rate; + u64 rate; struct net *net = (struct net *)arg; struct netns_ipvs *ipvs; @@ -108,33 +111,29 @@ static void estimation_timer(unsigned long arg) s = container_of(e, struct ip_vs_stats, est); spin_lock(&s->lock); - ip_vs_read_cpu_stats(&s->ustats, s->cpustats); - n_conns = s->ustats.conns; - n_inpkts = s->ustats.inpkts; - n_outpkts = s->ustats.outpkts; - n_inbytes = s->ustats.inbytes; - n_outbytes = s->ustats.outbytes; + ip_vs_read_cpu_stats(&s->kstats, s->cpustats); /* scaled by 2^10, but divided 2 seconds */ - rate = (n_conns - e->last_conns) << 9; - e->last_conns = n_conns; - e->cps += ((long)rate - (long)e->cps) >> 2; - - rate = (n_inpkts - e->last_inpkts) << 9; - e->last_inpkts = n_inpkts; - e->inpps += ((long)rate - (long)e->inpps) >> 2; - - rate = (n_outpkts - e->last_outpkts) << 9; - e->last_outpkts = n_outpkts; - e->outpps += ((long)rate - (long)e->outpps) >> 2; - - rate = (n_inbytes - e->last_inbytes) << 4; - e->last_inbytes = n_inbytes; - e->inbps += ((long)rate - (long)e->inbps) >> 2; - - rate = (n_outbytes - e->last_outbytes) << 4; - e->last_outbytes = n_outbytes; - e->outbps += ((long)rate - (long)e->outbps) >> 2; + rate = (s->kstats.conns - e->last_conns) << 9; + e->last_conns = s->kstats.conns; + e->cps += ((s64)rate - (s64)e->cps) >> 2; + + rate = (s->kstats.inpkts - e->last_inpkts) << 9; + e->last_inpkts = s->kstats.inpkts; + e->inpps += ((s64)rate - (s64)e->inpps) >> 2; + + rate = (s->kstats.outpkts - e->last_outpkts) << 9; + e->last_outpkts = s->kstats.outpkts; + e->outpps += ((s64)rate - (s64)e->outpps) >> 2; + + /* scaled by 2^5, but divided 2 seconds */ + rate = (s->kstats.inbytes - e->last_inbytes) << 4; + e->last_inbytes = s->kstats.inbytes; + e->inbps += ((s64)rate - (s64)e->inbps) >> 2; + + rate = (s->kstats.outbytes - e->last_outbytes) << 4; + e->last_outbytes = s->kstats.outbytes; + e->outbps += ((s64)rate - (s64)e->outbps) >> 2; spin_unlock(&s->lock); } spin_unlock(&ipvs->est_lock); @@ -166,14 +165,14 @@ void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats) void ip_vs_zero_estimator(struct ip_vs_stats *stats) { struct ip_vs_estimator *est = &stats->est; - struct ip_vs_stats_user *u = &stats->ustats; + struct ip_vs_kstats *k = &stats->kstats; /* reset counters, caller must hold the stats->lock lock */ - est->last_inbytes = u->inbytes; - est->last_outbytes = u->outbytes; - est->last_conns = u->conns; - est->last_inpkts = u->inpkts; - est->last_outpkts = u->outpkts; + est->last_inbytes = k->inbytes; + est->last_outbytes = k->outbytes; + est->last_conns = k->conns; + est->last_inpkts = k->inpkts; + est->last_outpkts = k->outpkts; est->cps = 0; est->inpps = 0; est->outpps = 0; @@ -182,8 +181,7 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) } /* Get decoded rates */ -void ip_vs_read_estimator(struct ip_vs_stats_user *dst, - struct ip_vs_stats *stats) +void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats) { struct ip_vs_estimator *e = &stats->est; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index c47ffd7a0a70..19b9cce6c210 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -845,10 +845,27 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, struct ip_vs_conn *cp; struct netns_ipvs *ipvs = net_ipvs(net); - if (!(flags & IP_VS_CONN_F_TEMPLATE)) + if (!(flags & IP_VS_CONN_F_TEMPLATE)) { cp = ip_vs_conn_in_get(param); - else + if (cp && ((cp->dport != dport) || + !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) { + if (!(flags & IP_VS_CONN_F_INACTIVE)) { + ip_vs_conn_expire_now(cp); + __ip_vs_conn_put(cp); + cp = NULL; + } else { + /* This is the expiration message for the + * connection that was already replaced, so we + * just ignore it. + */ + __ip_vs_conn_put(cp); + kfree(param->pe_data); + return; + } + } + } else { cp = ip_vs_ct_in_get(param); + } if (cp) { /* Free pe_data */ @@ -896,6 +913,8 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); return; } + if (!(flags & IP_VS_CONN_F_TEMPLATE)) + kfree(param->pe_data); } if (opt) @@ -1169,6 +1188,7 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) ); #endif + ip_vs_pe_put(param.pe); return 0; /* Error exit */ out: @@ -1385,9 +1405,11 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) mreq.imr_ifindex = dev->ifindex; + rtnl_lock(); lock_sock(sk); ret = ip_mc_join_group(sk, &mreq); release_sock(sk); + rtnl_unlock(); return ret; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 3aedbda7658a..bf02932b7188 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) struct sock *sk = skb->sk; struct rtable *ort = skb_rtable(skb); - if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) + if (!skb->dev && sk && sk_fullsock(sk)) ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); } @@ -924,7 +924,8 @@ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + struct net *net = skb_net(skb); + struct netns_ipvs *ipvs = net_ipvs(net); struct rtable *rt; /* Route to the other host */ __be32 saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ @@ -991,7 +992,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = cp->daddr.ip; iph->saddr = saddr; iph->ttl = ttl; - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->ignore_df = 1; diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index a4b5e2a435ac..45da11afa785 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -47,9 +47,11 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) return 0; counter = acct->counter; - return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)atomic64_read(&counter[dir].packets), - (unsigned long long)atomic64_read(&counter[dir].bytes)); + seq_printf(s, "packets=%llu bytes=%llu ", + (unsigned long long)atomic64_read(&counter[dir].packets), + (unsigned long long)atomic64_read(&counter[dir].bytes)); + + return 0; }; EXPORT_SYMBOL_GPL(seq_print_acct); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 91a1837acd0e..7a17070c5dab 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -561,7 +561,9 @@ static int exp_seq_show(struct seq_file *s, void *v) helper->expect_policy[expect->class].name); } - return seq_putc(s, '\n'); + seq_putc(s, '\n'); + + return 0; } static const struct seq_operations exp_seq_ops = { diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 0d8448f19dfe..675d12c69e32 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -212,6 +212,30 @@ void nf_log_packet(struct net *net, } EXPORT_SYMBOL(nf_log_packet); +void nf_log_trace(struct net *net, + u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, const char *fmt, ...) +{ + va_list args; + char prefix[NF_LOG_PREFIXLEN]; + const struct nf_logger *logger; + + rcu_read_lock(); + logger = rcu_dereference(net->nf.nf_loggers[pf]); + if (logger) { + va_start(args, fmt); + vsnprintf(prefix, sizeof(prefix), fmt, args); + va_end(args); + logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(nf_log_trace); + #define S_SIZE (1024 - (sizeof(unsigned int) + 1)) struct nf_log_buf { diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index a2233e77cf39..2631876ac55b 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -133,7 +133,7 @@ EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header); void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk) { - if (!sk || sk->sk_state == TCP_TIME_WAIT) + if (!sk || !sk_fullsock(sk)) return; read_lock_bh(&sk->sk_callback_lock); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 199fd0f27b0e..5604c2df05d1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -198,36 +198,31 @@ static int nft_delchain(struct nft_ctx *ctx) static inline bool nft_rule_is_active(struct net *net, const struct nft_rule *rule) { - return (rule->genmask & (1 << net->nft.gencursor)) == 0; -} - -static inline int gencursor_next(struct net *net) -{ - return net->nft.gencursor+1 == 1 ? 1 : 0; + return (rule->genmask & nft_genmask_cur(net)) == 0; } static inline int nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) { - return (rule->genmask & (1 << gencursor_next(net))) == 0; + return (rule->genmask & nft_genmask_next(net)) == 0; } static inline void nft_rule_activate_next(struct net *net, struct nft_rule *rule) { /* Now inactive, will be active in the future */ - rule->genmask = (1 << net->nft.gencursor); + rule->genmask = nft_genmask_cur(net); } static inline void nft_rule_deactivate_next(struct net *net, struct nft_rule *rule) { - rule->genmask = (1 << gencursor_next(net)); + rule->genmask = nft_genmask_next(net); } static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) { - rule->genmask = 0; + rule->genmask &= ~nft_genmask_next(net); } static int @@ -401,7 +396,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi, } static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { - [NFTA_TABLE_NAME] = { .type = NLA_STRING }, + [NFTA_TABLE_NAME] = { .type = NLA_STRING, + .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, }; @@ -686,26 +682,28 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, if (!try_module_get(afi->owner)) return -EAFNOSUPPORT; - table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); - if (table == NULL) { - module_put(afi->owner); - return -ENOMEM; - } + err = -ENOMEM; + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (table == NULL) + goto err1; - nla_strlcpy(table->name, name, nla_len(name)); + nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); table->flags = flags; nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); - if (err < 0) { - kfree(table); - module_put(afi->owner); - return err; - } + if (err < 0) + goto err2; + list_add_tail_rcu(&table->list, &afi->tables); return 0; +err2: + kfree(table); +err1: + module_put(afi->owner); + return err; } static int nft_flush_table(struct nft_ctx *ctx) @@ -1225,7 +1223,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_CHAIN_POLICY]) { if ((chain != NULL && - !(chain->flags & NFT_BASE_CHAIN)) || + !(chain->flags & NFT_BASE_CHAIN))) + return -EOPNOTSUPP; + + if (chain == NULL && nla[NFTA_CHAIN_HOOK] == NULL) return -EOPNOTSUPP; @@ -1348,6 +1349,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, rcu_assign_pointer(basechain->stats, stats); } + write_pnet(&basechain->pnet, net); basechain->type = type; chain = &basechain->chain; @@ -1375,7 +1377,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); - chain->net = net; chain->table = table; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); @@ -1711,9 +1712,12 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, } nla_nest_end(skb, list); - if (rule->ulen && - nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule))) - goto nla_put_failure; + if (rule->udata) { + struct nft_userdata *udata = nft_userdata(rule); + if (nla_put(skb, NFTA_RULE_USERDATA, udata->len + 1, + udata->data) < 0) + goto nla_put_failure; + } nlmsg_end(skb, nlh); return 0; @@ -1896,11 +1900,12 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; + struct nft_userdata *udata; struct nft_trans *trans = NULL; struct nft_expr *expr; struct nft_ctx ctx; struct nlattr *tmp; - unsigned int size, i, n, ulen = 0; + unsigned int size, i, n, ulen = 0, usize = 0; int err, rem; bool create; u64 handle, pos_handle; @@ -1968,12 +1973,19 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, n++; } } + /* Check for overflow of dlen field */ + err = -EFBIG; + if (size >= 1 << 12) + goto err1; - if (nla[NFTA_RULE_USERDATA]) + if (nla[NFTA_RULE_USERDATA]) { ulen = nla_len(nla[NFTA_RULE_USERDATA]); + if (ulen > 0) + usize = sizeof(struct nft_userdata) + ulen; + } err = -ENOMEM; - rule = kzalloc(sizeof(*rule) + size + ulen, GFP_KERNEL); + rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL); if (rule == NULL) goto err1; @@ -1981,10 +1993,13 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, rule->handle = handle; rule->dlen = size; - rule->ulen = ulen; + rule->udata = ulen ? 1 : 0; - if (ulen) - nla_memcpy(nft_userdata(rule), nla[NFTA_RULE_USERDATA], ulen); + if (ulen) { + udata = nft_userdata(rule); + udata->len = ulen - 1; + nla_memcpy(udata->data, nla[NFTA_RULE_USERDATA], ulen); + } expr = nft_expr_first(rule); for (i = 0; i < n; i++) { @@ -2031,12 +2046,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, err3: list_del_rcu(&rule->list); - if (trans) { - list_del_rcu(&nft_trans_rule(trans)->list); - nft_rule_clear(net, nft_trans_rule(trans)); - nft_trans_destroy(trans); - chain->use++; - } err2: nf_tables_rule_destroy(&ctx, rule); err1: @@ -2681,6 +2690,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, goto err2; INIT_LIST_HEAD(&set->bindings); + write_pnet(&set->pnet, net); set->ops = ops; set->ktype = ktype; set->klen = desc.klen; @@ -2757,10 +2767,11 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, const struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); enum nft_registers dreg; dreg = nft_type_to_reg(set->dtype); - return nft_validate_data_load(ctx, dreg, &elem->data, + return nft_validate_data_load(ctx, dreg, nft_set_ext_data(ext), set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE); } @@ -2813,6 +2824,22 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, nf_tables_set_destroy(ctx, set); } +const struct nft_set_ext_type nft_set_ext_types[] = { + [NFT_SET_EXT_KEY] = { + .len = sizeof(struct nft_data), + .align = __alignof__(struct nft_data), + }, + [NFT_SET_EXT_DATA] = { + .len = sizeof(struct nft_data), + .align = __alignof__(struct nft_data), + }, + [NFT_SET_EXT_FLAGS] = { + .len = sizeof(u8), + .align = __alignof__(u8), + }, +}; +EXPORT_SYMBOL_GPL(nft_set_ext_types); + /* * Set elements */ @@ -2859,6 +2886,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, const struct nft_set *set, const struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -2866,20 +2894,20 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nest == NULL) goto nla_put_failure; - if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE, - set->klen) < 0) + if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext), + NFT_DATA_VALUE, set->klen) < 0) goto nla_put_failure; - if (set->flags & NFT_SET_MAP && - !(elem->flags & NFT_SET_ELEM_INTERVAL_END) && - nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data, + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext), set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, set->dlen) < 0) goto nla_put_failure; - if (elem->flags != 0) - if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags))) - goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && + nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, + htonl(*nft_set_ext_flags(ext)))) + goto nla_put_failure; nla_nest_end(skb, nest); return 0; @@ -3100,15 +3128,54 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, return trans; } +static void *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const struct nft_data *key, + const struct nft_data *data, + gfp_t gfp) +{ + struct nft_set_ext *ext; + void *elem; + + elem = kzalloc(set->ops->elemsize + tmpl->len, gfp); + if (elem == NULL) + return NULL; + + ext = nft_set_elem_ext(set, elem); + nft_set_ext_init(ext, tmpl); + + memcpy(nft_set_ext_key(ext), key, set->klen); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + memcpy(nft_set_ext_data(ext), data, set->dlen); + + return elem; +} + +void nft_set_elem_destroy(const struct nft_set *set, void *elem) +{ + struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + + nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_uninit(nft_set_ext_data(ext), set->dtype); + + kfree(elem); +} +EXPORT_SYMBOL_GPL(nft_set_elem_destroy); + static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_data_desc d1, d2; + struct nft_set_ext_tmpl tmpl; + struct nft_set_ext *ext; struct nft_set_elem elem; struct nft_set_binding *binding; + struct nft_data data; enum nft_registers dreg; struct nft_trans *trans; + u32 flags; int err; if (set->size && set->nelems == set->size) @@ -3122,19 +3189,26 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_KEY] == NULL) return -EINVAL; - elem.flags = 0; + nft_set_ext_prepare(&tmpl); + + flags = 0; if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { - elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); - if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END) + flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); + if (flags & ~NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + if (!(set->flags & NFT_SET_INTERVAL) && + flags & NFT_SET_ELEM_INTERVAL_END) return -EINVAL; + if (flags != 0) + nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); } if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && - !(elem.flags & NFT_SET_ELEM_INTERVAL_END)) + !(flags & NFT_SET_ELEM_INTERVAL_END)) return -EINVAL; if (nla[NFTA_SET_ELEM_DATA] != NULL && - elem.flags & NFT_SET_ELEM_INTERVAL_END) + flags & NFT_SET_ELEM_INTERVAL_END) return -EINVAL; } else { if (nla[NFTA_SET_ELEM_DATA] != NULL) @@ -3148,12 +3222,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (d1.type != NFT_DATA_VALUE || d1.len != set->klen) goto err2; - err = -EEXIST; - if (set->ops->get(set, &elem) == 0) - goto err2; + nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY); if (nla[NFTA_SET_ELEM_DATA] != NULL) { - err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]); + err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]); if (err < 0) goto err2; @@ -3170,29 +3242,43 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, }; err = nft_validate_data_load(&bind_ctx, dreg, - &elem.data, d2.type); + &data, d2.type); if (err < 0) goto err3; } + + nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA); } + err = -ENOMEM; + elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL); + if (elem.priv == NULL) + goto err3; + + ext = nft_set_elem_ext(set, elem.priv); + if (flags) + *nft_set_ext_flags(ext) = flags; + trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) - goto err3; + goto err4; + ext->genmask = nft_genmask_cur(ctx->net); err = set->ops->insert(set, &elem); if (err < 0) - goto err4; + goto err5; nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; -err4: +err5: kfree(trans); +err4: + kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) - nft_data_uninit(&elem.data, d2.type); + nft_data_uninit(&data, d2.type); err2: nft_data_uninit(&elem.key, d1.type); err1: @@ -3265,19 +3351,24 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) goto err2; - err = set->ops->get(set, &elem); - if (err < 0) - goto err2; - trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); if (trans == NULL) { err = -ENOMEM; goto err2; } + elem.priv = set->ops->deactivate(set, &elem); + if (elem.priv == NULL) { + err = -ENOENT; + goto err3; + } + nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; + +err3: + kfree(trans); err2: nft_data_uninit(&elem.key, desc.type); err1: @@ -3515,6 +3606,10 @@ static void nf_tables_commit_release(struct nft_trans *trans) case NFT_MSG_DELSET: nft_set_destroy(nft_trans_set(trans)); break; + case NFT_MSG_DELSETELEM: + nft_set_elem_destroy(nft_trans_elem_set(trans), + nft_trans_elem(trans).priv); + break; } kfree(trans); } @@ -3529,7 +3624,7 @@ static int nf_tables_commit(struct sk_buff *skb) while (++net->nft.base_seq == 0); /* A new generation has just started */ - net->nft.gencursor = gencursor_next(net); + net->nft.gencursor = nft_gencursor_next(net); /* Make sure all packets have left the previous generation before * purging old rules. @@ -3600,25 +3695,21 @@ static int nf_tables_commit(struct sk_buff *skb) NFT_MSG_DELSET, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: - nf_tables_setelem_notify(&trans->ctx, - nft_trans_elem_set(trans), - &nft_trans_elem(trans), + te = (struct nft_trans_elem *)trans->data; + + te->set->ops->activate(te->set, &te->elem); + nf_tables_setelem_notify(&trans->ctx, te->set, + &te->elem, NFT_MSG_NEWSETELEM, 0); nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; + nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM, 0); - te->set->ops->get(te->set, &te->elem); te->set->ops->remove(te->set, &te->elem); - nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (te->elem.flags & NFT_SET_MAP) { - nft_data_uninit(&te->elem.data, - te->set->dtype); - } - nft_trans_destroy(trans); break; } } @@ -3650,6 +3741,10 @@ static void nf_tables_abort_release(struct nft_trans *trans) case NFT_MSG_NEWSET: nft_set_destroy(nft_trans_set(trans)); break; + case NFT_MSG_NEWSETELEM: + nft_set_elem_destroy(nft_trans_elem_set(trans), + nft_trans_elem(trans).priv); + break; } kfree(trans); } @@ -3658,7 +3753,7 @@ static int nf_tables_abort(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; - struct nft_set *set; + struct nft_trans_elem *te; list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { @@ -3719,13 +3814,16 @@ static int nf_tables_abort(struct sk_buff *skb) break; case NFT_MSG_NEWSETELEM: nft_trans_elem_set(trans)->nelems--; - set = nft_trans_elem_set(trans); - set->ops->get(set, &nft_trans_elem(trans)); - set->ops->remove(set, &nft_trans_elem(trans)); - nft_trans_destroy(trans); + te = (struct nft_trans_elem *)trans->data; + + te->set->ops->remove(te->set, &te->elem); break; case NFT_MSG_DELSETELEM: + te = (struct nft_trans_elem *)trans->data; + nft_trans_elem_set(trans)->nelems++; + te->set->ops->activate(te->set, &te->elem); + nft_trans_destroy(trans); break; } @@ -3800,13 +3898,18 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, const struct nft_set_elem *elem) { - if (elem->flags & NFT_SET_ELEM_INTERVAL_END) + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_data *data; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && + *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) return 0; - switch (elem->data.verdict) { + data = nft_set_ext_data(ext); + switch (data->verdict) { case NFT_JUMP: case NFT_GOTO: - return nf_tables_check_loops(ctx, elem->data.chain); + return nf_tables_check_loops(ctx, data->chain); default: return 0; } diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 3b90eb2b2c55..ef4dfcbaf149 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -8,6 +8,7 @@ * Development of this code funded by Astaro AG (http://www.astaro.com/) */ +#include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/list.h> @@ -21,6 +22,48 @@ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_log.h> +enum nft_trace { + NFT_TRACE_RULE, + NFT_TRACE_RETURN, + NFT_TRACE_POLICY, +}; + +static const char *const comments[] = { + [NFT_TRACE_RULE] = "rule", + [NFT_TRACE_RETURN] = "return", + [NFT_TRACE_POLICY] = "policy", +}; + +static struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = LOGLEVEL_WARNING, + .logflags = NF_LOG_MASK, + }, + }, +}; + +static void __nft_trace_packet(const struct nft_pktinfo *pkt, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) +{ + struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + + nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, + pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], + rulenum); +} + +static inline void nft_trace_packet(const struct nft_pktinfo *pkt, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) +{ + if (unlikely(pkt->skb->nf_trace)) + __nft_trace_packet(pkt, chain, rulenum, type); +} + static void nft_cmp_fast_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1]) { @@ -66,44 +109,11 @@ struct nft_jumpstack { int rulenum; }; -enum nft_trace { - NFT_TRACE_RULE, - NFT_TRACE_RETURN, - NFT_TRACE_POLICY, -}; - -static const char *const comments[] = { - [NFT_TRACE_RULE] = "rule", - [NFT_TRACE_RETURN] = "return", - [NFT_TRACE_POLICY] = "policy", -}; - -static struct nf_loginfo trace_loginfo = { - .type = NF_LOG_TYPE_LOG, - .u = { - .log = { - .level = 4, - .logflags = NF_LOG_MASK, - }, - }, -}; - -static void nft_trace_packet(const struct nft_pktinfo *pkt, - const struct nft_chain *chain, - int rulenum, enum nft_trace type) -{ - struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - - nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, - pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", - chain->table->name, chain->name, comments[type], - rulenum); -} - unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { const struct nft_chain *chain = ops->priv, *basechain = chain; + const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet); const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_data data[NFT_REG_MAX + 1]; @@ -111,11 +121,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; struct nft_stats *stats; int rulenum; - /* - * Cache cursor to avoid problems in case that the cursor is updated - * while traversing the ruleset. - */ - unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor); + unsigned int gencursor = nft_genmask_cur(net); do_chain: rulenum = 0; @@ -146,8 +152,7 @@ next_rule: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; continue; case NFT_CONTINUE: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); continue; } break; @@ -157,37 +162,28 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); - + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); return data[NFT_REG_VERDICT].verdict; } switch (data[NFT_REG_VERDICT].verdict) { case NFT_JUMP: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); - BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); jumpstack[stackptr].chain = chain; jumpstack[stackptr].rule = rule; jumpstack[stackptr].rulenum = rulenum; stackptr++; - chain = data[NFT_REG_VERDICT].chain; - goto do_chain; + /* fall through */ case NFT_GOTO: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); chain = data[NFT_REG_VERDICT].chain; goto do_chain; - case NFT_RETURN: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); - break; case NFT_CONTINUE: - if (unlikely(pkt->skb->nf_trace && !(chain->flags & NFT_BASE_CHAIN))) - nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); + rulenum++; + /* fall through */ + case NFT_RETURN: + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); break; default: WARN_ON(1); @@ -201,8 +197,7 @@ next_rule: goto next_rule; } - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); + nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); rcu_read_lock_bh(); stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats)); diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index a5599fc51a6f..54330fb5efaf 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -77,6 +77,9 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; + /* Not all fields are initialized so first zero the tuple */ + memset(tuple, 0, sizeof(struct nf_conntrack_tuple)); + tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 11d85b3813f2..957b83a0223b 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -539,7 +539,7 @@ __build_packet_message(struct nfnl_log_net *log, /* UID */ sk = skb->sk; - if (sk && sk->sk_state != TCP_TIME_WAIT) { + if (sk && sk_fullsock(sk)) { read_lock_bh(&sk->sk_callback_lock); if (sk->sk_socket && sk->sk_socket->file) { struct file *file = sk->sk_socket->file; @@ -998,11 +998,13 @@ static int seq_show(struct seq_file *s, void *v) { const struct nfulnl_instance *inst = v; - return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", - inst->group_num, - inst->peer_portid, inst->qlen, - inst->copy_mode, inst->copy_range, - inst->flushtimeout, atomic_read(&inst->use)); + seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", + inst->group_num, + inst->peer_portid, inst->qlen, + inst->copy_mode, inst->copy_range, + inst->flushtimeout, atomic_read(&inst->use)); + + return 0; } static const struct seq_operations nful_seq_ops = { diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 0db8515e76da..86ee8b05adae 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -257,7 +257,7 @@ static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk) { const struct cred *cred; - if (sk->sk_state == TCP_TIME_WAIT) + if (!sk_fullsock(sk)) return 0; read_lock_bh(&sk->sk_callback_lock); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index c598f74063a1..589b8487cd08 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -20,6 +20,7 @@ #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_arp/arp_tables.h> #include <net/netfilter/nf_tables.h> static int nft_compat_chain_validate_dependency(const char *tablename, @@ -42,6 +43,7 @@ union nft_entry { struct ipt_entry e4; struct ip6t_entry e6; struct ebt_entry ebt; + struct arpt_entry arp; }; static inline void @@ -123,7 +125,7 @@ static void nft_target_set_tgchk_param(struct xt_tgchk_param *par, const struct nft_ctx *ctx, struct xt_target *target, void *info, - union nft_entry *entry, u8 proto, bool inv) + union nft_entry *entry, u16 proto, bool inv) { par->net = ctx->net; par->table = ctx->table->name; @@ -133,13 +135,18 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; break; case AF_INET6: + if (proto) + entry->e6.ipv6.flags |= IP6T_F_PROTO; + entry->e6.ipv6.proto = proto; entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; break; case NFPROTO_BRIDGE: - entry->ebt.ethproto = proto; + entry->ebt.ethproto = (__force __be16)proto; entry->ebt.invflags = inv ? EBT_IPROTO : 0; break; + case NFPROTO_ARP: + break; } par->entryinfo = entry; par->target = target; @@ -171,7 +178,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, }; -static int nft_parse_compat(const struct nlattr *attr, u8 *proto, bool *inv) +static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) { struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; u32 flags; @@ -203,7 +210,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_target *target = expr->ops->data; struct xt_tgchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); - u8 proto = 0; + u16 proto = 0; bool inv = false; union nft_entry e = {}; int ret; @@ -334,7 +341,7 @@ static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { static void nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, struct xt_match *match, void *info, - union nft_entry *entry, u8 proto, bool inv) + union nft_entry *entry, u16 proto, bool inv) { par->net = ctx->net; par->table = ctx->table->name; @@ -344,13 +351,18 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; break; case AF_INET6: + if (proto) + entry->e6.ipv6.flags |= IP6T_F_PROTO; + entry->e6.ipv6.proto = proto; entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; break; case NFPROTO_BRIDGE: - entry->ebt.ethproto = proto; + entry->ebt.ethproto = (__force __be16)proto; entry->ebt.invflags = inv ? EBT_IPROTO : 0; break; + case NFPROTO_ARP: + break; } par->entryinfo = entry; par->match = match; @@ -385,7 +397,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_match *match = expr->ops->data; struct xt_mtchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); - u8 proto = 0; + u16 proto = 0; bool inv = false; union nft_entry e = {}; int ret; @@ -537,6 +549,9 @@ nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb, case NFPROTO_BRIDGE: fmt = "ebt_%s"; break; + case NFPROTO_ARP: + fmt = "arpt_%s"; + break; default: pr_err("nft_compat: unsupported protocol %d\n", nfmsg->nfgen_family); @@ -625,8 +640,12 @@ nft_match_select_ops(const struct nft_ctx *ctx, struct xt_match *match = nft_match->ops.data; if (strcmp(match->name, mt_name) == 0 && - match->revision == rev && match->family == family) + match->revision == rev && match->family == family) { + if (!try_module_get(match->me)) + return ERR_PTR(-ENOENT); + return &nft_match->ops; + } } match = xt_request_find_match(family, mt_name, rev); @@ -695,8 +714,12 @@ nft_target_select_ops(const struct nft_ctx *ctx, struct xt_target *target = nft_target->ops.data; if (strcmp(target->name, tg_name) == 0 && - target->revision == rev && target->family == family) + target->revision == rev && target->family == family) { + if (!try_module_get(target->me)) + return ERR_PTR(-ENOENT); + return &nft_target->ops; + } } target = xt_request_find_target(family, tg_name, rev); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 61e6c407476a..c7e1a9d7d46f 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -23,119 +23,130 @@ /* We target a hash table size of 4, element hint is 75% of final size */ #define NFT_HASH_ELEMENT_HINT 3 +struct nft_hash { + struct rhashtable ht; +}; + struct nft_hash_elem { struct rhash_head node; - struct nft_data key; - struct nft_data data[]; + struct nft_set_ext ext; }; -static bool nft_hash_lookup(const struct nft_set *set, - const struct nft_data *key, - struct nft_data *data) -{ - struct rhashtable *priv = nft_set_priv(set); - const struct nft_hash_elem *he; +struct nft_hash_cmp_arg { + const struct nft_set *set; + const struct nft_data *key; + u8 genmask; +}; - he = rhashtable_lookup(priv, key); - if (he && set->flags & NFT_SET_MAP) - nft_data_copy(data, he->data); +static const struct rhashtable_params nft_hash_params; - return !!he; +static inline u32 nft_hash_key(const void *data, u32 len, u32 seed) +{ + const struct nft_hash_cmp_arg *arg = data; + + return jhash(arg->key, len, seed); } -static int nft_hash_insert(const struct nft_set *set, - const struct nft_set_elem *elem) +static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed) { - struct rhashtable *priv = nft_set_priv(set); - struct nft_hash_elem *he; - unsigned int size; + const struct nft_hash_elem *he = data; - if (elem->flags != 0) - return -EINVAL; + return jhash(nft_set_ext_key(&he->ext), len, seed); +} - size = sizeof(*he); - if (set->flags & NFT_SET_MAP) - size += sizeof(he->data[0]); +static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nft_hash_cmp_arg *x = arg->key; + const struct nft_hash_elem *he = ptr; - he = kzalloc(size, GFP_KERNEL); - if (he == NULL) - return -ENOMEM; + if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) + return 1; + if (!nft_set_elem_active(&he->ext, x->genmask)) + return 1; + return 0; +} - nft_data_copy(&he->key, &elem->key); - if (set->flags & NFT_SET_MAP) - nft_data_copy(he->data, &elem->data); +static bool nft_hash_lookup(const struct nft_set *set, + const struct nft_data *key, + const struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_cur(read_pnet(&set->pnet)), + .set = set, + .key = key, + }; - rhashtable_insert(priv, &he->node); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) + *ext = &he->ext; - return 0; + return !!he; } -static void nft_hash_elem_destroy(const struct nft_set *set, - struct nft_hash_elem *he) +static int nft_hash_insert(const struct nft_set *set, + const struct nft_set_elem *elem) { - nft_data_uninit(&he->key, NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP) - nft_data_uninit(he->data, set->dtype); - kfree(he); + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(read_pnet(&set->pnet)), + .set = set, + .key = &elem->key, + }; + + return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); } -static void nft_hash_remove(const struct nft_set *set, - const struct nft_set_elem *elem) +static void nft_hash_activate(const struct nft_set *set, + const struct nft_set_elem *elem) { - struct rhashtable *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; - rhashtable_remove(priv, elem->cookie); - synchronize_rcu(); - kfree(elem->cookie); + nft_set_elem_change_active(set, &he->ext); } -struct nft_compare_arg { - const struct nft_set *set; - struct nft_set_elem *elem; -}; - -static bool nft_hash_compare(void *ptr, void *arg) +static void *nft_hash_deactivate(const struct nft_set *set, + const struct nft_set_elem *elem) { - struct nft_hash_elem *he = ptr; - struct nft_compare_arg *x = arg; - - if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) { - x->elem->cookie = he; - x->elem->flags = 0; - if (x->set->flags & NFT_SET_MAP) - nft_data_copy(&x->elem->data, he->data); + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(read_pnet(&set->pnet)), + .set = set, + .key = &elem->key, + }; - return true; - } + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) + nft_set_elem_change_active(set, &he->ext); - return false; + return he; } -static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) +static void nft_hash_remove(const struct nft_set *set, + const struct nft_set_elem *elem) { - struct rhashtable *priv = nft_set_priv(set); - struct nft_compare_arg arg = { - .set = set, - .elem = elem, - }; + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; - if (rhashtable_lookup_compare(priv, &elem->key, - &nft_hash_compare, &arg)) - return 0; - - return -ENOENT; + rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); } static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_iter *iter) { - struct rhashtable *priv = nft_set_priv(set); - const struct nft_hash_elem *he; + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int err; - err = rhashtable_walk_init(priv, &hti); + err = rhashtable_walk_init(&priv->ht, &hti); iter->err = err; if (err) return; @@ -153,15 +164,16 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, iter->err = err; goto out; } + + continue; } if (iter->count < iter->skip) goto cont; + if (!nft_set_elem_active(&he->ext, genmask)) + goto cont; - memcpy(&elem.key, &he->key, sizeof(elem.key)); - if (set->flags & NFT_SET_MAP) - memcpy(&elem.data, he->data, sizeof(elem.data)); - elem.flags = 0; + elem.priv = he; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) @@ -178,47 +190,41 @@ out: static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) { - return sizeof(struct rhashtable); + return sizeof(struct nft_hash); } +static const struct rhashtable_params nft_hash_params = { + .head_offset = offsetof(struct nft_hash_elem, node), + .hashfn = nft_hash_key, + .obj_hashfn = nft_hash_obj, + .obj_cmpfn = nft_hash_cmp, + .automatic_shrinking = true, +}; + static int nft_hash_init(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const tb[]) { - struct rhashtable *priv = nft_set_priv(set); - struct rhashtable_params params = { - .nelem_hint = desc->size ? : NFT_HASH_ELEMENT_HINT, - .head_offset = offsetof(struct nft_hash_elem, node), - .key_offset = offsetof(struct nft_hash_elem, key), - .key_len = set->klen, - .hashfn = jhash, - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, - }; + struct nft_hash *priv = nft_set_priv(set); + struct rhashtable_params params = nft_hash_params; + + params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT; + params.key_len = set->klen; - return rhashtable_init(priv, ¶ms); + return rhashtable_init(&priv->ht, ¶ms); } -static void nft_hash_destroy(const struct nft_set *set) +static void nft_hash_elem_destroy(void *ptr, void *arg) { - struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl; - struct nft_hash_elem *he; - struct rhash_head *pos, *next; - unsigned int i; - - /* Stop an eventual async resizing */ - priv->being_destroyed = true; - mutex_lock(&priv->mutex); + nft_set_elem_destroy((const struct nft_set *)arg, ptr); +} - tbl = rht_dereference(priv->tbl, priv); - for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_safe(he, pos, next, tbl, i, node) - nft_hash_elem_destroy(set, he); - } - mutex_unlock(&priv->mutex); +static void nft_hash_destroy(const struct nft_set *set) +{ + struct nft_hash *priv = nft_set_priv(set); - rhashtable_destroy(priv); + rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy, + (void *)set); } static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, @@ -227,11 +233,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, unsigned int esize; esize = sizeof(struct nft_hash_elem); - if (features & NFT_SET_MAP) - esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); - if (desc->size) { - est->size = sizeof(struct rhashtable) + + est->size = sizeof(struct nft_hash) + roundup_pow_of_two(desc->size * 4 / 3) * sizeof(struct nft_hash_elem *) + desc->size * esize; @@ -251,11 +254,13 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_ops nft_hash_ops __read_mostly = { .privsize = nft_hash_privsize, + .elemsize = offsetof(struct nft_hash_elem, ext), .estimate = nft_hash_estimate, .init = nft_hash_init, .destroy = nft_hash_destroy, - .get = nft_hash_get, .insert = nft_hash_insert, + .activate = nft_hash_activate, + .deactivate = nft_hash_deactivate, .remove = nft_hash_remove, .lookup = nft_hash_lookup, .walk = nft_hash_walk, diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index bde05f28cf14..e18af9db2f04 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -78,7 +78,7 @@ static int nft_log_init(const struct nft_ctx *ctx, li->u.log.level = ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL])); } else { - li->u.log.level = 4; + li->u.log.level = LOGLEVEL_WARNING; } if (tb[NFTA_LOG_FLAGS] != NULL) { li->u.log.logflags = diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 9615b8b9fb37..a5f30b8760ea 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -31,9 +31,13 @@ static void nft_lookup_eval(const struct nft_expr *expr, { const struct nft_lookup *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; + const struct nft_set_ext *ext; - if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg])) + if (set->ops->lookup(set, &data[priv->sreg], &ext)) { + if (set->flags & NFT_SET_MAP) + nft_data_copy(&data[priv->dreg], nft_set_ext_data(ext)); return; + } data[NFT_REG_VERDICT].verdict = NFT_BREAK; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index e99911eda915..5197874372ec 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -83,7 +83,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, *(u16 *)dest->data = out->type; break; case NFT_META_SKUID: - if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + if (skb->sk == NULL || !sk_fullsock(skb->sk)) goto err; read_lock_bh(&skb->sk->sk_callback_lock); @@ -99,7 +99,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, read_unlock_bh(&skb->sk->sk_callback_lock); break; case NFT_META_SKGID: - if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + if (skb->sk == NULL || !sk_fullsock(skb->sk)) goto err; read_lock_bh(&skb->sk->sk_callback_lock); @@ -153,7 +153,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, } break; case NFT_META_CPU: - dest->data[0] = smp_processor_id(); + dest->data[0] = raw_smp_processor_id(); break; case NFT_META_IIFGROUP: if (in == NULL) diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 46214f245665..42d0ca45fb9e 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -26,25 +26,26 @@ struct nft_rbtree { struct nft_rbtree_elem { struct rb_node node; - u16 flags; - struct nft_data key; - struct nft_data data[]; + struct nft_set_ext ext; }; + static bool nft_rbtree_lookup(const struct nft_set *set, const struct nft_data *key, - struct nft_data *data) + const struct nft_set_ext **ext) { const struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; - const struct rb_node *parent = priv->root.rb_node; + const struct rb_node *parent; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int d; spin_lock_bh(&nft_rbtree_lock); + parent = priv->root.rb_node; while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, key, set->klen); + d = nft_data_cmp(nft_set_ext_key(&rbe->ext), key, set->klen); if (d < 0) { parent = parent->rb_left; interval = rbe; @@ -52,12 +53,17 @@ static bool nft_rbtree_lookup(const struct nft_set *set, parent = parent->rb_right; else { found: - if (rbe->flags & NFT_SET_ELEM_INTERVAL_END) + if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; + } + if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && + *nft_set_ext_flags(&rbe->ext) & + NFT_SET_ELEM_INTERVAL_END) goto out; - if (set->flags & NFT_SET_MAP) - nft_data_copy(data, rbe->data); - spin_unlock_bh(&nft_rbtree_lock); + + *ext = &rbe->ext; return true; } } @@ -71,23 +77,13 @@ out: return false; } -static void nft_rbtree_elem_destroy(const struct nft_set *set, - struct nft_rbtree_elem *rbe) -{ - nft_data_uninit(&rbe->key, NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_uninit(rbe->data, set->dtype); - - kfree(rbe); -} - static int __nft_rbtree_insert(const struct nft_set *set, struct nft_rbtree_elem *new) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; struct rb_node *parent, **p; + u8 genmask = nft_genmask_next(read_pnet(&set->pnet)); int d; parent = NULL; @@ -95,13 +91,18 @@ static int __nft_rbtree_insert(const struct nft_set *set, while (*p != NULL) { parent = *p; rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, &new->key, set->klen); + d = nft_data_cmp(nft_set_ext_key(&rbe->ext), + nft_set_ext_key(&new->ext), + set->klen); if (d < 0) p = &parent->rb_left; else if (d > 0) p = &parent->rb_right; - else - return -EEXIST; + else { + if (nft_set_elem_active(&rbe->ext, genmask)) + return -EEXIST; + p = &parent->rb_left; + } } rb_link_node(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); @@ -111,31 +112,13 @@ static int __nft_rbtree_insert(const struct nft_set *set, static int nft_rbtree_insert(const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_rbtree_elem *rbe; - unsigned int size; + struct nft_rbtree_elem *rbe = elem->priv; int err; - size = sizeof(*rbe); - if (set->flags & NFT_SET_MAP && - !(elem->flags & NFT_SET_ELEM_INTERVAL_END)) - size += sizeof(rbe->data[0]); - - rbe = kzalloc(size, GFP_KERNEL); - if (rbe == NULL) - return -ENOMEM; - - rbe->flags = elem->flags; - nft_data_copy(&rbe->key, &elem->key); - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(rbe->data, &elem->data); - spin_lock_bh(&nft_rbtree_lock); err = __nft_rbtree_insert(set, rbe); - if (err < 0) - kfree(rbe); - spin_unlock_bh(&nft_rbtree_lock); + return err; } @@ -143,42 +126,49 @@ static void nft_rbtree_remove(const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree *priv = nft_set_priv(set); - struct nft_rbtree_elem *rbe = elem->cookie; + struct nft_rbtree_elem *rbe = elem->priv; spin_lock_bh(&nft_rbtree_lock); rb_erase(&rbe->node, &priv->root); spin_unlock_bh(&nft_rbtree_lock); - kfree(rbe); } -static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) +static void nft_rbtree_activate(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_rbtree_elem *rbe = elem->priv; + + nft_set_elem_change_active(set, &rbe->ext); +} + +static void *nft_rbtree_deactivate(const struct nft_set *set, + const struct nft_set_elem *elem) { const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; struct nft_rbtree_elem *rbe; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int d; - spin_lock_bh(&nft_rbtree_lock); while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, &elem->key, set->klen); + d = nft_data_cmp(nft_set_ext_key(&rbe->ext), &elem->key, + set->klen); if (d < 0) parent = parent->rb_left; else if (d > 0) parent = parent->rb_right; else { - elem->cookie = rbe; - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(&elem->data, rbe->data); - elem->flags = rbe->flags; - spin_unlock_bh(&nft_rbtree_lock); - return 0; + if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; + } + nft_set_elem_change_active(set, &rbe->ext); + return rbe; } } - spin_unlock_bh(&nft_rbtree_lock); - return -ENOENT; + return NULL; } static void nft_rbtree_walk(const struct nft_ctx *ctx, @@ -186,21 +176,21 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_set_iter *iter) { const struct nft_rbtree *priv = nft_set_priv(set); - const struct nft_rbtree_elem *rbe; + struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); spin_lock_bh(&nft_rbtree_lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (iter->count < iter->skip) goto cont; + if (!nft_set_elem_active(&rbe->ext, genmask)) + goto cont; - rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_data_copy(&elem.key, &rbe->key); - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(&elem.data, rbe->data); - elem.flags = rbe->flags; + elem.priv = rbe; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) { @@ -237,7 +227,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_rbtree_elem_destroy(set, rbe); + nft_set_elem_destroy(set, rbe); } } @@ -247,9 +237,6 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, unsigned int nsize; nsize = sizeof(struct nft_rbtree_elem); - if (features & NFT_SET_MAP) - nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]); - if (desc->size) est->size = sizeof(struct nft_rbtree) + desc->size * nsize; else @@ -262,12 +249,14 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_ops nft_rbtree_ops __read_mostly = { .privsize = nft_rbtree_privsize, + .elemsize = offsetof(struct nft_rbtree_elem, ext), .estimate = nft_rbtree_estimate, .init = nft_rbtree_init, .destroy = nft_rbtree_destroy, .insert = nft_rbtree_insert, .remove = nft_rbtree_remove, - .get = nft_rbtree_get, + .deactivate = nft_rbtree_deactivate, + .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, .features = NFT_SET_INTERVAL | NFT_SET_MAP, diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 7b5f9d58680a..92877114aff4 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -28,14 +28,16 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, case NFPROTO_IPV4: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code); + nf_send_unreach(pkt->skb, priv->icmp_code, + pkt->ops->hooknum); break; case NFT_REJECT_TCP_RST: nf_send_reset(pkt->skb, pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: nf_send_unreach(pkt->skb, - nft_reject_icmp_code(priv->icmp_code)); + nft_reject_icmp_code(priv->icmp_code), + pkt->ops->hooknum); break; } break; diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index ef8a926752a9..c205b26a2bee 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -42,15 +42,21 @@ enum nf_tproxy_lookup_t { static bool tproxy_sk_is_transparent(struct sock *sk) { - if (sk->sk_state != TCP_TIME_WAIT) { - if (inet_sk(sk)->transparent) - return true; - sock_put(sk); - } else { + switch (sk->sk_state) { + case TCP_TIME_WAIT: if (inet_twsk(sk)->tw_transparent) return true; - inet_twsk_put(inet_twsk(sk)); + break; + case TCP_NEW_SYN_RECV: + if (inet_rsk(inet_reqsk(sk))->no_srccheck) + return true; + break; + default: + if (inet_sk(sk)->transparent) + return true; } + + sock_gen_put(sk); return false; } @@ -513,8 +519,8 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_ip6 *i = par->entryinfo; - if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) - && !(i->flags & IP6T_INV_PROTO)) + if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && + !(i->invflags & IP6T_INV_PROTO)) return 0; pr_info("Can be used only in combination with " diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index f440f57a452f..50a52043650f 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -56,8 +56,7 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par) /* This only makes sense in the FORWARD and POSTROUTING chains */ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && - (!!(nf_bridge->mask & BRNF_BRIDGED) ^ - !(info->invert & XT_PHYSDEV_OP_BRIDGED))) + (!!nf_bridge->physoutdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED))) return false; if ((info->bitmask & XT_PHYSDEV_OP_ISIN && diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 30dbe34915ae..45e1b30e4fb2 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -378,12 +378,11 @@ static int recent_mt_check(const struct xt_mtchk_param *par, mutex_lock(&recent_mutex); t = recent_table_lookup(recent_net, info->name); if (t != NULL) { - if (info->hit_count > t->nstamps_max_mask) { - pr_info("hitcount (%u) is larger than packets to be remembered (%u) for table %s\n", - info->hit_count, t->nstamps_max_mask + 1, - info->name); - ret = -EINVAL; - goto out; + if (nstamp_mask > t->nstamps_max_mask) { + spin_lock_bh(&recent_lock); + recent_table_flush(t); + t->nstamps_max_mask = nstamp_mask; + spin_unlock_bh(&recent_lock); } t->refcnt++; diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 0d47afea9682..89045982ec94 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -193,7 +193,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) return ret; if (!match_counter0(opt.ext.packets, &info->packets)) - return 0; + return false; return match_counter0(opt.ext.bytes, &info->bytes); } @@ -239,7 +239,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) return ret; if (!match_counter(opt.ext.packets, &info->packets)) - return 0; + return false; return match_counter(opt.ext.bytes, &info->bytes); } diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 1ba67931eb1b..895534e87a47 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -129,6 +129,20 @@ xt_socket_get_sock_v4(struct net *net, const u8 protocol, return NULL; } +static bool xt_socket_sk_is_transparent(struct sock *sk) +{ + switch (sk->sk_state) { + case TCP_TIME_WAIT: + return inet_twsk(sk)->tw_transparent; + + case TCP_NEW_SYN_RECV: + return inet_rsk(inet_reqsk(sk))->no_srccheck; + + default: + return inet_sk(sk)->transparent; + } +} + static bool socket_match(const struct sk_buff *skb, struct xt_action_param *par, const struct xt_socket_mtinfo1 *info) @@ -195,16 +209,14 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, * unless XT_SOCKET_NOWILDCARD is set */ wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && - sk->sk_state != TCP_TIME_WAIT && + sk_fullsock(sk) && inet_sk(sk)->inet_rcv_saddr == 0); /* Ignore non-transparent sockets, - if XT_SOCKET_TRANSPARENT is used */ + * if XT_SOCKET_TRANSPARENT is used + */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = ((sk->sk_state != TCP_TIME_WAIT && - inet_sk(sk)->transparent) || - (sk->sk_state == TCP_TIME_WAIT && - inet_twsk(sk)->tw_transparent)); + transparent = xt_socket_sk_is_transparent(sk); if (sk != skb->sk) sock_gen_put(sk); @@ -243,12 +255,13 @@ static int extract_icmp6_fields(const struct sk_buff *skb, unsigned int outside_hdrlen, int *protocol, - struct in6_addr **raddr, - struct in6_addr **laddr, + const struct in6_addr **raddr, + const struct in6_addr **laddr, __be16 *rport, - __be16 *lport) + __be16 *lport, + struct ipv6hdr *ipv6_var) { - struct ipv6hdr *inside_iph, _inside_iph; + const struct ipv6hdr *inside_iph; struct icmp6hdr *icmph, _icmph; __be16 *ports, _ports[2]; u8 inside_nexthdr; @@ -263,12 +276,14 @@ extract_icmp6_fields(const struct sk_buff *skb, if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) return 1; - inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), sizeof(_inside_iph), &_inside_iph); + inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), + sizeof(*ipv6_var), ipv6_var); if (inside_iph == NULL) return 1; inside_nexthdr = inside_iph->nexthdr; - inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), + inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + + sizeof(*ipv6_var), &inside_nexthdr, &inside_fragoff); if (inside_hdrlen < 0) return 1; /* hjm: Packet has no/incomplete transport layer headers. */ @@ -315,10 +330,10 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol, static bool socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { - struct ipv6hdr *iph = ipv6_hdr(skb); + struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; struct sock *sk = skb->sk; - struct in6_addr *daddr = NULL, *saddr = NULL; + const struct in6_addr *daddr = NULL, *saddr = NULL; __be16 uninitialized_var(dport), uninitialized_var(sport); int thoff = 0, uninitialized_var(tproto); const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; @@ -342,7 +357,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) } else if (tproto == IPPROTO_ICMPV6) { if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, - &sport, &dport)) + &sport, &dport, &ipv6_var)) return false; } else { return false; @@ -360,16 +375,14 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) * unless XT_SOCKET_NOWILDCARD is set */ wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && - sk->sk_state != TCP_TIME_WAIT && + sk_fullsock(sk) && ipv6_addr_any(&sk->sk_v6_rcv_saddr)); /* Ignore non-transparent sockets, - if XT_SOCKET_TRANSPARENT is used */ + * if XT_SOCKET_TRANSPARENT is used + */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = ((sk->sk_state != TCP_TIME_WAIT && - inet_sk(sk)->transparent) || - (sk->sk_state == TCP_TIME_WAIT && - inet_twsk(sk)->tw_transparent)); + transparent = xt_socket_sk_is_transparent(sk); if (sk != skb->sk) sock_gen_put(sk); diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 70440748fe5c..13f777f20995 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -293,15 +293,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return -ENOMEM; addr_struct.s_addr = iter4->addr; - ret_val = nla_put(skb, NLBL_MGMT_A_IPV4ADDR, - sizeof(struct in_addr), - &addr_struct); + ret_val = nla_put_in_addr(skb, NLBL_MGMT_A_IPV4ADDR, + addr_struct.s_addr); if (ret_val != 0) return ret_val; addr_struct.s_addr = iter4->mask; - ret_val = nla_put(skb, NLBL_MGMT_A_IPV4MASK, - sizeof(struct in_addr), - &addr_struct); + ret_val = nla_put_in_addr(skb, NLBL_MGMT_A_IPV4MASK, + addr_struct.s_addr); if (ret_val != 0) return ret_val; map4 = netlbl_domhsh_addr4_entry(iter4); @@ -328,14 +326,12 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, if (nla_b == NULL) return -ENOMEM; - ret_val = nla_put(skb, NLBL_MGMT_A_IPV6ADDR, - sizeof(struct in6_addr), - &iter6->addr); + ret_val = nla_put_in6_addr(skb, NLBL_MGMT_A_IPV6ADDR, + &iter6->addr); if (ret_val != 0) return ret_val; - ret_val = nla_put(skb, NLBL_MGMT_A_IPV6MASK, - sizeof(struct in6_addr), - &iter6->mask); + ret_val = nla_put_in6_addr(skb, NLBL_MGMT_A_IPV6MASK, + &iter6->mask); if (ret_val != 0) return ret_val; map6 = netlbl_domhsh_addr6_entry(iter6); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index aec7994f78cf..b0380927f05f 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1117,34 +1117,30 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, struct in_addr addr_struct; addr_struct.s_addr = addr4->list.addr; - ret_val = nla_put(cb_arg->skb, - NLBL_UNLABEL_A_IPV4ADDR, - sizeof(struct in_addr), - &addr_struct); + ret_val = nla_put_in_addr(cb_arg->skb, + NLBL_UNLABEL_A_IPV4ADDR, + addr_struct.s_addr); if (ret_val != 0) goto list_cb_failure; addr_struct.s_addr = addr4->list.mask; - ret_val = nla_put(cb_arg->skb, - NLBL_UNLABEL_A_IPV4MASK, - sizeof(struct in_addr), - &addr_struct); + ret_val = nla_put_in_addr(cb_arg->skb, + NLBL_UNLABEL_A_IPV4MASK, + addr_struct.s_addr); if (ret_val != 0) goto list_cb_failure; secid = addr4->secid; } else { - ret_val = nla_put(cb_arg->skb, - NLBL_UNLABEL_A_IPV6ADDR, - sizeof(struct in6_addr), - &addr6->list.addr); + ret_val = nla_put_in6_addr(cb_arg->skb, + NLBL_UNLABEL_A_IPV6ADDR, + &addr6->list.addr); if (ret_val != 0) goto list_cb_failure; - ret_val = nla_put(cb_arg->skb, - NLBL_UNLABEL_A_IPV6MASK, - sizeof(struct in6_addr), - &addr6->list.mask); + ret_val = nla_put_in6_addr(cb_arg->skb, + NLBL_UNLABEL_A_IPV6MASK, + &addr6->list.mask); if (ret_val != 0) goto list_cb_failure; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2702673f0f23..19909d0786a2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -116,6 +116,8 @@ static ATOMIC_NOTIFIER_HEAD(netlink_chain); static DEFINE_SPINLOCK(netlink_tap_lock); static struct list_head netlink_tap_all __read_mostly; +static const struct rhashtable_params netlink_rhashtable_params; + static inline u32 netlink_group_mask(u32 group) { return group ? 1 << (group - 1) : 0; @@ -970,41 +972,50 @@ netlink_unlock_table(void) struct netlink_compare_arg { - struct net *net; + possible_net_t pnet; u32 portid; }; -static bool netlink_compare(void *ptr, void *arg) +/* Doing sizeof directly may yield 4 extra bytes on 64-bit. */ +#define netlink_compare_arg_len \ + (offsetof(struct netlink_compare_arg, portid) + sizeof(u32)) + +static inline int netlink_compare(struct rhashtable_compare_arg *arg, + const void *ptr) { - struct netlink_compare_arg *x = arg; - struct sock *sk = ptr; + const struct netlink_compare_arg *x = arg->key; + const struct netlink_sock *nlk = ptr; - return nlk_sk(sk)->portid == x->portid && - net_eq(sock_net(sk), x->net); + return nlk->portid != x->portid || + !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); +} + +static void netlink_compare_arg_init(struct netlink_compare_arg *arg, + struct net *net, u32 portid) +{ + memset(arg, 0, sizeof(*arg)); + write_pnet(&arg->pnet, net); + arg->portid = portid; } static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, struct net *net) { - struct netlink_compare_arg arg = { - .net = net, - .portid = portid, - }; + struct netlink_compare_arg arg; - return rhashtable_lookup_compare(&table->hash, &portid, - &netlink_compare, &arg); + netlink_compare_arg_init(&arg, net, portid); + return rhashtable_lookup_fast(&table->hash, &arg, + netlink_rhashtable_params); } -static bool __netlink_insert(struct netlink_table *table, struct sock *sk) +static int __netlink_insert(struct netlink_table *table, struct sock *sk) { - struct netlink_compare_arg arg = { - .net = sock_net(sk), - .portid = nlk_sk(sk)->portid, - }; + struct netlink_compare_arg arg; - return rhashtable_lookup_compare_insert(&table->hash, - &nlk_sk(sk)->node, - &netlink_compare, &arg); + netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); + return rhashtable_lookup_insert_key(&table->hash, &arg, + &nlk_sk(sk)->node, + netlink_rhashtable_params); } static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) @@ -1066,9 +1077,10 @@ static int netlink_insert(struct sock *sk, u32 portid) nlk_sk(sk)->portid = portid; sock_hold(sk); - err = 0; - if (!__netlink_insert(table, sk)) { - err = -EADDRINUSE; + err = __netlink_insert(table, sk); + if (err) { + if (err == -EEXIST) + err = -EADDRINUSE; sock_put(sk); } @@ -1082,7 +1094,8 @@ static void netlink_remove(struct sock *sk) struct netlink_table *table; table = &nl_table[sk->sk_protocol]; - if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { + if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node, + netlink_rhashtable_params)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } @@ -2256,8 +2269,7 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); } -static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); @@ -2346,8 +2358,7 @@ out: return err; } -static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len, +static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct scm_cookie scm; @@ -3116,19 +3127,28 @@ static struct pernet_operations __net_initdata netlink_net_ops = { .exit = netlink_net_exit, }; +static inline u32 netlink_hash(const void *data, u32 len, u32 seed) +{ + const struct netlink_sock *nlk = data; + struct netlink_compare_arg arg; + + netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); + return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); +} + +static const struct rhashtable_params netlink_rhashtable_params = { + .head_offset = offsetof(struct netlink_sock, node), + .key_len = netlink_compare_arg_len, + .obj_hashfn = netlink_hash, + .obj_cmpfn = netlink_compare, + .max_size = 65536, + .automatic_shrinking = true, +}; + static int __init netlink_proto_init(void) { int i; int err = proto_register(&netlink_proto, 0); - struct rhashtable_params ht_params = { - .head_offset = offsetof(struct netlink_sock, node), - .key_offset = offsetof(struct netlink_sock, portid), - .key_len = sizeof(u32), /* portid */ - .hashfn = jhash, - .max_shift = 16, /* 64K */ - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, - }; if (err != 0) goto out; @@ -3140,7 +3160,8 @@ static int __init netlink_proto_init(void) goto panic; for (i = 0; i < MAX_LINKS; i++) { - if (rhashtable_init(&nl_table[i].hash, &ht_params) < 0) { + if (rhashtable_init(&nl_table[i].hash, + &netlink_rhashtable_params) < 0) { while (--i > 0) rhashtable_destroy(&nl_table[i].hash); kfree(nl_table); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 69f1d5e9959f..b987fd56c3c5 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1023,8 +1023,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) return 1; } -static int nr_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int nr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); @@ -1133,8 +1132,8 @@ out: return err; } -static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_ax25 *, sax, msg->msg_name); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 6ae063cebf7d..988f542481a8 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -65,36 +65,6 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) return 1; } -#ifdef CONFIG_INET - -static int nr_rebuild_header(struct sk_buff *skb) -{ - unsigned char *bp = skb->data; - - if (arp_find(bp + 7, skb)) - return 1; - - bp[6] &= ~AX25_CBIT; - bp[6] &= ~AX25_EBIT; - bp[6] |= AX25_SSSID_SPARE; - bp += AX25_ADDR_LEN; - - bp[6] &= ~AX25_CBIT; - bp[6] |= AX25_EBIT; - bp[6] |= AX25_SSSID_SPARE; - - return 0; -} - -#else - -static int nr_rebuild_header(struct sk_buff *skb) -{ - return 1; -} - -#endif - static int nr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) @@ -188,7 +158,6 @@ static netdev_tx_t nr_xmit(struct sk_buff *skb, struct net_device *dev) static const struct header_ops nr_header_ops = { .create = nr_header, - .rebuild= nr_rebuild_header, }; static const struct net_device_ops nr_netdev_ops = { diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index e181e290427c..9578bd6a4f3e 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -750,8 +750,8 @@ error: return ret; } -static int llcp_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -793,8 +793,8 @@ static int llcp_sock_sendmsg(struct kiocb *iocb, struct socket *sock, return nfc_llcp_send_i_frame(llcp_sock, msg, len); } -static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 373e138c0ab6..82b4e8024778 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -211,8 +211,7 @@ static void rawsock_tx_work(struct work_struct *work) } } -static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int rawsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct nfc_dev *dev = nfc_rawsock(sk)->dev; @@ -248,8 +247,8 @@ static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock, return len; } -static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index b7d818c59423..ed6b0f8dd1bb 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -6,6 +6,7 @@ config OPENVSWITCH tristate "Open vSwitch" depends on INET select LIBCRC32C + select MPLS select NET_MPLS_GSO ---help--- Open vSwitch is a multilayer Ethernet switch targeted at virtualized diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index ae5e77cdc0ca..096c6276e6b9 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -203,7 +203,6 @@ static void destroy_dp_rcu(struct rcu_head *rcu) ovs_flow_tbl_destroy(&dp->table); free_percpu(dp->stats_percpu); - release_net(ovs_dp_get_net(dp)); kfree(dp->ports); kfree(dp); } @@ -1501,7 +1500,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (dp == NULL) goto err_free_reply; - ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); + ovs_dp_set_net(dp, sock_net(skb->sk)); /* Allocate table. */ err = ovs_flow_tbl_init(&dp->table); @@ -1575,7 +1574,6 @@ err_destroy_percpu: err_destroy_table: ovs_flow_tbl_destroy(&dp->table); err_free_dp: - release_net(ovs_dp_get_net(dp)); kfree(dp); err_free_reply: kfree_skb(reply); @@ -2194,14 +2192,55 @@ static int __net_init ovs_init_net(struct net *net) return 0; } -static void __net_exit ovs_exit_net(struct net *net) +static void __net_exit list_vports_from_net(struct net *net, struct net *dnet, + struct list_head *head) { - struct datapath *dp, *dp_next; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + struct datapath *dp; + + list_for_each_entry(dp, &ovs_net->dps, list_node) { + int i; + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { + struct vport *vport; + + hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) { + struct netdev_vport *netdev_vport; + + if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL) + continue; + + netdev_vport = netdev_vport_priv(vport); + if (dev_net(netdev_vport->dev) == dnet) + list_add(&vport->detach_list, head); + } + } + } +} + +static void __net_exit ovs_exit_net(struct net *dnet) +{ + struct datapath *dp, *dp_next; + struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id); + struct vport *vport, *vport_next; + struct net *net; + LIST_HEAD(head); ovs_lock(); list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); + + rtnl_lock(); + for_each_net(net) + list_vports_from_net(net, dnet, &head); + rtnl_unlock(); + + /* Detach all vports from given namespace. */ + list_for_each_entry_safe(vport, vport_next, &head, detach_list) { + list_del(&vport->detach_list); + ovs_dp_detach_port(vport); + } + ovs_unlock(); cancel_work_sync(&ovs_net->dp_notify_work); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 3ece94563079..4ec4a480b147 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -84,10 +84,8 @@ struct datapath { /* Stats. */ struct dp_stats_percpu __percpu *stats_percpu; -#ifdef CONFIG_NET_NS /* Network namespace ref. */ - struct net *net; -#endif + possible_net_t net; u32 user_features; }; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 216f20b90aa5..c691b1a1eee0 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -535,11 +535,11 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, - nla_get_be32(a), is_mask); + nla_get_in_addr(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_IPV4_DST: SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, - nla_get_be32(a), is_mask); + nla_get_in_addr(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_TOS: SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, @@ -648,10 +648,12 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) return -EMSGSIZE; if (output->ipv4_src && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) + nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, + output->ipv4_src)) return -EMSGSIZE; if (output->ipv4_dst && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) + nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, + output->ipv4_dst)) return -EMSGSIZE; if (output->ipv4_tos && nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) @@ -2253,14 +2255,20 @@ static int masked_set_action_to_set_action_attr(const struct nlattr *a, struct sk_buff *skb) { const struct nlattr *ovs_key = nla_data(a); + struct nlattr *nla; size_t key_len = nla_len(ovs_key) / 2; /* Revert the conversion we did from a non-masked set action to * masked set action. */ - if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key)) + nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + if (!nla) return -EMSGSIZE; + if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key))) + return -EMSGSIZE; + + nla_nest_end(skb, nla); return 0; } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index f8ae295fb001..bc85331a6c60 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -103,6 +103,7 @@ struct vport_portids { * @ops: Class structure. * @percpu_stats: Points to per-CPU statistics used and maintained by vport * @err_stats: Points to error statistics used and maintained by vport + * @detach_list: list used for detaching vport in net-exit call. */ struct vport { struct rcu_head rcu; @@ -117,6 +118,7 @@ struct vport { struct pcpu_sw_netstats __percpu *percpu_stats; struct vport_err_stats err_stats; + struct list_head detach_list; }; /** diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9c28cec1a083..5102c3cc4eec 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -216,10 +216,16 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *, static void packet_flush_mclist(struct sock *sk); struct packet_skb_cb { - unsigned int origlen; union { struct sockaddr_pkt pkt; - struct sockaddr_ll ll; + union { + /* Trick: alias skb original length with + * ll.sll_family and ll.protocol in order + * to save room. + */ + unsigned int origlen; + struct sockaddr_ll ll; + }; } sa; }; @@ -698,6 +704,10 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data) if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { if (!frozen) { + if (!BLOCK_NUM_PKTS(pbd)) { + /* An empty block. Just refresh the timer. */ + goto refresh_timer; + } prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); if (!prb_dispatch_next_block(pkc, po)) goto refresh_timer; @@ -798,7 +808,11 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1, h1->ts_last_pkt.ts_sec = last_pkt->tp_sec; h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec; } else { - /* Ok, we tmo'd - so get the current time */ + /* Ok, we tmo'd - so get the current time. + * + * It shouldn't really happen as we don't close empty + * blocks. See prb_retire_rx_blk_timer_expired(). + */ struct timespec ts; getnstimeofday(&ts); h1->ts_last_pkt.ts_sec = ts.tv_sec; @@ -1349,14 +1363,14 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, return 0; } + if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { + skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); + if (!skb) + return 0; + } switch (f->type) { case PACKET_FANOUT_HASH: default: - if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { - skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); - if (!skb) - return 0; - } idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: @@ -1600,8 +1614,8 @@ oom: * protocol layers and you must therefore supply it with a complete frame */ -static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); @@ -1810,13 +1824,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, skb = nskb; } - BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 > - sizeof(skb->cb)); + sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8); sll = &PACKET_SKB_CB(skb)->sa.ll; - sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type; - sll->sll_protocol = skb->protocol; sll->sll_pkttype = skb->pkt_type; if (unlikely(po->origdev)) sll->sll_ifindex = orig_dev->ifindex; @@ -1825,7 +1836,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, sll->sll_halen = dev_parse_header(skb, sll->sll_addr); - PACKET_SKB_CB(skb)->origlen = skb->len; + /* sll->sll_family and sll->sll_protocol are set in packet_recvmsg(). + * Use their space for storing the original skb length. + */ + PACKET_SKB_CB(skb)->sa.origlen = skb->len; if (pskb_trim(skb, snaplen)) goto drop_n_acct; @@ -1839,7 +1853,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, spin_lock(&sk->sk_receive_queue.lock); po->stats.stats1.tp_packets++; - skb->dropcount = atomic_read(&sk->sk_drops); + sock_skb_set_dropcount(sk, skb); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); @@ -1902,14 +1916,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } } - if (skb->ip_summed == CHECKSUM_PARTIAL) - status |= TP_STATUS_CSUMNOTREADY; - snaplen = skb->len; res = run_filter(skb, sk, snaplen); if (!res) goto drop_n_restore; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + status |= TP_STATUS_CSUMNOTREADY; + else if (skb->pkt_type != PACKET_OUTGOING && + (skb->ip_summed == CHECKSUM_COMPLETE || + skb_csum_unnecessary(skb))) + status |= TP_STATUS_CSUM_VALID; + if (snaplen > res) snaplen = res; @@ -2595,8 +2614,7 @@ out: return err; } -static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); @@ -2876,13 +2894,14 @@ out: * If necessary we block. */ -static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; int vnet_hdr_len = 0; + unsigned int origlen = 0; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE)) @@ -2982,6 +3001,15 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, if (err) goto out_free; + if (sock->type != SOCK_PACKET) { + struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; + + /* Original length was stored in sockaddr_ll fields */ + origlen = PACKET_SKB_CB(skb)->sa.origlen; + sll->sll_family = AF_PACKET; + sll->sll_protocol = skb->protocol; + } + sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { @@ -2993,6 +3021,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = sizeof(struct sockaddr_pkt); } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; + msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); } @@ -3006,7 +3035,12 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_status = TP_STATUS_USER; if (skb->ip_summed == CHECKSUM_PARTIAL) aux.tp_status |= TP_STATUS_CSUMNOTREADY; - aux.tp_len = PACKET_SKB_CB(skb)->origlen; + else if (skb->pkt_type != PACKET_OUTGOING && + (skb->ip_summed == CHECKSUM_COMPLETE || + skb_csum_unnecessary(skb))) + aux.tp_status |= TP_STATUS_CSUM_VALID; + + aux.tp_len = origlen; aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); @@ -3115,11 +3149,18 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, return 0; } -static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what) +static void packet_dev_mclist_delete(struct net_device *dev, + struct packet_mclist **mlp) { - for ( ; i; i = i->next) { - if (i->ifindex == dev->ifindex) - packet_dev_mc(dev, i, what); + struct packet_mclist *ml; + + while ((ml = *mlp) != NULL) { + if (ml->ifindex == dev->ifindex) { + packet_dev_mc(dev, ml, -1); + *mlp = ml->next; + kfree(ml); + } else + mlp = &ml->next; } } @@ -3196,12 +3237,11 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) packet_dev_mc(dev, ml, -1); kfree(ml); } - rtnl_unlock(); - return 0; + break; } } rtnl_unlock(); - return -EADDRNOTAVAIL; + return 0; } static void packet_flush_mclist(struct sock *sk) @@ -3551,7 +3591,7 @@ static int packet_notifier(struct notifier_block *this, switch (msg) { case NETDEV_UNREGISTER: if (po->mclist) - packet_dev_mclist(dev, po->mclist, -1); + packet_dev_mclist_delete(dev, &po->mclist); /* fallthrough */ case NETDEV_DOWN: diff --git a/net/packet/internal.h b/net/packet/internal.h index cdddf6a30399..fe6e20caea1d 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -74,9 +74,7 @@ extern struct mutex fanout_mutex; #define PACKET_FANOUT_MAX 256 struct packet_fanout { -#ifdef CONFIG_NET_NS - struct net *net; -#endif + possible_net_t net; unsigned int num_members; u16 id; u8 type; diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 26054b4b467c..5e710435ffa9 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -83,8 +83,7 @@ static int pn_init(struct sock *sk) return 0; } -static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_pn *, target, msg->msg_name); struct sk_buff *skb; @@ -125,9 +124,8 @@ static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, return (err >= 0) ? len : err; } -static int pn_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct sk_buff *skb = NULL; struct sockaddr_pn sa; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 5d3f2b7507d4..6de2aeb98a1f 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1118,8 +1118,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) } -static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int pep_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct pep_sock *pn = pep_sk(sk); struct sk_buff *skb; @@ -1246,9 +1245,8 @@ struct sk_buff *pep_read(struct sock *sk) return skb; } -static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct sk_buff *skb; int err; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 008214a3d5eb..d575ef4e9aa6 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -425,15 +425,15 @@ out: return err; } -static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int pn_socket_sendmsg(struct socket *sock, struct msghdr *m, + size_t total_len) { struct sock *sk = sock->sk; if (pn_socket_autobind(sock)) return -EAGAIN; - return sk->sk_prot->sendmsg(iocb, sk, m, total_len); + return sk->sk_prot->sendmsg(sk, m, total_len); } const struct proto_ops phonet_dgram_ops = { diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index a817705ce2d0..dba8d0864f18 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -88,7 +88,9 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, int *unpinned); static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); -static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id) +static int rds_iw_get_device(struct sockaddr_in *src, struct sockaddr_in *dst, + struct rds_iw_device **rds_iwdev, + struct rdma_cm_id **cm_id) { struct rds_iw_device *iwdev; struct rds_iw_cm_id *i_cm_id; @@ -112,15 +114,15 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd src_addr->sin_port, dst_addr->sin_addr.s_addr, dst_addr->sin_port, - rs->rs_bound_addr, - rs->rs_bound_port, - rs->rs_conn_addr, - rs->rs_conn_port); + src->sin_addr.s_addr, + src->sin_port, + dst->sin_addr.s_addr, + dst->sin_port); #ifdef WORKING_TUPLE_DETECTION - if (src_addr->sin_addr.s_addr == rs->rs_bound_addr && - src_addr->sin_port == rs->rs_bound_port && - dst_addr->sin_addr.s_addr == rs->rs_conn_addr && - dst_addr->sin_port == rs->rs_conn_port) { + if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr && + src_addr->sin_port == src->sin_port && + dst_addr->sin_addr.s_addr == dst->sin_addr.s_addr && + dst_addr->sin_port == dst->sin_port) { #else /* FIXME - needs to compare the local and remote * ipaddr/port tuple, but the ipaddr is the only @@ -128,7 +130,7 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd * zero'ed. It doesn't appear to be properly populated * during connection setup... */ - if (src_addr->sin_addr.s_addr == rs->rs_bound_addr) { + if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr) { #endif spin_unlock_irq(&iwdev->spinlock); *rds_iwdev = iwdev; @@ -180,19 +182,13 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i { struct sockaddr_in *src_addr, *dst_addr; struct rds_iw_device *rds_iwdev_old; - struct rds_sock rs; struct rdma_cm_id *pcm_id; int rc; src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr; dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr; - rs.rs_bound_addr = src_addr->sin_addr.s_addr; - rs.rs_bound_port = src_addr->sin_port; - rs.rs_conn_addr = dst_addr->sin_addr.s_addr; - rs.rs_conn_port = dst_addr->sin_port; - - rc = rds_iw_get_device(&rs, &rds_iwdev_old, &pcm_id); + rc = rds_iw_get_device(src_addr, dst_addr, &rds_iwdev_old, &pcm_id); if (rc) rds_iw_remove_cm_id(rds_iwdev, cm_id); @@ -598,9 +594,17 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents, struct rds_iw_device *rds_iwdev; struct rds_iw_mr *ibmr = NULL; struct rdma_cm_id *cm_id; + struct sockaddr_in src = { + .sin_addr.s_addr = rs->rs_bound_addr, + .sin_port = rs->rs_bound_port, + }; + struct sockaddr_in dst = { + .sin_addr.s_addr = rs->rs_conn_addr, + .sin_port = rs->rs_conn_port, + }; int ret; - ret = rds_iw_get_device(rs, &rds_iwdev, &cm_id); + ret = rds_iw_get_device(&src, &dst, &rds_iwdev, &cm_id); if (ret || !cm_id) { ret = -ENODEV; goto out; diff --git a/net/rds/rds.h b/net/rds/rds.h index c2a5eef41343..c3f2855c3d84 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -702,8 +702,8 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, void rds_inc_put(struct rds_incoming *inc); void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, struct rds_incoming *inc, gfp_t gfp); -int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int msg_flags); +int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int msg_flags); void rds_clear_recv_queue(struct rds_sock *rs); int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg); void rds_inc_info_copy(struct rds_incoming *inc, @@ -711,8 +711,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, __be32 saddr, __be32 daddr, int flip); /* send.c */ -int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t payload_len); +int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len); void rds_send_reset(struct rds_connection *conn); int rds_send_xmit(struct rds_connection *conn); struct sockaddr_in; diff --git a/net/rds/recv.c b/net/rds/recv.c index f9ec1acd801c..a00462b0d01d 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -395,8 +395,8 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg) return 0; } -int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int msg_flags) +int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int msg_flags) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); diff --git a/net/rds/send.c b/net/rds/send.c index 42f65d4305c8..44672befc0ee 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -920,8 +920,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, return ret; } -int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t payload_len) +int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 43bac7c4dd9e..8ae603069a1a 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1046,8 +1046,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros return 1; } -static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int rose_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); @@ -1211,8 +1210,8 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, } -static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 50005888be57..369ca81a8c5d 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -41,6 +41,9 @@ static int rose_header(struct sk_buff *skb, struct net_device *dev, { unsigned char *buff = skb_push(skb, ROSE_MIN_LEN + 2); + if (daddr) + memcpy(buff + 7, daddr, dev->addr_len); + *buff++ = ROSE_GFI | ROSE_Q_BIT; *buff++ = 0x00; *buff++ = ROSE_DATA; @@ -53,43 +56,6 @@ static int rose_header(struct sk_buff *skb, struct net_device *dev, return -37; } -static int rose_rebuild_header(struct sk_buff *skb) -{ -#ifdef CONFIG_INET - struct net_device *dev = skb->dev; - struct net_device_stats *stats = &dev->stats; - unsigned char *bp = (unsigned char *)skb->data; - struct sk_buff *skbn; - unsigned int len; - - if (arp_find(bp + 7, skb)) { - return 1; - } - - if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { - kfree_skb(skb); - return 1; - } - - if (skb->sk != NULL) - skb_set_owner_w(skbn, skb->sk); - - kfree_skb(skb); - - len = skbn->len; - - if (!rose_route_frame(skbn, NULL)) { - kfree_skb(skbn); - stats->tx_errors++; - return 1; - } - - stats->tx_packets++; - stats->tx_bytes += len; -#endif - return 1; -} - static int rose_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = addr; @@ -134,19 +100,26 @@ static int rose_close(struct net_device *dev) static netdev_tx_t rose_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_device_stats *stats = &dev->stats; + unsigned int len = skb->len; if (!netif_running(dev)) { printk(KERN_ERR "ROSE: rose_xmit - called when iface is down\n"); return NETDEV_TX_BUSY; } - dev_kfree_skb(skb); - stats->tx_errors++; + + if (!rose_route_frame(skb, NULL)) { + dev_kfree_skb(skb); + stats->tx_errors++; + return NETDEV_TX_OK; + } + + stats->tx_packets++; + stats->tx_bytes += len; return NETDEV_TX_OK; } static const struct header_ops rose_header_ops = { .create = rose_header, - .rebuild = rose_rebuild_header, }; static const struct net_device_ops rose_netdev_ops = { diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 7b1670489638..0095b9a0b779 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -441,8 +441,7 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr, * - sends a call data packet * - may send an abort (abort code in control data) */ -static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t len) +static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) { struct rxrpc_transport *trans; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); @@ -482,7 +481,7 @@ static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock, switch (rx->sk.sk_state) { case RXRPC_SERVER_LISTENING: if (!m->msg_name) { - ret = rxrpc_server_sendmsg(iocb, rx, m, len); + ret = rxrpc_server_sendmsg(rx, m, len); break; } case RXRPC_SERVER_BOUND: @@ -492,7 +491,7 @@ static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock, break; } case RXRPC_CLIENT_CONNECTED: - ret = rxrpc_client_sendmsg(iocb, rx, trans, m, len); + ret = rxrpc_client_sendmsg(rx, trans, m, len); break; default: ret = -ENOTCONN; diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index c6be17a959a6..e0547f521f20 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -218,7 +218,8 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_header *hdr; struct sk_buff *txb; unsigned long *p_txb, resend_at; - int loop, stop; + bool stop; + int loop; u8 resend; _enter("{%d,%d,%d,%d},", @@ -226,7 +227,7 @@ static void rxrpc_resend(struct rxrpc_call *call) atomic_read(&call->sequence), CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); - stop = 0; + stop = false; resend = 0; resend_at = 0; @@ -255,11 +256,11 @@ static void rxrpc_resend(struct rxrpc_call *call) _proto("Tx DATA %%%u { #%d }", ntohl(sp->hdr.serial), ntohl(sp->hdr.seq)); if (rxrpc_send_packet(call->conn->trans, txb) < 0) { - stop = 0; + stop = true; sp->resend_at = jiffies + 3; } else { sp->resend_at = - jiffies + rxrpc_resend_timeout * HZ; + jiffies + rxrpc_resend_timeout; } } diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c index 5394b6be46ec..0610efa83d72 100644 --- a/net/rxrpc/ar-error.c +++ b/net/rxrpc/ar-error.c @@ -42,7 +42,8 @@ void rxrpc_UDP_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } - if (!skb->len) { + serr = SKB_EXT_ERR(skb); + if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); kfree_skb(skb); return; @@ -50,7 +51,6 @@ void rxrpc_UDP_error_report(struct sock *sk) rxrpc_new_skb(skb); - serr = SKB_EXT_ERR(skb); addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); port = serr->port; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ba9fd36d3f15..2fc1e659e5c9 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -548,10 +548,9 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t, extern unsigned rxrpc_resend_timeout; int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *); -int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *, - struct rxrpc_transport *, struct msghdr *, size_t); -int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *, struct msghdr *, - size_t); +int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *, + struct msghdr *, size_t); +int rxrpc_server_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); /* * ar-peer.c @@ -572,8 +571,7 @@ extern const struct file_operations rxrpc_connection_seq_fops; * ar-recvmsg.c */ void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *); -int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, - int); +int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* * ar-security.c diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index 8331c95e1522..09f584566e23 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -23,8 +23,7 @@ */ unsigned rxrpc_resend_timeout = 4 * HZ; -static int rxrpc_send_data(struct kiocb *iocb, - struct rxrpc_sock *rx, +static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, struct msghdr *msg, size_t len); @@ -129,9 +128,8 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) * - caller holds the socket locked * - the socket may be either a client socket or a server socket */ -int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, - struct rxrpc_transport *trans, struct msghdr *msg, - size_t len) +int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans, + struct msghdr *msg, size_t len) { struct rxrpc_conn_bundle *bundle; enum rxrpc_command cmd; @@ -191,7 +189,7 @@ int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, /* request phase complete for this client call */ ret = -EPROTO; } else { - ret = rxrpc_send_data(iocb, rx, call, msg, len); + ret = rxrpc_send_data(rx, call, msg, len); } rxrpc_put_call(call); @@ -232,7 +230,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, call->state != RXRPC_CALL_SERVER_SEND_REPLY) { ret = -EPROTO; /* request phase complete for this client call */ } else { - ret = rxrpc_send_data(NULL, call->socket, call, msg, len); + ret = rxrpc_send_data(call->socket, call, msg, len); } release_sock(&call->socket->sk); @@ -271,8 +269,7 @@ EXPORT_SYMBOL(rxrpc_kernel_abort_call); * send a message through a server socket * - caller holds the socket locked */ -int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, - struct msghdr *msg, size_t len) +int rxrpc_server_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) { enum rxrpc_command cmd; struct rxrpc_call *call; @@ -313,7 +310,7 @@ int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, break; } - ret = rxrpc_send_data(iocb, rx, call, msg, len); + ret = rxrpc_send_data(rx, call, msg, len); break; case RXRPC_CMD_SEND_ABORT: @@ -520,8 +517,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, * - must be called in process context * - caller holds the socket locked */ -static int rxrpc_send_data(struct kiocb *iocb, - struct rxrpc_sock *rx, +static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, struct msghdr *msg, size_t len) { diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 4575485ad1b4..b92beded7459 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -43,8 +43,8 @@ void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call) * - we need to be careful about two or more threads calling recvmsg * simultaneously */ -int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct rxrpc_skb_priv *sp; struct rxrpc_call *call = NULL, *continue_call = NULL; @@ -87,7 +87,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) { /* nothing remains on the queue */ if (copied && - (msg->msg_flags & MSG_PEEK || timeo == 0)) + (flags & MSG_PEEK || timeo == 0)) goto out; /* wait for a message to turn up */ @@ -150,7 +150,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, &call->conn->trans->peer->srx, len); msg->msg_namelen = len; } - sock_recv_ts_and_drops(msg, &rx->sk, skb); + sock_recv_timestamp(msg, &rx->sk, skb); } /* receive the message */ diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 899d0319f2b2..2274e723a3df 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -348,7 +348,7 @@ config NET_SCH_PLUG comment "Classification" config NET_CLS - boolean + bool config NET_CLS_BASIC tristate "Elementary classification (BASIC)" diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 82c5d7fc1988..4d2cede17468 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -13,71 +13,140 @@ #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/filter.h> +#include <linux/bpf.h> + #include <net/netlink.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_bpf.h> #include <net/tc_act/tc_bpf.h> -#define BPF_TAB_MASK 15 +#define BPF_TAB_MASK 15 +#define ACT_BPF_NAME_LEN 256 + +struct tcf_bpf_cfg { + struct bpf_prog *filter; + struct sock_filter *bpf_ops; + char *bpf_name; + u32 bpf_fd; + u16 bpf_num_ops; +}; -static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a, +static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { - struct tcf_bpf *b = a->priv; - int action; - int filter_res; - - spin_lock(&b->tcf_lock); - b->tcf_tm.lastuse = jiffies; - bstats_update(&b->tcf_bstats, skb); - action = b->tcf_action; - - filter_res = BPF_PROG_RUN(b->filter, skb); - if (filter_res == 0) { - /* Return code 0 from the BPF program - * is being interpreted as a drop here. - */ - action = TC_ACT_SHOT; - b->tcf_qstats.drops++; + struct tcf_bpf *prog = act->priv; + int action, filter_res; + + spin_lock(&prog->tcf_lock); + + prog->tcf_tm.lastuse = jiffies; + bstats_update(&prog->tcf_bstats, skb); + + /* Needed here for accessing maps. */ + rcu_read_lock(); + filter_res = BPF_PROG_RUN(prog->filter, skb); + rcu_read_unlock(); + + /* A BPF program may overwrite the default action opcode. + * Similarly as in cls_bpf, if filter_res == -1 we use the + * default action specified from tc. + * + * In case a different well-known TC_ACT opcode has been + * returned, it will overwrite the default one. + * + * For everything else that is unkown, TC_ACT_UNSPEC is + * returned. + */ + switch (filter_res) { + case TC_ACT_PIPE: + case TC_ACT_RECLASSIFY: + case TC_ACT_OK: + action = filter_res; + break; + case TC_ACT_SHOT: + action = filter_res; + prog->tcf_qstats.drops++; + break; + case TC_ACT_UNSPEC: + action = prog->tcf_action; + break; + default: + action = TC_ACT_UNSPEC; + break; } - spin_unlock(&b->tcf_lock); + spin_unlock(&prog->tcf_lock); return action; } -static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a, +static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog) +{ + return !prog->bpf_ops; +} + +static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog, + struct sk_buff *skb) +{ + struct nlattr *nla; + + if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops)) + return -EMSGSIZE; + + nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops * + sizeof(struct sock_filter)); + if (nla == NULL) + return -EMSGSIZE; + + memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); + + return 0; +} + +static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, + struct sk_buff *skb) +{ + if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd)) + return -EMSGSIZE; + + if (prog->bpf_name && + nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) + return -EMSGSIZE; + + return 0; +} + +static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) { unsigned char *tp = skb_tail_pointer(skb); - struct tcf_bpf *b = a->priv; + struct tcf_bpf *prog = act->priv; struct tc_act_bpf opt = { - .index = b->tcf_index, - .refcnt = b->tcf_refcnt - ref, - .bindcnt = b->tcf_bindcnt - bind, - .action = b->tcf_action, + .index = prog->tcf_index, + .refcnt = prog->tcf_refcnt - ref, + .bindcnt = prog->tcf_bindcnt - bind, + .action = prog->tcf_action, }; - struct tcf_t t; - struct nlattr *nla; + struct tcf_t tm; + int ret; if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops)) - goto nla_put_failure; - - nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops * - sizeof(struct sock_filter)); - if (!nla) + if (tcf_bpf_is_ebpf(prog)) + ret = tcf_bpf_dump_ebpf_info(prog, skb); + else + ret = tcf_bpf_dump_bpf_info(prog, skb); + if (ret) goto nla_put_failure; - memcpy(nla_data(nla), b->bpf_ops, nla_len(nla)); + tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install); + tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse); + tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires); - t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(b->tcf_tm.expires); - if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t)) + if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm)) goto nla_put_failure; + return skb->len; nla_put_failure: @@ -87,36 +156,21 @@ nla_put_failure: static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, + [TCA_ACT_BPF_FD] = { .type = NLA_U32 }, + [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN }, [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, }; -static int tcf_bpf_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action *a, - int ovr, int bind) +static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) { - struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; - struct tc_act_bpf *parm; - struct tcf_bpf *b; - u16 bpf_size, bpf_num_ops; struct sock_filter *bpf_ops; - struct sock_fprog_kern tmp; + struct sock_fprog_kern fprog_tmp; struct bpf_prog *fp; + u16 bpf_size, bpf_num_ops; int ret; - if (!nla) - return -EINVAL; - - ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); - if (ret < 0) - return ret; - - if (!tb[TCA_ACT_BPF_PARMS] || - !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS]) - return -EINVAL; - parm = nla_data(tb[TCA_ACT_BPF_PARMS]); - bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]); if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) return -EINVAL; @@ -126,68 +180,165 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return -EINVAL; bpf_ops = kzalloc(bpf_size, GFP_KERNEL); - if (!bpf_ops) + if (bpf_ops == NULL) return -ENOMEM; memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size); - tmp.len = bpf_num_ops; - tmp.filter = bpf_ops; + fprog_tmp.len = bpf_num_ops; + fprog_tmp.filter = bpf_ops; - ret = bpf_prog_create(&fp, &tmp); - if (ret) - goto free_bpf_ops; + ret = bpf_prog_create(&fp, &fprog_tmp); + if (ret < 0) { + kfree(bpf_ops); + return ret; + } - if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind); - if (ret) + cfg->bpf_ops = bpf_ops; + cfg->bpf_num_ops = bpf_num_ops; + cfg->filter = fp; + + return 0; +} + +static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) +{ + struct bpf_prog *fp; + char *name = NULL; + u32 bpf_fd; + + bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]); + + fp = bpf_prog_get(bpf_fd); + if (IS_ERR(fp)) + return PTR_ERR(fp); + + if (fp->type != BPF_PROG_TYPE_SCHED_ACT) { + bpf_prog_put(fp); + return -EINVAL; + } + + if (tb[TCA_ACT_BPF_NAME]) { + name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]), + nla_len(tb[TCA_ACT_BPF_NAME]), + GFP_KERNEL); + if (!name) { + bpf_prog_put(fp); + return -ENOMEM; + } + } + + cfg->bpf_fd = bpf_fd; + cfg->bpf_name = name; + cfg->filter = fp; + + return 0; +} + +static int tcf_bpf_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *act, + int replace, int bind) +{ + struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tc_act_bpf *parm; + struct tcf_bpf *prog; + struct tcf_bpf_cfg cfg; + bool is_bpf, is_ebpf; + int ret; + + if (!nla) + return -EINVAL; + + ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); + if (ret < 0) + return ret; + + is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; + is_ebpf = tb[TCA_ACT_BPF_FD]; + + if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) || + !tb[TCA_ACT_BPF_PARMS]) + return -EINVAL; + + parm = nla_data(tb[TCA_ACT_BPF_PARMS]); + + memset(&cfg, 0, sizeof(cfg)); + + ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) : + tcf_bpf_init_from_efd(tb, &cfg); + if (ret < 0) + return ret; + + if (!tcf_hash_check(parm->index, act, bind)) { + ret = tcf_hash_create(parm->index, est, act, + sizeof(*prog), bind); + if (ret < 0) goto destroy_fp; ret = ACT_P_CREATED; } else { + /* Don't override defaults. */ if (bind) goto destroy_fp; - tcf_hash_release(a, bind); - if (!ovr) { + + tcf_hash_release(act, bind); + if (!replace) { ret = -EEXIST; goto destroy_fp; } } - b = to_bpf(a); - spin_lock_bh(&b->tcf_lock); - b->tcf_action = parm->action; - b->bpf_num_ops = bpf_num_ops; - b->bpf_ops = bpf_ops; - b->filter = fp; - spin_unlock_bh(&b->tcf_lock); + prog = to_bpf(act); + spin_lock_bh(&prog->tcf_lock); + + prog->bpf_ops = cfg.bpf_ops; + prog->bpf_name = cfg.bpf_name; + + if (cfg.bpf_num_ops) + prog->bpf_num_ops = cfg.bpf_num_ops; + if (cfg.bpf_fd) + prog->bpf_fd = cfg.bpf_fd; + + prog->tcf_action = parm->action; + prog->filter = cfg.filter; + + spin_unlock_bh(&prog->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(a); + tcf_hash_insert(act); + return ret; destroy_fp: - bpf_prog_destroy(fp); -free_bpf_ops: - kfree(bpf_ops); + if (is_ebpf) + bpf_prog_put(cfg.filter); + else + bpf_prog_destroy(cfg.filter); + + kfree(cfg.bpf_ops); + kfree(cfg.bpf_name); + return ret; } -static void tcf_bpf_cleanup(struct tc_action *a, int bind) +static void tcf_bpf_cleanup(struct tc_action *act, int bind) { - struct tcf_bpf *b = a->priv; + const struct tcf_bpf *prog = act->priv; - bpf_prog_destroy(b->filter); + if (tcf_bpf_is_ebpf(prog)) + bpf_prog_put(prog->filter); + else + bpf_prog_destroy(prog->filter); } -static struct tc_action_ops act_bpf_ops = { - .kind = "bpf", - .type = TCA_ACT_BPF, - .owner = THIS_MODULE, - .act = tcf_bpf, - .dump = tcf_bpf_dump, - .cleanup = tcf_bpf_cleanup, - .init = tcf_bpf_init, +static struct tc_action_ops act_bpf_ops __read_mostly = { + .kind = "bpf", + .type = TCA_ACT_BPF, + .owner = THIS_MODULE, + .act = tcf_bpf, + .dump = tcf_bpf_dump, + .cleanup = tcf_bpf_cleanup, + .init = tcf_bpf_init, }; static int __init bpf_init_module(void) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index baef987fe2c0..8b0470e418dc 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -286,7 +286,7 @@ replay: RCU_INIT_POINTER(*back, next); tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); - tcf_destroy(tp); + tcf_destroy(tp, true); err = 0; goto errout; } @@ -301,14 +301,20 @@ replay: err = -EEXIST; if (n->nlmsg_flags & NLM_F_EXCL) { if (tp_created) - tcf_destroy(tp); + tcf_destroy(tp, true); goto errout; } break; case RTM_DELTFILTER: err = tp->ops->delete(tp, fh); - if (err == 0) + if (err == 0) { tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); + if (tcf_destroy(tp, false)) { + struct tcf_proto *next = rtnl_dereference(tp->next); + + RCU_INIT_POINTER(*back, next); + } + } goto errout; case RTM_GETTFILTER: err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); @@ -329,7 +335,7 @@ replay: tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); } else { if (tp_created) - tcf_destroy(tp); + tcf_destroy(tp, true); } errout: diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index fc399db86f11..0b8c3ace671f 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -96,11 +96,14 @@ static void basic_delete_filter(struct rcu_head *head) kfree(f); } -static void basic_destroy(struct tcf_proto *tp) +static bool basic_destroy(struct tcf_proto *tp, bool force) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f, *n; + if (!force && !list_empty(&head->flist)) + return false; + list_for_each_entry_safe(f, n, &head->flist, link) { list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); @@ -108,6 +111,7 @@ static void basic_destroy(struct tcf_proto *tp) } RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); + return true; } static int basic_delete(struct tcf_proto *tp, unsigned long arg) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5f3ee9e4b5bf..5c4171c5d2bd 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -16,6 +16,8 @@ #include <linux/types.h> #include <linux/skbuff.h> #include <linux/filter.h> +#include <linux/bpf.h> + #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/sock.h> @@ -24,6 +26,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); MODULE_DESCRIPTION("TC BPF based classifier"); +#define CLS_BPF_NAME_LEN 256 + struct cls_bpf_head { struct list_head plist; u32 hgen; @@ -32,18 +36,24 @@ struct cls_bpf_head { struct cls_bpf_prog { struct bpf_prog *filter; - struct sock_filter *bpf_ops; - struct tcf_exts exts; - struct tcf_result res; struct list_head link; + struct tcf_result res; + struct tcf_exts exts; u32 handle; - u16 bpf_num_ops; + union { + u32 bpf_fd; + u16 bpf_num_ops; + }; + struct sock_filter *bpf_ops; + const char *bpf_name; struct tcf_proto *tp; struct rcu_head rcu; }; static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, + [TCA_BPF_FD] = { .type = NLA_U32 }, + [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, @@ -54,8 +64,10 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, { struct cls_bpf_head *head = rcu_dereference_bh(tp->root); struct cls_bpf_prog *prog; - int ret; + int ret = -1; + /* Needed here for accessing maps. */ + rcu_read_lock(); list_for_each_entry_rcu(prog, &head->plist, link) { int filter_res = BPF_PROG_RUN(prog->filter, skb); @@ -70,10 +82,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (ret < 0) continue; - return ret; + break; } + rcu_read_unlock(); - return -1; + return ret; +} + +static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) +{ + return !prog->bpf_ops; } static int cls_bpf_init(struct tcf_proto *tp) @@ -94,8 +112,12 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) { tcf_exts_destroy(&prog->exts); - bpf_prog_destroy(prog->filter); + if (cls_bpf_is_ebpf(prog)) + bpf_prog_put(prog->filter); + else + bpf_prog_destroy(prog->filter); + kfree(prog->bpf_name); kfree(prog->bpf_ops); kfree(prog); } @@ -114,14 +136,18 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); + return 0; } -static void cls_bpf_destroy(struct tcf_proto *tp) +static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog, *tmp; + if (!force && !list_empty(&head->plist)) + return false; + list_for_each_entry_safe(prog, tmp, &head->plist, link) { list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); @@ -130,6 +156,7 @@ static void cls_bpf_destroy(struct tcf_proto *tp) RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); + return true; } static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) @@ -151,69 +178,121 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) return ret; } -static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, - struct cls_bpf_prog *prog, - unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr) +static int cls_bpf_prog_from_ops(struct nlattr **tb, + struct cls_bpf_prog *prog, u32 classid) { struct sock_filter *bpf_ops; - struct tcf_exts exts; - struct sock_fprog_kern tmp; + struct sock_fprog_kern fprog_tmp; struct bpf_prog *fp; u16 bpf_size, bpf_num_ops; - u32 classid; int ret; - if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID]) - return -EINVAL; - - tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); - ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); - if (ret < 0) - return ret; - - classid = nla_get_u32(tb[TCA_BPF_CLASSID]); bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]); - if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) { - ret = -EINVAL; - goto errout; - } + if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) + return -EINVAL; bpf_size = bpf_num_ops * sizeof(*bpf_ops); - if (bpf_size != nla_len(tb[TCA_BPF_OPS])) { - ret = -EINVAL; - goto errout; - } + if (bpf_size != nla_len(tb[TCA_BPF_OPS])) + return -EINVAL; bpf_ops = kzalloc(bpf_size, GFP_KERNEL); - if (bpf_ops == NULL) { - ret = -ENOMEM; - goto errout; - } + if (bpf_ops == NULL) + return -ENOMEM; memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); - tmp.len = bpf_num_ops; - tmp.filter = bpf_ops; + fprog_tmp.len = bpf_num_ops; + fprog_tmp.filter = bpf_ops; - ret = bpf_prog_create(&fp, &tmp); - if (ret) - goto errout_free; + ret = bpf_prog_create(&fp, &fprog_tmp); + if (ret < 0) { + kfree(bpf_ops); + return ret; + } - prog->bpf_num_ops = bpf_num_ops; prog->bpf_ops = bpf_ops; + prog->bpf_num_ops = bpf_num_ops; + prog->bpf_name = NULL; + + prog->filter = fp; + prog->res.classid = classid; + + return 0; +} + +static int cls_bpf_prog_from_efd(struct nlattr **tb, + struct cls_bpf_prog *prog, u32 classid) +{ + struct bpf_prog *fp; + char *name = NULL; + u32 bpf_fd; + + bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); + + fp = bpf_prog_get(bpf_fd); + if (IS_ERR(fp)) + return PTR_ERR(fp); + + if (fp->type != BPF_PROG_TYPE_SCHED_CLS) { + bpf_prog_put(fp); + return -EINVAL; + } + + if (tb[TCA_BPF_NAME]) { + name = kmemdup(nla_data(tb[TCA_BPF_NAME]), + nla_len(tb[TCA_BPF_NAME]), + GFP_KERNEL); + if (!name) { + bpf_prog_put(fp); + return -ENOMEM; + } + } + + prog->bpf_ops = NULL; + prog->bpf_fd = bpf_fd; + prog->bpf_name = name; + prog->filter = fp; prog->res.classid = classid; + return 0; +} + +static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, + struct cls_bpf_prog *prog, + unsigned long base, struct nlattr **tb, + struct nlattr *est, bool ovr) +{ + struct tcf_exts exts; + bool is_bpf, is_ebpf; + u32 classid; + int ret; + + is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; + is_ebpf = tb[TCA_BPF_FD]; + + if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) || + !tb[TCA_BPF_CLASSID]) + return -EINVAL; + + tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); + ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); + if (ret < 0) + return ret; + + classid = nla_get_u32(tb[TCA_BPF_CLASSID]); + + ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) : + cls_bpf_prog_from_efd(tb, prog, classid); + if (ret < 0) { + tcf_exts_destroy(&exts); + return ret; + } + tcf_bind_filter(tp, &prog->res, base); tcf_exts_change(tp, &prog->exts, &exts); return 0; -errout_free: - kfree(bpf_ops); -errout: - tcf_exts_destroy(&exts); - return ret; } static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, @@ -297,11 +376,43 @@ errout: return ret; } +static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog, + struct sk_buff *skb) +{ + struct nlattr *nla; + + if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops)) + return -EMSGSIZE; + + nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops * + sizeof(struct sock_filter)); + if (nla == NULL) + return -EMSGSIZE; + + memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); + + return 0; +} + +static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, + struct sk_buff *skb) +{ + if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd)) + return -EMSGSIZE; + + if (prog->bpf_name && + nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name)) + return -EMSGSIZE; + + return 0; +} + static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *tm) { struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; - struct nlattr *nest, *nla; + struct nlattr *nest; + int ret; if (prog == NULL) return skb->len; @@ -314,16 +425,14 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) goto nla_put_failure; - if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops)) - goto nla_put_failure; - nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops * - sizeof(struct sock_filter)); - if (nla == NULL) + if (cls_bpf_is_ebpf(prog)) + ret = cls_bpf_dump_ebpf_info(prog, skb); + else + ret = cls_bpf_dump_bpf_info(prog, skb); + if (ret) goto nla_put_failure; - memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); - if (tcf_exts_dump(skb, &prog->exts) < 0) goto nla_put_failure; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 221697ab0247..ea611b216412 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -143,14 +143,18 @@ errout: return err; } -static void cls_cgroup_destroy(struct tcf_proto *tp) +static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); + if (!force) + return false; + if (head) { RCU_INIT_POINTER(tp->root, NULL); call_rcu(&head->rcu, cls_cgroup_destroy_rcu); } + return true; } static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 461410394d08..a620c4e288a5 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -557,17 +557,21 @@ static int flow_init(struct tcf_proto *tp) return 0; } -static void flow_destroy(struct tcf_proto *tp) +static bool flow_destroy(struct tcf_proto *tp, bool force) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; + if (!force && !list_empty(&head->filters)) + return false; + list_for_each_entry_safe(f, next, &head->filters, list) { list_del_rcu(&f->list); call_rcu(&f->rcu, flow_destroy_filter); } RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); + return true; } static unsigned long flow_get(struct tcf_proto *tp, u32 handle) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index a5269f76004c..715e01e5910a 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -33,6 +33,7 @@ struct fw_head { u32 mask; + bool mask_set; struct fw_filter __rcu *ht[HTSIZE]; struct rcu_head rcu; }; @@ -113,6 +114,14 @@ static unsigned long fw_get(struct tcf_proto *tp, u32 handle) static int fw_init(struct tcf_proto *tp) { + struct fw_head *head; + + head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + + head->mask_set = false; + rcu_assign_pointer(tp->root, head); return 0; } @@ -124,14 +133,20 @@ static void fw_delete_filter(struct rcu_head *head) kfree(f); } -static void fw_destroy(struct tcf_proto *tp) +static bool fw_destroy(struct tcf_proto *tp, bool force) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; int h; if (head == NULL) - return; + return true; + + if (!force) { + for (h = 0; h < HTSIZE; h++) + if (rcu_access_pointer(head->ht[h])) + return false; + } for (h = 0; h < HTSIZE; h++) { while ((f = rtnl_dereference(head->ht[h])) != NULL) { @@ -143,6 +158,7 @@ static void fw_destroy(struct tcf_proto *tp) } RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); + return true; } static int fw_delete(struct tcf_proto *tp, unsigned long arg) @@ -286,17 +302,11 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (!handle) return -EINVAL; - if (head == NULL) { - u32 mask = 0xFFFFFFFF; + if (!head->mask_set) { + head->mask = 0xFFFFFFFF; if (tb[TCA_FW_MASK]) - mask = nla_get_u32(tb[TCA_FW_MASK]); - - head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); - if (head == NULL) - return -ENOBUFS; - head->mask = mask; - - rcu_assign_pointer(tp->root, head); + head->mask = nla_get_u32(tb[TCA_FW_MASK]); + head->mask_set = true; } f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL); diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 2ecd24688554..08a3b0a6f5ab 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -258,6 +258,13 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) static int route4_init(struct tcf_proto *tp) { + struct route4_head *head; + + head = kzalloc(sizeof(struct route4_head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + + rcu_assign_pointer(tp->root, head); return 0; } @@ -270,13 +277,20 @@ route4_delete_filter(struct rcu_head *head) kfree(f); } -static void route4_destroy(struct tcf_proto *tp) +static bool route4_destroy(struct tcf_proto *tp, bool force) { struct route4_head *head = rtnl_dereference(tp->root); int h1, h2; if (head == NULL) - return; + return true; + + if (!force) { + for (h1 = 0; h1 <= 256; h1++) { + if (rcu_access_pointer(head->table[h1])) + return false; + } + } for (h1 = 0; h1 <= 256; h1++) { struct route4_bucket *b; @@ -301,6 +315,7 @@ static void route4_destroy(struct tcf_proto *tp) } RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); + return true; } static int route4_delete(struct tcf_proto *tp, unsigned long arg) @@ -484,13 +499,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; err = -ENOBUFS; - if (head == NULL) { - head = kzalloc(sizeof(struct route4_head), GFP_KERNEL); - if (head == NULL) - goto errout; - rcu_assign_pointer(tp->root, head); - } - f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL); if (!f) goto errout; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index edd8ade3fbc1..02fa82792dab 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -291,13 +291,20 @@ rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) kfree_rcu(f, rcu); } -static void rsvp_destroy(struct tcf_proto *tp) +static bool rsvp_destroy(struct tcf_proto *tp, bool force) { struct rsvp_head *data = rtnl_dereference(tp->root); int h1, h2; if (data == NULL) - return; + return true; + + if (!force) { + for (h1 = 0; h1 < 256; h1++) { + if (rcu_access_pointer(data->ht[h1])) + return false; + } + } RCU_INIT_POINTER(tp->root, NULL); @@ -319,6 +326,7 @@ static void rsvp_destroy(struct tcf_proto *tp) } } kfree_rcu(data, rcu); + return true; } static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index bd49bf547a47..a557dbaf5afe 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -468,11 +468,14 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) } } -static void tcindex_destroy(struct tcf_proto *tp) +static bool tcindex_destroy(struct tcf_proto *tp, bool force) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcf_walker walker; + if (!force) + return false; + pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); walker.count = 0; walker.skip = 0; @@ -481,6 +484,7 @@ static void tcindex_destroy(struct tcf_proto *tp) RCU_INIT_POINTER(tp->root, NULL); call_rcu(&p->rcu, __tcindex_destroy); + return true; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 09487afbfd51..cab9e9b43967 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -78,8 +78,11 @@ struct tc_u_hnode { struct tc_u_common *tp_c; int refcnt; unsigned int divisor; - struct tc_u_knode __rcu *ht[1]; struct rcu_head rcu; + /* The 'ht' field MUST be the last field in structure to allow for + * more entries allocated at end of structure. + */ + struct tc_u_knode __rcu *ht[1]; }; struct tc_u_common { @@ -460,13 +463,35 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) return -ENOENT; } -static void u32_destroy(struct tcf_proto *tp) +static bool ht_empty(struct tc_u_hnode *ht) +{ + unsigned int h; + + for (h = 0; h <= ht->divisor; h++) + if (rcu_access_pointer(ht->ht[h])) + return false; + + return true; +} + +static bool u32_destroy(struct tcf_proto *tp, bool force) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); WARN_ON(root_ht == NULL); + if (!force) { + if (root_ht) { + if (root_ht->refcnt > 1) + return false; + if (root_ht->refcnt == 1) { + if (!ht_empty(root_ht)) + return false; + } + } + } + if (root_ht && --root_ht->refcnt == 0) u32_destroy_hnode(tp, root_ht); @@ -491,6 +516,7 @@ static void u32_destroy(struct tcf_proto *tp) } tp->data = NULL; + return true; } static int u32_delete(struct tcf_proto *tp, unsigned long arg) diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 6742200b1307..fbb7ebfc58c6 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -228,6 +228,7 @@ static int tcf_em_validate(struct tcf_proto *tp, * to replay the request. */ module_put(em->ops->owner); + em->ops = NULL; err = -EAGAIN; } #endif diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 243b7d169d61..ad9eed70bc8f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1858,11 +1858,15 @@ reclassify: } EXPORT_SYMBOL(tc_classify); -void tcf_destroy(struct tcf_proto *tp) +bool tcf_destroy(struct tcf_proto *tp, bool force) { - tp->ops->destroy(tp); - module_put(tp->ops->owner); - kfree_rcu(tp, rcu); + if (tp->ops->destroy(tp, force)) { + module_put(tp->ops->owner); + kfree_rcu(tp, rcu); + return true; + } + + return false; } void tcf_destroy_chain(struct tcf_proto __rcu **fl) @@ -1871,7 +1875,7 @@ void tcf_destroy_chain(struct tcf_proto __rcu **fl) while ((tp = rtnl_dereference(*fl)) != NULL) { RCU_INIT_POINTER(*fl, tp->next); - tcf_destroy(tp); + tcf_destroy(tp, true); } } EXPORT_SYMBOL(tcf_destroy_chain); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 8f34b27d5775..53b7acde9aa3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1322,8 +1322,7 @@ static __init int sctp_init(void) int max_share; int order; - BUILD_BUG_ON(sizeof(struct sctp_ulpevent) > - sizeof(((struct sk_buff *) 0)->cb)); + sock_skb_cb_check_size(sizeof(struct sctp_ulpevent)); /* Allocate bind_bucket and chunk caches. */ status = -ENOBUFS; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index aafe94bf292e..f09de7fac2e6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -102,11 +102,6 @@ static int sctp_autobind(struct sock *sk); static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); -extern struct kmem_cache *sctp_bucket_cachep; -extern long sysctl_sctp_mem[3]; -extern int sysctl_sctp_rmem[3]; -extern int sysctl_sctp_wmem[3]; - static int sctp_memory_pressure; static atomic_long_t sctp_memory_allocated; struct percpu_counter sctp_sockets_allocated; @@ -1586,8 +1581,7 @@ static int sctp_error(struct sock *sk, int flags, int err) static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); -static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t msg_len) +static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) { struct net *net = sock_net(sk); struct sctp_sock *sp; @@ -2066,9 +2060,8 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) * flags - flags sent or received with the user message, see Section * 5 for complete description of the flags. */ -static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 2e9ada10fd84..26d50c565f54 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -58,10 +58,6 @@ static unsigned long max_autoclose_max = (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX) ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ; -extern long sysctl_sctp_mem[3]; -extern int sysctl_sctp_rmem[3]; -extern int sysctl_sctp_wmem[3]; - static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/net/socket.c b/net/socket.c index bbedbfcb42c2..073809f4125f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -610,45 +610,20 @@ void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) } EXPORT_SYMBOL(__sock_tx_timestamp); -static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, + size_t size) { - return sock->ops->sendmsg(iocb, sock, msg, size); + return sock->ops->sendmsg(sock, msg, size); } -static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { int err = security_socket_sendmsg(sock, msg, size); - return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size); -} - -static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg, - size_t size, bool nosec) -{ - struct kiocb iocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) : - __sock_sendmsg(&iocb, sock, msg, size); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; -} - -int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) -{ - return do_sock_sendmsg(sock, msg, size, false); + return err ?: sock_sendmsg_nosec(sock, msg, size); } EXPORT_SYMBOL(sock_sendmsg); -static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) -{ - return do_sock_sendmsg(sock, msg, size, true); -} - int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { @@ -731,9 +706,9 @@ EXPORT_SYMBOL_GPL(__sock_recv_wifi_status); static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) + if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount) put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, - sizeof(__u32), &skb->dropcount); + sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount); } void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, @@ -744,47 +719,21 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); -static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { - return sock->ops->recvmsg(iocb, sock, msg, size, flags); + return sock->ops->recvmsg(sock, msg, size, flags); } -static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { int err = security_socket_recvmsg(sock, msg, size, flags); - return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); -} - -int sock_recvmsg(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct kiocb iocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - ret = __sock_recvmsg(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; + return err ?: sock_recvmsg_nosec(sock, msg, size, flags); } EXPORT_SYMBOL(sock_recvmsg); -static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct kiocb iocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; -} - /** * kernel_recvmsg - Receive a message from a socket (kernel space) * @sock: The socket to receive the message from @@ -849,7 +798,8 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - struct msghdr msg = {.msg_iter = *to}; + struct msghdr msg = {.msg_iter = *to, + .msg_iocb = iocb}; ssize_t res; if (file->f_flags & O_NONBLOCK) @@ -861,8 +811,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */ return 0; - res = __sock_recvmsg(iocb, sock, &msg, - iocb->ki_nbytes, msg.msg_flags); + res = sock_recvmsg(sock, &msg, iocb->ki_nbytes, msg.msg_flags); *to = msg.msg_iter; return res; } @@ -871,7 +820,8 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - struct msghdr msg = {.msg_iter = *from}; + struct msghdr msg = {.msg_iter = *from, + .msg_iocb = iocb}; ssize_t res; if (iocb->ki_pos != 0) @@ -883,7 +833,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (sock->type == SOCK_SEQPACKET) msg.msg_flags |= MSG_EOR; - res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes); + res = sock_sendmsg(sock, &msg, iocb->ki_nbytes); *from = msg.msg_iter; return res; } @@ -1702,6 +1652,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, if (len > INT_MAX) len = INT_MAX; + if (unlikely(!access_ok(VERIFY_READ, buff, len))) + return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1760,6 +1712,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, if (size > INT_MAX) size = INT_MAX; + if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size))) + return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1942,6 +1896,8 @@ static ssize_t copy_msghdr_from_user(struct msghdr *kmsg, if (nr_segs > UIO_MAXIOV) return -EMSGSIZE; + kmsg->msg_iocb = NULL; + err = rw_copy_check_uvector(save_addr ? READ : WRITE, uiov, nr_segs, UIO_FASTIOV, *iov, iov); diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index abbb7dcd1689..59eeed43eda2 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -217,6 +217,8 @@ static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg) for (i = 0; i < arg->npages && arg->pages[i]; i++) __free_page(arg->pages[i]); + + kfree(arg->pages); } static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 224a82f24d3c..1095be9c80ab 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -463,6 +463,8 @@ static int rsc_parse(struct cache_detail *cd, /* number of additional gid's */ if (get_int(&mesg, &N)) goto out; + if (N < 0 || N > NGROUPS_MAX) + goto out; status = -ENOMEM; rsci.cred.cr_group_info = groups_alloc(N); if (rsci.cred.cr_group_info == NULL) diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 651f49ab601f..9dd0ea8db463 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -309,12 +309,15 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) struct rpc_xprt *xprt = req->rq_xprt; struct svc_serv *bc_serv = xprt->bc_serv; + spin_lock(&xprt->bc_pa_lock); + list_del(&req->rq_bc_pa_list); + spin_unlock(&xprt->bc_pa_lock); + req->rq_private_buf.len = copied; set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); dprintk("RPC: add callback request to list\n"); spin_lock(&bc_serv->sv_cb_lock); - list_del(&req->rq_bc_pa_list); list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); wake_up(&bc_serv->sv_cb_waitq); spin_unlock(&bc_serv->sv_cb_lock); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 33fb105d4352..5199bb1a017e 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -921,7 +921,7 @@ static unsigned int cache_poll(struct file *filp, poll_table *wait, poll_wait(filp, &queue_wait, wait); /* alway allow write */ - mask = POLL_OUT | POLLWRNORM; + mask = POLLOUT | POLLWRNORM; if (!rp) return mask; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 7e9acd9361c5..91ffde82fa0c 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -738,8 +738,9 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) struct rpc_xprt *xprt = rep->rr_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); __be32 *iptr; - int credits, rdmalen, status; + int rdmalen, status; unsigned long cwnd; + u32 credits; /* Check status. If bad, signal disconnect and return rep to pool */ if (rep->rr_len == ~0U) { diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index d1b70397c60f..0a16fb6f0885 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -285,7 +285,7 @@ rpcr_to_rdmar(struct rpc_rqst *rqst) */ struct rpcrdma_buffer { spinlock_t rb_lock; /* protects indexes */ - int rb_max_requests;/* client max requests */ + u32 rb_max_requests;/* client max requests */ struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ struct list_head rb_all; int rb_send_index; diff --git a/net/switchdev/Kconfig b/net/switchdev/Kconfig index 155754588fd6..86a47e17cfaf 100644 --- a/net/switchdev/Kconfig +++ b/net/switchdev/Kconfig @@ -3,7 +3,7 @@ # config NET_SWITCHDEV - boolean "Switch (and switch-ish) device support (EXPERIMENTAL)" + bool "Switch (and switch-ish) device support (EXPERIMENTAL)" depends on INET ---help--- This module provides glue between core networking code and device diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8c1e558db118..46568b85c333 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1,6 +1,7 @@ /* * net/switchdev/switchdev.c - Switch device API * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> + * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -14,6 +15,7 @@ #include <linux/mutex.h> #include <linux/notifier.h> #include <linux/netdevice.h> +#include <net/ip_fib.h> #include <net/switchdev.h> /** @@ -26,13 +28,13 @@ int netdev_switch_parent_id_get(struct net_device *dev, struct netdev_phys_item_id *psid) { - const struct net_device_ops *ops = dev->netdev_ops; + const struct swdev_ops *ops = dev->swdev_ops; - if (!ops->ndo_switch_parent_id_get) + if (!ops || !ops->swdev_parent_id_get) return -EOPNOTSUPP; - return ops->ndo_switch_parent_id_get(dev, psid); + return ops->swdev_parent_id_get(dev, psid); } -EXPORT_SYMBOL(netdev_switch_parent_id_get); +EXPORT_SYMBOL_GPL(netdev_switch_parent_id_get); /** * netdev_switch_port_stp_update - Notify switch device port of STP @@ -44,20 +46,29 @@ EXPORT_SYMBOL(netdev_switch_parent_id_get); */ int netdev_switch_port_stp_update(struct net_device *dev, u8 state) { - const struct net_device_ops *ops = dev->netdev_ops; + const struct swdev_ops *ops = dev->swdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; - if (!ops->ndo_switch_port_stp_update) - return -EOPNOTSUPP; - WARN_ON(!ops->ndo_switch_parent_id_get); - return ops->ndo_switch_port_stp_update(dev, state); + if (ops && ops->swdev_port_stp_update) + return ops->swdev_port_stp_update(dev, state); + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = netdev_switch_port_stp_update(lower_dev, state); + if (err && err != -EOPNOTSUPP) + return err; + } + + return err; } -EXPORT_SYMBOL(netdev_switch_port_stp_update); +EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update); static DEFINE_MUTEX(netdev_switch_mutex); static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain); /** - * register_netdev_switch_notifier - Register nofifier + * register_netdev_switch_notifier - Register notifier * @nb: notifier_block * * Register switch device notifier. This should be used by code @@ -73,10 +84,10 @@ int register_netdev_switch_notifier(struct notifier_block *nb) mutex_unlock(&netdev_switch_mutex); return err; } -EXPORT_SYMBOL(register_netdev_switch_notifier); +EXPORT_SYMBOL_GPL(register_netdev_switch_notifier); /** - * unregister_netdev_switch_notifier - Unregister nofifier + * unregister_netdev_switch_notifier - Unregister notifier * @nb: notifier_block * * Unregister switch device notifier. @@ -91,10 +102,10 @@ int unregister_netdev_switch_notifier(struct notifier_block *nb) mutex_unlock(&netdev_switch_mutex); return err; } -EXPORT_SYMBOL(unregister_netdev_switch_notifier); +EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier); /** - * call_netdev_switch_notifiers - Call nofifiers + * call_netdev_switch_notifiers - Call notifiers * @val: value passed unmodified to notifier function * @dev: port device * @info: notifier information data @@ -114,7 +125,7 @@ int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, mutex_unlock(&netdev_switch_mutex); return err; } -EXPORT_SYMBOL(call_netdev_switch_notifiers); +EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers); /** * netdev_switch_port_bridge_setlink - Notify switch device port of bridge @@ -139,7 +150,7 @@ int netdev_switch_port_bridge_setlink(struct net_device *dev, return ops->ndo_bridge_setlink(dev, nlh, flags); } -EXPORT_SYMBOL(netdev_switch_port_bridge_setlink); +EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_setlink); /** * netdev_switch_port_bridge_dellink - Notify switch device port of bridge @@ -164,7 +175,7 @@ int netdev_switch_port_bridge_dellink(struct net_device *dev, return ops->ndo_bridge_dellink(dev, nlh, flags); } -EXPORT_SYMBOL(netdev_switch_port_bridge_dellink); +EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_dellink); /** * ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink @@ -194,7 +205,7 @@ int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, return ret; } -EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_setlink); +EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_setlink); /** * ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink @@ -224,4 +235,170 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, return ret; } -EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink); +EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_dellink); + +static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev) +{ + const struct swdev_ops *ops = dev->swdev_ops; + struct net_device *lower_dev; + struct net_device *port_dev; + struct list_head *iter; + + /* Recusively search down until we find a sw port dev. + * (A sw port dev supports swdev_parent_id_get). + */ + + if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD && + ops && ops->swdev_parent_id_get) + return dev; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + port_dev = netdev_switch_get_lowest_dev(lower_dev); + if (port_dev) + return port_dev; + } + + return NULL; +} + +static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) +{ + struct netdev_phys_item_id psid; + struct netdev_phys_item_id prev_psid; + struct net_device *dev = NULL; + int nhsel; + + /* For this route, all nexthop devs must be on the same switch. */ + + for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { + const struct fib_nh *nh = &fi->fib_nh[nhsel]; + + if (!nh->nh_dev) + return NULL; + + dev = netdev_switch_get_lowest_dev(nh->nh_dev); + if (!dev) + return NULL; + + if (netdev_switch_parent_id_get(dev, &psid)) + return NULL; + + if (nhsel > 0) { + if (prev_psid.id_len != psid.id_len) + return NULL; + if (memcmp(prev_psid.id, psid.id, psid.id_len)) + return NULL; + } + + prev_psid = psid; + } + + return dev; +} + +/** + * netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch + * + * @dst: route's IPv4 destination address + * @dst_len: destination address length (prefix length) + * @fi: route FIB info structure + * @tos: route TOS + * @type: route type + * @nlflags: netlink flags passed in (NLM_F_*) + * @tb_id: route table ID + * + * Add IPv4 route entry to switch device. + */ +int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 nlflags, u32 tb_id) +{ + struct net_device *dev; + const struct swdev_ops *ops; + int err = 0; + + /* Don't offload route if using custom ip rules or if + * IPv4 FIB offloading has been disabled completely. + */ + +#ifdef CONFIG_IP_MULTIPLE_TABLES + if (fi->fib_net->ipv4.fib_has_custom_rules) + return 0; +#endif + + if (fi->fib_net->ipv4.fib_offload_disabled) + return 0; + + dev = netdev_switch_get_dev_by_nhs(fi); + if (!dev) + return 0; + ops = dev->swdev_ops; + + if (ops->swdev_fib_ipv4_add) { + err = ops->swdev_fib_ipv4_add(dev, htonl(dst), dst_len, + fi, tos, type, nlflags, + tb_id); + if (!err) + fi->fib_flags |= RTNH_F_EXTERNAL; + } + + return err; +} +EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add); + +/** + * netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch + * + * @dst: route's IPv4 destination address + * @dst_len: destination address length (prefix length) + * @fi: route FIB info structure + * @tos: route TOS + * @type: route type + * @tb_id: route table ID + * + * Delete IPv4 route entry from switch device. + */ +int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) +{ + struct net_device *dev; + const struct swdev_ops *ops; + int err = 0; + + if (!(fi->fib_flags & RTNH_F_EXTERNAL)) + return 0; + + dev = netdev_switch_get_dev_by_nhs(fi); + if (!dev) + return 0; + ops = dev->swdev_ops; + + if (ops->swdev_fib_ipv4_del) { + err = ops->swdev_fib_ipv4_del(dev, htonl(dst), dst_len, + fi, tos, type, tb_id); + if (!err) + fi->fib_flags &= ~RTNH_F_EXTERNAL; + } + + return err; +} +EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_del); + +/** + * netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation + * + * @fi: route FIB info structure + */ +void netdev_switch_fib_ipv4_abort(struct fib_info *fi) +{ + /* There was a problem installing this route to the offload + * device. For now, until we come up with more refined + * policy handling, abruptly end IPv4 fib offloading for + * for entire net by flushing offload device(s) of all + * IPv4 routes, and mark IPv4 fib offloading broken from + * this point forward. + */ + + fib_flush_external(fi->fib_net); + fi->fib_net->ipv4.fib_offload_disabled = true; +} +EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_abort); diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index 91c8a8e031db..c25a3a149dc4 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -26,3 +26,11 @@ config TIPC_MEDIA_IB help Saying Y here will enable support for running TIPC on IP-over-InfiniBand devices. +config TIPC_MEDIA_UDP + bool "IP/UDP media type support" + depends on TIPC + select NET_UDP_TUNNEL + help + Saying Y here will enable support for running TIPC over IP/UDP + bool + default y diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 599b1a540d2b..57e460be4692 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -10,5 +10,6 @@ tipc-y += addr.o bcast.o bearer.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ server.o socket.o +tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 48fd3b5a73fb..ba7daa864d44 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -38,6 +38,13 @@ #include "addr.h" #include "core.h" +u32 tipc_own_addr(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->own_addr; +} + /** * in_own_cluster - test for cluster inclusion; <0.0.0> always matches */ diff --git a/net/tipc/addr.h b/net/tipc/addr.h index c700c2d28e09..7ba6d5c8ae40 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -55,6 +55,7 @@ static inline u32 tipc_cluster_mask(u32 addr) return addr & TIPC_CLUSTER_MASK; } +u32 tipc_own_addr(struct net *net); int in_own_cluster(struct net *net, u32 addr); int in_own_cluster_exact(struct net *net, u32 addr); int in_own_node(struct net *net, u32 addr); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 3e41704832de..ae558dd7f8ee 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -62,21 +62,8 @@ static void tipc_bclink_lock(struct net *net) static void tipc_bclink_unlock(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *node = NULL; - if (likely(!tn->bclink->flags)) { - spin_unlock_bh(&tn->bclink->lock); - return; - } - - if (tn->bclink->flags & TIPC_BCLINK_RESET) { - tn->bclink->flags &= ~TIPC_BCLINK_RESET; - node = tipc_bclink_retransmit_to(net); - } spin_unlock_bh(&tn->bclink->lock); - - if (node) - tipc_link_reset_all(node); } void tipc_bclink_input(struct net *net) @@ -91,13 +78,6 @@ uint tipc_bclink_get_mtu(void) return MAX_PKT_DEFAULT_MCAST; } -void tipc_bclink_set_flags(struct net *net, unsigned int flags) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tn->bclink->flags |= flags; -} - static u32 bcbuf_acks(struct sk_buff *buf) { return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle; @@ -135,9 +115,10 @@ static void bclink_set_last_sent(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *bcl = tn->bcl; + struct sk_buff *skb = skb_peek(&bcl->backlogq); - if (bcl->next_out) - bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1); + if (skb) + bcl->fsm_msg_cnt = mod(buf_seqno(skb) - 1); else bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1); } @@ -155,7 +136,6 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) seqno : node->bclink.last_sent; } - /** * tipc_bclink_retransmit_to - get most recent node to request retransmission * @@ -180,7 +160,7 @@ static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to) struct sk_buff *skb; struct tipc_link *bcl = tn->bcl; - skb_queue_walk(&bcl->outqueue, skb) { + skb_queue_walk(&bcl->transmq, skb) { if (more(buf_seqno(skb), after)) { tipc_link_retransmit(bcl, skb, mod(to - after)); break; @@ -210,14 +190,17 @@ void tipc_bclink_wakeup_users(struct net *net) void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) { struct sk_buff *skb, *tmp; - struct sk_buff *next; unsigned int released = 0; struct net *net = n_ptr->net; struct tipc_net *tn = net_generic(net, tipc_net_id); + if (unlikely(!n_ptr->bclink.recv_permitted)) + return; + tipc_bclink_lock(net); + /* Bail out if tx queue is empty (no clean up is required) */ - skb = skb_peek(&tn->bcl->outqueue); + skb = skb_peek(&tn->bcl->transmq); if (!skb) goto exit; @@ -244,27 +227,19 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) } /* Skip over packets that node has previously acknowledged */ - skb_queue_walk(&tn->bcl->outqueue, skb) { + skb_queue_walk(&tn->bcl->transmq, skb) { if (more(buf_seqno(skb), n_ptr->bclink.acked)) break; } /* Update packets that node is now acknowledging */ - skb_queue_walk_from_safe(&tn->bcl->outqueue, skb, tmp) { + skb_queue_walk_from_safe(&tn->bcl->transmq, skb, tmp) { if (more(buf_seqno(skb), acked)) break; - - next = tipc_skb_queue_next(&tn->bcl->outqueue, skb); - if (skb != tn->bcl->next_out) { - bcbuf_decr_acks(skb); - } else { - bcbuf_set_acks(skb, 0); - tn->bcl->next_out = next; - bclink_set_last_sent(net); - } - + bcbuf_decr_acks(skb); + bclink_set_last_sent(net); if (bcbuf_acks(skb) == 0) { - __skb_unlink(skb, &tn->bcl->outqueue); + __skb_unlink(skb, &tn->bcl->transmq); kfree_skb(skb); released = 1; } @@ -272,7 +247,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) n_ptr->bclink.acked = acked; /* Try resolving broadcast link congestion, if necessary */ - if (unlikely(tn->bcl->next_out)) { + if (unlikely(skb_peek(&tn->bcl->backlogq))) { tipc_link_push_packets(tn->bcl); bclink_set_last_sent(net); } @@ -319,7 +294,7 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, buf = tipc_buf_acquire(INT_H_SIZE); if (buf) { struct tipc_msg *msg = buf_msg(buf); - struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferred_queue); + struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferdq); u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent; tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG, @@ -354,13 +329,12 @@ static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) return; tipc_node_lock(n_ptr); - if (n_ptr->bclink.recv_permitted && (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) && (n_ptr->bclink.last_in == msg_bcgap_after(msg))) n_ptr->bclink.oos_state = 2; - tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); } /* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster @@ -387,14 +361,13 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) __skb_queue_purge(list); return -EHOSTUNREACH; } - /* Broadcast to all nodes */ if (likely(bclink)) { tipc_bclink_lock(net); if (likely(bclink->bcast_nodes.count)) { rc = __tipc_link_xmit(net, bcl, list); if (likely(!rc)) { - u32 len = skb_queue_len(&bcl->outqueue); + u32 len = skb_queue_len(&bcl->transmq); bclink_set_last_sent(net); bcl->stats.queue_sz_counts++; @@ -481,17 +454,18 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) goto unlock; if (msg_destnode(msg) == tn->own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); - tipc_node_unlock(node); tipc_bclink_lock(net); bcl->stats.recv_nacks++; tn->bclink->retransmit_to = node; bclink_retransmit_pkt(tn, msg_bcgap_after(msg), msg_bcgap_to(msg)); tipc_bclink_unlock(net); + tipc_node_unlock(node); } else { tipc_node_unlock(node); bclink_peek_nack(net, msg); } + tipc_node_put(node); goto exit; } @@ -528,11 +502,13 @@ receive: tipc_bclink_unlock(net); tipc_node_unlock(node); } else if (msg_user(msg) == MSG_FRAGMENTER) { - tipc_buf_append(&node->bclink.reasm_buf, &buf); - if (unlikely(!buf && !node->bclink.reasm_buf)) - goto unlock; tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); + tipc_buf_append(&node->bclink.reasm_buf, &buf); + if (unlikely(!buf && !node->bclink.reasm_buf)) { + tipc_bclink_unlock(net); + goto unlock; + } bcl->stats.recv_fragments++; if (buf) { bcl->stats.recv_fragmented++; @@ -559,25 +535,25 @@ receive: if (node->bclink.last_in == node->bclink.last_sent) goto unlock; - if (skb_queue_empty(&node->bclink.deferred_queue)) { + if (skb_queue_empty(&node->bclink.deferdq)) { node->bclink.oos_state = 1; goto unlock; } - msg = buf_msg(skb_peek(&node->bclink.deferred_queue)); + msg = buf_msg(skb_peek(&node->bclink.deferdq)); seqno = msg_seqno(msg); next_in = mod(next_in + 1); if (seqno != next_in) goto unlock; /* Take in-sequence message from deferred queue & deliver it */ - buf = __skb_dequeue(&node->bclink.deferred_queue); + buf = __skb_dequeue(&node->bclink.deferdq); goto receive; } /* Handle out-of-sequence broadcast message */ if (less(next_in, seqno)) { - deferred = tipc_link_defer_pkt(&node->bclink.deferred_queue, + deferred = tipc_link_defer_pkt(&node->bclink.deferdq, buf); bclink_update_last_sent(node, seqno); buf = NULL; @@ -594,6 +570,7 @@ receive: unlock: tipc_node_unlock(node); + tipc_node_put(node); exit: kfree_skb(buf); } @@ -634,7 +611,6 @@ static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf, msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tn->net_id); tn->bcl->stats.sent_info++; - if (WARN_ON(!bclink->bcast_nodes.count)) { dump_stack(); return 0; @@ -835,7 +811,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->queue_limit[0])) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) goto prop_msg_full; nla_nest_end(msg->skb, prop); @@ -913,8 +889,9 @@ int tipc_bclink_init(struct net *net) sprintf(bcbearer->media.name, "tipc-broadcast"); spin_lock_init(&bclink->lock); - __skb_queue_head_init(&bcl->outqueue); - __skb_queue_head_init(&bcl->deferred_queue); + __skb_queue_head_init(&bcl->transmq); + __skb_queue_head_init(&bcl->backlogq); + __skb_queue_head_init(&bcl->deferdq); skb_queue_head_init(&bcl->wakeupq); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 43f397fbac55..4bdc12277d33 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -55,7 +55,6 @@ struct tipc_bcbearer_pair { struct tipc_bearer *secondary; }; -#define TIPC_BCLINK_RESET 1 #define BCBEARER MAX_BEARERS /** @@ -86,7 +85,6 @@ struct tipc_bcbearer { * @lock: spinlock governing access to structure * @link: (non-standard) broadcast link structure * @node: (non-standard) node structure representing b'cast link's peer node - * @flags: represent bclink states * @bcast_nodes: map of broadcast-capable nodes * @retransmit_to: node that most recently requested a retransmit * @@ -96,7 +94,6 @@ struct tipc_bclink { spinlock_t lock; struct tipc_link link; struct tipc_node node; - unsigned int flags; struct sk_buff_head arrvq; struct sk_buff_head inputq; struct tipc_node_map bcast_nodes; @@ -117,7 +114,6 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, int tipc_bclink_init(struct net *net); void tipc_bclink_stop(struct net *net); -void tipc_bclink_set_flags(struct net *tn, unsigned int flags); void tipc_bclink_add_node(struct net *net, u32 addr); void tipc_bclink_remove_node(struct net *net, u32 addr); struct tipc_node *tipc_bclink_retransmit_to(struct net *tn); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 48852c2dcc03..3613e72e858e 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -48,6 +48,9 @@ static struct tipc_media * const media_info_array[] = { #ifdef CONFIG_TIPC_MEDIA_IB &ib_media_info, #endif +#ifdef CONFIG_TIPC_MEDIA_UDP + &udp_media_info, +#endif NULL }; @@ -216,7 +219,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) * tipc_enable_bearer - enable bearer with the given name */ static int tipc_enable_bearer(struct net *net, const char *name, - u32 disc_domain, u32 priority) + u32 disc_domain, u32 priority, + struct nlattr *attr[]) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; @@ -304,7 +308,7 @@ restart: strcpy(b_ptr->name, name); b_ptr->media = m_ptr; - res = m_ptr->enable_media(net, b_ptr); + res = m_ptr->enable_media(net, b_ptr, attr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); @@ -372,7 +376,8 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, kfree_rcu(b_ptr, rcu); } -int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b) +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attr[]) { struct net_device *dev; char *driver_name = strchr((const char *)b->name, ':') + 1; @@ -791,7 +796,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) } rtnl_lock(); - err = tipc_enable_bearer(net, bearer, domain, prio); + err = tipc_enable_bearer(net, bearer, domain, prio, attrs); if (err) { rtnl_unlock(); return err; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 6b17795ff8bc..5cad243ee8fc 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -41,7 +41,7 @@ #include <net/genetlink.h> #define MAX_BEARERS 2 -#define MAX_MEDIA 2 +#define MAX_MEDIA 3 #define MAX_NODES 4096 #define WSIZE 32 @@ -50,14 +50,16 @@ * - the field's actual content and length is defined per media * - remaining unused bytes in the field are set to zero */ -#define TIPC_MEDIA_ADDR_SIZE 32 +#define TIPC_MEDIA_INFO_SIZE 32 #define TIPC_MEDIA_TYPE_OFFSET 3 +#define TIPC_MEDIA_ADDR_OFFSET 4 /* * Identifiers of supported TIPC media types */ #define TIPC_MEDIA_TYPE_ETH 1 #define TIPC_MEDIA_TYPE_IB 2 +#define TIPC_MEDIA_TYPE_UDP 3 /** * struct tipc_node_map - set of node identifiers @@ -76,7 +78,7 @@ struct tipc_node_map { * @broadcast: non-zero if address is a broadcast address */ struct tipc_media_addr { - u8 value[TIPC_MEDIA_ADDR_SIZE]; + u8 value[TIPC_MEDIA_INFO_SIZE]; u8 media_id; u8 broadcast; }; @@ -103,7 +105,8 @@ struct tipc_media { int (*send_msg)(struct net *net, struct sk_buff *buf, struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); - int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr); + int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr, + struct nlattr *attr[]); void (*disable_media)(struct tipc_bearer *b_ptr); int (*addr2str)(struct tipc_media_addr *addr, char *strbuf, @@ -182,6 +185,9 @@ extern struct tipc_media eth_media_info; #ifdef CONFIG_TIPC_MEDIA_IB extern struct tipc_media ib_media_info; #endif +#ifdef CONFIG_TIPC_MEDIA_UDP +extern struct tipc_media udp_media_info; +#endif int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); @@ -196,7 +202,8 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); -int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b); +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]); void tipc_disable_l2_media(struct tipc_bearer *b); int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index feef3753615d..967e292f53c8 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -86,9 +86,10 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, msg = buf_msg(buf); tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type, - INT_H_SIZE, dest_domain); + MAX_H_SIZE, dest_domain); msg_set_non_seq(msg, 1); msg_set_node_sig(msg, tn->random); + msg_set_node_capabilities(msg, 0); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tn->net_id); b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); @@ -133,6 +134,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, u32 net_id = msg_bc_netid(msg); u32 mtyp = msg_type(msg); u32 signature = msg_node_sig(msg); + u16 caps = msg_node_capabilities(msg); bool addr_match = false; bool sign_match = false; bool link_up = false; @@ -167,6 +169,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, if (!node) return; tipc_node_lock(node); + node->capabilities = caps; link = node->links[bearer->identity]; /* Prepare to validate requesting node's signature and media address */ @@ -249,7 +252,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, /* Send response, if necessary */ if (respond && (mtyp == DSC_REQ_MSG)) { - rbuf = tipc_buf_acquire(INT_H_SIZE); + rbuf = tipc_buf_acquire(MAX_H_SIZE); if (rbuf) { tipc_disc_init_msg(net, rbuf, DSC_RESP_MSG, bearer); tipc_bearer_send(net, bearer->identity, rbuf, &maddr); @@ -257,6 +260,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, } } tipc_node_unlock(node); + tipc_node_put(node); } /** @@ -359,8 +363,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) return -ENOMEM; - - req->buf = tipc_buf_acquire(INT_H_SIZE); + req->buf = tipc_buf_acquire(MAX_H_SIZE); if (!req->buf) { kfree(req); return -ENOMEM; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 5e1426f1751f..f69a2fde9f4a 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -37,8 +37,6 @@ #include "core.h" #include "bearer.h" -#define ETH_ADDR_OFFSET 4 /* MAC addr position inside address field */ - /* Convert Ethernet address (media address format) to string */ static int tipc_eth_addr2str(struct tipc_media_addr *addr, char *strbuf, int bufsz) @@ -53,9 +51,9 @@ static int tipc_eth_addr2str(struct tipc_media_addr *addr, /* Convert from media address format to discovery message addr format */ static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) { - memset(msg, 0, TIPC_MEDIA_ADDR_SIZE); + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; - memcpy(msg + ETH_ADDR_OFFSET, addr->value, ETH_ALEN); + memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, addr->value, ETH_ALEN); return 0; } @@ -79,7 +77,7 @@ static int tipc_eth_msg2addr(struct tipc_bearer *b, char *msg) { /* Skip past preamble: */ - msg += ETH_ADDR_OFFSET; + msg += TIPC_MEDIA_ADDR_OFFSET; return tipc_eth_raw2addr(b, addr, msg); } diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 8522eef9c136..e8c16718e3fa 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -57,7 +57,7 @@ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, /* Convert from media address format to discovery message addr format */ static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr) { - memset(msg, 0, TIPC_MEDIA_ADDR_SIZE); + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); memcpy(msg, addr->value, INFINIBAND_ALEN); return 0; } diff --git a/net/tipc/link.c b/net/tipc/link.c index a4cf364316de..514466efc25c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1,7 +1,7 @@ /* * net/tipc/link.c: TIPC link code * - * Copyright (c) 1996-2007, 2012-2014, Ericsson AB + * Copyright (c) 1996-2007, 2012-2015, Ericsson AB * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * @@ -35,6 +35,7 @@ */ #include "core.h" +#include "subscr.h" #include "link.h" #include "bcast.h" #include "socket.h" @@ -138,6 +139,13 @@ static void tipc_link_put(struct tipc_link *l_ptr) kref_put(&l_ptr->ref, tipc_link_release); } +static struct tipc_link *tipc_parallel_link(struct tipc_link *l) +{ + if (l->owner->active_links[0] != l) + return l->owner->active_links[0]; + return l->owner->active_links[1]; +} + static void link_init_max_pkt(struct tipc_link *l_ptr) { struct tipc_node *node = l_ptr->owner; @@ -194,10 +202,10 @@ static void link_timeout(unsigned long data) tipc_node_lock(l_ptr->owner); /* update counters used in statistical profiling of send traffic */ - l_ptr->stats.accu_queue_sz += skb_queue_len(&l_ptr->outqueue); + l_ptr->stats.accu_queue_sz += skb_queue_len(&l_ptr->transmq); l_ptr->stats.queue_sz_counts++; - skb = skb_peek(&l_ptr->outqueue); + skb = skb_peek(&l_ptr->transmq); if (skb) { struct tipc_msg *msg = buf_msg(skb); u32 length = msg_size(msg); @@ -229,7 +237,7 @@ static void link_timeout(unsigned long data) /* do all other link processing performed on a periodic basis */ link_state_event(l_ptr, TIMEOUT_EVT); - if (l_ptr->next_out) + if (skb_queue_len(&l_ptr->backlogq)) tipc_link_push_packets(l_ptr); tipc_node_unlock(l_ptr->owner); @@ -305,16 +313,14 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, msg_set_session(msg, (tn->random & 0xffff)); msg_set_bearer_id(msg, b_ptr->identity); strcpy((char *)msg_data(msg), if_name); - - l_ptr->priority = b_ptr->priority; - tipc_link_set_queue_limits(l_ptr, b_ptr->window); - l_ptr->net_plane = b_ptr->net_plane; link_init_max_pkt(l_ptr); - + l_ptr->priority = b_ptr->priority; + tipc_link_set_queue_limits(l_ptr, b_ptr->window); l_ptr->next_out_no = 1; - __skb_queue_head_init(&l_ptr->outqueue); - __skb_queue_head_init(&l_ptr->deferred_queue); + __skb_queue_head_init(&l_ptr->transmq); + __skb_queue_head_init(&l_ptr->backlogq); + __skb_queue_head_init(&l_ptr->deferdq); skb_queue_head_init(&l_ptr->wakeupq); skb_queue_head_init(&l_ptr->inputq); skb_queue_head_init(&l_ptr->namedq); @@ -344,6 +350,7 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id, struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *link; struct tipc_node *node; + bool del_link; rcu_read_lock(); list_for_each_entry_rcu(node, &tn->node_list, list) { @@ -353,12 +360,13 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id, tipc_node_unlock(node); continue; } + del_link = !tipc_link_is_up(link) && !link->exp_msg_count; tipc_link_reset(link); if (del_timer(&link->timer)) tipc_link_put(link); link->flags |= LINK_STOPPED; /* Delete link now, or when failover is finished: */ - if (shutting_down || !tipc_node_is_up(node)) + if (shutting_down || !tipc_node_is_up(node) || del_link) tipc_link_delete(link); tipc_node_unlock(node); } @@ -366,28 +374,43 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id, } /** - * link_schedule_user - schedule user for wakeup after congestion + * link_schedule_user - schedule a message sender for wakeup after congestion * @link: congested link - * @oport: sending port - * @chain_sz: size of buffer chain that was attempted sent - * @imp: importance of message attempted sent + * @list: message that was attempted sent * Create pseudo msg to send back to user when congestion abates + * Only consumes message if there is an error */ -static bool link_schedule_user(struct tipc_link *link, u32 oport, - uint chain_sz, uint imp) +static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list) { - struct sk_buff *buf; + struct tipc_msg *msg = buf_msg(skb_peek(list)); + int imp = msg_importance(msg); + u32 oport = msg_origport(msg); + u32 addr = link_own_addr(link); + struct sk_buff *skb; - buf = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, - link_own_addr(link), link_own_addr(link), - oport, 0, 0); - if (!buf) - return false; - TIPC_SKB_CB(buf)->chain_sz = chain_sz; - TIPC_SKB_CB(buf)->chain_imp = imp; - skb_queue_tail(&link->wakeupq, buf); + /* This really cannot happen... */ + if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { + pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); + tipc_link_reset(link); + goto err; + } + /* Non-blocking sender: */ + if (TIPC_SKB_CB(skb_peek(list))->wakeup_pending) + return -ELINKCONG; + + /* Create and schedule wakeup pseudo message */ + skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, + addr, addr, oport, 0, 0); + if (!skb) + goto err; + TIPC_SKB_CB(skb)->chain_sz = skb_queue_len(list); + TIPC_SKB_CB(skb)->chain_imp = imp; + skb_queue_tail(&link->wakeupq, skb); link->stats.link_congs++; - return true; + return -ELINKCONG; +err: + __skb_queue_purge(list); + return -ENOBUFS; } /** @@ -396,19 +419,22 @@ static bool link_schedule_user(struct tipc_link *link, u32 oport, * Move a number of waiting users, as permitted by available space in * the send queue, from link wait queue to node wait queue for wakeup */ -void link_prepare_wakeup(struct tipc_link *link) +void link_prepare_wakeup(struct tipc_link *l) { - uint pend_qsz = skb_queue_len(&link->outqueue); + int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,}; + int imp, lim; struct sk_buff *skb, *tmp; - skb_queue_walk_safe(&link->wakeupq, skb, tmp) { - if (pend_qsz >= link->queue_limit[TIPC_SKB_CB(skb)->chain_imp]) + skb_queue_walk_safe(&l->wakeupq, skb, tmp) { + imp = TIPC_SKB_CB(skb)->chain_imp; + lim = l->window + l->backlog[imp].limit; + pnd[imp] += TIPC_SKB_CB(skb)->chain_sz; + if ((pnd[imp] + l->backlog[imp].len) >= lim) break; - pend_qsz += TIPC_SKB_CB(skb)->chain_sz; - skb_unlink(skb, &link->wakeupq); - skb_queue_tail(&link->inputq, skb); - link->owner->inputq = &link->inputq; - link->owner->action_flags |= TIPC_MSG_EVT; + skb_unlink(skb, &l->wakeupq); + skb_queue_tail(&l->inputq, skb); + l->owner->inputq = &l->inputq; + l->owner->action_flags |= TIPC_MSG_EVT; } } @@ -422,14 +448,25 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr) l_ptr->reasm_buf = NULL; } +static void tipc_link_purge_backlog(struct tipc_link *l) +{ + __skb_queue_purge(&l->backlogq); + l->backlog[TIPC_LOW_IMPORTANCE].len = 0; + l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; + l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; + l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; + l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; +} + /** * tipc_link_purge_queues - purge all pkt queues associated with link * @l_ptr: pointer to link */ void tipc_link_purge_queues(struct tipc_link *l_ptr) { - __skb_queue_purge(&l_ptr->deferred_queue); - __skb_queue_purge(&l_ptr->outqueue); + __skb_queue_purge(&l_ptr->deferdq); + __skb_queue_purge(&l_ptr->transmq); + tipc_link_purge_backlog(l_ptr); tipc_link_reset_fragments(l_ptr); } @@ -462,14 +499,15 @@ void tipc_link_reset(struct tipc_link *l_ptr) } /* Clean up all queues, except inputq: */ - __skb_queue_purge(&l_ptr->outqueue); - __skb_queue_purge(&l_ptr->deferred_queue); - skb_queue_splice_init(&l_ptr->wakeupq, &l_ptr->inputq); - if (!skb_queue_empty(&l_ptr->inputq)) + __skb_queue_purge(&l_ptr->transmq); + __skb_queue_purge(&l_ptr->deferdq); + if (!owner->inputq) + owner->inputq = &l_ptr->inputq; + skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq); + if (!skb_queue_empty(owner->inputq)) owner->action_flags |= TIPC_MSG_EVT; - owner->inputq = &l_ptr->inputq; - l_ptr->next_out = NULL; - l_ptr->unacked_window = 0; + tipc_link_purge_backlog(l_ptr); + l_ptr->rcv_unacked = 0; l_ptr->checkpoint = 1; l_ptr->next_out_no = 1; l_ptr->fsm_msg_cnt = 0; @@ -692,101 +730,65 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) } } -/* tipc_link_cong: determine return value and how to treat the - * sent buffer during link congestion. - * - For plain, errorless user data messages we keep the buffer and - * return -ELINKONG. - * - For all other messages we discard the buffer and return -EHOSTUNREACH - * - For TIPC internal messages we also reset the link - */ -static int tipc_link_cong(struct tipc_link *link, struct sk_buff_head *list) -{ - struct sk_buff *skb = skb_peek(list); - struct tipc_msg *msg = buf_msg(skb); - uint imp = tipc_msg_tot_importance(msg); - u32 oport = msg_tot_origport(msg); - - if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { - pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); - tipc_link_reset(link); - goto drop; - } - if (unlikely(msg_errcode(msg))) - goto drop; - if (unlikely(msg_reroute_cnt(msg))) - goto drop; - if (TIPC_SKB_CB(skb)->wakeup_pending) - return -ELINKCONG; - if (link_schedule_user(link, oport, skb_queue_len(list), imp)) - return -ELINKCONG; -drop: - __skb_queue_purge(list); - return -EHOSTUNREACH; -} - /** * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked * @link: link to use * @list: chain of buffers containing message * - * Consumes the buffer chain, except when returning -ELINKCONG - * Returns 0 if success, otherwise errno: -ELINKCONG, -EMSGSIZE (plain socket - * user data messages) or -EHOSTUNREACH (all other messages/senders) - * Only the socket functions tipc_send_stream() and tipc_send_packet() need - * to act on the return value, since they may need to do more send attempts. + * Consumes the buffer chain, except when returning -ELINKCONG, + * since the caller then may want to make more send attempts. + * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS + * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted */ int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *list) { struct tipc_msg *msg = buf_msg(skb_peek(list)); - uint psz = msg_size(msg); - uint sndlim = link->queue_limit[0]; - uint imp = tipc_msg_tot_importance(msg); + unsigned int maxwin = link->window; + unsigned int imp = msg_importance(msg); uint mtu = link->max_pkt; uint ack = mod(link->next_in_no - 1); uint seqno = link->next_out_no; uint bc_last_in = link->owner->bclink.last_in; struct tipc_media_addr *addr = &link->media_addr; - struct sk_buff_head *outqueue = &link->outqueue; + struct sk_buff_head *transmq = &link->transmq; + struct sk_buff_head *backlogq = &link->backlogq; struct sk_buff *skb, *tmp; - /* Match queue limits against msg importance: */ - if (unlikely(skb_queue_len(outqueue) >= link->queue_limit[imp])) - return tipc_link_cong(link, list); + /* Match backlog limit against msg importance: */ + if (unlikely(link->backlog[imp].len >= link->backlog[imp].limit)) + return link_schedule_user(link, list); - /* Has valid packet limit been used ? */ - if (unlikely(psz > mtu)) { + if (unlikely(msg_size(msg) > mtu)) { __skb_queue_purge(list); return -EMSGSIZE; } - - /* Prepare each packet for sending, and add to outqueue: */ + /* Prepare each packet for sending, and add to relevant queue: */ skb_queue_walk_safe(list, skb, tmp) { __skb_unlink(skb, list); msg = buf_msg(skb); - msg_set_word(msg, 2, ((ack << 16) | mod(seqno))); + msg_set_seqno(msg, seqno); + msg_set_ack(msg, ack); msg_set_bcast_ack(msg, bc_last_in); - if (skb_queue_len(outqueue) < sndlim) { - __skb_queue_tail(outqueue, skb); - tipc_bearer_send(net, link->bearer_id, - skb, addr); - link->next_out = NULL; - link->unacked_window = 0; - } else if (tipc_msg_bundle(outqueue, skb, mtu)) { + if (likely(skb_queue_len(transmq) < maxwin)) { + __skb_queue_tail(transmq, skb); + tipc_bearer_send(net, link->bearer_id, skb, addr); + link->rcv_unacked = 0; + seqno++; + continue; + } + if (tipc_msg_bundle(skb_peek_tail(backlogq), skb, mtu)) { link->stats.sent_bundled++; continue; - } else if (tipc_msg_make_bundle(outqueue, skb, mtu, - link->addr)) { + } + if (tipc_msg_make_bundle(&skb, mtu, link->addr)) { link->stats.sent_bundled++; link->stats.sent_bundles++; - if (!link->next_out) - link->next_out = skb_peek_tail(outqueue); - } else { - __skb_queue_tail(outqueue, skb); - if (!link->next_out) - link->next_out = skb; + imp = msg_importance(buf_msg(skb)); } + __skb_queue_tail(backlogq, skb); + link->backlog[imp].len++; seqno++; } link->next_out_no = seqno; @@ -807,13 +809,25 @@ static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb) return __tipc_link_xmit(link->owner->net, link, &head); } +/* tipc_link_xmit_skb(): send single buffer to destination + * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE + * messages, which will not be rejected + * The only exception is datagram messages rerouted after secondary + * lookup, which are rare and safe to dispose of anyway. + * TODO: Return real return value, and let callers use + * tipc_wait_for_sendpkt() where applicable + */ int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, u32 selector) { struct sk_buff_head head; + int rc; skb2list(skb, &head); - return tipc_link_xmit(net, &head, dnode, selector); + rc = tipc_link_xmit(net, &head, dnode, selector); + if (rc == -ELINKCONG) + kfree_skb(skb); + return 0; } /** @@ -840,12 +854,15 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, if (link) rc = __tipc_link_xmit(net, link, list); tipc_node_unlock(node); + tipc_node_put(node); } if (link) return rc; - if (likely(in_own_node(net, dnode))) - return tipc_sk_rcv(net, list); + if (likely(in_own_node(net, dnode))) { + tipc_sk_rcv(net, list); + return 0; + } __skb_queue_purge(list); return rc; @@ -892,14 +909,6 @@ static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf) kfree_skb(buf); } -struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list, - const struct sk_buff *skb) -{ - if (skb_queue_is_last(list, skb)) - return NULL; - return skb->next; -} - /* * tipc_link_push_packets - push unsent packets to bearer * @@ -908,30 +917,24 @@ struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list, * * Called with node locked */ -void tipc_link_push_packets(struct tipc_link *l_ptr) +void tipc_link_push_packets(struct tipc_link *link) { - struct sk_buff_head *outqueue = &l_ptr->outqueue; - struct sk_buff *skb = l_ptr->next_out; + struct sk_buff *skb; struct tipc_msg *msg; - u32 next, first; + unsigned int ack = mod(link->next_in_no - 1); - skb_queue_walk_from(outqueue, skb) { - msg = buf_msg(skb); - next = msg_seqno(msg); - first = buf_seqno(skb_peek(outqueue)); - - if (mod(next - first) < l_ptr->queue_limit[0]) { - msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - if (msg_user(msg) == MSG_BUNDLER) - TIPC_SKB_CB(skb)->bundling = false; - tipc_bearer_send(l_ptr->owner->net, - l_ptr->bearer_id, skb, - &l_ptr->media_addr); - l_ptr->next_out = tipc_skb_queue_next(outqueue, skb); - } else { + while (skb_queue_len(&link->transmq) < link->window) { + skb = __skb_dequeue(&link->backlogq); + if (!skb) break; - } + msg = buf_msg(skb); + link->backlog[msg_importance(msg)].len--; + msg_set_ack(msg, ack); + msg_set_bcast_ack(msg, link->owner->bclink.last_in); + link->rcv_unacked = 0; + __skb_queue_tail(&link->transmq, skb); + tipc_bearer_send(link->owner->net, link->bearer_id, + skb, &link->media_addr); } } @@ -978,7 +981,6 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, (unsigned long) TIPC_SKB_CB(buf)->handle); n_ptr = tipc_bclink_retransmit_to(net); - tipc_node_lock(n_ptr); tipc_addr_string_fill(addr_string, n_ptr->addr); pr_info("Broadcast link info for %s\n", addr_string); @@ -990,9 +992,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, n_ptr->bclink.oos_state, n_ptr->bclink.last_sent); - tipc_node_unlock(n_ptr); - - tipc_bclink_set_flags(net, TIPC_BCLINK_RESET); + n_ptr->action_flags |= TIPC_BCAST_RESET; l_ptr->stale_count = 0; } } @@ -1018,8 +1018,8 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, l_ptr->stale_count = 1; } - skb_queue_walk_from(&l_ptr->outqueue, skb) { - if (!retransmits || skb == l_ptr->next_out) + skb_queue_walk_from(&l_ptr->transmq, skb) { + if (!retransmits) break; msg = buf_msg(skb); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); @@ -1031,72 +1031,43 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, } } -static void link_retrieve_defq(struct tipc_link *link, - struct sk_buff_head *list) -{ - u32 seq_no; - - if (skb_queue_empty(&link->deferred_queue)) - return; - - seq_no = buf_seqno(skb_peek(&link->deferred_queue)); - if (seq_no == mod(link->next_in_no)) - skb_queue_splice_tail_init(&link->deferred_queue, list); -} - -/** - * link_recv_buf_validate - validate basic format of received message - * - * This routine ensures a TIPC message has an acceptable header, and at least - * as much data as the header indicates it should. The routine also ensures - * that the entire message header is stored in the main fragment of the message - * buffer, to simplify future access to message header fields. - * - * Note: Having extra info present in the message header or data areas is OK. - * TIPC will ignore the excess, under the assumption that it is optional info - * introduced by a later release of the protocol. +/* link_synch(): check if all packets arrived before the synch + * point have been consumed + * Returns true if the parallel links are synched, otherwise false */ -static int link_recv_buf_validate(struct sk_buff *buf) +static bool link_synch(struct tipc_link *l) { - static u32 min_data_hdr_size[8] = { - SHORT_H_SIZE, MCAST_H_SIZE, NAMED_H_SIZE, BASIC_H_SIZE, - MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE - }; - - struct tipc_msg *msg; - u32 tipc_hdr[2]; - u32 size; - u32 hdr_size; - u32 min_hdr_size; - - /* If this packet comes from the defer queue, the skb has already - * been validated - */ - if (unlikely(TIPC_SKB_CB(buf)->deferred)) - return 1; + unsigned int post_synch; + struct tipc_link *pl; - if (unlikely(buf->len < MIN_H_SIZE)) - return 0; + pl = tipc_parallel_link(l); + if (pl == l) + goto synched; - msg = skb_header_pointer(buf, 0, sizeof(tipc_hdr), tipc_hdr); - if (msg == NULL) - return 0; + /* Was last pre-synch packet added to input queue ? */ + if (less_eq(pl->next_in_no, l->synch_point)) + return false; - if (unlikely(msg_version(msg) != TIPC_VERSION)) - return 0; + /* Is it still in the input queue ? */ + post_synch = mod(pl->next_in_no - l->synch_point) - 1; + if (skb_queue_len(&pl->inputq) > post_synch) + return false; +synched: + l->flags &= ~LINK_SYNCHING; + return true; +} - size = msg_size(msg); - hdr_size = msg_hdr_sz(msg); - min_hdr_size = msg_isdata(msg) ? - min_data_hdr_size[msg_type(msg)] : INT_H_SIZE; +static void link_retrieve_defq(struct tipc_link *link, + struct sk_buff_head *list) +{ + u32 seq_no; - if (unlikely((hdr_size < min_hdr_size) || - (size < hdr_size) || - (buf->len < size) || - (size - hdr_size > TIPC_MAX_USER_MSG_SIZE))) - return 0; + if (skb_queue_empty(&link->deferdq)) + return; - return pskb_may_pull(buf, hdr_size); + seq_no = buf_seqno(skb_peek(&link->deferdq)); + if (seq_no == mod(link->next_in_no)) + skb_queue_splice_tail_init(&link->deferdq, list); } /** @@ -1124,16 +1095,11 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) while ((skb = __skb_dequeue(&head))) { /* Ensure message is well-formed */ - if (unlikely(!link_recv_buf_validate(skb))) - goto discard; - - /* Ensure message data is a single contiguous unit */ - if (unlikely(skb_linearize(skb))) + if (unlikely(!tipc_msg_validate(skb))) goto discard; /* Handle arrival of a non-unicast link message */ msg = buf_msg(skb); - if (unlikely(msg_non_seq(msg))) { if (msg_user(msg) == LINK_CONFIG) tipc_disc_rcv(net, skb, b_ptr); @@ -1151,8 +1117,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) n_ptr = tipc_node_find(net, msg_prevnode(msg)); if (unlikely(!n_ptr)) goto discard; - tipc_node_lock(n_ptr); + tipc_node_lock(n_ptr); /* Locate unicast link endpoint that should handle message */ l_ptr = n_ptr->links[b_ptr->identity]; if (unlikely(!l_ptr)) @@ -1174,21 +1140,20 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) ackd = msg_ack(msg); /* Release acked messages */ - if (n_ptr->bclink.recv_permitted) + if (unlikely(n_ptr->bclink.acked != msg_bcast_ack(msg))) tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg)); released = 0; - skb_queue_walk_safe(&l_ptr->outqueue, skb1, tmp) { - if (skb1 == l_ptr->next_out || - more(buf_seqno(skb1), ackd)) + skb_queue_walk_safe(&l_ptr->transmq, skb1, tmp) { + if (more(buf_seqno(skb1), ackd)) break; - __skb_unlink(skb1, &l_ptr->outqueue); + __skb_unlink(skb1, &l_ptr->transmq); kfree_skb(skb1); released = 1; } /* Try sending any messages link endpoint has pending */ - if (unlikely(l_ptr->next_out)) + if (unlikely(skb_queue_len(&l_ptr->backlogq))) tipc_link_push_packets(l_ptr); if (released && !skb_queue_empty(&l_ptr->wakeupq)) @@ -1222,11 +1187,18 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) skb = NULL; goto unlock; } + /* Synchronize with parallel link if applicable */ + if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) { + link_handle_out_of_seq_msg(l_ptr, skb); + if (link_synch(l_ptr)) + link_retrieve_defq(l_ptr, &head); + skb = NULL; + goto unlock; + } l_ptr->next_in_no++; - if (unlikely(!skb_queue_empty(&l_ptr->deferred_queue))) + if (unlikely(!skb_queue_empty(&l_ptr->deferdq))) link_retrieve_defq(l_ptr, &head); - - if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) { + if (unlikely(++l_ptr->rcv_unacked >= TIPC_MIN_LINK_WIN)) { l_ptr->stats.sent_acks++; tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); } @@ -1234,6 +1206,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) skb = NULL; unlock: tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); discard: if (unlikely(skb)) kfree_skb(skb); @@ -1298,6 +1271,10 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) switch (msg_user(msg)) { case CHANGEOVER_PROTOCOL: + if (msg_dup(msg)) { + link->flags |= LINK_SYNCHING; + link->synch_point = msg_seqno(msg_get_wrapped(msg)); + } if (!tipc_link_tunnel_rcv(node, &skb)) break; if (msg_user(buf_msg(skb)) != MSG_BUNDLER) { @@ -1393,10 +1370,9 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, return; } - if (tipc_link_defer_pkt(&l_ptr->deferred_queue, buf)) { + if (tipc_link_defer_pkt(&l_ptr->deferdq, buf)) { l_ptr->stats.deferred_recv++; - TIPC_SKB_CB(buf)->deferred = true; - if ((skb_queue_len(&l_ptr->deferred_queue) % 16) == 1) + if ((skb_queue_len(&l_ptr->deferdq) % TIPC_MIN_LINK_WIN) == 1) tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); } else { l_ptr->stats.duplicates++; @@ -1433,11 +1409,11 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, if (!tipc_link_is_up(l_ptr)) return; - if (l_ptr->next_out) - next_sent = buf_seqno(l_ptr->next_out); + if (skb_queue_len(&l_ptr->backlogq)) + next_sent = buf_seqno(skb_peek(&l_ptr->backlogq)); msg_set_next_sent(msg, next_sent); - if (!skb_queue_empty(&l_ptr->deferred_queue)) { - u32 rec = buf_seqno(skb_peek(&l_ptr->deferred_queue)); + if (!skb_queue_empty(&l_ptr->deferdq)) { + u32 rec = buf_seqno(skb_peek(&l_ptr->deferdq)); gap = mod(rec - mod(l_ptr->next_in_no)); } msg_set_seq_gap(msg, gap); @@ -1489,10 +1465,9 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); buf->priority = TC_PRIO_CONTROL; - tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, buf, &l_ptr->media_addr); - l_ptr->unacked_window = 0; + l_ptr->rcv_unacked = 0; kfree_skb(buf); } @@ -1627,7 +1602,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, } if (msg_seq_gap(msg)) { l_ptr->stats.recv_nacks++; - tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->outqueue), + tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->transmq), msg_seq_gap(msg)); } break; @@ -1674,7 +1649,7 @@ static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr, */ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) { - u32 msgcount = skb_queue_len(&l_ptr->outqueue); + int msgcount; struct tipc_link *tunnel = l_ptr->owner->active_links[0]; struct tipc_msg tunnel_hdr; struct sk_buff *skb; @@ -1685,10 +1660,13 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); + skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq); + tipc_link_purge_backlog(l_ptr); + msgcount = skb_queue_len(&l_ptr->transmq); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); - if (skb_queue_empty(&l_ptr->outqueue)) { + if (skb_queue_empty(&l_ptr->transmq)) { skb = tipc_buf_acquire(INT_H_SIZE); if (skb) { skb_copy_to_linear_data(skb, &tunnel_hdr, INT_H_SIZE); @@ -1704,7 +1682,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) split_bundles = (l_ptr->owner->active_links[0] != l_ptr->owner->active_links[1]); - skb_queue_walk(&l_ptr->outqueue, skb) { + skb_queue_walk(&l_ptr->transmq, skb) { struct tipc_msg *msg = buf_msg(skb); if ((msg_user(msg) == MSG_BUNDLER) && split_bundles) { @@ -1735,80 +1713,66 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) * and sequence order is preserved per sender/receiver socket pair. * Owner node is locked. */ -void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, - struct tipc_link *tunnel) +void tipc_link_dup_queue_xmit(struct tipc_link *link, + struct tipc_link *tnl) { struct sk_buff *skb; - struct tipc_msg tunnel_hdr; - - tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, - DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); - msg_set_msgcnt(&tunnel_hdr, skb_queue_len(&l_ptr->outqueue)); - msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); - skb_queue_walk(&l_ptr->outqueue, skb) { + struct tipc_msg tnl_hdr; + struct sk_buff_head *queue = &link->transmq; + int mcnt; + + tipc_msg_init(link_own_addr(link), &tnl_hdr, CHANGEOVER_PROTOCOL, + DUPLICATE_MSG, INT_H_SIZE, link->addr); + mcnt = skb_queue_len(&link->transmq) + skb_queue_len(&link->backlogq); + msg_set_msgcnt(&tnl_hdr, mcnt); + msg_set_bearer_id(&tnl_hdr, link->peer_bearer_id); + +tunnel_queue: + skb_queue_walk(queue, skb) { struct sk_buff *outskb; struct tipc_msg *msg = buf_msg(skb); - u32 length = msg_size(msg); + u32 len = msg_size(msg); - if (msg_user(msg) == MSG_BUNDLER) - msg_set_type(msg, CLOSED_MSG); - msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); /* Update */ - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - msg_set_size(&tunnel_hdr, length + INT_H_SIZE); - outskb = tipc_buf_acquire(length + INT_H_SIZE); + msg_set_ack(msg, mod(link->next_in_no - 1)); + msg_set_bcast_ack(msg, link->owner->bclink.last_in); + msg_set_size(&tnl_hdr, len + INT_H_SIZE); + outskb = tipc_buf_acquire(len + INT_H_SIZE); if (outskb == NULL) { pr_warn("%sunable to send duplicate msg\n", link_co_err); return; } - skb_copy_to_linear_data(outskb, &tunnel_hdr, INT_H_SIZE); - skb_copy_to_linear_data_offset(outskb, INT_H_SIZE, skb->data, - length); - __tipc_link_xmit_skb(tunnel, outskb); - if (!tipc_link_is_up(l_ptr)) + skb_copy_to_linear_data(outskb, &tnl_hdr, INT_H_SIZE); + skb_copy_to_linear_data_offset(outskb, INT_H_SIZE, + skb->data, len); + __tipc_link_xmit_skb(tnl, outskb); + if (!tipc_link_is_up(link)) return; } -} - -/** - * buf_extract - extracts embedded TIPC message from another message - * @skb: encapsulating message buffer - * @from_pos: offset to extract from - * - * Returns a new message buffer containing an embedded message. The - * encapsulating buffer is left unchanged. - */ -static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) -{ - struct tipc_msg *msg = (struct tipc_msg *)(skb->data + from_pos); - u32 size = msg_size(msg); - struct sk_buff *eb; - - eb = tipc_buf_acquire(size); - if (eb) - skb_copy_to_linear_data(eb, msg, size); - return eb; + if (queue == &link->backlogq) + return; + queue = &link->backlogq; + goto tunnel_queue; } /* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet. * Owner node is locked. */ -static void tipc_link_dup_rcv(struct tipc_link *l_ptr, - struct sk_buff *t_buf) +static void tipc_link_dup_rcv(struct tipc_link *link, + struct sk_buff *skb) { - struct sk_buff *buf; + struct sk_buff *iskb; + int pos = 0; - if (!tipc_link_is_up(l_ptr)) + if (!tipc_link_is_up(link)) return; - buf = buf_extract(t_buf, INT_H_SIZE); - if (buf == NULL) { + if (!tipc_msg_extract(skb, &iskb, &pos)) { pr_warn("%sfailed to extract inner dup pkt\n", link_co_err); return; } - - /* Add buffer to deferred queue, if applicable: */ - link_handle_out_of_seq_msg(l_ptr, buf); + /* Append buffer to deferred queue, if applicable: */ + link_handle_out_of_seq_msg(link, iskb); } /* tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet @@ -1820,6 +1784,7 @@ static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, struct tipc_msg *t_msg = buf_msg(t_buf); struct sk_buff *buf = NULL; struct tipc_msg *msg; + int pos = 0; if (tipc_link_is_up(l_ptr)) tipc_link_reset(l_ptr); @@ -1831,8 +1796,7 @@ static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, /* Should there be an inner packet? */ if (l_ptr->exp_msg_count) { l_ptr->exp_msg_count--; - buf = buf_extract(t_buf, INT_H_SIZE); - if (buf == NULL) { + if (!tipc_msg_extract(t_buf, &buf, &pos)) { pr_warn("%sno inner failover pkt\n", link_co_err); goto exit; } @@ -1900,23 +1864,16 @@ static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->cont_intv) / 4); } -void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) +void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) { - /* Data messages from this node, inclusive FIRST_FRAGM */ - l_ptr->queue_limit[TIPC_LOW_IMPORTANCE] = window; - l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE] = (window / 3) * 4; - l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE] = (window / 3) * 5; - l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE] = (window / 3) * 6; - /* Transiting data messages,inclusive FIRST_FRAGM */ - l_ptr->queue_limit[TIPC_LOW_IMPORTANCE + 4] = 300; - l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE + 4] = 600; - l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE + 4] = 900; - l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE + 4] = 1200; - l_ptr->queue_limit[CONN_MANAGER] = 1200; - l_ptr->queue_limit[CHANGEOVER_PROTOCOL] = 2500; - l_ptr->queue_limit[NAME_DISTRIBUTOR] = 3000; - /* FRAGMENT and LAST_FRAGMENT packets */ - l_ptr->queue_limit[MSG_FRAGMENTER] = 4000; + int max_bulk = TIPC_MAX_PUBLICATIONS / (l->max_pkt / ITEM_SIZE); + + l->window = win; + l->backlog[TIPC_LOW_IMPORTANCE].limit = win / 2; + l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = win; + l->backlog[TIPC_HIGH_IMPORTANCE].limit = win / 2 * 3; + l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = win * 2; + l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } /* tipc_link_find_owner - locate owner node of link by link's name @@ -2215,7 +2172,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, link->tolerance)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, - link->queue_limit[TIPC_LOW_IMPORTANCE])) + link->window)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) goto prop_msg_full; @@ -2281,7 +2238,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - if (prev_node) { node = tipc_node_find(net, prev_node); if (!node) { @@ -2294,6 +2250,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->prev_seq = 1; goto out; } + tipc_node_put(node); list_for_each_entry_continue_rcu(node, &tn->node_list, list) { @@ -2301,6 +2258,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) err = __tipc_nl_add_node_links(net, &msg, node, &prev_link); tipc_node_unlock(node); + tipc_node_put(node); if (err) goto out; diff --git a/net/tipc/link.h b/net/tipc/link.h index 7aeb52092bf3..d2b5663643da 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -60,6 +60,7 @@ */ #define LINK_STARTED 0x0001 #define LINK_STOPPED 0x0002 +#define LINK_SYNCHING 0x0004 /* Starting value for maximum packet size negotiation on unicast links * (unless bearer MTU is less) @@ -118,13 +119,14 @@ struct tipc_stats { * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') - * @queue_limit: outbound message queue congestion thresholds (indexed by user) + * @backlog_limit: backlog queue congestion thresholds (indexed by importance) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_checkpoint: seq # of last acknowledged message at time of link reset * @max_pkt: current maximum packet size for this link * @max_pkt_target: desired maximum packet size for this link * @max_pkt_probes: # of probes based on current (max_pkt, max_pkt_target) - * @outqueue: outbound message queue + * @transmitq: queue for sent, non-acked messages + * @backlogq: queue for messages waiting to be sent * @next_out_no: next sequence number to use for outbound messages * @last_retransmitted: sequence number of most recently retransmitted message * @stale_count: # of identical retransmit requests made by peer @@ -165,11 +167,11 @@ struct tipc_link { struct tipc_msg *pmsg; u32 priority; char net_plane; - u32 queue_limit[15]; /* queue_limit[0]==window limit */ /* Changeover */ u32 exp_msg_count; u32 reset_checkpoint; + u32 synch_point; /* Max packet negotiation */ u32 max_pkt; @@ -177,20 +179,25 @@ struct tipc_link { u32 max_pkt_probes; /* Sending */ - struct sk_buff_head outqueue; + struct sk_buff_head transmq; + struct sk_buff_head backlogq; + struct { + u16 len; + u16 limit; + } backlog[5]; u32 next_out_no; + u32 window; u32 last_retransmitted; u32 stale_count; /* Reception */ u32 next_in_no; - struct sk_buff_head deferred_queue; - u32 unacked_window; + u32 rcv_unacked; + struct sk_buff_head deferdq; struct sk_buff_head inputq; struct sk_buff_head namedq; /* Congestion handling */ - struct sk_buff *next_out; struct sk_buff_head wakeupq; /* Fragmentation/reassembly */ @@ -302,9 +309,4 @@ static inline int link_reset_reset(struct tipc_link *l_ptr) return l_ptr->state == RESET_RESET; } -static inline int link_congested(struct tipc_link *l_ptr) -{ - return skb_queue_len(&l_ptr->outqueue) >= l_ptr->queue_limit[0]; -} - #endif diff --git a/net/tipc/msg.c b/net/tipc/msg.c index b6eb90cd3ef7..3bb499c61918 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -1,7 +1,7 @@ /* * net/tipc/msg.c: TIPC message header routines * - * Copyright (c) 2000-2006, 2014, Ericsson AB + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -165,6 +165,9 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) } if (fragid == LAST_FRAGMENT) { + TIPC_SKB_CB(head)->validated = false; + if (unlikely(!tipc_msg_validate(head))) + goto err; *buf = head; TIPC_SKB_CB(head)->tail = NULL; *headbuf = NULL; @@ -172,7 +175,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) } *buf = NULL; return 0; - err: pr_warn_ratelimited("Unable to build fragment list\n"); kfree_skb(*buf); @@ -181,6 +183,48 @@ err: return 0; } +/* tipc_msg_validate - validate basic format of received message + * + * This routine ensures a TIPC message has an acceptable header, and at least + * as much data as the header indicates it should. The routine also ensures + * that the entire message header is stored in the main fragment of the message + * buffer, to simplify future access to message header fields. + * + * Note: Having extra info present in the message header or data areas is OK. + * TIPC will ignore the excess, under the assumption that it is optional info + * introduced by a later release of the protocol. + */ +bool tipc_msg_validate(struct sk_buff *skb) +{ + struct tipc_msg *msg; + int msz, hsz; + + if (unlikely(TIPC_SKB_CB(skb)->validated)) + return true; + if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE))) + return false; + + hsz = msg_hdr_sz(buf_msg(skb)); + if (unlikely(hsz < MIN_H_SIZE) || (hsz > MAX_H_SIZE)) + return false; + if (unlikely(!pskb_may_pull(skb, hsz))) + return false; + + msg = buf_msg(skb); + if (unlikely(msg_version(msg) != TIPC_VERSION)) + return false; + + msz = msg_size(msg); + if (unlikely(msz < hsz)) + return false; + if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE)) + return false; + if (unlikely(skb->len < msz)) + return false; + + TIPC_SKB_CB(skb)->validated = true; + return true; +} /** * tipc_msg_build - create buffer chain containing specified header and data @@ -228,6 +272,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr)); msg_set_size(&pkthdr, pktmax); msg_set_fragm_no(&pkthdr, pktno); + msg_set_importance(&pkthdr, msg_importance(mhdr)); /* Prepare first fragment */ skb = tipc_buf_acquire(pktmax); @@ -286,33 +331,36 @@ error: /** * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one - * @list: the buffer chain of the existing buffer ("bundle") + * @bskb: the buffer to append to ("bundle") * @skb: buffer to be appended * @mtu: max allowable size for the bundle buffer * Consumes buffer if successful * Returns true if bundling could be performed, otherwise false */ -bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu) +bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu) { - struct sk_buff *bskb = skb_peek_tail(list); - struct tipc_msg *bmsg = buf_msg(bskb); + struct tipc_msg *bmsg; struct tipc_msg *msg = buf_msg(skb); - unsigned int bsz = msg_size(bmsg); + unsigned int bsz; unsigned int msz = msg_size(msg); - u32 start = align(bsz); + u32 start, pad; u32 max = mtu - INT_H_SIZE; - u32 pad = start - bsz; if (likely(msg_user(msg) == MSG_FRAGMENTER)) return false; + if (!bskb) + return false; + bmsg = buf_msg(bskb); + bsz = msg_size(bmsg); + start = align(bsz); + pad = start - bsz; + if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) return false; if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) return false; if (likely(msg_user(bmsg) != MSG_BUNDLER)) return false; - if (likely(!TIPC_SKB_CB(bskb)->bundling)) - return false; if (unlikely(skb_tailroom(bskb) < (pad + msz))) return false; if (unlikely(max < (start + msz))) @@ -328,34 +376,40 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu) /** * tipc_msg_extract(): extract bundled inner packet from buffer - * @skb: linear outer buffer, to be extracted from. + * @skb: buffer to be extracted from. * @iskb: extracted inner buffer, to be returned - * @pos: position of msg to be extracted. Returns with pointer of next msg + * @pos: position in outer message of msg to be extracted. + * Returns position of next msg * Consumes outer buffer when last packet extracted * Returns true when when there is an extracted buffer, otherwise false */ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) { - struct tipc_msg *msg = buf_msg(skb); - int imsz; - struct tipc_msg *imsg = (struct tipc_msg *)(msg_data(msg) + *pos); + struct tipc_msg *msg; + int imsz, offset; - /* Is there space left for shortest possible message? */ - if (*pos > (msg_data_sz(msg) - SHORT_H_SIZE)) + *iskb = NULL; + if (unlikely(skb_linearize(skb))) + goto none; + + msg = buf_msg(skb); + offset = msg_hdr_sz(msg) + *pos; + if (unlikely(offset > (msg_size(msg) - MIN_H_SIZE))) goto none; - imsz = msg_size(imsg); - /* Is there space left for current message ? */ - if ((*pos + imsz) > msg_data_sz(msg)) + *iskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!*iskb)) goto none; - *iskb = tipc_buf_acquire(imsz); - if (!*iskb) + skb_pull(*iskb, offset); + imsz = msg_size(buf_msg(*iskb)); + skb_trim(*iskb, imsz); + if (unlikely(!tipc_msg_validate(*iskb))) goto none; - skb_copy_to_linear_data(*iskb, imsg, imsz); *pos += align(imsz); return true; none: kfree_skb(skb); + kfree_skb(*iskb); *iskb = NULL; return false; } @@ -369,12 +423,11 @@ none: * Replaces buffer if successful * Returns true if success, otherwise false */ -bool tipc_msg_make_bundle(struct sk_buff_head *list, - struct sk_buff *skb, u32 mtu, u32 dnode) +bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode) { struct sk_buff *bskb; struct tipc_msg *bmsg; - struct tipc_msg *msg = buf_msg(skb); + struct tipc_msg *msg = buf_msg(*skb); u32 msz = msg_size(msg); u32 max = mtu - INT_H_SIZE; @@ -398,9 +451,9 @@ bool tipc_msg_make_bundle(struct sk_buff_head *list, msg_set_seqno(bmsg, msg_seqno(msg)); msg_set_ack(bmsg, msg_ack(msg)); msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); - TIPC_SKB_CB(bskb)->bundling = true; - __skb_queue_tail(list, bskb); - return tipc_msg_bundle(list, skb, mtu); + tipc_msg_bundle(bskb, *skb, mtu); + *skb = bskb; + return true; } /** @@ -415,21 +468,17 @@ bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, int err) { struct tipc_msg *msg = buf_msg(buf); - uint imp = msg_importance(msg); struct tipc_msg ohdr; uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE); if (skb_linearize(buf)) goto exit; + msg = buf_msg(buf); if (msg_dest_droppable(msg)) goto exit; if (msg_errcode(msg)) goto exit; - memcpy(&ohdr, msg, msg_hdr_sz(msg)); - imp = min_t(uint, imp + 1, TIPC_CRITICAL_IMPORTANCE); - if (msg_isdata(msg)) - msg_set_importance(msg, imp); msg_set_errcode(msg, err); msg_set_origport(msg, msg_destport(&ohdr)); msg_set_destport(msg, msg_origport(&ohdr)); @@ -462,15 +511,18 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, { struct tipc_msg *msg = buf_msg(skb); u32 dport; + u32 own_addr = tipc_own_addr(net); if (!msg_isdata(msg)) return false; if (!msg_named(msg)) return false; + if (msg_errcode(msg)) + return false; *err = -TIPC_ERR_NO_NAME; if (skb_linearize(skb)) return false; - if (msg_reroute_cnt(msg) > 0) + if (msg_reroute_cnt(msg)) return false; *dnode = addr_domain(net, msg_lookup_scope(msg)); dport = tipc_nametbl_translate(net, msg_nametype(msg), @@ -478,6 +530,8 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, if (!dport) return false; msg_incr_reroute_cnt(msg); + if (*dnode != own_addr) + msg_set_prevnode(msg, own_addr); msg_set_destnode(msg, *dnode); msg_set_destport(msg, dport); *err = TIPC_OK; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 9ace47f44a69..d273207ede28 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -1,7 +1,7 @@ /* * net/tipc/msg.h: Include file for TIPC message header routines * - * Copyright (c) 2000-2007, 2014, Ericsson AB + * Copyright (c) 2000-2007, 2014-2015 Ericsson AB * Copyright (c) 2005-2008, 2010-2011, Wind River Systems * All rights reserved. * @@ -54,6 +54,8 @@ struct plist; * - TIPC_HIGH_IMPORTANCE * - TIPC_CRITICAL_IMPORTANCE */ +#define TIPC_SYSTEM_IMPORTANCE 4 + /* * Payload message types @@ -64,6 +66,19 @@ struct plist; #define TIPC_DIRECT_MSG 3 /* + * Internal message users + */ +#define BCAST_PROTOCOL 5 +#define MSG_BUNDLER 6 +#define LINK_PROTOCOL 7 +#define CONN_MANAGER 8 +#define CHANGEOVER_PROTOCOL 10 +#define NAME_DISTRIBUTOR 11 +#define MSG_FRAGMENTER 12 +#define LINK_CONFIG 13 +#define SOCK_WAKEUP 14 /* pseudo user */ + +/* * Message header sizes */ #define SHORT_H_SIZE 24 /* In-cluster basic payload message */ @@ -76,7 +91,7 @@ struct plist; #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) -#define TIPC_MEDIA_ADDR_OFFSET 5 +#define TIPC_MEDIA_INFO_OFFSET 5 /** * TIPC message buffer code @@ -87,12 +102,12 @@ struct plist; * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields * are word aligned for quicker access */ -#define BUF_HEADROOM LL_MAX_HEADER +#define BUF_HEADROOM (LL_MAX_HEADER + 48) struct tipc_skb_cb { void *handle; struct sk_buff *tail; - bool deferred; + bool validated; bool wakeup_pending; bool bundling; u16 chain_sz; @@ -170,16 +185,6 @@ static inline void msg_set_user(struct tipc_msg *m, u32 n) msg_set_bits(m, 0, 25, 0xf, n); } -static inline u32 msg_importance(struct tipc_msg *m) -{ - return msg_bits(m, 0, 25, 0xf); -} - -static inline void msg_set_importance(struct tipc_msg *m, u32 i) -{ - msg_set_user(m, i); -} - static inline u32 msg_hdr_sz(struct tipc_msg *m) { return msg_bits(m, 0, 21, 0xf) << 2; @@ -235,6 +240,15 @@ static inline void msg_set_size(struct tipc_msg *m, u32 sz) m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz); } +static inline unchar *msg_data(struct tipc_msg *m) +{ + return ((unchar *)m) + msg_hdr_sz(m); +} + +static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) +{ + return (struct tipc_msg *)msg_data(m); +} /* * Word 1 @@ -336,6 +350,25 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n) /* * Words 3-10 */ +static inline u32 msg_importance(struct tipc_msg *m) +{ + if (unlikely(msg_user(m) == MSG_FRAGMENTER)) + return msg_bits(m, 5, 13, 0x7); + if (likely(msg_isdata(m) && !msg_errcode(m))) + return msg_user(m); + return TIPC_SYSTEM_IMPORTANCE; +} + +static inline void msg_set_importance(struct tipc_msg *m, u32 i) +{ + if (unlikely(msg_user(m) == MSG_FRAGMENTER)) + msg_set_bits(m, 5, 13, 0x7, i); + else if (likely(i < TIPC_SYSTEM_IMPORTANCE)) + msg_set_user(m, i); + else + pr_warn("Trying to set illegal importance in message\n"); +} + static inline u32 msg_prevnode(struct tipc_msg *m) { return msg_word(m, 3); @@ -348,6 +381,8 @@ static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) static inline u32 msg_origport(struct tipc_msg *m) { + if (msg_user(m) == MSG_FRAGMENTER) + m = msg_get_wrapped(m); return msg_word(m, 4); } @@ -443,35 +478,11 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) msg_set_word(m, 10, n); } -static inline unchar *msg_data(struct tipc_msg *m) -{ - return ((unchar *)m) + msg_hdr_sz(m); -} - -static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) -{ - return (struct tipc_msg *)msg_data(m); -} - /* * Constants and routines used to read and write TIPC internal message headers */ /* - * Internal message users - */ -#define BCAST_PROTOCOL 5 -#define MSG_BUNDLER 6 -#define LINK_PROTOCOL 7 -#define CONN_MANAGER 8 -#define ROUTE_DISTRIBUTOR 9 /* obsoleted */ -#define CHANGEOVER_PROTOCOL 10 -#define NAME_DISTRIBUTOR 11 -#define MSG_FRAGMENTER 12 -#define LINK_CONFIG 13 -#define SOCK_WAKEUP 14 /* pseudo user */ - -/* * Connection management protocol message types */ #define CONN_PROBE 0 @@ -510,7 +521,6 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) #define DSC_REQ_MSG 0 #define DSC_RESP_MSG 1 - /* * Word 1 */ @@ -534,6 +544,24 @@ static inline void msg_set_node_sig(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 0, 0xffff, n); } +static inline u32 msg_node_capabilities(struct tipc_msg *m) +{ + return msg_bits(m, 1, 15, 0x1fff); +} + +static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 15, 0x1fff, n); +} + +static inline bool msg_dup(struct tipc_msg *m) +{ + if (likely(msg_user(m) != CHANGEOVER_PROTOCOL)) + return false; + if (msg_type(m) != DUPLICATE_MSG) + return false; + return true; +} /* * Word 2 @@ -688,7 +716,7 @@ static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) static inline char *msg_media_addr(struct tipc_msg *m) { - return (char *)&m->hdr[TIPC_MEDIA_ADDR_OFFSET]; + return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; } /* @@ -734,21 +762,8 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } -static inline u32 tipc_msg_tot_importance(struct tipc_msg *m) -{ - if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) - return msg_importance(msg_get_wrapped(m)); - return msg_importance(m); -} - -static inline u32 msg_tot_origport(struct tipc_msg *m) -{ - if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) - return msg_origport(msg_get_wrapped(m)); - return msg_origport(m); -} - struct sk_buff *tipc_buf_acquire(u32 size); +bool tipc_msg_validate(struct sk_buff *skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, int err); void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, @@ -757,9 +772,9 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, uint data_sz, u32 dnode, u32 onode, u32 dport, u32 oport, int errcode); int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); -bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu); -bool tipc_msg_make_bundle(struct sk_buff_head *list, - struct sk_buff *skb, u32 mtu, u32 dnode); +bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu); + +bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode); bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index fcb07915aaac..41e7b7e4dda0 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -98,7 +98,7 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb) continue; if (!tipc_node_active_links(node)) continue; - oskb = skb_copy(skb, GFP_ATOMIC); + oskb = pskb_copy(skb, GFP_ATOMIC); if (!oskb) break; msg_set_destnode(buf_msg(oskb), dnode); @@ -244,6 +244,7 @@ static void tipc_publ_subscribe(struct net *net, struct publication *publ, tipc_node_lock(node); list_add_tail(&publ->nodesub_list, &node->publ_list); tipc_node_unlock(node); + tipc_node_put(node); } static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, @@ -258,6 +259,7 @@ static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, tipc_node_lock(node); list_del_init(&publ->nodesub_list); tipc_node_unlock(node); + tipc_node_put(node); } /** diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 105ba7adf06f..ab0ac62a1287 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -811,8 +811,8 @@ static void tipc_purge_publications(struct net *net, struct name_seq *seq) sseq = seq->sseqs; info = sseq->info; list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { - tipc_nametbl_remove_publ(net, publ->type, publ->lower, - publ->node, publ->ref, publ->key); + tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node, + publ->ref, publ->key); kfree_rcu(publ, rcu); } hlist_del_init_rcu(&seq->ns_list); diff --git a/net/tipc/node.c b/net/tipc/node.c index 86152de8248d..3e4f04897c03 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -42,6 +42,7 @@ static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); +static void tipc_node_delete(struct tipc_node *node); struct tipc_sock_conn { u32 port; @@ -67,6 +68,23 @@ static unsigned int tipc_hashfn(u32 addr) return addr & (NODE_HTABLE_SIZE - 1); } +static void tipc_node_kref_release(struct kref *kref) +{ + struct tipc_node *node = container_of(kref, struct tipc_node, kref); + + tipc_node_delete(node); +} + +void tipc_node_put(struct tipc_node *node) +{ + kref_put(&node->kref, tipc_node_kref_release); +} + +static void tipc_node_get(struct tipc_node *node) +{ + kref_get(&node->kref); +} + /* * tipc_node_find - locate specified node object, if it exists */ @@ -82,6 +100,7 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr) hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)], hash) { if (node->addr == addr) { + tipc_node_get(node); rcu_read_unlock(); return node; } @@ -106,12 +125,13 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) } n_ptr->addr = addr; n_ptr->net = net; + kref_init(&n_ptr->kref); spin_lock_init(&n_ptr->lock); INIT_HLIST_NODE(&n_ptr->hash); INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->publ_list); INIT_LIST_HEAD(&n_ptr->conn_sks); - __skb_queue_head_init(&n_ptr->bclink.deferred_queue); + __skb_queue_head_init(&n_ptr->bclink.deferdq); hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n_ptr->addr < temp_node->addr) @@ -120,16 +140,17 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) list_add_tail_rcu(&n_ptr->list, &temp_node->list); n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; n_ptr->signature = INVALID_NODE_SIG; + tipc_node_get(n_ptr); exit: spin_unlock_bh(&tn->node_list_lock); return n_ptr; } -static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr) +static void tipc_node_delete(struct tipc_node *node) { - list_del_rcu(&n_ptr->list); - hlist_del_rcu(&n_ptr->hash); - kfree_rcu(n_ptr, rcu); + list_del_rcu(&node->list); + hlist_del_rcu(&node->hash); + kfree_rcu(node, rcu); } void tipc_node_stop(struct net *net) @@ -139,7 +160,7 @@ void tipc_node_stop(struct net *net) spin_lock_bh(&tn->node_list_lock); list_for_each_entry_safe(node, t_node, &tn->node_list, list) - tipc_node_delete(tn, node); + tipc_node_put(node); spin_unlock_bh(&tn->node_list_lock); } @@ -147,6 +168,7 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; struct tipc_sock_conn *conn; + int err = 0; if (in_own_node(net, dnode)) return 0; @@ -157,8 +179,10 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) return -EHOSTUNREACH; } conn = kmalloc(sizeof(*conn), GFP_ATOMIC); - if (!conn) - return -EHOSTUNREACH; + if (!conn) { + err = -EHOSTUNREACH; + goto exit; + } conn->peer_node = dnode; conn->port = port; conn->peer_port = peer_port; @@ -166,7 +190,9 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) tipc_node_lock(node); list_add_tail(&conn->list, &node->conn_sks); tipc_node_unlock(node); - return 0; +exit: + tipc_node_put(node); + return err; } void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) @@ -189,6 +215,7 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) kfree(conn); } tipc_node_unlock(node); + tipc_node_put(node); } /** @@ -354,7 +381,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) /* Flush broadcast link info associated with lost node */ if (n_ptr->bclink.recv_permitted) { - __skb_queue_purge(&n_ptr->bclink.deferred_queue); + __skb_queue_purge(&n_ptr->bclink.deferdq); if (n_ptr->bclink.reasm_buf) { kfree_skb(n_ptr->bclink.reasm_buf); @@ -417,19 +444,25 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, char *linkname, size_t len) { struct tipc_link *link; + int err = -EINVAL; struct tipc_node *node = tipc_node_find(net, addr); - if ((bearer_id >= MAX_BEARERS) || !node) - return -EINVAL; + if (!node) + return err; + + if (bearer_id >= MAX_BEARERS) + goto exit; + tipc_node_lock(node); link = node->links[bearer_id]; if (link) { strncpy(linkname, link->name, len); - tipc_node_unlock(node); - return 0; + err = 0; } +exit: tipc_node_unlock(node); - return -EINVAL; + tipc_node_put(node); + return err; } void tipc_node_unlock(struct tipc_node *node) @@ -459,7 +492,7 @@ void tipc_node_unlock(struct tipc_node *node) TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT | - TIPC_NAMED_MSG_EVT); + TIPC_NAMED_MSG_EVT | TIPC_BCAST_RESET); spin_unlock_bh(&node->lock); @@ -488,6 +521,9 @@ void tipc_node_unlock(struct tipc_node *node) if (flags & TIPC_BCAST_MSG_EVT) tipc_bclink_input(net); + + if (flags & TIPC_BCAST_RESET) + tipc_link_reset_all(node); } /* Caller should hold node lock for the passed node */ @@ -542,17 +578,21 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - - if (last_addr && !tipc_node_find(net, last_addr)) { - rcu_read_unlock(); - /* We never set seq or call nl_dump_check_consistent() this - * means that setting prev_seq here will cause the consistence - * check to fail in the netlink callback handler. Resulting in - * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if - * the node state changed while we released the lock. - */ - cb->prev_seq = 1; - return -EPIPE; + if (last_addr) { + node = tipc_node_find(net, last_addr); + if (!node) { + rcu_read_unlock(); + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the NLMSG_DONE message having + * the NLM_F_DUMP_INTR flag set if the node state + * changed while we released the lock. + */ + cb->prev_seq = 1; + return -EPIPE; + } + tipc_node_put(node); } list_for_each_entry_rcu(node, &tn->node_list, list) { diff --git a/net/tipc/node.h b/net/tipc/node.h index 3d18c66b7f78..02d5c20dc551 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -64,7 +64,8 @@ enum { TIPC_NOTIFY_LINK_UP = (1 << 6), TIPC_NOTIFY_LINK_DOWN = (1 << 7), TIPC_NAMED_MSG_EVT = (1 << 8), - TIPC_BCAST_MSG_EVT = (1 << 9) + TIPC_BCAST_MSG_EVT = (1 << 9), + TIPC_BCAST_RESET = (1 << 10) }; /** @@ -84,7 +85,7 @@ struct tipc_node_bclink { u32 last_sent; u32 oos_state; u32 deferred_size; - struct sk_buff_head deferred_queue; + struct sk_buff_head deferdq; struct sk_buff *reasm_buf; int inputq_map; bool recv_permitted; @@ -93,6 +94,7 @@ struct tipc_node_bclink { /** * struct tipc_node - TIPC node structure * @addr: network address of node + * @ref: reference counter to node object * @lock: spinlock governing access to structure * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain @@ -106,6 +108,7 @@ struct tipc_node_bclink { * @list: links to adjacent nodes in sorted list of cluster's nodes * @working_links: number of working links to node (both active and standby) * @link_cnt: number of links to node + * @capabilities: bitmap, indicating peer node's functional capabilities * @signature: node instance identifier * @link_id: local and remote bearer ids of changing link, if any * @publ_list: list of publications @@ -113,6 +116,7 @@ struct tipc_node_bclink { */ struct tipc_node { u32 addr; + struct kref kref; spinlock_t lock; struct net *net; struct hlist_node hash; @@ -125,7 +129,8 @@ struct tipc_node { struct tipc_node_bclink bclink; struct list_head list; int link_cnt; - int working_links; + u16 working_links; + u16 capabilities; u32 signature; u32 link_id; struct list_head publ_list; @@ -134,6 +139,7 @@ struct tipc_node { }; struct tipc_node *tipc_node_find(struct net *net, u32 addr); +void tipc_node_put(struct tipc_node *node); struct tipc_node *tipc_node_create(struct net *net, u32 addr); void tipc_node_stop(struct net *net); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); @@ -168,10 +174,12 @@ static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector) node = tipc_node_find(net, addr); - if (likely(node)) + if (likely(node)) { mtu = node->act_mtus[selector & 1]; - else + tipc_node_put(node); + } else { mtu = MAX_MSG_SIZE; + } return mtu; } diff --git a/net/tipc/server.c b/net/tipc/server.c index eadd4ed45905..ab6183cdb121 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -37,11 +37,13 @@ #include "core.h" #include "socket.h" #include <net/sock.h> +#include <linux/module.h> /* Number of messages to send before rescheduling */ #define MAX_SEND_MSG_COUNT 25 #define MAX_RECV_MSG_COUNT 25 #define CF_CONNECTED 1 +#define CF_SERVER 2 #define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data) @@ -88,9 +90,19 @@ static void tipc_clean_outqueues(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct sockaddr_tipc *saddr = con->server->saddr; + struct socket *sock = con->sock; + struct sock *sk; - if (con->sock) { - tipc_sock_release_local(con->sock); + if (sock) { + sk = sock->sk; + if (test_bit(CF_SERVER, &con->flags)) { + __module_get(sock->ops->owner); + __module_get(sk->sk_prot_creator->owner); + } + saddr->scope = -TIPC_NODE_SCOPE; + kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); + sk_release_kernel(sk); con->sock = NULL; } @@ -281,7 +293,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con) struct tipc_conn *newcon; int ret; - ret = tipc_sock_accept_local(sock, &newsock, O_NONBLOCK); + ret = kernel_accept(sock, &newsock, O_NONBLOCK); if (ret < 0) return ret; @@ -309,9 +321,12 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) struct socket *sock = NULL; int ret; - ret = tipc_sock_create_local(s->net, s->type, &sock); + ret = sock_create_kern(AF_TIPC, SOCK_SEQPACKET, 0, &sock); if (ret < 0) return NULL; + + sk_change_net(sock->sk, s->net); + ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, (char *)&s->imp, sizeof(s->imp)); if (ret < 0) @@ -337,11 +352,31 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) pr_err("Unknown socket type %d\n", s->type); goto create_err; } + + /* As server's listening socket owner and creator is the same module, + * we have to decrease TIPC module reference count to guarantee that + * it remains zero after the server socket is created, otherwise, + * executing "rmmod" command is unable to make TIPC module deleted + * after TIPC module is inserted successfully. + * + * However, the reference count is ever increased twice in + * sock_create_kern(): one is to increase the reference count of owner + * of TIPC socket's proto_ops struct; another is to increment the + * reference count of owner of TIPC proto struct. Therefore, we must + * decrement the module reference count twice to ensure that it keeps + * zero after server's listening socket is created. Of course, we + * must bump the module reference count twice as well before the socket + * is closed. + */ + module_put(sock->ops->owner); + module_put(sock->sk->sk_prot_creator->owner); + set_bit(CF_SERVER, &con->flags); + return sock; create_err: - sock_release(sock); - con->sock = NULL; + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sock->sk); return NULL; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f73e975af80b..ee90d74d7516 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -35,7 +35,6 @@ */ #include <linux/rhashtable.h> -#include <linux/jhash.h> #include "core.h" #include "name_table.h" #include "node.h" @@ -74,6 +73,7 @@ * @link_cong: non-zero if owner must sleep because of link congestion * @sent_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @remote: 'connected' peer for dgram/rdm * @node: hash table node * @rcu: rcu struct for tipc_sock */ @@ -96,6 +96,7 @@ struct tipc_sock { bool link_cong; uint sent_unacked; uint rcv_unacked; + struct sockaddr_tipc remote; struct rhash_head node; struct rcu_head rcu; }; @@ -114,13 +115,14 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); static int tipc_sk_insert(struct tipc_sock *tsk); static void tipc_sk_remove(struct tipc_sock *tsk); +static int __tipc_send_stream(struct socket *sock, struct msghdr *m, + size_t dsz); +static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; static const struct proto_ops msg_ops; - static struct proto tipc_proto; -static struct proto tipc_proto_kern; static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC }, @@ -130,6 +132,8 @@ static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG } }; +static const struct rhashtable_params tsk_rht_params; + /* * Revised TIPC socket locking policy: * @@ -338,11 +342,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, } /* Allocate socket's protocol area */ - if (!kern) - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); - else - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern); - + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); if (sk == NULL) return -ENOMEM; @@ -380,75 +380,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, return 0; } -/** - * tipc_sock_create_local - create TIPC socket from inside TIPC module - * @type: socket type - SOCK_RDM or SOCK_SEQPACKET - * - * We cannot use sock_creat_kern here because it bumps module user count. - * Since socket owner and creator is the same module we must make sure - * that module count remains zero for module local sockets, otherwise - * we cannot do rmmod. - * - * Returns 0 on success, errno otherwise - */ -int tipc_sock_create_local(struct net *net, int type, struct socket **res) -{ - int rc; - - rc = sock_create_lite(AF_TIPC, type, 0, res); - if (rc < 0) { - pr_err("Failed to create kernel socket\n"); - return rc; - } - tipc_sk_create(net, *res, 0, 1); - - return 0; -} - -/** - * tipc_sock_release_local - release socket created by tipc_sock_create_local - * @sock: the socket to be released. - * - * Module reference count is not incremented when such sockets are created, - * so we must keep it from being decremented when they are released. - */ -void tipc_sock_release_local(struct socket *sock) -{ - tipc_release(sock); - sock->ops = NULL; - sock_release(sock); -} - -/** - * tipc_sock_accept_local - accept a connection on a socket created - * with tipc_sock_create_local. Use this function to avoid that - * module reference count is inadvertently incremented. - * - * @sock: the accepting socket - * @newsock: reference to the new socket to be created - * @flags: socket flags - */ - -int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, - int flags) -{ - struct sock *sk = sock->sk; - int ret; - - ret = sock_create_lite(sk->sk_family, sk->sk_type, - sk->sk_protocol, newsock); - if (ret < 0) - return ret; - - ret = tipc_accept(sock, *newsock, flags); - if (ret < 0) { - sock_release(*newsock); - return ret; - } - (*newsock)->ops = sock->ops; - return ret; -} - static void tipc_sk_callback(struct rcu_head *head) { struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu); @@ -892,7 +823,6 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) /** * tipc_sendmsg - send message in connectionless manner - * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send * @dsz: amount of user data to be sent @@ -904,9 +834,21 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) * * Returns the number of bytes sent on success, or errno otherwise */ -static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, +static int tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) { + struct sock *sk = sock->sk; + int ret; + + lock_sock(sk); + ret = __tipc_sendmsg(sock, m, dsz); + release_sock(sk); + + return ret; +} + +static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) +{ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); @@ -915,49 +857,40 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, u32 dnode, dport; struct sk_buff_head *pktchain = &sk->sk_write_queue; struct sk_buff *skb; - struct tipc_name_seq *seq = &dest->addr.nameseq; + struct tipc_name_seq *seq; struct iov_iter save; u32 mtu; long timeo; int rc; - if (unlikely(!dest)) - return -EDESTADDRREQ; - - if (unlikely((m->msg_namelen < sizeof(*dest)) || - (dest->family != AF_TIPC))) - return -EINVAL; - if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; - - if (iocb) - lock_sock(sk); - + if (unlikely(!dest)) { + if (tsk->connected && sock->state == SS_READY) + dest = &tsk->remote; + else + return -EDESTADDRREQ; + } else if (unlikely(m->msg_namelen < sizeof(*dest)) || + dest->family != AF_TIPC) { + return -EINVAL; + } if (unlikely(sock->state != SS_READY)) { - if (sock->state == SS_LISTENING) { - rc = -EPIPE; - goto exit; - } - if (sock->state != SS_UNCONNECTED) { - rc = -EISCONN; - goto exit; - } - if (tsk->published) { - rc = -EOPNOTSUPP; - goto exit; - } + if (sock->state == SS_LISTENING) + return -EPIPE; + if (sock->state != SS_UNCONNECTED) + return -EISCONN; + if (tsk->published) + return -EOPNOTSUPP; if (dest->addrtype == TIPC_ADDR_NAME) { tsk->conn_type = dest->addr.name.name.type; tsk->conn_instance = dest->addr.name.name.instance; } } - + seq = &dest->addr.nameseq; timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); if (dest->addrtype == TIPC_ADDR_MCAST) { - rc = tipc_sendmcast(sock, seq, m, dsz, timeo); - goto exit; + return tipc_sendmcast(sock, seq, m, dsz, timeo); } else if (dest->addrtype == TIPC_ADDR_NAME) { u32 type = dest->addr.name.name.type; u32 inst = dest->addr.name.name.instance; @@ -972,10 +905,8 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, dport = tipc_nametbl_translate(net, type, inst, &dnode); msg_set_destnode(mhdr, dnode); msg_set_destport(mhdr, dport); - if (unlikely(!dport && !dnode)) { - rc = -EHOSTUNREACH; - goto exit; - } + if (unlikely(!dport && !dnode)) + return -EHOSTUNREACH; } else if (dest->addrtype == TIPC_ADDR_ID) { dnode = dest->addr.id.node; msg_set_type(mhdr, TIPC_DIRECT_MSG); @@ -990,7 +921,7 @@ new_mtu: mtu = tipc_node_get_mtu(net, dnode, tsk->portid); rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain); if (rc < 0) - goto exit; + return rc; do { skb = skb_peek(pktchain); @@ -1013,9 +944,6 @@ new_mtu: if (rc) __skb_queue_purge(pktchain); } while (!rc); -exit: - if (iocb) - release_sock(sk); return rc; } @@ -1052,7 +980,6 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) /** * tipc_send_stream - send stream-oriented data - * @iocb: (unused) * @sock: socket structure * @m: data to send * @dsz: total length of data to be transmitted @@ -1062,8 +989,19 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) * Returns the number of bytes sent on success (or partial success), * or errno if no data sent */ -static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t dsz) +static int tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) +{ + struct sock *sk = sock->sk; + int ret; + + lock_sock(sk); + ret = __tipc_send_stream(sock, m, dsz); + release_sock(sk); + + return ret; +} + +static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); @@ -1080,7 +1018,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, /* Handle implied connection establishment */ if (unlikely(dest)) { - rc = tipc_sendmsg(iocb, sock, m, dsz); + rc = __tipc_sendmsg(sock, m, dsz); if (dsz && (dsz == rc)) tsk->sent_unacked = 1; return rc; @@ -1088,15 +1026,11 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, if (dsz > (uint)INT_MAX) return -EMSGSIZE; - if (iocb) - lock_sock(sk); - if (unlikely(sock->state != SS_CONNECTED)) { if (sock->state == SS_DISCONNECTING) - rc = -EPIPE; + return -EPIPE; else - rc = -ENOTCONN; - goto exit; + return -ENOTCONN; } timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); @@ -1108,7 +1042,7 @@ next: send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain); if (unlikely(rc < 0)) - goto exit; + return rc; do { if (likely(!tsk_conn_cong(tsk))) { rc = tipc_link_xmit(net, pktchain, dnode, portid); @@ -1133,15 +1067,12 @@ next: if (rc) __skb_queue_purge(pktchain); } while (!rc); -exit: - if (iocb) - release_sock(sk); + return sent ? sent : rc; } /** * tipc_send_packet - send a connection-oriented message - * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send * @dsz: length of data to be transmitted @@ -1150,13 +1081,12 @@ exit: * * Returns the number of bytes sent on success, or errno otherwise */ -static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t dsz) +static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz) { if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; - return tipc_send_stream(iocb, sock, m, dsz); + return tipc_send_stream(sock, m, dsz); } /* tipc_sk_finish_conn - complete the setup of a connection @@ -1317,12 +1247,12 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) break; - err = sock_intr_errno(timeo); - if (signal_pending(current)) - break; err = -EAGAIN; if (!timeo) break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; } finish_wait(sk_sleep(sk), &wait); *timeop = timeo; @@ -1331,7 +1261,6 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) /** * tipc_recvmsg - receive packet-oriented message - * @iocb: (unused) * @m: descriptor for message info * @buf_len: total size of user buffer area * @flags: receive flags @@ -1341,8 +1270,8 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) * * Returns size of returned message data, errno otherwise */ -static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t buf_len, int flags) +static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, + int flags) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); @@ -1426,7 +1355,6 @@ exit: /** * tipc_recv_stream - receive stream-oriented data - * @iocb: (unused) * @m: descriptor for message info * @buf_len: total size of user buffer area * @flags: receive flags @@ -1436,8 +1364,8 @@ exit: * * Returns size of returned message data, errno otherwise */ -static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t buf_len, int flags) +static int tipc_recv_stream(struct socket *sock, struct msghdr *m, + size_t buf_len, int flags) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); @@ -1909,17 +1837,26 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, int destlen, int flags) { struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; struct msghdr m = {NULL,}; - long timeout = (flags & O_NONBLOCK) ? 0 : tipc_sk(sk)->conn_timeout; + long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout; socket_state previous; - int res; + int res = 0; lock_sock(sk); - /* For now, TIPC does not allow use of connect() with DGRAM/RDM types */ + /* DGRAM/RDM connect(), just save the destaddr */ if (sock->state == SS_READY) { - res = -EOPNOTSUPP; + if (dst->family == AF_UNSPEC) { + memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc)); + tsk->connected = 0; + } else if (destlen != sizeof(struct sockaddr_tipc)) { + res = -EINVAL; + } else { + memcpy(&tsk->remote, dest, destlen); + tsk->connected = 1; + } goto exit; } @@ -1947,7 +1884,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, if (!timeout) m.msg_flags = MSG_DONTWAIT; - res = tipc_sendmsg(NULL, sock, &m, 0); + res = __tipc_sendmsg(sock, &m, 0); if ((res < 0) && (res != -EWOULDBLOCK)) goto exit; @@ -2027,12 +1964,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) err = -EINVAL; if (sock->state != SS_LISTENING) break; - err = sock_intr_errno(timeo); - if (signal_pending(current)) - break; err = -EAGAIN; if (!timeo) break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; } finish_wait(sk_sleep(sk), &wait); return err; @@ -2103,7 +2040,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) struct msghdr m = {NULL,}; tsk_advance_rx_queue(sk); - tipc_send_packet(NULL, new_sock, &m, 0); + __tipc_send_stream(new_sock, &m, 0); } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); @@ -2154,7 +2091,6 @@ restart: TIPC_CONN_SHUTDOWN)) tipc_link_xmit_skb(net, skb, dnode, tsk->portid); - tipc_node_remove_conn(net, dnode, tsk->portid); } else { dnode = tsk_peer_node(tsk); @@ -2312,7 +2248,7 @@ static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) struct tipc_sock *tsk; rcu_read_lock(); - tsk = rhashtable_lookup(&tn->sk_rht, &portid); + tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params); if (tsk) sock_hold(&tsk->sk); rcu_read_unlock(); @@ -2334,7 +2270,8 @@ static int tipc_sk_insert(struct tipc_sock *tsk) portid = TIPC_MIN_PORT; tsk->portid = portid; sock_hold(&tsk->sk); - if (rhashtable_lookup_insert(&tn->sk_rht, &tsk->node)) + if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node, + tsk_rht_params)) return 0; sock_put(&tsk->sk); } @@ -2347,28 +2284,27 @@ static void tipc_sk_remove(struct tipc_sock *tsk) struct sock *sk = &tsk->sk; struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); - if (rhashtable_remove(&tn->sk_rht, &tsk->node)) { + if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } } +static const struct rhashtable_params tsk_rht_params = { + .nelem_hint = 192, + .head_offset = offsetof(struct tipc_sock, node), + .key_offset = offsetof(struct tipc_sock, portid), + .key_len = sizeof(u32), /* portid */ + .max_size = 1048576, + .min_size = 256, + .automatic_shrinking = true, +}; + int tipc_sk_rht_init(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct rhashtable_params rht_params = { - .nelem_hint = 192, - .head_offset = offsetof(struct tipc_sock, node), - .key_offset = offsetof(struct tipc_sock, portid), - .key_len = sizeof(u32), /* portid */ - .hashfn = jhash, - .max_shift = 20, /* 1M */ - .min_shift = 8, /* 256 */ - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, - }; - return rhashtable_init(&tn->sk_rht, &rht_params); + return rhashtable_init(&tn->sk_rht, &tsk_rht_params); } void tipc_sk_rht_destroy(struct net *net) @@ -2611,12 +2547,6 @@ static struct proto tipc_proto = { .sysctl_rmem = sysctl_tipc_rmem }; -static struct proto tipc_proto_kern = { - .name = "TIPC", - .obj_size = sizeof(struct tipc_sock), - .sysctl_rmem = sysctl_tipc_rmem -}; - /** * tipc_socket_init - initialize TIPC socket interface * diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 238f1b7bd9bd..bf6551389522 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -44,10 +44,6 @@ SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) int tipc_socket_init(void); void tipc_socket_stop(void); -int tipc_sock_create_local(struct net *net, int type, struct socket **res); -void tipc_sock_release_local(struct socket *sock); -int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, - int flags); int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 72c339e432aa..1c147c869c2e 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -162,19 +162,6 @@ static void subscr_del(struct tipc_subscription *sub) atomic_dec(&tn->subscription_count); } -/** - * subscr_terminate - terminate communication with a subscriber - * - * Note: Must call it in process context since it might sleep. - */ -static void subscr_terminate(struct tipc_subscription *sub) -{ - struct tipc_subscriber *subscriber = sub->subscriber; - struct tipc_net *tn = net_generic(sub->net, tipc_net_id); - - tipc_conn_terminate(tn->topsrv, subscriber->conid); -} - static void subscr_release(struct tipc_subscriber *subscriber) { struct tipc_subscription *sub; @@ -312,16 +299,14 @@ static void subscr_conn_msg_event(struct net *net, int conid, { struct tipc_subscriber *subscriber = usr_data; struct tipc_subscription *sub = NULL; + struct tipc_net *tn = net_generic(net, tipc_net_id); spin_lock_bh(&subscriber->lock); - if (subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, - &sub) < 0) { - spin_unlock_bh(&subscriber->lock); - subscr_terminate(sub); - return; - } + subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub); if (sub) tipc_nametbl_subscribe(sub); + else + tipc_conn_terminate(tn->topsrv, subscriber->conid); spin_unlock_bh(&subscriber->lock); } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c new file mode 100644 index 000000000000..ef3d7aa2854a --- /dev/null +++ b/net/tipc/udp_media.c @@ -0,0 +1,446 @@ +/* net/tipc/udp_media.c: IP bearer support for TIPC + * + * Copyright (c) 2015, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/socket.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/inet.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> +#include <linux/kernel.h> +#include <linux/workqueue.h> +#include <linux/list.h> +#include <net/sock.h> +#include <net/ip.h> +#include <net/udp_tunnel.h> +#include <net/addrconf.h> +#include <linux/tipc_netlink.h> +#include "core.h" +#include "bearer.h" + +/* IANA assigned UDP port */ +#define UDP_PORT_DEFAULT 6118 + +static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { + [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC}, + [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, + [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, +}; + +/** + * struct udp_media_addr - IP/UDP addressing information + * + * This is the bearer level originating address used in neighbor discovery + * messages, and all fields should be in network byte order + */ +struct udp_media_addr { + __be16 proto; + __be16 udp_port; + union { + struct in_addr ipv4; + struct in6_addr ipv6; + }; +}; + +/** + * struct udp_bearer - ip/udp bearer data structure + * @bearer: associated generic tipc bearer + * @ubsock: bearer associated socket + * @ifindex: local address scope + * @work: used to schedule deferred work on a bearer + */ +struct udp_bearer { + struct tipc_bearer __rcu *bearer; + struct socket *ubsock; + u32 ifindex; + struct work_struct work; +}; + +/* udp_media_addr_set - convert a ip/udp address to a TIPC media address */ +static void tipc_udp_media_addr_set(struct tipc_media_addr *addr, + struct udp_media_addr *ua) +{ + memset(addr, 0, sizeof(struct tipc_media_addr)); + addr->media_id = TIPC_MEDIA_TYPE_UDP; + memcpy(addr->value, ua, sizeof(struct udp_media_addr)); + if (ntohs(ua->proto) == ETH_P_IP) { + if (ipv4_is_multicast(ua->ipv4.s_addr)) + addr->broadcast = 1; + } else if (ntohs(ua->proto) == ETH_P_IPV6) { + if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST) + addr->broadcast = 1; + } else { + pr_err("Invalid UDP media address\n"); + } +} + +/* tipc_udp_addr2str - convert ip/udp address to string */ +static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size) +{ + struct udp_media_addr *ua = (struct udp_media_addr *)&a->value; + + if (ntohs(ua->proto) == ETH_P_IP) + snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->udp_port)); + else if (ntohs(ua->proto) == ETH_P_IPV6) + snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->udp_port)); + else + pr_err("Invalid UDP media address\n"); + return 0; +} + +/* tipc_udp_msg2addr - extract an ip/udp address from a TIPC ndisc message */ +static int tipc_udp_msg2addr(struct tipc_bearer *b, struct tipc_media_addr *a, + char *msg) +{ + struct udp_media_addr *ua; + + ua = (struct udp_media_addr *) (msg + TIPC_MEDIA_ADDR_OFFSET); + if (msg[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_UDP) + return -EINVAL; + tipc_udp_media_addr_set(a, ua); + return 0; +} + +/* tipc_udp_addr2msg - write an ip/udp address to a TIPC ndisc message */ +static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a) +{ + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); + msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_UDP; + memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, a->value, + sizeof(struct udp_media_addr)); + return 0; +} + +/* tipc_send_msg - enqueue a send request */ +static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *dest) +{ + int ttl, err = 0; + struct udp_bearer *ub; + struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value; + struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; + struct sk_buff *clone; + struct rtable *rt; + + clone = skb_clone(skb, GFP_ATOMIC); + skb_set_inner_protocol(clone, htons(ETH_P_TIPC)); + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + err = -ENODEV; + goto tx_error; + } + if (dst->proto == htons(ETH_P_IP)) { + struct flowi4 fl = { + .daddr = dst->ipv4.s_addr, + .saddr = src->ipv4.s_addr, + .flowi4_mark = clone->mark, + .flowi4_proto = IPPROTO_UDP + }; + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto tx_error; + } + ttl = ip4_dst_hoplimit(&rt->dst); + err = udp_tunnel_xmit_skb(rt, clone, src->ipv4.s_addr, + dst->ipv4.s_addr, 0, ttl, 0, + src->udp_port, dst->udp_port, + false, true); + if (err < 0) { + ip_rt_put(rt); + goto tx_error; + } +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct dst_entry *ndst; + struct flowi6 fl6 = { + .flowi6_oif = ub->ifindex, + .daddr = dst->ipv6, + .saddr = src->ipv6, + .flowi6_proto = IPPROTO_UDP + }; + err = ipv6_stub->ipv6_dst_lookup(ub->ubsock->sk, &ndst, &fl6); + if (err) + goto tx_error; + ttl = ip6_dst_hoplimit(ndst); + err = udp_tunnel6_xmit_skb(ndst, clone, ndst->dev, &src->ipv6, + &dst->ipv6, 0, ttl, src->udp_port, + dst->udp_port, false); +#endif + } + return err; + +tx_error: + kfree_skb(clone); + return err; +} + +/* tipc_udp_recv - read data from bearer socket */ +static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) +{ + struct udp_bearer *ub; + struct tipc_bearer *b; + + ub = rcu_dereference_sk_user_data(sk); + if (!ub) { + pr_err_ratelimited("Failed to get UDP bearer reference"); + kfree_skb(skb); + return 0; + } + + skb_pull(skb, sizeof(struct udphdr)); + rcu_read_lock(); + b = rcu_dereference_rtnl(ub->bearer); + + if (b) { + tipc_rcv(sock_net(sk), skb, b); + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); + kfree_skb(skb); + return 0; +} + +static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) +{ + int err = 0; + struct ip_mreqn mreqn; + struct sock *sk = ub->ubsock->sk; + + if (ntohs(remote->proto) == ETH_P_IP) { + if (!ipv4_is_multicast(remote->ipv4.s_addr)) + return 0; + mreqn.imr_multiaddr = remote->ipv4; + mreqn.imr_ifindex = ub->ifindex; + err = ip_mc_join_group(sk, &mreqn); +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (!ipv6_addr_is_multicast(&remote->ipv6)) + return 0; + err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex, + &remote->ipv6); +#endif + } + return err; +} + +/** + * parse_options - build local/remote addresses from configuration + * @attrs: netlink config data + * @ub: UDP bearer instance + * @local: local bearer IP address/port + * @remote: peer or multicast IP/port + */ +static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub, + struct udp_media_addr *local, + struct udp_media_addr *remote) +{ + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; + struct sockaddr_storage *sa_local, *sa_remote; + + if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) + goto err; + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, + attrs[TIPC_NLA_BEARER_UDP_OPTS], + tipc_nl_udp_policy)) + goto err; + if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) { + sa_local = nla_data(opts[TIPC_NLA_UDP_LOCAL]); + sa_remote = nla_data(opts[TIPC_NLA_UDP_REMOTE]); + } else { +err: + pr_err("Invalid UDP bearer configuration"); + return -EINVAL; + } + if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET) { + struct sockaddr_in *ip4; + + ip4 = (struct sockaddr_in *)sa_local; + local->proto = htons(ETH_P_IP); + local->udp_port = ip4->sin_port; + local->ipv4.s_addr = ip4->sin_addr.s_addr; + + ip4 = (struct sockaddr_in *)sa_remote; + remote->proto = htons(ETH_P_IP); + remote->udp_port = ip4->sin_port; + remote->ipv4.s_addr = ip4->sin_addr.s_addr; + return 0; + +#if IS_ENABLED(CONFIG_IPV6) + } else if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET6) { + struct sockaddr_in6 *ip6; + + ip6 = (struct sockaddr_in6 *)sa_local; + local->proto = htons(ETH_P_IPV6); + local->udp_port = ip6->sin6_port; + local->ipv6 = ip6->sin6_addr; + ub->ifindex = ip6->sin6_scope_id; + + ip6 = (struct sockaddr_in6 *)sa_remote; + remote->proto = htons(ETH_P_IPV6); + remote->udp_port = ip6->sin6_port; + remote->ipv6 = ip6->sin6_addr; + return 0; +#endif + } + return -EADDRNOTAVAIL; +} + +/** + * tipc_udp_enable - callback to create a new udp bearer instance + * @net: network namespace + * @b: pointer to generic tipc_bearer + * @attrs: netlink bearer configuration + * + * validate the bearer parameters and initialize the udp bearer + * rtnl_lock should be held + */ +static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]) +{ + int err = -EINVAL; + struct udp_bearer *ub; + struct udp_media_addr *remote; + struct udp_media_addr local = {0}; + struct udp_port_cfg udp_conf = {0}; + struct udp_tunnel_sock_cfg tuncfg = {NULL}; + + ub = kzalloc(sizeof(*ub), GFP_ATOMIC); + if (!ub) + return -ENOMEM; + + remote = (struct udp_media_addr *)&b->bcast_addr.value; + memset(remote, 0, sizeof(struct udp_media_addr)); + err = parse_options(attrs, ub, &local, remote); + if (err) + goto err; + + b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP; + b->bcast_addr.broadcast = 1; + rcu_assign_pointer(b->media_ptr, ub); + rcu_assign_pointer(ub->bearer, b); + tipc_udp_media_addr_set(&b->addr, &local); + if (local.proto == htons(ETH_P_IP)) { + struct net_device *dev; + + dev = __ip_dev_find(net, local.ipv4.s_addr, false); + if (!dev) { + err = -ENODEV; + goto err; + } + udp_conf.family = AF_INET; + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + udp_conf.use_udp_checksums = false; + ub->ifindex = dev->ifindex; + b->mtu = dev->mtu - sizeof(struct iphdr) + - sizeof(struct udphdr); +#if IS_ENABLED(CONFIG_IPV6) + } else if (local.proto == htons(ETH_P_IPV6)) { + udp_conf.family = AF_INET6; + udp_conf.use_udp6_tx_checksums = true; + udp_conf.use_udp6_rx_checksums = true; + udp_conf.local_ip6 = in6addr_any; + b->mtu = 1280; +#endif + } else { + err = -EAFNOSUPPORT; + goto err; + } + udp_conf.local_udp_port = local.udp_port; + err = udp_sock_create(net, &udp_conf, &ub->ubsock); + if (err) + goto err; + tuncfg.sk_user_data = ub; + tuncfg.encap_type = 1; + tuncfg.encap_rcv = tipc_udp_recv; + tuncfg.encap_destroy = NULL; + setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); + + if (enable_mcast(ub, remote)) + goto err; + return 0; +err: + kfree(ub); + return err; +} + +/* cleanup_bearer - break the socket/bearer association */ +static void cleanup_bearer(struct work_struct *work) +{ + struct udp_bearer *ub = container_of(work, struct udp_bearer, work); + + if (ub->ubsock) + udp_tunnel_sock_release(ub->ubsock); + synchronize_net(); + kfree(ub); +} + +/* tipc_udp_disable - detach bearer from socket */ +static void tipc_udp_disable(struct tipc_bearer *b) +{ + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + pr_err("UDP bearer instance not found\n"); + return; + } + if (ub->ubsock) + sock_set_flag(ub->ubsock->sk, SOCK_DEAD); + RCU_INIT_POINTER(b->media_ptr, NULL); + RCU_INIT_POINTER(ub->bearer, NULL); + + /* sock_release need to be done outside of rtnl lock */ + INIT_WORK(&ub->work, cleanup_bearer); + schedule_work(&ub->work); +} + +struct tipc_media udp_media_info = { + .send_msg = tipc_udp_send_msg, + .enable_media = tipc_udp_enable, + .disable_media = tipc_udp_disable, + .addr2str = tipc_udp_addr2str, + .addr2msg = tipc_udp_addr2msg, + .msg2addr = tipc_udp_msg2addr, + .priority = TIPC_DEF_LINK_PRI, + .tolerance = TIPC_DEF_LINK_TOL, + .window = TIPC_DEF_LINK_WIN, + .type_id = TIPC_MEDIA_TYPE_UDP, + .hwaddr_len = 0, + .name = "udp" +}; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 526b6edab018..433f287ee548 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -516,20 +516,15 @@ static unsigned int unix_dgram_poll(struct file *, struct socket *, poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); -static int unix_stream_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -static int unix_stream_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); -static int unix_dgram_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -static int unix_dgram_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); +static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); +static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); +static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); +static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int); -static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); +static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); +static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, + int); static int unix_set_peek_off(struct sock *sk, int val) { @@ -1442,8 +1437,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, * Send AF_UNIX data. */ -static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); @@ -1622,8 +1617,8 @@ out: */ #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) -static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct sock *other = NULL; @@ -1725,8 +1720,8 @@ out_err: return sent ? : err; } -static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { int err; struct sock *sk = sock->sk; @@ -1741,19 +1736,18 @@ static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock, if (msg->msg_namelen) msg->msg_namelen = 0; - return unix_dgram_sendmsg(kiocb, sock, msg, len); + return unix_dgram_sendmsg(sock, msg, len); } -static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; - return unix_dgram_recvmsg(iocb, sock, msg, size, flags); + return unix_dgram_recvmsg(sock, msg, size, flags); } static void unix_copy_addr(struct msghdr *msg, struct sock *sk) @@ -1766,9 +1760,8 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) } } -static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct scm_cookie scm; struct sock *sk = sock->sk; @@ -1900,9 +1893,8 @@ static unsigned int unix_skb_len(const struct sk_buff *skb) return skb->len - UNIXCB(skb).consumed; } -static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct scm_cookie scm; struct sock *sk = sock->sk; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 1d0e39c9a3e2..2ec86e652a19 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -949,8 +949,8 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, return mask; } -static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { int err; struct sock *sk; @@ -1062,11 +1062,10 @@ out: return err; } -static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { - return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len, - flags); + return transport->dgram_dequeue(vsock_sk(sock->sk), msg, len, flags); } static const struct proto_ops vsock_dgram_ops = { @@ -1505,8 +1504,8 @@ static int vsock_stream_getsockopt(struct socket *sock, return 0; } -static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk; struct vsock_sock *vsk; @@ -1644,9 +1643,8 @@ out: static int -vsock_stream_recvmsg(struct kiocb *kiocb, - struct socket *sock, - struct msghdr *msg, size_t len, int flags) +vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk; struct vsock_sock *vsk; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 7f3255084a6c..c294da095461 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1730,8 +1730,7 @@ static int vmci_transport_dgram_enqueue( return err - sizeof(*dg); } -static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, - struct vsock_sock *vsk, +static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg, size_t len, int flags) { diff --git a/net/wireless/core.c b/net/wireless/core.c index 3af0ecf1cc16..2a0bbd22854b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1199,6 +1199,7 @@ out_fail_wq: regulatory_exit(); out_fail_reg: debugfs_remove(ieee80211_debugfs_dir); + nl80211_exit(); out_fail_nl80211: unregister_netdevice_notifier(&cfg80211_netdev_notifier); out_fail_notifier: diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 1a65662a5d73..4c55fab9b4e4 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -533,7 +533,7 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev, else if (wdev->wext.ibss.bssid) memcpy(ap_addr->sa_data, wdev->wext.ibss.bssid, ETH_ALEN); else - memset(ap_addr->sa_data, 0, ETH_ALEN); + eth_zero_addr(ap_addr->sa_data); wdev_unlock(wdev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f60ee5b45c0c..6dd1ab3b10ea 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2653,10 +2653,6 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return err; } - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); @@ -2665,6 +2661,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + wdev = rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL80211_ATTR_IFNAME]), NET_NAME_USER, type, err ? NULL : &flags, @@ -4400,6 +4400,16 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; + /* HT/VHT requires QoS, but if we don't have that just ignore HT/VHT + * as userspace might just pass through the capabilities from the IEs + * directly, rather than enforcing this restriction and returning an + * error in this case. + */ + if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) { + params.ht_capa = NULL; + params.vht_capa = NULL; + } + /* When you run into this, adjust the code below for the new flag */ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); @@ -5694,8 +5704,8 @@ static int nl80211_parse_random_mac(struct nlattr **attrs, int i; if (!attrs[NL80211_ATTR_MAC] && !attrs[NL80211_ATTR_MAC_MASK]) { - memset(mac_addr, 0, ETH_ALEN); - memset(mac_addr_mask, 0, ETH_ALEN); + eth_zero_addr(mac_addr); + eth_zero_addr(mac_addr_mask); mac_addr[0] = 0x2; mac_addr_mask[0] = 0x3; @@ -8783,8 +8793,8 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg, if (!nl_tcp) return -ENOBUFS; - if (nla_put_be32(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) || - nla_put_be32(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) || + if (nla_put_in_addr(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) || + nla_put_in_addr(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) || nla_put(msg, NL80211_WOWLAN_TCP_DST_MAC, ETH_ALEN, tcp->dst_mac) || nla_put_u16(msg, NL80211_WOWLAN_TCP_SRC_PORT, tcp->src_port) || nla_put_u16(msg, NL80211_WOWLAN_TCP_DST_PORT, tcp->dst_port) || @@ -9018,8 +9028,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, cfg = kzalloc(size, GFP_KERNEL); if (!cfg) return -ENOMEM; - cfg->src = nla_get_be32(tb[NL80211_WOWLAN_TCP_SRC_IPV4]); - cfg->dst = nla_get_be32(tb[NL80211_WOWLAN_TCP_DST_IPV4]); + cfg->src = nla_get_in_addr(tb[NL80211_WOWLAN_TCP_SRC_IPV4]); + cfg->dst = nla_get_in_addr(tb[NL80211_WOWLAN_TCP_DST_IPV4]); memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]), ETH_ALEN); if (tb[NL80211_WOWLAN_TCP_SRC_PORT]) @@ -12585,9 +12595,7 @@ static int cfg80211_net_detect_results(struct sk_buff *msg, } for (j = 0; j < match->n_channels; j++) { - if (nla_put_u32(msg, - NL80211_ATTR_WIPHY_FREQ, - match->channels[j])) { + if (nla_put_u32(msg, j, match->channels[j])) { nla_nest_cancel(msg, nl_freqs); nla_nest_cancel(msg, nl_match); goto out; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 8c6cf52b9f1d..be5f81caa488 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -226,7 +226,7 @@ static DECLARE_DELAYED_WORK(reg_timeout, reg_timeout_work); /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { - .n_reg_rules = 6, + .n_reg_rules = 8, .alpha2 = "00", .reg_rules = { /* IEEE 802.11b/g, channels 1..11 */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index e4e39143728c..af3617c9879e 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -7,6 +7,7 @@ #include <linux/tracepoint.h> #include <linux/rtnetlink.h> +#include <linux/etherdevice.h> #include <net/cfg80211.h> #include "core.h" @@ -15,7 +16,7 @@ if (given_mac) \ memcpy(__entry->entry_mac, given_mac, ETH_ALEN); \ else \ - memset(__entry->entry_mac, 0, ETH_ALEN); \ + eth_zero_addr(__entry->entry_mac); \ } while (0) #define MAC_PR_FMT "%pM" #define MAC_PR_ARG(entry_mac) (__entry->entry_mac) @@ -1081,7 +1082,7 @@ TRACE_EVENT(rdev_auth, if (req->bss) MAC_ASSIGN(bssid, req->bss->bssid); else - memset(__entry->bssid, 0, ETH_ALEN); + eth_zero_addr(__entry->bssid); __entry->auth_type = req->auth_type; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", auth type: %d, bssid: " MAC_PR_FMT, @@ -1107,7 +1108,7 @@ TRACE_EVENT(rdev_assoc, if (req->bss) MAC_ASSIGN(bssid, req->bss->bssid); else - memset(__entry->bssid, 0, ETH_ALEN); + eth_zero_addr(__entry->bssid); MAC_ASSIGN(prev_bssid, req->prev_bssid); __entry->use_mfp = req->use_mfp; __entry->flags = req->flags; @@ -1157,7 +1158,7 @@ TRACE_EVENT(rdev_disassoc, if (req->bss) MAC_ASSIGN(bssid, req->bss->bssid); else - memset(__entry->bssid, 0, ETH_ALEN); + eth_zero_addr(__entry->bssid); __entry->reason_code = req->reason_code; __entry->local_state_change = req->local_state_change; ), diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 368611c05739..a4e8af3321d2 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -322,7 +322,7 @@ int cfg80211_mgd_wext_giwap(struct net_device *dev, if (wdev->current_bss) memcpy(ap_addr->sa_data, wdev->current_bss->pub.bssid, ETH_ALEN); else - memset(ap_addr->sa_data, 0, ETH_ALEN); + eth_zero_addr(ap_addr->sa_data); wdev_unlock(wdev); return 0; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index d9149b68b9bc..c3ab230e4493 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1077,8 +1077,7 @@ out_clear_request: goto out; } -static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct x25_sock *x25 = x25_sk(sk); @@ -1252,8 +1251,7 @@ out_kfree_skb: } -static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, +static int x25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cee479bc655c..638af0655aaf 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2269,11 +2269,9 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, * have the xfrm_state's. We need to wait for KM to * negotiate new SA's or bail out with error.*/ if (net->xfrm.sysctl_larval_drop) { - dst_release(dst); - xfrm_pols_put(pols, drop_pols); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); - - return ERR_PTR(-EREMOTE); + err = -EREMOTE; + goto error; } err = -EAGAIN; @@ -2324,7 +2322,8 @@ nopol: error: dst_release(dst); dropdst: - dst_release(dst_orig); + if (!(flags & XFRM_LOOKUP_KEEP_DST_REF)) + dst_release(dst_orig); xfrm_pols_put(pols, drop_pols); return ERR_PTR(err); } @@ -2338,7 +2337,8 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, struct sock *sk, int flags) { struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, - flags | XFRM_LOOKUP_QUEUE); + flags | XFRM_LOOKUP_QUEUE | + XFRM_LOOKUP_KEEP_DST_REF); if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) return make_blackhole(net, dst_orig->ops->family, dst_orig); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index de971b6d38c5..f5e39e35d73a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1043,12 +1043,12 @@ static struct xfrm_state *__find_acq_core(struct net *net, break; case AF_INET6: - *(struct in6_addr *)x->sel.daddr.a6 = *(struct in6_addr *)daddr; - *(struct in6_addr *)x->sel.saddr.a6 = *(struct in6_addr *)saddr; + x->sel.daddr.in6 = daddr->in6; + x->sel.saddr.in6 = saddr->in6; x->sel.prefixlen_d = 128; x->sel.prefixlen_s = 128; - *(struct in6_addr *)x->props.saddr.a6 = *(struct in6_addr *)saddr; - *(struct in6_addr *)x->id.daddr.a6 = *(struct in6_addr *)daddr; + x->props.saddr.in6 = saddr->in6; + x->id.daddr.in6 = daddr->in6; break; } |